wasBayesSharp – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | using System.Collections.Generic; |
2 | using System.IO; |
||
3 | using System.Linq; |
||
4 | using System.Reflection; |
||
5 | using BayesSharp.Combiners; |
||
6 | using BayesSharp.Tokenizers; |
||
7 | using NUnit.Framework; |
||
8 | |||
9 | namespace BayesSharp.UnitTests |
||
10 | { |
||
11 | [TestFixture] |
||
12 | public class BasicTests |
||
13 | { |
||
14 | [Test] |
||
15 | public void TestSpanHam() |
||
16 | { |
||
17 | var t = new BayesSimpleTextClassifier(); |
||
18 | t.Train("span", "bad"); |
||
19 | t.Train("ham", "good"); |
||
20 | |||
21 | var res = t.Classify("this is a bad sentence"); |
||
22 | Assert.AreEqual(1, res.Count); |
||
23 | Assert.AreEqual(0.9999, res["span"]); |
||
24 | } |
||
25 | |||
26 | [Test] |
||
27 | public void TestLanguageDiscover() |
||
28 | { |
||
29 | var t = new BayesSimpleTextClassifier(); |
||
30 | t.Train("french", "le la les du un une je il elle de en"); |
||
31 | t.Train("german", "der die das ein eine"); |
||
32 | t.Train("spanish", "el uno una las de la en"); |
||
33 | t.Train("english", "the it she he they them are were to"); |
||
34 | t.Train("english", "the rain in spain falls mainly on the plain"); |
||
35 | var res = t.Classify("uno das je de la elle in"); |
||
36 | |||
37 | Assert.AreEqual(4, res.Count); |
||
38 | Assert.AreEqual(0.9999, res["english"]); |
||
39 | Assert.AreEqual(0.9999, res["german"]); |
||
40 | Assert.AreEqual(0.67285006523593538, res["french"]); |
||
41 | Assert.AreEqual(0.58077905232271598d, res["spanish"]); |
||
42 | } |
||
43 | |||
44 | [Test] |
||
45 | public void TestNewTag() |
||
46 | { |
||
47 | var t = new BayesSimpleTextClassifier(); |
||
48 | t.AddTag("teste"); |
||
49 | Assert.IsNotNull(t.GetTagById("teste")); |
||
50 | } |
||
51 | |||
52 | [Test] |
||
53 | public void TestRemoveTag() |
||
54 | { |
||
55 | var t = new BayesSimpleTextClassifier(); |
||
56 | t.Train("teste", "Bla"); |
||
57 | Assert.IsNotNull(t.GetTagById("teste")); |
||
58 | t.RemoveTag("teste"); |
||
59 | Assert.IsNull(t.GetTagById("teste")); |
||
60 | } |
||
61 | |||
62 | [Test] |
||
63 | public void TestChangeTag() |
||
64 | { |
||
65 | var t = new BayesSimpleTextClassifier(); |
||
66 | t.Train("teste", "Bla"); |
||
67 | Assert.IsNull(t.GetTagById("teste2")); |
||
68 | t.ChangeTagId("teste", "teste2"); |
||
69 | Assert.IsNull(t.GetTagById("teste")); |
||
70 | Assert.IsNotNull(t.GetTagById("teste2")); |
||
71 | } |
||
72 | |||
73 | [Test] |
||
74 | public void TestMergeTags() |
||
75 | { |
||
76 | var t = new BayesSimpleTextClassifier(); |
||
77 | t.Train("bom", "gordo"); |
||
78 | t.Train("mal", "magro"); |
||
79 | var output = t.Classify("gordo magro"); |
||
80 | |||
81 | Assert.AreEqual(2, output.Count); |
||
82 | Assert.AreEqual(0.9999, output["bom"]); |
||
83 | Assert.AreEqual(0.9999, output["mal"]); |
||
84 | |||
85 | t.MergeTags("mal", "bom"); |
||
86 | output = t.Classify("gordo magro"); |
||
87 | |||
88 | Assert.AreEqual(1, output.Count); |
||
89 | Assert.AreEqual(0.9999, output["bom"]); |
||
90 | } |
||
91 | |||
92 | #if !MONO |
||
93 | [Test] |
||
94 | public void TestSaveAndLoad() |
||
95 | { |
||
96 | var path = new FileInfo(new System.Uri(Assembly.GetExecutingAssembly().CodeBase).AbsolutePath).Directory.FullName + @"\bayes.json"; |
||
97 | var t = new BayesSimpleTextClassifier(); |
||
98 | t.Train("teste", "Afonso França"); |
||
99 | t.Save(path); |
||
100 | var output = t.Classify("Afonso França"); |
||
101 | Assert.AreEqual(1, output.Count); |
||
102 | Assert.AreEqual(0.9999, output["teste"]); |
||
103 | |||
104 | var t1 = new BayesSimpleTextClassifier(); |
||
105 | t1.Load(path); |
||
106 | output = t1.Classify("Afonso França"); |
||
107 | |||
108 | Assert.AreEqual(1, output.Count); |
||
109 | Assert.AreEqual(0.9999, output["teste"]); |
||
110 | } |
||
111 | #endif |
||
112 | |||
113 | [Test] |
||
114 | public void TestUntrain() |
||
115 | { |
||
116 | var t = new BayesSimpleTextClassifier(); |
||
117 | t.Train("teste", "Afonso França"); |
||
118 | t.Untrain("teste", "França"); |
||
119 | |||
120 | var res = t.Classify("França"); |
||
121 | Assert.AreEqual(0, res.Count); |
||
122 | } |
||
123 | |||
124 | [Test] |
||
125 | public void TestTagIds() |
||
126 | { |
||
127 | var t = new BayesSimpleTextClassifier(); |
||
128 | t.Train("teste", "Afonso França"); |
||
129 | t.Train("teste1", "Afonso França"); |
||
130 | |||
131 | var res = t.TagIds().ToList(); |
||
132 | Assert.AreEqual(2, res.Count()); |
||
133 | Assert.AreEqual("teste", res[0]); |
||
134 | Assert.AreEqual("teste1", res[1]); |
||
135 | } |
||
136 | |||
137 | [Test] |
||
138 | public void TestRobinsonFisherCombiner() |
||
139 | { |
||
140 | var t = new BayesSimpleTextClassifier(); |
||
141 | t.Train("Alimentação", "Ipiranga AMPM"); |
||
142 | t.Train("Alimentação", "Restaurante Bobs"); |
||
143 | t.Train("Combustível", "Posto Ipiranga"); |
||
144 | |||
145 | var res = t.Classify("Restaurante Ipiranga"); |
||
146 | Assert.AreEqual(2, res.Count()); |
||
147 | Assert.AreEqual(0.84415961583962162, res["Alimentação"]); |
||
148 | Assert.AreEqual(0.33333333333333326, res["Combustível"]); |
||
149 | |||
150 | |||
151 | t = new BayesSimpleTextClassifier(new SimpleTextTokenizer(), new RobinsonFisherCombiner()); |
||
152 | t.Train("Alimentação", "IPIRANGA AMPM"); |
||
153 | t.Train("Alimentação", "Restaurante Bobs"); |
||
154 | t.Train("Combustível", "Posto Ipiranga"); |
||
155 | |||
156 | res = t.Classify("Restaurante Ipiranga"); |
||
157 | Assert.AreEqual(2, res.Count()); |
||
158 | Assert.AreEqual(0.99481185089082513, res["Alimentação"]); |
||
159 | Assert.AreEqual(0.38128034540863015, res["Combustível"]); |
||
160 | } |
||
161 | |||
162 | [Test] |
||
163 | public void TestCatsAndDogs() |
||
164 | { |
||
165 | var ignoreList = new List<string> {"the", "my", "i", "dont"}; |
||
166 | var cls = new BayesSimpleTextClassifier(new SimpleTextTokenizer(true, ignoreList)); |
||
167 | cls.Train("dog", "Dogs are awesome, cats too. I love my dog"); |
||
168 | cls.Train("cat", "Cats are more preferred by software developers. I never could stand cats. I have a dog"); |
||
169 | cls.Train("dog", "My dog's name is Willy. He likes to play with my wife's cat all day long. I love dogs"); |
||
170 | cls.Train("cat", "Cats are difficult animals, unlike dogs, really annoying, I hate them all"); |
||
171 | cls.Train("dog", "So which one should you choose? A dog, definitely."); |
||
172 | cls.Train("cat", "The favorite food for cats is bird meat, although mice are good, but birds are a delicacy"); |
||
173 | cls.Train("dog", "A dog will eat anything, including birds or whatever meat"); |
||
174 | cls.Train("cat", "My cat's favorite place to purr is on my keyboard"); |
||
175 | cls.Train("dog", "My dog's favorite place to take a leak is the tree in front of our house"); |
||
176 | |||
177 | Assert.AreEqual("cat", cls.Classify("This test is about cats.").First().Key); |
||
178 | Assert.AreEqual("cat", cls.Classify("I hate ...").First().Key); |
||
179 | Assert.AreEqual("cat", cls.Classify("The most annoying animal on earth.").First().Key); |
||
180 | Assert.AreEqual("cat", cls.Classify("My precious, my favorite!").First().Key); |
||
181 | Assert.AreEqual("cat", cls.Classify("Get off my keyboard!").First().Key); |
||
182 | Assert.AreEqual("cat", cls.Classify("Kill that bird!").First().Key); |
||
183 | Assert.AreEqual("dog", cls.Classify("This test is about dogs.").First().Key); |
||
184 | Assert.AreEqual("dog",cls.Classify("Cats or Dogs?").First().Key); |
||
185 | Assert.AreEqual("dog",cls.Classify("What pet will I love more?").First().Key); |
||
186 | Assert.AreEqual("cat",cls.Classify("Willy, where the heck are you?").First().Key); |
||
187 | Assert.AreEqual("dog",cls.Classify("Why is the front door of our house open?").First().Key); |
||
188 | |||
189 | var res = cls.Classify("The preferred company of software developers."); |
||
190 | Assert.AreEqual(2, res.Count); |
||
191 | Assert.AreEqual(0.9999, res["cat"]); |
||
192 | Assert.AreEqual(0.9999, res["dog"]); |
||
193 | } |
||
194 | } |
||
195 | } |