wasBayesSharp – Rev 1

Subversion Repositories:
Rev:
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection;
using BayesSharp.Combiners;
using BayesSharp.Tokenizers;
using NUnit.Framework;

namespace BayesSharp.UnitTests
{
    [TestFixture]
    public class BasicTests
    {
        [Test]
        public void TestSpanHam()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("span", "bad");
            t.Train("ham", "good");

            var res = t.Classify("this is a bad sentence");
            Assert.AreEqual(1, res.Count);
            Assert.AreEqual(0.9999, res["span"]);
        }

        [Test]
        public void TestLanguageDiscover()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("french", "le la les du un une je il elle de en");
            t.Train("german", "der die das ein eine");
            t.Train("spanish", "el uno una las de la en");
            t.Train("english", "the it she he they them are were to");
            t.Train("english", "the rain in spain falls mainly on the plain");
            var res = t.Classify("uno das je de la elle in");

            Assert.AreEqual(4, res.Count);
            Assert.AreEqual(0.9999, res["english"]);
            Assert.AreEqual(0.9999, res["german"]);
            Assert.AreEqual(0.67285006523593538, res["french"]);
            Assert.AreEqual(0.58077905232271598d, res["spanish"]);
        }

        [Test]
        public void TestNewTag()
        {
            var t = new BayesSimpleTextClassifier();
            t.AddTag("teste");
            Assert.IsNotNull(t.GetTagById("teste"));
        }

        [Test]
        public void TestRemoveTag()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("teste", "Bla");
            Assert.IsNotNull(t.GetTagById("teste"));
            t.RemoveTag("teste");
            Assert.IsNull(t.GetTagById("teste"));
        }

        [Test]
        public void TestChangeTag()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("teste", "Bla");
            Assert.IsNull(t.GetTagById("teste2"));
            t.ChangeTagId("teste", "teste2");
            Assert.IsNull(t.GetTagById("teste"));
            Assert.IsNotNull(t.GetTagById("teste2"));
        }

        [Test]
        public void TestMergeTags()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("bom", "gordo");
            t.Train("mal", "magro");
            var output = t.Classify("gordo magro");

            Assert.AreEqual(2, output.Count);
            Assert.AreEqual(0.9999, output["bom"]);
            Assert.AreEqual(0.9999, output["mal"]);

            t.MergeTags("mal", "bom");
            output = t.Classify("gordo magro");

            Assert.AreEqual(1, output.Count);
            Assert.AreEqual(0.9999, output["bom"]);
        }

#if !MONO
        [Test]
        public void TestSaveAndLoad()
        {
            var path = new FileInfo(new System.Uri(Assembly.GetExecutingAssembly().CodeBase).AbsolutePath).Directory.FullName + @"\bayes.json";
            var t = new BayesSimpleTextClassifier();
            t.Train("teste", "Afonso França");
            t.Save(path);
            var output = t.Classify("Afonso França");
            Assert.AreEqual(1, output.Count);
            Assert.AreEqual(0.9999, output["teste"]);

            var t1 = new BayesSimpleTextClassifier();
            t1.Load(path);
            output = t1.Classify("Afonso França");

            Assert.AreEqual(1, output.Count);
            Assert.AreEqual(0.9999, output["teste"]);
        }
#endif

        [Test]
        public void TestUntrain()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("teste", "Afonso França");
            t.Untrain("teste", "França");

            var res = t.Classify("França");
            Assert.AreEqual(0, res.Count);
        }

        [Test]
        public void TestTagIds()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("teste", "Afonso França");
            t.Train("teste1", "Afonso França");

            var res = t.TagIds().ToList();
            Assert.AreEqual(2, res.Count());
            Assert.AreEqual("teste", res[0]);
            Assert.AreEqual("teste1", res[1]);
        }

        [Test]
        public void TestRobinsonFisherCombiner()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("Alimentação", "Ipiranga AMPM");
            t.Train("Alimentação", "Restaurante Bobs");
            t.Train("Combustível", "Posto Ipiranga");

            var res = t.Classify("Restaurante Ipiranga");
            Assert.AreEqual(2, res.Count());
            Assert.AreEqual(0.84415961583962162, res["Alimentação"]);
            Assert.AreEqual(0.33333333333333326, res["Combustível"]);


            t = new BayesSimpleTextClassifier(new SimpleTextTokenizer(), new RobinsonFisherCombiner());
            t.Train("Alimentação", "IPIRANGA AMPM");
            t.Train("Alimentação", "Restaurante Bobs");
            t.Train("Combustível", "Posto Ipiranga");

            res = t.Classify("Restaurante Ipiranga");
            Assert.AreEqual(2, res.Count());
            Assert.AreEqual(0.99481185089082513, res["Alimentação"]);
            Assert.AreEqual(0.38128034540863015, res["Combustível"]);
        }

        [Test]
        public void TestCatsAndDogs()
        {
            var ignoreList = new List<string> {"the", "my", "i", "dont"};
            var cls = new BayesSimpleTextClassifier(new SimpleTextTokenizer(true, ignoreList));
            cls.Train("dog", "Dogs are awesome, cats too. I love my dog");
            cls.Train("cat", "Cats are more preferred by software developers. I never could stand cats. I have a dog");
            cls.Train("dog", "My dog's name is Willy. He likes to play with my wife's cat all day long. I love dogs");
            cls.Train("cat", "Cats are difficult animals, unlike dogs, really annoying, I hate them all");
            cls.Train("dog", "So which one should you choose? A dog, definitely.");
            cls.Train("cat", "The favorite food for cats is bird meat, although mice are good, but birds are a delicacy");
            cls.Train("dog", "A dog will eat anything, including birds or whatever meat");
            cls.Train("cat", "My cat's favorite place to purr is on my keyboard");
            cls.Train("dog", "My dog's favorite place to take a leak is the tree in front of our house");

            Assert.AreEqual("cat", cls.Classify("This test is about cats.").First().Key);
            Assert.AreEqual("cat", cls.Classify("I hate ...").First().Key);
            Assert.AreEqual("cat", cls.Classify("The most annoying animal on earth.").First().Key);
            Assert.AreEqual("cat", cls.Classify("My precious, my favorite!").First().Key);
            Assert.AreEqual("cat", cls.Classify("Get off my keyboard!").First().Key);
            Assert.AreEqual("cat", cls.Classify("Kill that bird!").First().Key);
            Assert.AreEqual("dog", cls.Classify("This test is about dogs.").First().Key);
            Assert.AreEqual("dog",cls.Classify("Cats or Dogs?").First().Key);
            Assert.AreEqual("dog",cls.Classify("What pet will I love more?").First().Key);  
            Assert.AreEqual("cat",cls.Classify("Willy, where the heck are you?").First().Key);
            Assert.AreEqual("dog",cls.Classify("Why is the front door of our house open?").First().Key);

            var res = cls.Classify("The preferred company of software developers.");
            Assert.AreEqual(2, res.Count);
            Assert.AreEqual(0.9999, res["cat"]);
            Assert.AreEqual(0.9999, res["dog"]);
        }
    }
}