You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by di...@apache.org on 2011/05/15 19:51:58 UTC

[Lucene.Net] svn commit: r1103482 [2/2] - in /incubator/lucene.net/branches/Lucene.Net_2_9_4g: src/contrib/Analyzers/ src/contrib/Analyzers/AR/ src/contrib/Analyzers/BR/ src/contrib/Analyzers/CJK/ src/contrib/Analyzers/Cz/ src/contrib/Analyzers/De/ src/contrib/Anal...

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenFilter.cs?rev=1103482&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenFilter.cs Sun May 15 17:51:57 2011
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.NGram
+{
+
+    /**
+     * Tests {@link NGramTokenFilter} for correctness.
+     */
+    [TestFixture]
+    public class TestNGramTokenFilter : BaseTokenStreamTestCase
+    {
+        private TokenStream input;
+
+        [SetUp]
+        public void SetUp()
+        {
+            base.SetUp();
+            input = new WhitespaceTokenizer(new StringReader("abcde"));
+        }
+
+        [Test]
+        public void TestInvalidInput()
+        {
+            bool gotException = false;
+            try
+            {
+                new NGramTokenFilter(input, 2, 1);
+            }
+            catch (System.ArgumentException e)
+            {
+                gotException = true;
+            }
+            Assert.IsTrue(gotException);
+        }
+
+        [Test]
+        public void TestInvalidInput2()
+        {
+            bool gotException = false;
+            try
+            {
+                new NGramTokenFilter(input, 0, 1);
+            }
+            catch (System.ArgumentException e)
+            {
+                gotException = true;
+            }
+            Assert.IsTrue(gotException);
+        }
+
+        [Test]
+        public void TestUnigrams()
+        {
+            NGramTokenFilter filter = new NGramTokenFilter(input, 1, 1);
+            AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
+        }
+
+        [Test]
+        public void TestBigrams()
+        {
+            NGramTokenFilter filter = new NGramTokenFilter(input, 2, 2);
+            AssertTokenStreamContents(filter, new String[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 });
+        }
+
+        [Test]
+        public void TestNgrams()
+        {
+            NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3);
+            AssertTokenStreamContents(filter,
+              new String[] { "a", "b", "c", "d", "e", "ab", "bc", "cd", "de", "abc", "bcd", "cde" },
+              new int[] { 0, 1, 2, 3, 4, 0, 1, 2, 3, 0, 1, 2 },
+              new int[] { 1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5 }
+            );
+        }
+
+        [Test]
+        public void TestOversizedNgrams()
+        {
+            NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7);
+            AssertTokenStreamContents(filter, new String[0], new int[0], new int[0]);
+        }
+
+        [Test]
+        public void TestSmallTokenInStream()
+        {
+            input = new WhitespaceTokenizer(new StringReader("abc de fgh"));
+            NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
+            AssertTokenStreamContents(filter, new String[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 });
+        }
+
+        [Test]
+        public void TestReset()
+        {
+            WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
+            NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1);
+            AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
+            tokenizer.Reset(new StringReader("abcde"));
+            AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenizer.cs?rev=1103482&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenizer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenizer.cs Sun May 15 17:51:57 2011
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.NGram
+{
+
+    /**
+     * Tests {@link NGramTokenizer} for correctness.
+     */
+    [TestFixture]
+    public class TestNGramTokenizer : BaseTokenStreamTestCase
+    {
+        private StringReader input;
+
+        [SetUp]
+        public void SetUp()
+        {
+            base.SetUp();
+            input = new StringReader("abcde");
+        }
+
+        [Test]
+        public void TestInvalidInput()
+        {
+            bool gotException = false;
+            try
+            {
+                new NGramTokenizer(input, 2, 1);
+            }
+            catch (System.ArgumentException e)
+            {
+                gotException = true;
+            }
+            Assert.IsTrue(gotException);
+        }
+
+        [Test]
+        public void TestInvalidInput2()
+        {
+            bool gotException = false;
+            try
+            {
+                new NGramTokenizer(input, 0, 1);
+            }
+            catch (System.ArgumentException e)
+            {
+                gotException = true;
+            }
+            Assert.IsTrue(gotException);
+        }
+
+        [Test]
+        public void TestUnigrams()
+        {
+            NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1);
+            AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
+        }
+
+        [Test]
+        public void TestBigrams()
+        {
+            NGramTokenizer tokenizer = new NGramTokenizer(input, 2, 2);
+            AssertTokenStreamContents(tokenizer, new String[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5 /* abcde */);
+        }
+
+        [Test]
+        public void TestNgrams()
+        {
+            NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 3);
+            AssertTokenStreamContents(tokenizer,
+              new String[] { "a", "b", "c", "d", "e", "ab", "bc", "cd", "de", "abc", "bcd", "cde" },
+              new int[] { 0, 1, 2, 3, 4, 0, 1, 2, 3, 0, 1, 2 },
+              new int[] { 1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5 },
+              5 /* abcde */
+            );
+        }
+
+        [Test]
+        public void TestOversizedNgrams()
+        {
+            NGramTokenizer tokenizer = new NGramTokenizer(input, 6, 7);
+            AssertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */);
+        }
+
+        [Test]
+        public void TestReset()
+        {
+            NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1);
+            AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
+            tokenizer.Reset(new StringReader("abcde"));
+            AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
+        }
+    }
+}
\ No newline at end of file