You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by di...@apache.org on 2011/05/15 19:51:58 UTC
[Lucene.Net] svn commit: r1103482 [2/2] - in
/incubator/lucene.net/branches/Lucene.Net_2_9_4g: src/contrib/Analyzers/
src/contrib/Analyzers/AR/ src/contrib/Analyzers/BR/
src/contrib/Analyzers/CJK/ src/contrib/Analyzers/Cz/
src/contrib/Analyzers/De/ src/contrib/Anal...
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenFilter.cs?rev=1103482&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenFilter.cs Sun May 15 17:51:57 2011
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.NGram
+{
+
+ /**
+ * Tests {@link NGramTokenFilter} for correctness.
+ */
+ [TestFixture]
+ public class TestNGramTokenFilter : BaseTokenStreamTestCase
+ {
+ private TokenStream input;
+
+ [SetUp]
+ public void SetUp()
+ {
+ base.SetUp();
+ input = new WhitespaceTokenizer(new StringReader("abcde"));
+ }
+
+ [Test]
+ public void TestInvalidInput()
+ {
+ bool gotException = false;
+ try
+ {
+ new NGramTokenFilter(input, 2, 1);
+ }
+ catch (System.ArgumentException e)
+ {
+ gotException = true;
+ }
+ Assert.IsTrue(gotException);
+ }
+
+ [Test]
+ public void TestInvalidInput2()
+ {
+ bool gotException = false;
+ try
+ {
+ new NGramTokenFilter(input, 0, 1);
+ }
+ catch (System.ArgumentException e)
+ {
+ gotException = true;
+ }
+ Assert.IsTrue(gotException);
+ }
+
+ [Test]
+ public void TestUnigrams()
+ {
+ NGramTokenFilter filter = new NGramTokenFilter(input, 1, 1);
+ AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
+ }
+
+ [Test]
+ public void TestBigrams()
+ {
+ NGramTokenFilter filter = new NGramTokenFilter(input, 2, 2);
+ AssertTokenStreamContents(filter, new String[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 });
+ }
+
+ [Test]
+ public void TestNgrams()
+ {
+ NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3);
+ AssertTokenStreamContents(filter,
+ new String[] { "a", "b", "c", "d", "e", "ab", "bc", "cd", "de", "abc", "bcd", "cde" },
+ new int[] { 0, 1, 2, 3, 4, 0, 1, 2, 3, 0, 1, 2 },
+ new int[] { 1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5 }
+ );
+ }
+
+ [Test]
+ public void TestOversizedNgrams()
+ {
+ NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7);
+ AssertTokenStreamContents(filter, new String[0], new int[0], new int[0]);
+ }
+
+ [Test]
+ public void TestSmallTokenInStream()
+ {
+ input = new WhitespaceTokenizer(new StringReader("abc de fgh"));
+ NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
+ AssertTokenStreamContents(filter, new String[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 });
+ }
+
+ [Test]
+ public void TestReset()
+ {
+ WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
+ NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1);
+ AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
+ tokenizer.Reset(new StringReader("abcde"));
+ AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenizer.cs?rev=1103482&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenizer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenizer.cs Sun May 15 17:51:57 2011
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.NGram
+{
+
+ /**
+ * Tests {@link NGramTokenizer} for correctness.
+ */
+ [TestFixture]
+ public class TestNGramTokenizer : BaseTokenStreamTestCase
+ {
+ private StringReader input;
+
+ [SetUp]
+ public void SetUp()
+ {
+ base.SetUp();
+ input = new StringReader("abcde");
+ }
+
+ [Test]
+ public void TestInvalidInput()
+ {
+ bool gotException = false;
+ try
+ {
+ new NGramTokenizer(input, 2, 1);
+ }
+ catch (System.ArgumentException e)
+ {
+ gotException = true;
+ }
+ Assert.IsTrue(gotException);
+ }
+
+ [Test]
+ public void TestInvalidInput2()
+ {
+ bool gotException = false;
+ try
+ {
+ new NGramTokenizer(input, 0, 1);
+ }
+ catch (System.ArgumentException e)
+ {
+ gotException = true;
+ }
+ Assert.IsTrue(gotException);
+ }
+
+ [Test]
+ public void TestUnigrams()
+ {
+ NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1);
+ AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
+ }
+
+ [Test]
+ public void TestBigrams()
+ {
+ NGramTokenizer tokenizer = new NGramTokenizer(input, 2, 2);
+ AssertTokenStreamContents(tokenizer, new String[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5 /* abcde */);
+ }
+
+ [Test]
+ public void TestNgrams()
+ {
+ NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 3);
+ AssertTokenStreamContents(tokenizer,
+ new String[] { "a", "b", "c", "d", "e", "ab", "bc", "cd", "de", "abc", "bcd", "cde" },
+ new int[] { 0, 1, 2, 3, 4, 0, 1, 2, 3, 0, 1, 2 },
+ new int[] { 1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5 },
+ 5 /* abcde */
+ );
+ }
+
+ [Test]
+ public void TestOversizedNgrams()
+ {
+ NGramTokenizer tokenizer = new NGramTokenizer(input, 6, 7);
+ AssertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */);
+ }
+
+ [Test]
+ public void TestReset()
+ {
+ NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1);
+ AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
+ tokenizer.Reset(new StringReader("abcde"));
+ AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
+ }
+ }
+}
\ No newline at end of file