You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2011/11/21 09:41:54 UTC
[Lucene.Net] svn commit: r1204396 [3/3] - in
/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk:
src/contrib/Analyzers/BR/ src/contrib/Analyzers/CJK/
src/contrib/Analyzers/Cn/ src/contrib/Analyzers/Fr/
src/contrib/Analyzers/Miscellaneous/ src/contrib/Analyzers/N...
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Position/PositionFilterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Position/PositionFilterTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Position/PositionFilterTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Position/PositionFilterTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,151 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Position;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analyzers.Shingle;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Position
+{
+ [TestFixture]
+ public class PositionFilterTest : BaseTokenStreamTestCase
+ {
+ public class TestTokenStream : TokenStream
+ {
+ protected int index = 0;
+ protected String[] testToken;
+ protected TermAttribute termAtt;
+
+ public TestTokenStream(String[] testToken)
+ {
+ this.testToken = testToken;
+ termAtt = AddAttribute<TermAttribute>();
+ }
+
+ public sealed override bool IncrementToken()
+ {
+ ClearAttributes();
+ if (index < testToken.Length)
+ {
+ termAtt.SetTermBuffer(testToken[index++]);
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ public override void Reset()
+ {
+ index = 0;
+ }
+ }
+
+ public static readonly String[] TEST_TOKEN = new String[]
+ {
+ "please",
+ "divide",
+ "this",
+ "sentence",
+ "into",
+ "shingles",
+ };
+
+ public static readonly int[] TEST_TOKEN_POSITION_INCREMENTS = new int[]
+ {
+ 1, 0, 0, 0, 0, 0
+ };
+
+ public static readonly int[] TEST_TOKEN_NON_ZERO_POSITION_INCREMENTS = new int[]
+ {
+ 1, 5, 5, 5, 5, 5
+ };
+
+ public static readonly String[] SIX_GRAM_NO_POSITIONS_TOKENS = new String[]
+ {
+ "please",
+ "please divide",
+ "please divide this",
+ "please divide this sentence",
+ "please divide this sentence into",
+ "please divide this sentence into shingles"
+ ,
+ "divide",
+ "divide this",
+ "divide this sentence",
+ "divide this sentence into",
+ "divide this sentence into shingles",
+ "this",
+ "this sentence",
+ "this sentence into",
+ "this sentence into shingles",
+ "sentence",
+ "sentence into",
+ "sentence into shingles",
+ "into",
+ "into shingles",
+ "shingles",
+ };
+
+ public static readonly int[] SIX_GRAM_NO_POSITIONS_INCREMENTS = new int[]
+ {
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ , 0, 0, 0, 0, 0, 0, 0
+ };
+
+ public static readonly String[] SIX_GRAM_NO_POSITIONS_TYPES = new String[]
+ {
+ "word", "shingle", "shingle", "shingle",
+ "shingle", "shingle",
+ "word", "shingle", "shingle", "shingle",
+ "shingle",
+ "word", "shingle", "shingle", "shingle",
+ "word", "shingle", "shingle",
+ "word", "shingle",
+ "word"
+ };
+
+ [Test]
+ public void TestFilter()
+ {
+ AssertTokenStreamContents(new PositionFilter(new TestTokenStream(TEST_TOKEN)),
+ TEST_TOKEN,
+ TEST_TOKEN_POSITION_INCREMENTS);
+ }
+
+ [Test]
+ public void TestNonZeroPositionIncrement()
+ {
+ AssertTokenStreamContents(new PositionFilter(new TestTokenStream(TEST_TOKEN), 5),
+ TEST_TOKEN,
+ TEST_TOKEN_NON_ZERO_POSITION_INCREMENTS);
+ }
+
+ [Test]
+ public void TestReset()
+ {
+ PositionFilter filter = new PositionFilter(new TestTokenStream(TEST_TOKEN));
+ AssertTokenStreamContents(filter, TEST_TOKEN, TEST_TOKEN_POSITION_INCREMENTS);
+ filter.Reset();
+ // Make sure that the reset filter provides correct position increments
+ AssertTokenStreamContents(filter, TEST_TOKEN, TEST_TOKEN_POSITION_INCREMENTS);
+ }
+
+ /** Tests ShingleFilter up to six shingles against six terms.
+ * Tests PositionFilter setting all but the first positionIncrement to zero.
+ * @throws java.io.IOException @see Token#next(Token)
+ */
+ [Test]
+ public void Test6GramFilterNoPositions()
+ {
+ ShingleFilter filter = new ShingleFilter(new TestTokenStream(TEST_TOKEN), 6);
+ AssertTokenStreamContents(new PositionFilter(filter),
+ SIX_GRAM_NO_POSITIONS_TOKENS,
+ SIX_GRAM_NO_POSITIONS_INCREMENTS);
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Properties/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Properties/AssemblyInfo.cs?rev=1204396&r1=1204395&r2=1204396&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Properties/AssemblyInfo.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Properties/AssemblyInfo.cs Mon Nov 21 08:41:52 2011
@@ -53,5 +53,5 @@ using System.Runtime.InteropServices;
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("2.9.2.1")]
-[assembly: AssemblyFileVersion("2.9.2.1")]
+[assembly: AssemblyVersion("3.0.3")]
+[assembly: AssemblyFileVersion("3.0.3")]
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Query/QueryAutoStopWordAnalyzerTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Query/QueryAutoStopWordAnalyzerTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Query/QueryAutoStopWordAnalyzerTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Query/QueryAutoStopWordAnalyzerTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,211 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Query;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.QueryParsers;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using NUnit.Framework;
+using Version=Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Query
+{
+ [TestFixture]
+ public class QueryAutoStopWordAnalyzerTest : BaseTokenStreamTestCase
+ {
+ String[] variedFieldValues = { "the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "boring", "dog" };
+ String[] repetitiveFieldValues = { "boring", "boring", "vaguelyboring" };
+ RAMDirectory dir;
+ Analyzer appAnalyzer;
+ IndexReader reader;
+ QueryAutoStopWordAnalyzer protectedAnalyzer;
+
+ public override void SetUp()
+ {
+ dir = new RAMDirectory();
+ appAnalyzer = new WhitespaceAnalyzer();
+ IndexWriter writer = new IndexWriter(dir, appAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
+ int numDocs = 200;
+ for (int i = 0; i < numDocs; i++)
+ {
+ Document doc = new Document();
+ String variedFieldValue = variedFieldValues[i % variedFieldValues.Length];
+ String repetitiveFieldValue = repetitiveFieldValues[i % repetitiveFieldValues.Length];
+ doc.Add(new Field("variedField", variedFieldValue, Field.Store.YES, Field.Index.ANALYZED));
+ doc.Add(new Field("repetitiveField", repetitiveFieldValue, Field.Store.YES, Field.Index.ANALYZED));
+ writer.AddDocument(doc);
+ }
+ writer.Close();
+ reader = IndexReader.Open(dir, true);
+ protectedAnalyzer = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, appAnalyzer);
+ base.SetUp();
+ }
+
+ public override void TearDown()
+ {
+ reader.Close();
+ base.TearDown();
+ }
+
+ //Helper method to query
+ private int Search(Analyzer a, String queryString)
+ {
+ QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "repetitiveField", a);
+ var q = qp.Parse(queryString);
+ return new IndexSearcher(reader).Search(q, null, 1000).totalHits;
+ }
+
+ [Test]
+ public void TestUninitializedAnalyzer()
+ {
+ //Note: no calls to "addStopWord"
+ String query = "variedField:quick repetitiveField:boring";
+ int numHits1 = Search(protectedAnalyzer, query);
+ int numHits2 = Search(appAnalyzer, query);
+ Assert.AreEqual(numHits1, numHits2, "No filtering test");
+ }
+
+ /*
+ * Test method for 'org.apache.lucene.analysis.QueryAutoStopWordAnalyzer.AddStopWords(IndexReader)'
+ */
+ [Test]
+ public void TestDefaultAddStopWordsIndexReader()
+ {
+ protectedAnalyzer.AddStopWords(reader);
+ int numHits = Search(protectedAnalyzer, "repetitiveField:boring");
+ Assert.AreEqual(0, numHits, "Default filter should remove all docs");
+ }
+
+
+ /*
+ * Test method for 'org.apache.lucene.analysis.QueryAutoStopWordAnalyzer.AddStopWords(IndexReader, int)'
+ */
+ [Test]
+ public void TestAddStopWordsIndexReaderInt()
+ {
+ protectedAnalyzer.AddStopWords(reader, 1f / 2f);
+ int numHits = Search(protectedAnalyzer, "repetitiveField:boring");
+ Assert.AreEqual(0, numHits, "A filter on terms in > one half of docs remove boring docs");
+
+ numHits = Search(protectedAnalyzer, "repetitiveField:vaguelyboring");
+ Assert.True(numHits > 1, "A filter on terms in > half of docs should not remove vaguelyBoring docs");
+
+ protectedAnalyzer.AddStopWords(reader, 1f / 4f);
+ numHits = Search(protectedAnalyzer, "repetitiveField:vaguelyboring");
+ Assert.AreEqual(0, numHits, "A filter on terms in > quarter of docs should remove vaguelyBoring docs");
+ }
+
+
+ [Test]
+ public void TestAddStopWordsIndexReaderStringFloat()
+ {
+ protectedAnalyzer.AddStopWords(reader, "variedField", 1f / 2f);
+ int numHits = Search(protectedAnalyzer, "repetitiveField:boring");
+ Assert.True(numHits > 0, "A filter on one Field should not affect queris on another");
+
+ protectedAnalyzer.AddStopWords(reader, "repetitiveField", 1f / 2f);
+ numHits = Search(protectedAnalyzer, "repetitiveField:boring");
+ Assert.AreEqual(numHits, 0, "A filter on the right Field should affect queries on it");
+ }
+
+ [Test]
+ public void TestAddStopWordsIndexReaderStringInt()
+ {
+ int numStopWords = protectedAnalyzer.AddStopWords(reader, "repetitiveField", 10);
+ Assert.True(numStopWords > 0, "Should have identified stop words");
+
+ Term[] t = protectedAnalyzer.GetStopWords();
+ Assert.AreEqual(t.Length, numStopWords, "num terms should = num stopwords returned");
+
+ int numNewStopWords = protectedAnalyzer.AddStopWords(reader, "variedField", 10);
+ Assert.True(numNewStopWords > 0, "Should have identified more stop words");
+ t = protectedAnalyzer.GetStopWords();
+ Assert.AreEqual(t.Length, numStopWords + numNewStopWords, "num terms should = num stopwords returned");
+ }
+
+ [Test]
+ public void TestNoFieldNamePollution()
+ {
+ protectedAnalyzer.AddStopWords(reader, "repetitiveField", 10);
+ int numHits = Search(protectedAnalyzer, "repetitiveField:boring");
+ Assert.AreEqual(0, numHits, "Check filter set up OK");
+
+ numHits = Search(protectedAnalyzer, "variedField:boring");
+ Assert.True(numHits > 0, "Filter should not prevent stopwords in one field being used in another ");
+
+ }
+
+ /**
+ * subclass that acts just like whitespace analyzer for testing
+ */
+ private class QueryAutoStopWordSubclassAnalyzer : QueryAutoStopWordAnalyzer
+ {
+ public QueryAutoStopWordSubclassAnalyzer(Version matchVersion)
+ : base(matchVersion, new WhitespaceAnalyzer())
+ {
+
+ }
+
+
+ public override TokenStream TokenStream(String fieldName, TextReader reader)
+ {
+ return new WhitespaceTokenizer(reader);
+ }
+ }
+
+ [Test]
+ public void TestLucene1678BwComp()
+ {
+ QueryAutoStopWordAnalyzer a = new QueryAutoStopWordSubclassAnalyzer(Version.LUCENE_CURRENT);
+ a.AddStopWords(reader, "repetitiveField", 10);
+ int numHits = Search(a, "repetitiveField:boring");
+ Assert.False(numHits == 0);
+ }
+
+ /*
+ * analyzer that does not support reuse
+ * it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even.
+ */
+ private class NonreusableAnalyzer : Analyzer
+ {
+ int invocationCount = 0;
+
+ public override TokenStream TokenStream(String fieldName, TextReader reader)
+ {
+ if (++invocationCount % 2 == 0)
+ return new WhitespaceTokenizer(reader);
+ else
+ return new LetterTokenizer(reader);
+ }
+ }
+
+ [Test]
+ public void TestWrappingNonReusableAnalyzer()
+ {
+ QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new NonreusableAnalyzer());
+ a.AddStopWords(reader, 10);
+ int numHits = Search(a, "repetitiveField:boring");
+ Assert.True(numHits == 0);
+ numHits = Search(a, "repetitiveField:vaguelyboring");
+ Assert.True(numHits == 0);
+ }
+
+ [Test]
+ public void TestTokenStream()
+ {
+ QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new WhitespaceAnalyzer());
+ a.AddStopWords(reader, 10);
+ TokenStream ts = a.TokenStream("repetitiveField", new StringReader("this boring"));
+ TermAttribute termAtt = ts.GetAttribute<TermAttribute>();
+ Assert.True(ts.IncrementToken());
+ Assert.AreEqual("this", termAtt.Term());
+ Assert.False(ts.IncrementToken());
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Reverse/TestReverseStringFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Reverse/TestReverseStringFilter.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Reverse/TestReverseStringFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Reverse/TestReverseStringFilter.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,72 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Reverse;
+using Lucene.Net.Analysis.Tokenattributes;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Reverse
+{
+ [TestFixture]
+ public class TestReverseStringFilter : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void TestFilter()
+ {
+ TokenStream stream = new WhitespaceTokenizer(
+ new StringReader("Do have a nice day")); // 1-4 length string
+ ReverseStringFilter filter = new ReverseStringFilter(stream);
+ TermAttribute text = filter.GetAttribute<TermAttribute>();
+ Assert.True(filter.IncrementToken());
+ Assert.AreEqual("oD", text.Term());
+ Assert.True(filter.IncrementToken());
+ Assert.AreEqual("evah", text.Term());
+ Assert.True(filter.IncrementToken());
+ Assert.AreEqual("a", text.Term());
+ Assert.True(filter.IncrementToken());
+ Assert.AreEqual("ecin", text.Term());
+ Assert.True(filter.IncrementToken());
+ Assert.AreEqual("yad", text.Term());
+ Assert.False(filter.IncrementToken());
+ }
+
+ [Test]
+ public void TestFilterWithMark()
+ {
+ TokenStream stream = new WhitespaceTokenizer(new StringReader(
+ "Do have a nice day")); // 1-4 length string
+ ReverseStringFilter filter = new ReverseStringFilter(stream, '\u0001');
+ TermAttribute text = filter.GetAttribute<TermAttribute>();
+ Assert.True(filter.IncrementToken());
+ Assert.AreEqual("\u0001oD", text.Term());
+ Assert.True(filter.IncrementToken());
+ Assert.AreEqual("\u0001evah", text.Term());
+ Assert.True(filter.IncrementToken());
+ Assert.AreEqual("\u0001a", text.Term());
+ Assert.True(filter.IncrementToken());
+ Assert.AreEqual("\u0001ecin", text.Term());
+ Assert.True(filter.IncrementToken());
+ Assert.AreEqual("\u0001yad", text.Term());
+ Assert.False(filter.IncrementToken());
+ }
+
+ [Test]
+ public void TestReverseString()
+ {
+ Assert.AreEqual("A", ReverseStringFilter.Reverse("A"));
+ Assert.AreEqual("BA", ReverseStringFilter.Reverse("AB"));
+ Assert.AreEqual("CBA", ReverseStringFilter.Reverse("ABC"));
+ }
+
+ [Test]
+ public void TestReverseChar()
+ {
+ char[] buffer = { 'A', 'B', 'C', 'D', 'E', 'F' };
+ ReverseStringFilter.Reverse(buffer, 2, 3);
+ Assert.AreEqual("ABEDCF", new String(buffer));
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/TestRussianAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/TestRussianAnalyzer.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/TestRussianAnalyzer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/TestRussianAnalyzer.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,90 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Ru;
+using Lucene.Net.Analysis.Tokenattributes;
+using NUnit.Framework;
+using Version=Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Ru
+{
+ /**
+ * Test case for RussianAnalyzer.
+ */
+ [TestFixture]
+ public class TestRussianAnalyzer : BaseTokenStreamTestCase
+ {
+ private StreamReader inWords;
+
+ private StreamReader sampleUnicode;
+
+ protected void setUp()
+ {
+ base.SetUp();
+ }
+
+ [Test]
+ public void TestUnicode()
+ {
+ RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
+
+ using (inWords = new StreamReader(@"ru\testUTF8.txt", Encoding.UTF8))
+ using (sampleUnicode = new StreamReader(@"ru\resUTF8.htm", Encoding.UTF8))
+ {
+
+ TokenStream _in = ra.TokenStream("all", inWords);
+
+ RussianLetterTokenizer sample =
+ new RussianLetterTokenizer(
+ sampleUnicode);
+
+ TermAttribute text = _in.GetAttribute<TermAttribute>();
+ TermAttribute sampleText = sample.GetAttribute<TermAttribute>();
+
+ for (; ; )
+ {
+ if (_in.IncrementToken() == false)
+ break;
+
+ bool nextSampleToken = sample.IncrementToken();
+ Assert.AreEqual(text.Term(), nextSampleToken == false ? null : sampleText.Term(), "Unicode");
+ }
+ }
+ }
+
+ [Test]
+ public void TestDigitsInRussianCharset()
+ {
+ TextReader reader = new StringReader("text 1000");
+ RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
+ TokenStream stream = ra.TokenStream("", reader);
+
+ TermAttribute termText = stream.GetAttribute<TermAttribute>();
+ try
+ {
+ Assert.True(stream.IncrementToken());
+ Assert.AreEqual("text", termText.Term());
+ Assert.True(stream.IncrementToken());
+ Assert.AreEqual("1000", termText.Term(), "RussianAnalyzer's tokenizer skips numbers from input text");
+ Assert.False(stream.IncrementToken());
+ }
+ catch (IOException e)
+ {
+ Assert.Fail("unexpected IOException");
+ }
+ }
+
+ [Test]
+ public void TestReusableTokenStream()
+ {
+ Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT);
+ AssertAnalyzesToReuse(a, "ÐмеÑÑе Ñ Ñем о Ñиле ÑлекÑÑомагниÑной ÑнеÑгии имели пÑедÑÑавление еÑе",
+ new String[] {"вмеÑÑ", "Ñил", "ÑлекÑÑомагниÑн", "ÑнеÑг", "имел", "пÑедÑÑавлен"});
+ AssertAnalyzesToReuse(a, "Ðо знание ÑÑо Ñ
ÑанилоÑÑ Ð² Ñайне",
+ new String[] {"знан", "Ñ
Ñан", "Ñайн"});
+ }
+ }
+}
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/TestRussianStem.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/TestRussianStem.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/TestRussianStem.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/TestRussianStem.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,56 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis.Ru;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Ru
+{
+ [TestFixture]
+ public class TestRussianStem : LuceneTestCase
+ {
+ private List<string> words = new List<string>();
+ private List<string> stems = new List<string>();
+
+ /**
+ * @see TestCase#setUp()
+ */
+ public override void SetUp()
+ {
+ base.SetUp();
+ //System.out.println(new java.util.Date());
+ String str;
+
+ // open and read words into an array list
+ StreamReader inWords = new StreamReader(@"ru\wordsUTF8.txt", Encoding.UTF8);
+ while ((str = inWords.ReadLine()) != null)
+ {
+ words.Add(str);
+ }
+ inWords.Close();
+
+ // open and read stems into an array list
+ StreamReader inStems = new StreamReader(@"ru\stemsUTF8.txt", Encoding.UTF8);
+ while ((str = inStems.ReadLine()) != null)
+ {
+ stems.Add(str);
+ }
+ inStems.Close();
+ }
+
+ [Test]
+ public void TestStem()
+ {
+ for (int i = 0; i < words.Count; i++)
+ {
+ //if ( (i % 100) == 0 ) System.err.println(i);
+ String realStem =
+ RussianStemmer.StemWord(words[i]);
+ Assert.AreEqual(stems[i], realStem, "unicode");
+ }
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/resUTF8.htm
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/resUTF8.htm?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/resUTF8.htm (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/resUTF8.htm Mon Nov 21 08:41:52 2011
@@ -0,0 +1 @@
+[вмеÑÑ][Ñил][ÑлекÑÑомагниÑн][ÑнеÑг][имел][пÑедÑÑавлен][Ñкаж][жÑеÑ][дÑевн][египÑ][знан][Ñ
Ñан][Ñайн][Ñзк][кÑÑг][поÑвÑÑен][вÑÑк][вÑемен][виÑок][пÑин][Ñоб][нов][ÑеÑ
нолог][Ñам][дел][ÑаÑкÑÑва][поÑаен][знан][пÑежн][век][говоÑ][нов][инÑоÑмаÑ][ÑÑанов][доÑÑÑпн][ÑиÑок][кÑÑг][полÑзоваÑел][ÑеÑ
][ÑлÑÑа][Ñознан][обÑеÑÑв][гоÑ
ов][воÑпÑинÑ][воÑполÑзова]
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/stemsUTF8.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/stemsUTF8.txt?rev=1204396&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/stemsUTF8.txt
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/testUTF8.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/testUTF8.txt?rev=1204396&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/testUTF8.txt
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/wordsUTF8.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/wordsUTF8.txt?rev=1204396&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Ru/wordsUTF8.txt
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/DateRecognizerSinkTokenizerTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/DateRecognizerSinkTokenizerTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/DateRecognizerSinkTokenizerTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/DateRecognizerSinkTokenizerTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,40 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Sinks;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Sinks
+{
+ [TestFixture]
+ public class DateRecognizerSinkTokenizerTest : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void Test()
+ {
+ DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(System.Globalization.CultureInfo.CurrentCulture);
+ String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
+ TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+ TeeSinkTokenFilter.SinkTokenStream sink = tee.NewSinkTokenStream(sinkFilter);
+ int count = 0;
+
+ tee.Reset();
+ while (tee.IncrementToken())
+ {
+ count++;
+ }
+ Assert.True(count == 18, count + " does not equal: " + 18);
+
+ int sinkCount = 0;
+ sink.Reset();
+ while (sink.IncrementToken())
+ {
+ sinkCount++;
+ }
+ Assert.True(sinkCount == 2, "sink Size: " + sinkCount + " is not: " + 2);
+ }
+ }
+}
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/TokenRangeSinkTokenizerTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/TokenRangeSinkTokenizerTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/TokenRangeSinkTokenizerTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/TokenRangeSinkTokenizerTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,41 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Sinks;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Sinks
+{
+ [TestFixture]
+ public class TokenRangeSinkTokenizerTest : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void Test()
+ {
+ TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4);
+ String test = "The quick red fox jumped over the lazy brown dogs";
+ TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+ TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.NewSinkTokenStream(sinkFilter);
+
+ int count = 0;
+ tee.Reset();
+ while (tee.IncrementToken())
+ {
+ count++;
+ }
+
+ int sinkCount = 0;
+ rangeToks.Reset();
+ while (rangeToks.IncrementToken())
+ {
+ sinkCount++;
+ }
+
+ Assert.True(count == 10, count + " does not equal: " + 10);
+ Assert.True(sinkCount == 2, "rangeToks Size: " + sinkCount + " is not: " + 2);
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/TokenTypeSinkTokenizerTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/TokenTypeSinkTokenizerTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/TokenTypeSinkTokenizerTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Sinks/TokenTypeSinkTokenizerTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,78 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Sinks;
+using Lucene.Net.Analysis.Tokenattributes;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Sinks
+{
+ [TestFixture]
+ public class TokenTypeSinkTokenizerTest : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void Test()
+ {
+ TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D");
+ String test = "The quick red fox jumped over the lazy brown dogs";
+
+ TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))));
+ TeeSinkTokenFilter.SinkTokenStream sink = ttf.NewSinkTokenStream(sinkFilter);
+
+ bool seenDogs = false;
+
+ TermAttribute termAtt = ttf.AddAttribute<TermAttribute>();
+ TypeAttribute typeAtt = ttf.AddAttribute<TypeAttribute>();
+ ttf.Reset();
+ while (ttf.IncrementToken())
+ {
+ if (termAtt.Term().Equals("dogs"))
+ {
+ seenDogs = true;
+ Assert.True(typeAtt.Type().Equals("D") == true, typeAtt.Type() + " is not equal to " + "D");
+ }
+ else
+ {
+ Assert.True(typeAtt.Type().Equals("word"), typeAtt.Type() + " is not null and it should be");
+ }
+ }
+ Assert.True(seenDogs == true, seenDogs + " does not equal: " + true);
+
+ int sinkCount = 0;
+ sink.Reset();
+ while (sink.IncrementToken())
+ {
+ sinkCount++;
+ }
+
+ Assert.True(sinkCount == 1, "sink Size: " + sinkCount + " is not: " + 1);
+ }
+
+ internal class WordTokenFilter : TokenFilter
+ {
+ private TermAttribute termAtt;
+ private TypeAttribute typeAtt;
+
+ internal WordTokenFilter(TokenStream input)
+ : base(input)
+ {
+ termAtt = AddAttribute<TermAttribute>();
+ typeAtt = AddAttribute<TypeAttribute>();
+ }
+
+ public sealed override bool IncrementToken()
+ {
+ if (!input.IncrementToken()) return false;
+
+ if (termAtt.Term().Equals("dogs"))
+ {
+ typeAtt.SetType("D");
+ }
+ return true;
+ }
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Th/TestThaiAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Th/TestThaiAnalyzer.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Th/TestThaiAnalyzer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Th/TestThaiAnalyzer.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,18 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Th
+{
+ [TestFixture]
+ public class TestThaiAnalyzer
+ {
+ [Test]
+ public void TestThai()
+ {
+ Assert.Ignore("Need to port ThaiAnalyzer and tests");
+ }
+ }
+}