You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2011/11/21 09:41:54 UTC
[Lucene.Net] svn commit: r1204396 [2/3] - in
/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk:
src/contrib/Analyzers/BR/ src/contrib/Analyzers/CJK/
src/contrib/Analyzers/Cn/ src/contrib/Analyzers/Fr/
src/contrib/Analyzers/Miscellaneous/ src/contrib/Analyzers/N...
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/El/GreekAnalyzerTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/El/GreekAnalyzerTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/El/GreekAnalyzerTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/El/GreekAnalyzerTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,99 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using NUnit.Framework;
+using Version=Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.El
+{
+/**
+ * A unit test class for verifying the correct operation of the GreekAnalyzer.
+ *
+ */
+ [TestFixture]
+public class GreekAnalyzerTest : BaseTokenStreamTestCase {
+
+ /**
+ * Test the analysis of various greek strings.
+ *
+ * @throws Exception in case an error occurs
+ */
+ [Test]
+ public void testAnalyzer(){
+ Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
+ // Verify the correct analysis of capitals and small accented letters
+ AssertAnalyzesTo(a,
+ "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
+ new String[]
+ {
+ "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1",
+ "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1",
+ "\u03c3\u03b5\u03b9\u03c1\u03b1",
+ "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
+ "\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03b7\u03c3",
+ "\u03b3\u03bb\u03c9\u03c3\u03c3\u03b1\u03c3"
+ });
+ // Verify the correct analysis of small letters with diaeresis and the elimination
+ // of punctuation marks
+ AssertAnalyzesTo(a,
+ "\u03a0\u03c1\u03bf\u03ca\u03cc\u03bd\u03c4\u03b1 (\u03ba\u03b1\u03b9) [\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03ad\u03c2] - \u0391\u039d\u0391\u0393\u039a\u0395\u03a3",
+ new String[]
+ {
+ "\u03c0\u03c1\u03bf\u03b9\u03bf\u03bd\u03c4\u03b1",
+ "\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03b5\u03c3",
+ "\u03b1\u03bd\u03b1\u03b3\u03ba\u03b5\u03c3"
+ });
+ // Verify the correct analysis of capital accented letters and capitalletters with diaeresis,
+ // as well as the elimination of stop words
+ AssertAnalyzesTo(a,
+ "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3 \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
+ new String[]
+ {
+ "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3",
+ "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3", "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3",
+ "\u03b1\u03bb\u03bb\u03bf\u03b9"
+ });
+ }
+
+ [Test]
+ public void testReusableTokenStream(){
+ Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
+ // Verify the correct analysis of capitals and small accented letters
+ AssertAnalyzesToReuse(a,
+ "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
+ new String[]
+ {
+ "\u03bc\u03b9\u03b1",
+ "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1",
+ "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1",
+ "\u03c3\u03b5\u03b9\u03c1\u03b1",
+ "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
+ "\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03b7\u03c3",
+ "\u03b3\u03bb\u03c9\u03c3\u03c3\u03b1\u03c3"
+ });
+ // Verify the correct analysis of small letters with diaeresis and the elimination
+ // of punctuation marks
+ AssertAnalyzesToReuse(a,
+ "\u03a0\u03c1\u03bf\u03ca\u03cc\u03bd\u03c4\u03b1 (\u03ba\u03b1\u03b9) [\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03ad\u03c2] - \u0391\u039d\u0391\u0393\u039a\u0395\u03a3",
+ new String[]
+ {
+ "\u03c0\u03c1\u03bf\u03b9\u03bf\u03bd\u03c4\u03b1",
+ "\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03b5\u03c3",
+ "\u03b1\u03bd\u03b1\u03b3\u03ba\u03b5\u03c3"
+ });
+ // Verify the correct analysis of capital accented letters and capitalletters with diaeresis,
+ // as well as the elimination of stop words
+ AssertAnalyzesToReuse(a,
+ "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3 \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
+ new String[]
+ {
+ "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3",
+ "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3", "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3",
+ "\u03b1\u03bb\u03bb\u03bf\u03b9"
+ });
+ }
+}
+
+}
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianAnalyzer.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianAnalyzer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianAnalyzer.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,212 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Fa
+{
+/**
+ * Test the Persian Analyzer
+ *
+ */
+public class TestPersianAnalyzer : BaseTokenStreamTestCase {
+
+ /**
+ * This test fails with NPE when the stopwords file is missing in classpath
+ */
+ public void testResourcesAvailable() {
+ new PersianAnalyzer(Version.LUCENE_CURRENT);
+ }
+
+ /**
+ * This test shows how the combination of tokenization (breaking on zero-width
+ * non-joiner), normalization (such as treating arabic YEH and farsi YEH the
+ * same), and stopwords creates a light-stemming effect for verbs.
+ *
+ * These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
+ */
+ public void testBehaviorVerbs(){
+ Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
+ // active present indicative
+ AssertAnalyzesTo(a, "Ù
ÛâØ®Ùرد", new String[] { "Ø®Ùرد" });
+ // active preterite indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد", new String[] { "Ø®Ùرد" });
+ // active imperfective preterite indicative
+ AssertAnalyzesTo(a, "Ù
ÛâØ®Ùرد", new String[] { "Ø®Ùرد" });
+ // active future indicative
+ AssertAnalyzesTo(a, "Ø®ÙاÙد Ø®Ùرد", new String[] { "Ø®Ùرد" });
+ // active present progressive indicative
+ AssertAnalyzesTo(a, "دارد Ù
ÛâØ®Ùرد", new String[] { "Ø®Ùرد" });
+ // active preterite progressive indicative
+ AssertAnalyzesTo(a, "داشت Ù
ÛâØ®Ùرد", new String[] { "Ø®Ùرد" });
+
+ // active perfect indicative
+ AssertAnalyzesTo(a, "Ø®ÙردÙâاست", new String[] { "Ø®ÙردÙ" });
+ // active imperfective perfect indicative
+ AssertAnalyzesTo(a, "Ù
ÛâØ®ÙردÙâاست", new String[] { "Ø®ÙردÙ" });
+ // active pluperfect indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠بÙد", new String[] { "Ø®ÙردÙ" });
+ // active imperfective pluperfect indicative
+ AssertAnalyzesTo(a, "Ù
ÛâØ®Ùرد٠بÙد", new String[] { "Ø®ÙردÙ" });
+ // active preterite subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // active imperfective preterite subjunctive
+ AssertAnalyzesTo(a, "Ù
ÛâØ®Ùرد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // active pluperfect subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠بÙد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // active imperfective pluperfect subjunctive
+ AssertAnalyzesTo(a, "Ù
ÛâØ®Ùرد٠بÙد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // passive present indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
ÛâØ´Ùد", new String[] { "Ø®ÙردÙ" });
+ // passive preterite indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شد", new String[] { "Ø®ÙردÙ" });
+ // passive imperfective preterite indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
Ûâشد", new String[] { "Ø®ÙردÙ" });
+ // passive perfect indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شدÙâاست", new String[] { "Ø®ÙردÙ" });
+ // passive imperfective perfect indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
ÛâشدÙâاست", new String[] { "Ø®ÙردÙ" });
+ // passive pluperfect indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شد٠بÙد", new String[] { "Ø®ÙردÙ" });
+ // passive imperfective pluperfect indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
Ûâشد٠بÙد", new String[] { "Ø®ÙردÙ" });
+ // passive future indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠خÙاÙد شد", new String[] { "Ø®ÙردÙ" });
+ // passive present progressive indicative
+ AssertAnalyzesTo(a, "دارد Ø®Ùرد٠Ù
ÛâØ´Ùد", new String[] { "Ø®ÙردÙ" });
+ // passive preterite progressive indicative
+ AssertAnalyzesTo(a, "داشت Ø®Ùرد٠Ù
Ûâشد", new String[] { "Ø®ÙردÙ" });
+ // passive present subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شÙد", new String[] { "Ø®ÙردÙ" });
+ // passive preterite subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // passive imperfective preterite subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
Ûâشد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // passive pluperfect subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شد٠بÙد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // passive imperfective pluperfect subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
Ûâشد٠بÙد٠باشد", new String[] { "Ø®ÙردÙ" });
+
+ // active present subjunctive
+ AssertAnalyzesTo(a, "بخÙرد", new String[] { "بخÙرد" });
+ }
+
+ /**
+ * This test shows how the combination of tokenization and stopwords creates a
+ * light-stemming effect for verbs.
+ *
+ * In this case, these forms are presented with alternative orthography, using
+ * arabic yeh and whitespace. This yeh phenomenon is common for legacy text
+ * due to some previous bugs in Microsoft Windows.
+ *
+ * These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
+ */
+ public void testBehaviorVerbsDefective(){
+ Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
+ // active present indicative
+ AssertAnalyzesTo(a, "Ù
Ù Ø®Ùرد", new String[] { "Ø®Ùرد" });
+ // active preterite indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد", new String[] { "Ø®Ùرد" });
+ // active imperfective preterite indicative
+ AssertAnalyzesTo(a, "Ù
Ù Ø®Ùرد", new String[] { "Ø®Ùرد" });
+ // active future indicative
+ AssertAnalyzesTo(a, "Ø®ÙاÙد Ø®Ùرد", new String[] { "Ø®Ùرد" });
+ // active present progressive indicative
+ AssertAnalyzesTo(a, "دارد Ù
Ù Ø®Ùرد", new String[] { "Ø®Ùرد" });
+ // active preterite progressive indicative
+ AssertAnalyzesTo(a, "داشت Ù
Ù Ø®Ùرد", new String[] { "Ø®Ùرد" });
+
+ // active perfect indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠است", new String[] { "Ø®ÙردÙ" });
+ // active imperfective perfect indicative
+ AssertAnalyzesTo(a, "Ù
Ù Ø®Ùرد٠است", new String[] { "Ø®ÙردÙ" });
+ // active pluperfect indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠بÙد", new String[] { "Ø®ÙردÙ" });
+ // active imperfective pluperfect indicative
+ AssertAnalyzesTo(a, "Ù
Ù Ø®Ùرد٠بÙد", new String[] { "Ø®ÙردÙ" });
+ // active preterite subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // active imperfective preterite subjunctive
+ AssertAnalyzesTo(a, "Ù
Ù Ø®Ùرد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // active pluperfect subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠بÙد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // active imperfective pluperfect subjunctive
+ AssertAnalyzesTo(a, "Ù
Ù Ø®Ùرد٠بÙد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // passive present indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
Ù Ø´Ùد", new String[] { "Ø®ÙردÙ" });
+ // passive preterite indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شد", new String[] { "Ø®ÙردÙ" });
+ // passive imperfective preterite indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
٠شد", new String[] { "Ø®ÙردÙ" });
+ // passive perfect indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شد٠است", new String[] { "Ø®ÙردÙ" });
+ // passive imperfective perfect indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
٠شد٠است", new String[] { "Ø®ÙردÙ" });
+ // passive pluperfect indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شد٠بÙد", new String[] { "Ø®ÙردÙ" });
+ // passive imperfective pluperfect indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
٠شد٠بÙد", new String[] { "Ø®ÙردÙ" });
+ // passive future indicative
+ AssertAnalyzesTo(a, "Ø®Ùرد٠خÙاÙد شد", new String[] { "Ø®ÙردÙ" });
+ // passive present progressive indicative
+ AssertAnalyzesTo(a, "دارد Ø®Ùرد٠Ù
Ù Ø´Ùد", new String[] { "Ø®ÙردÙ" });
+ // passive preterite progressive indicative
+ AssertAnalyzesTo(a, "داشت Ø®Ùرد٠Ù
٠شد", new String[] { "Ø®ÙردÙ" });
+ // passive present subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شÙد", new String[] { "Ø®ÙردÙ" });
+ // passive preterite subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // passive imperfective preterite subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
٠شد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // passive pluperfect subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠شد٠بÙد٠باشد", new String[] { "Ø®ÙردÙ" });
+ // passive imperfective pluperfect subjunctive
+ AssertAnalyzesTo(a, "Ø®Ùرد٠Ù
٠شد٠بÙد٠باشد", new String[] { "Ø®ÙردÙ" });
+
+ // active present subjunctive
+ AssertAnalyzesTo(a, "بخÙرد", new String[] { "بخÙرد" });
+ }
+
+ /**
+ * This test shows how the combination of tokenization (breaking on zero-width
+ * non-joiner or space) and stopwords creates a light-stemming effect for
+ * nouns, removing the plural -ha.
+ */
+ public void testBehaviorNouns(){
+ Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
+ AssertAnalyzesTo(a, "برگ Ùا", new String[] { "برگ" });
+ AssertAnalyzesTo(a, "برگâÙا", new String[] { "برگ" });
+ }
+
+ /**
+ * Test showing that non-persian text is treated very much like SimpleAnalyzer
+ * (lowercased, etc)
+ */
+ public void testBehaviorNonPersian(){
+ Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
+ AssertAnalyzesTo(a, "English test.", new String[] { "english", "test" });
+ }
+
+ /**
+ * Basic test ensuring that reusableTokenStream works correctly.
+ */
+ public void testReusableTokenStream(){
+ Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
+ AssertAnalyzesToReuse(a, "Ø®Ùرد٠Ù
٠شد٠بÙد٠باشد", new String[] { "Ø®ÙردÙ" });
+ AssertAnalyzesToReuse(a, "برگâÙا", new String[] { "برگ" });
+ }
+
+ /**
+ * Test that custom stopwords work, and are not case-sensitive.
+ */
+ public void testCustomStopwords(){
+ PersianAnalyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT, new String[] { "the", "and", "a" });
+ AssertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
+ "brown", "fox" });
+ }
+
+}
+
+}
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianNormalizationFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianNormalizationFilter.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianNormalizationFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianNormalizationFilter.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,64 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.AR;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Fa
+{
+ /**
+ * Test the Arabic Normalization Filter
+ *
+ */
+ [TestFixture]
+ public class TestPersianNormalizationFilter : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void TestFarsiYeh()
+ {
+ Check("ÙاÛ", "ÙاÙ");
+ }
+
+ [Test]
+ public void TestYehBarree()
+ {
+ Check("ÙاÛ", "ÙاÙ");
+ }
+
+ [Test]
+ public void TestKeheh()
+ {
+ Check("کشاÙدÙ", "ÙشاÙدÙ");
+ }
+
+ [Test]
+ public void TestHehYeh()
+ {
+ Check("ÙتابÛ", "ÙتابÙ");
+ }
+
+ [Test]
+ public void TestHehHamzaAbove()
+ {
+ Check("ÙتابÙÙ", "ÙتابÙ");
+ }
+
+ [Test]
+ public void TestHehGoal()
+ {
+ Check("زادÛ", "زادÙ");
+ }
+
+ private void Check(String input, String expected)
+ {
+ ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(
+ new StringReader(input));
+ PersianNormalizationFilter filter = new PersianNormalizationFilter(
+ tokenStream);
+ AssertTokenStreamContents(filter, new String[] { expected });
+ }
+ }
+}
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestElision.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestElision.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestElision.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestElision.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,47 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using NUnit.Framework;
+using Version=Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Fr
+{
+ /**
+ *
+ */
+ [TestFixture]
+ public class TestElision : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void TestElision2()
+ {
+ String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
+ Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(test));
+ HashSet<String> articles = new HashSet<String>();
+ articles.Add("l");
+ articles.Add("M");
+ TokenFilter filter = new ElisionFilter(tokenizer, articles);
+ List<string> tas = Filtre(filter);
+ Assert.AreEqual("embrouille", tas[4]);
+ Assert.AreEqual("O'brian", tas[6]);
+ Assert.AreEqual("enfin", tas[7]);
+ }
+
+ private List<string> Filtre(TokenFilter filter)
+ {
+ List<string> tas = new List<string>();
+ TermAttribute termAtt = filter.GetAttribute<TermAttribute>();
+ while (filter.IncrementToken())
+ {
+ tas.Add(termAtt.Term());
+ }
+ return tas;
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestFrenchAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestFrenchAnalyzer.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestFrenchAnalyzer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestFrenchAnalyzer.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,147 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Fr;
+using NUnit.Framework;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Fr
+{
+ /**
+ * Test case for FrenchAnalyzer.
+ *
+ * @version $version$
+ */
+ [TestFixture]
+ public class TestFrenchAnalyzer : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void TestAnalyzer()
+ {
+ FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
+
+ AssertAnalyzesTo(fa, "", new String[0]);
+
+ AssertAnalyzesTo(
+ fa,
+ "chien chat cheval",
+ new String[] {"chien", "chat", "cheval"});
+
+ AssertAnalyzesTo(
+ fa,
+ "chien CHAT CHEVAL",
+ new String[] {"chien", "chat", "cheval"});
+
+ AssertAnalyzesTo(
+ fa,
+ " chien ,? + = - CHAT /: > CHEVAL",
+ new String[] {"chien", "chat", "cheval"});
+
+ AssertAnalyzesTo(fa, "chien++", new String[] {"chien"});
+
+ AssertAnalyzesTo(
+ fa,
+ "mot \"entreguillemet\"",
+ new String[] {"mot", "entreguillemet"});
+
+ // let's do some french specific tests now
+
+ /* 1. couldn't resist
+ I would expect this to stay one term as in French the minus
+ sign is often used for composing words */
+ AssertAnalyzesTo(
+ fa,
+ "Jean-François",
+ new String[] {"jean", "françois"});
+
+ // 2. stopwords
+ AssertAnalyzesTo(
+ fa,
+ "le la chien les aux chat du des à cheval",
+ new String[] {"chien", "chat", "cheval"});
+
+ // some nouns and adjectives
+ AssertAnalyzesTo(
+ fa,
+ "lances chismes habitable chiste éléments captifs",
+ new String[]
+ {
+ "lanc",
+ "chism",
+ "habit",
+ "chist",
+ "élément",
+ "captif"
+ });
+
+ // some verbs
+ AssertAnalyzesTo(
+ fa,
+ "finissions souffrirent rugissante",
+ new String[] {"fin", "souffr", "rug"});
+
+ // some everything else
+ // aujourd'hui stays one term which is OK
+ AssertAnalyzesTo(
+ fa,
+ "C3PO aujourd'hui oeuf ïâöûà ä anticonstitutionnellement Java++ ",
+ new String[]
+ {
+ "c3po",
+ "aujourd'hui",
+ "oeuf",
+ "ïâöûà ä",
+ "anticonstitutionnel",
+ "jav"
+ });
+
+ // some more everything else
+ // here 1940-1945 stays as one term, 1940:1945 not ?
+ AssertAnalyzesTo(
+ fa,
+ "33Bis 1940-1945 1940:1945 (---i+++)*",
+ new String[] {"33bis", "1940-1945", "1940", "1945", "i"});
+
+ }
+
+ [Test]
+ public void TestReusableTokenStream()
+ {
+ FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
+ // stopwords
+ AssertAnalyzesToReuse(
+ fa,
+ "le la chien les aux chat du des à cheval",
+ new String[] {"chien", "chat", "cheval"});
+
+ // some nouns and adjectives
+ AssertAnalyzesToReuse(
+ fa,
+ "lances chismes habitable chiste éléments captifs",
+ new String[]
+ {
+ "lanc",
+ "chism",
+ "habit",
+ "chist",
+ "élément",
+ "captif"
+ });
+ }
+
+ /*
+ * Test that changes to the exclusion table are applied immediately
+ * when using reusable token streams.
+ */
+ [Test]
+ public void TestExclusionTableReuse()
+ {
+ FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
+ AssertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
+ fa.SetStemExclusionTable(new String[] { "habitable" });
+ AssertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Lucene.Net.snk
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Lucene.Net.snk?rev=1204396&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Lucene.Net.snk
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/PatternAnalyzerTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/PatternAnalyzerTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/PatternAnalyzerTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/PatternAnalyzerTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,149 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+using Lucene.Net.Analysis;
+using NUnit.Framework;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+ /**
+ * Verifies the behavior of PatternAnalyzer.
+ */
+ [TestFixture]
+ public class PatternAnalyzerTest : BaseTokenStreamTestCase
+ {
+ /**
+ * Test PatternAnalyzer when it is configured with a non-word pattern.
+ * Behavior can be similar to SimpleAnalyzer (depending upon options)
+ */
+ [Test]
+ public void TestNonWordPattern()
+ {
+ // Split on non-letter pattern, do not lowercase, no stopwords
+ PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
+ false, null);
+ Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[]
+ {
+ "The", "quick", "brown", "Fox", "the",
+ "abcd", "dc"
+ });
+
+ // split on non-letter pattern, lowercase, english stopwords
+ PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
+ true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+ Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[]
+ {
+ "quick", "brown", "fox", "abcd", "dc"
+ });
+ }
+
+ /**
+ * Test PatternAnalyzer when it is configured with a whitespace pattern.
+ * Behavior can be similar to WhitespaceAnalyzer (depending upon options)
+ */
+ [Test]
+ public void TestWhitespacePattern()
+ {
+ // Split on whitespace patterns, do not lowercase, no stopwords
+ PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
+ false, null);
+ Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[]
+ {
+ "The", "quick", "brown", "Fox,the",
+ "abcd1234", "(56.78)", "dc."
+ });
+
+ // Split on whitespace patterns, lowercase, english stopwords
+ PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
+ true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+ Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[]
+ {
+ "quick", "brown", "fox,the", "abcd1234",
+ "(56.78)", "dc."
+ });
+ }
+
+ /**
+ * Test PatternAnalyzer when it is configured with a custom pattern. In this
+ * case, text is tokenized on the comma ","
+ */
+ [Test]
+ public void TestCustomPattern()
+ {
+ // Split on comma, do not lowercase, no stopwords
+ PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, new Regex(",", RegexOptions.Compiled), false, null);
+ Check(a, "Here,Are,some,Comma,separated,words,", new String[]
+ {
+ "Here",
+ "Are", "some", "Comma", "separated", "words"
+ });
+
+ // split on comma, lowercase, english stopwords
+ PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, new Regex(",", RegexOptions.Compiled), true,
+ StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+ Check(b, "Here,Are,some,Comma,separated,words,", new String[]
+ {
+ "here",
+ "some", "comma", "separated", "words"
+ });
+ }
+
+ /**
+ * Test PatternAnalyzer against a large document.
+ */
+ [Test]
+ public void TestHugeDocument()
+ {
+ StringBuilder document = new StringBuilder();
+ // 5000 a's
+ char[] largeWord;
+ largeWord = Enumerable.Repeat('a', 5000).ToArray();
+ document.Append(largeWord);
+
+ // a space
+ document.Append(' ');
+
+ // 2000 b's
+ char[] largeWord2;
+ largeWord2 = Enumerable.Repeat('b', 2000).ToArray();
+ document.Append(largeWord2);
+
+ // Split on whitespace patterns, do not lowercase, no stopwords
+ PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
+ false, null);
+ Check(a, document.ToString(), new String[]
+ {
+ new String(largeWord),
+ new String(largeWord2)
+ });
+ }
+
+ /**
+ * Verify the analyzer analyzes to the expected contents. For PatternAnalyzer,
+ * several methods are verified:
+ * <ul>
+ * <li>Analysis with a normal Reader
+ * <li>Analysis with a FastStringReader
+ * <li>Analysis with a String
+ * </ul>
+ */
+ private void Check(PatternAnalyzer analyzer, String document,
+ String[] expected)
+ {
+ // ordinary analysis of a Reader
+ AssertAnalyzesTo(analyzer, document, expected);
+
+ // analysis with a "FastStringReader"
+ TokenStream ts = analyzer.TokenStream("dummy",
+ new PatternAnalyzer.FastStringReader(document));
+ AssertTokenStreamContents(ts, expected);
+
+ // analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
+ TokenStream ts2 = analyzer.TokenStream("dummy", document);
+ AssertTokenStreamContents(ts2, expected);
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestEmptyTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestEmptyTokenStream.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestEmptyTokenStream.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestEmptyTokenStream.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,23 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+ [TestFixture]
+ public class TestEmptyTokenStream : LuceneTestCase
+ {
+ [Test]
+ public void Test()
+ {
+ TokenStream ts = new EmptyTokenStream();
+ Assert.False(ts.IncrementToken());
+ ts.Reset();
+ Assert.False(ts.IncrementToken());
+ }
+ }
+}
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestPrefixAndSuffixAwareTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestPrefixAndSuffixAwareTokenFilter.cs?rev=1204396&r1=1204395&r2=1204396&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestPrefixAndSuffixAwareTokenFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestPrefixAndSuffixAwareTokenFilter.cs Mon Nov 21 08:41:52 2011
@@ -22,6 +22,7 @@ using NUnit.Framework;
namespace Lucene.Net.Analyzers.Miscellaneous
{
+ [TestFixture]
public class TestPrefixAndSuffixAwareTokenFilter : BaseTokenStreamTestCase
{
[Test]
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestSingleTokenTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestSingleTokenTokenFilter.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestSingleTokenTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestSingleTokenTokenFilter.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,37 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+ [TestFixture]
+ public class TestSingleTokenTokenFilter : LuceneTestCase
+ {
+ [Test]
+ public void Test()
+ {
+ Token token = new Token();
+ SingleTokenTokenStream ts = new SingleTokenTokenStream(token);
+ AttributeImpl tokenAtt = (AttributeImpl)ts.AddAttribute<TermAttribute>();
+ Assert.True(tokenAtt is Token);
+ ts.Reset();
+
+ Assert.True(ts.IncrementToken());
+ Assert.AreEqual(token, tokenAtt);
+ Assert.False(ts.IncrementToken());
+
+ token = new Token("hallo", 10, 20, "someType");
+ ts.SetToken(token);
+ ts.Reset();
+
+ Assert.True(ts.IncrementToken());
+ Assert.AreEqual(token, tokenAtt);
+ Assert.False(ts.IncrementToken());
+ }
+ }
+}
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs?rev=1204396&r1=1204395&r2=1204396&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs Mon Nov 21 08:41:52 2011
@@ -1,44 +1,24 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
+using System;
+using System.Collections.Generic;
using System.IO;
-using System.Collections;
-
+using System.Linq;
+using System.Text;
using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Util;
+using Lucene.Net.Analysis.NGram;
using NUnit.Framework;
-namespace Lucene.Net.Analysis.NGram
+namespace Lucene.Net.Analyzers.Miscellaneous
{
-
/**
* Tests {@link EdgeNGramTokenFilter} for correctness.
*/
[TestFixture]
- public class TestEdgeNGramTokenFilter : BaseTokenStreamTestCase
+ public class EdgeNGramTokenFilterTest : BaseTokenStreamTestCase
{
private TokenStream input;
- [SetUp]
- public void SetUp()
+ public override void SetUp()
{
- base.SetUp();
input = new WhitespaceTokenizer(new StringReader("abcde"));
}
@@ -50,11 +30,11 @@ namespace Lucene.Net.Analysis.NGram
{
new EdgeNGramTokenFilter(input, Side.FRONT, 0, 0);
}
- catch (System.ArgumentException e)
+ catch (ArgumentException e)
{
gotException = true;
}
- Assert.IsTrue(gotException);
+ Assert.True(gotException);
}
[Test]
@@ -65,11 +45,11 @@ namespace Lucene.Net.Analysis.NGram
{
new EdgeNGramTokenFilter(input, Side.FRONT, 2, 1);
}
- catch (System.ArgumentException e)
+ catch (ArgumentException e)
{
gotException = true;
}
- Assert.IsTrue(gotException);
+ Assert.True(gotException);
}
[Test]
@@ -80,11 +60,11 @@ namespace Lucene.Net.Analysis.NGram
{
new EdgeNGramTokenFilter(input, Side.FRONT, -1, 2);
}
- catch (System.ArgumentException e)
+ catch (ArgumentException e)
{
gotException = true;
}
- Assert.IsTrue(gotException);
+ Assert.True(gotException);
}
[Test]
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs?rev=1204396&r1=1204395&r2=1204396&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs Mon Nov 21 08:41:52 2011
@@ -1,47 +1,26 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
+using System;
+using System.Collections.Generic;
using System.IO;
-using System.Collections;
-
+using System.Linq;
+using System.Text;
using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Util;
+using Lucene.Net.Analysis.NGram;
using NUnit.Framework;
-namespace Lucene.Net.Analysis.NGram
+namespace Lucene.Net.Analyzers.Miscellaneous
{
-
- /**
- * Tests {@link EdgeNGramTokenizer} for correctness.
+ /**
+ * Tests {@link EdgeNGramTokenizer} for correctness.
*/
[TestFixture]
- public class TestEdgeNGramTokenizer : BaseTokenStreamTestCase
+ public class EdgeNGramTokenizerTest : BaseTokenStreamTestCase
{
private StringReader input;
- [SetUp]
- public void SetUp()
+ public override void SetUp()
{
- base.SetUp();
input = new StringReader("abcde");
}
-
[Test]
public void TestInvalidInput()
{
@@ -50,11 +29,11 @@ namespace Lucene.Net.Analysis.NGram
{
new EdgeNGramTokenizer(input, Side.FRONT, 0, 0);
}
- catch (System.ArgumentException e)
+ catch (ArgumentException e)
{
gotException = true;
}
- Assert.IsTrue(gotException);
+ Assert.True(gotException);
}
[Test]
@@ -65,11 +44,11 @@ namespace Lucene.Net.Analysis.NGram
{
new EdgeNGramTokenizer(input, Side.FRONT, 2, 1);
}
- catch (System.ArgumentException e)
+ catch (ArgumentException e)
{
gotException = true;
}
- Assert.IsTrue(gotException);
+ Assert.True(gotException);
}
[Test]
@@ -80,11 +59,11 @@ namespace Lucene.Net.Analysis.NGram
{
new EdgeNGramTokenizer(input, Side.FRONT, -1, 2);
}
- catch (System.ArgumentException e)
+ catch (ArgumentException e)
{
gotException = true;
}
- Assert.IsTrue(gotException);
+ Assert.True(gotException);
}
[Test]
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/TestDutchStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/TestDutchStemmer.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/TestDutchStemmer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/TestDutchStemmer.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,177 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Nl;
+using NUnit.Framework;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Nl
+{
+ /**
+ * Test the Dutch Stem Filter, which only modifies the term text.
+ *
+ * The code states that it uses the snowball algorithm, but tests reveal some differences.
+ *
+ */
+ [TestFixture]
+ public class TestDutchStemmer : BaseTokenStreamTestCase
+ {
+ FileInfo customDictFile = new FileInfo(@"nl\customStemDict.txt");
+
+ [Test]
+ public void TestWithSnowballExamples()
+ {
+ Check("lichaamsziek", "lichaamsziek");
+ Check("lichamelijk", "licham");
+ Check("lichamelijke", "licham");
+ Check("lichamelijkheden", "licham");
+ Check("lichamen", "licham");
+ Check("lichere", "licher");
+ Check("licht", "licht");
+ Check("lichtbeeld", "lichtbeeld");
+ Check("lichtbruin", "lichtbruin");
+ Check("lichtdoorlatende", "lichtdoorlat");
+ Check("lichte", "licht");
+ Check("lichten", "licht");
+ Check("lichtende", "lichtend");
+ Check("lichtenvoorde", "lichtenvoord");
+ Check("lichter", "lichter");
+ Check("lichtere", "lichter");
+ Check("lichters", "lichter");
+ Check("lichtgevoeligheid", "lichtgevoel");
+ Check("lichtgewicht", "lichtgewicht");
+ Check("lichtgrijs", "lichtgrijs");
+ Check("lichthoeveelheid", "lichthoevel");
+ Check("lichtintensiteit", "lichtintensiteit");
+ Check("lichtje", "lichtj");
+ Check("lichtjes", "lichtjes");
+ Check("lichtkranten", "lichtkrant");
+ Check("lichtkring", "lichtkring");
+ Check("lichtkringen", "lichtkring");
+ Check("lichtregelsystemen", "lichtregelsystem");
+ Check("lichtste", "lichtst");
+ Check("lichtstromende", "lichtstrom");
+ Check("lichtte", "licht");
+ Check("lichtten", "licht");
+ Check("lichttoetreding", "lichttoetred");
+ Check("lichtverontreinigde", "lichtverontreinigd");
+ Check("lichtzinnige", "lichtzinn");
+ Check("lid", "lid");
+ Check("lidia", "lidia");
+ Check("lidmaatschap", "lidmaatschap");
+ Check("lidstaten", "lidstat");
+ Check("lidvereniging", "lidveren");
+ Check("opgingen", "opging");
+ Check("opglanzing", "opglanz");
+ Check("opglanzingen", "opglanz");
+ Check("opglimlachten", "opglimlacht");
+ Check("opglimpen", "opglimp");
+ Check("opglimpende", "opglimp");
+ Check("opglimping", "opglimp");
+ Check("opglimpingen", "opglimp");
+ Check("opgraven", "opgrav");
+ Check("opgrijnzen", "opgrijnz");
+ Check("opgrijzende", "opgrijz");
+ Check("opgroeien", "opgroei");
+ Check("opgroeiende", "opgroei");
+ Check("opgroeiplaats", "opgroeiplat");
+ Check("ophaal", "ophal");
+ Check("ophaaldienst", "ophaaldienst");
+ Check("ophaalkosten", "ophaalkost");
+ Check("ophaalsystemen", "ophaalsystem");
+ Check("ophaalt", "ophaalt");
+ Check("ophaaltruck", "ophaaltruck");
+ Check("ophalen", "ophal");
+ Check("ophalend", "ophal");
+ Check("ophalers", "ophaler");
+ Check("ophef", "ophef");
+ Check("opheffen", "ophef"); // versus snowball 'opheff'
+ Check("opheffende", "ophef"); // versus snowball 'opheff'
+ Check("opheffing", "ophef"); // versus snowball 'opheff'
+ Check("opheldering", "ophelder");
+ Check("ophemelde", "ophemeld");
+ Check("ophemelen", "ophemel");
+ Check("opheusden", "opheusd");
+ Check("ophief", "ophief");
+ Check("ophield", "ophield");
+ Check("ophieven", "ophiev");
+ Check("ophoepelt", "ophoepelt");
+ Check("ophoog", "ophog");
+ Check("ophoogzand", "ophoogzand");
+ Check("ophopen", "ophop");
+ Check("ophoping", "ophop");
+ Check("ophouden", "ophoud");
+ }
+
+ [Test]
+ public void TestReusableTokenStream()
+ {
+ Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
+ CheckOneTermReuse(a, "lichaamsziek", "lichaamsziek");
+ CheckOneTermReuse(a, "lichamelijk", "licham");
+ CheckOneTermReuse(a, "lichamelijke", "licham");
+ CheckOneTermReuse(a, "lichamelijkheden", "licham");
+ }
+
+ /**
+ * subclass that acts just like whitespace analyzer for testing
+ */
+ private class DutchSubclassAnalyzer : DutchAnalyzer
+ {
+ public DutchSubclassAnalyzer(Version matchVersion)
+ : base(matchVersion)
+ {
+
+ }
+ public override TokenStream TokenStream(String fieldName, TextReader reader)
+ {
+ return new WhitespaceTokenizer(reader);
+ }
+ }
+
+ [Test]
+ public void TestLucene1678BwComp()
+ {
+ Analyzer a = new DutchSubclassAnalyzer(Version.LUCENE_CURRENT);
+ CheckOneTermReuse(a, "lichaamsziek", "lichaamsziek");
+ CheckOneTermReuse(a, "lichamelijk", "lichamelijk");
+ CheckOneTermReuse(a, "lichamelijke", "lichamelijke");
+ CheckOneTermReuse(a, "lichamelijkheden", "lichamelijkheden");
+ }
+
+ /*
+ * Test that changes to the exclusion table are applied immediately
+ * when using reusable token streams.
+ */
+ [Test]
+ public void TestExclusionTableReuse()
+ {
+ DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
+ CheckOneTermReuse(a, "lichamelijk", "licham");
+ a.SetStemExclusionTable(new String[] { "lichamelijk" });
+ CheckOneTermReuse(a, "lichamelijk", "lichamelijk");
+ }
+
+ /*
+ * Test that changes to the dictionary stemming table are applied immediately
+ * when using reusable token streams.
+ */
+ [Test]
+ public void TestStemDictionaryReuse()
+ {
+ DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
+ CheckOneTermReuse(a, "lichamelijk", "licham");
+ a.SetStemDictionary(customDictFile);
+ CheckOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
+ }
+
+ private void Check(String input, String expected)
+ {
+ CheckOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected);
+ }
+
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/customStemDict.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/customStemDict.txt?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/customStemDict.txt (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/customStemDict.txt Mon Nov 21 08:41:52 2011
@@ -0,0 +1,3 @@
+lichamelijk somethingentirelydifferent
+lichamelijke licham
+lichamelijkheden licham
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilterTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilterTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilterTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,140 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+ [TestFixture]
+ public class DelimitedPayloadTokenFilterTest : LuceneTestCase
+ {
+ [Test]
+ public void TestPayloads()
+ {
+ var encoding = Encoding.UTF8;
+ String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+ TermAttribute termAtt = filter.GetAttribute<TermAttribute>();
+ PayloadAttribute payAtt = filter.GetAttribute<PayloadAttribute>();
+ AssertTermEquals("The", filter, termAtt, payAtt, null);
+ AssertTermEquals("quick", filter, termAtt, payAtt, encoding.GetBytes("JJ"));
+ AssertTermEquals("red", filter, termAtt, payAtt, encoding.GetBytes("JJ"));
+ AssertTermEquals("fox", filter, termAtt, payAtt, encoding.GetBytes("NN"));
+ AssertTermEquals("jumped", filter, termAtt, payAtt, encoding.GetBytes("VB"));
+ AssertTermEquals("over", filter, termAtt, payAtt, null);
+ AssertTermEquals("the", filter, termAtt, payAtt, null);
+ AssertTermEquals("lazy", filter, termAtt, payAtt, encoding.GetBytes("JJ"));
+ AssertTermEquals("brown", filter, termAtt, payAtt, encoding.GetBytes("JJ"));
+ AssertTermEquals("dogs", filter, termAtt, payAtt, encoding.GetBytes("NN"));
+ Assert.False(filter.IncrementToken());
+ }
+
+ [Test]
+ public void TestNext()
+ {
+ var encoding = Encoding.UTF8;
+ String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+ AssertTermEquals("The", filter, null);
+ AssertTermEquals("quick", filter, encoding.GetBytes("JJ"));
+ AssertTermEquals("red", filter, encoding.GetBytes("JJ"));
+ AssertTermEquals("fox", filter, encoding.GetBytes("NN"));
+ AssertTermEquals("jumped", filter, encoding.GetBytes("VB"));
+ AssertTermEquals("over", filter, null);
+ AssertTermEquals("the", filter, null);
+ AssertTermEquals("lazy", filter, encoding.GetBytes("JJ"));
+ AssertTermEquals("brown", filter, encoding.GetBytes("JJ"));
+ AssertTermEquals("dogs", filter, encoding.GetBytes("NN"));
+ Assert.False(filter.IncrementToken());
+ }
+
+
+ [Test]
+ public void TestFloatEncoding()
+ {
+ String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new FloatEncoder());
+ TermAttribute termAtt = filter.GetAttribute<TermAttribute>();
+ PayloadAttribute payAtt = filter.GetAttribute<PayloadAttribute>();
+ AssertTermEquals("The", filter, termAtt, payAtt, null);
+ AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(1.0f));
+ AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(2.0f));
+ AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(3.5f));
+ AssertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(0.5f));
+ AssertTermEquals("over", filter, termAtt, payAtt, null);
+ AssertTermEquals("the", filter, termAtt, payAtt, null);
+ AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(5.0f));
+ AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(99.3f));
+ AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(83.7f));
+ Assert.False(filter.IncrementToken());
+ }
+
+ [Test]
+ public void TestIntEncoding()
+ {
+ String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new IntegerEncoder());
+ TermAttribute termAtt = filter.GetAttribute<TermAttribute>();
+ PayloadAttribute payAtt = filter.GetAttribute<PayloadAttribute>();
+ AssertTermEquals("The", filter, termAtt, payAtt, null);
+ AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeInt(1));
+ AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeInt(2));
+ AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeInt(3));
+ AssertTermEquals("jumped", filter, termAtt, payAtt, null);
+ AssertTermEquals("over", filter, termAtt, payAtt, null);
+ AssertTermEquals("the", filter, termAtt, payAtt, null);
+ AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeInt(5));
+ AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeInt(99));
+ AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeInt(83));
+ Assert.False(filter.IncrementToken());
+ }
+
+ void AssertTermEquals(String expected, TokenStream stream, byte[] expectPay)
+ {
+ TermAttribute termAtt = stream.GetAttribute<TermAttribute>();
+ PayloadAttribute payloadAtt = stream.GetAttribute<PayloadAttribute>();
+ Assert.True(stream.IncrementToken());
+ Assert.AreEqual(expected, termAtt.Term());
+ Payload payload = payloadAtt.GetPayload();
+ if (payload != null)
+ {
+ Assert.True(payload.Length() == expectPay.Length, payload.Length() + " does not equal: " + expectPay.Length);
+ for (int i = 0; i < expectPay.Length; i++)
+ {
+ Assert.True(expectPay[i] == payload.ByteAt(i), expectPay[i] + " does not equal: " + payload.ByteAt(i));
+
+ }
+ }
+ else
+ {
+ Assert.True(expectPay == null, "expectPay is not null and it should be");
+ }
+ }
+
+ void AssertTermEquals(String expected, TokenStream stream, TermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay)
+ {
+ Assert.True(stream.IncrementToken());
+ Assert.AreEqual(expected, termAtt.Term());
+ Payload payload = payAtt.GetPayload();
+ if (payload != null)
+ {
+ Assert.True(payload.Length() == expectPay.Length, payload.Length() + " does not equal: " + expectPay.Length);
+ for (int i = 0; i < expectPay.Length; i++)
+ {
+ Assert.True(expectPay[i] == payload.ByteAt(i), expectPay[i] + " does not equal: " + payload.ByteAt(i));
+
+ }
+ }
+ else
+ {
+ Assert.True(expectPay == null, "expectPay is not null and it should be");
+ }
+ }
+ }
+}
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/NumericPayloadTokenFilterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/NumericPayloadTokenFilterTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/NumericPayloadTokenFilterTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/NumericPayloadTokenFilterTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,73 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+ [TestFixture]
+ public class NumericPayloadTokenFilterTest : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void Test()
+ {
+ String test = "The quick red fox jumped over the lazy brown dogs";
+
+ NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))), 3, "D");
+ bool seenDogs = false;
+ TermAttribute termAtt = nptf.GetAttribute<TermAttribute>();
+ TypeAttribute typeAtt = nptf.GetAttribute<TypeAttribute>();
+ PayloadAttribute payloadAtt = nptf.GetAttribute<PayloadAttribute>();
+ while (nptf.IncrementToken())
+ {
+ if (termAtt.Term().Equals("dogs"))
+ {
+ seenDogs = true;
+ Assert.True(typeAtt.Type().Equals("D") == true, typeAtt.Type() + " is not equal to " + "D");
+ Assert.True(payloadAtt.GetPayload() != null, "payloadAtt.GetPayload() is null and it shouldn't be");
+ byte[] bytes = payloadAtt.GetPayload().GetData();//safe here to just use the bytes, otherwise we should use offset, length
+ Assert.True(bytes.Length == payloadAtt.GetPayload().Length(), bytes.Length + " does not equal: " + payloadAtt.GetPayload().Length());
+ Assert.True(payloadAtt.GetPayload().GetOffset() == 0, payloadAtt.GetPayload().GetOffset() + " does not equal: " + 0);
+ float pay = PayloadHelper.DecodeFloat(bytes);
+ Assert.True(pay == 3, pay + " does not equal: " + 3);
+ }
+ else
+ {
+ Assert.True(typeAtt.Type().Equals("word"), typeAtt.Type() + " is not null and it should be");
+ }
+ }
+ Assert.True(seenDogs == true, seenDogs + " does not equal: " + true);
+ }
+
+ internal sealed class WordTokenFilter : TokenFilter
+ {
+ private TermAttribute termAtt;
+ private TypeAttribute typeAtt;
+
+ internal WordTokenFilter(TokenStream input)
+ : base(input)
+ {
+ termAtt = AddAttribute<TermAttribute>();
+ typeAtt = AddAttribute<TypeAttribute>();
+ }
+
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+ if (termAtt.Term().Equals("dogs"))
+ typeAtt.SetType("D");
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ }
+}
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilterTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilterTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilterTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,40 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+ [TestFixture]
+ public class TokenOffsetPayloadTokenFilterTest : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void Test()
+ {
+ String test = "The quick red fox jumped over the lazy brown dogs";
+
+ TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+ int count = 0;
+ PayloadAttribute payloadAtt = nptf.GetAttribute<PayloadAttribute>();
+ OffsetAttribute offsetAtt = nptf.GetAttribute<OffsetAttribute>();
+
+ while (nptf.IncrementToken())
+ {
+ Payload pay = payloadAtt.GetPayload();
+ Assert.True(pay != null, "pay is null and it shouldn't be");
+ byte[] data = pay.GetData();
+ int start = PayloadHelper.DecodeInt(data, 0);
+ Assert.True(start == offsetAtt.StartOffset(), start + " does not equal: " + offsetAtt.StartOffset());
+ int end = PayloadHelper.DecodeInt(data, 4);
+ Assert.True(end == offsetAtt.EndOffset(), end + " does not equal: " + offsetAtt.EndOffset());
+ count++;
+ }
+ Assert.True(count == 10, count + " does not equal: " + 10);
+ }
+ }
+}
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilterTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilterTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilterTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,65 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+ [TestFixture]
+ public class TypeAsPayloadTokenFilterTest : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void test()
+ {
+ String test = "The quick red fox jumped over the lazy brown dogs";
+
+ TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))));
+ int count = 0;
+ TermAttribute termAtt = nptf.GetAttribute<TermAttribute>();
+ TypeAttribute typeAtt = nptf.GetAttribute<TypeAttribute>();
+ PayloadAttribute payloadAtt = nptf.GetAttribute<PayloadAttribute>();
+
+ while (nptf.IncrementToken())
+ {
+ Assert.True(typeAtt.Type().Equals(char.ToUpper(termAtt.TermBuffer()[0]).ToString()), typeAtt.Type() + " is not null and it should be");
+ Assert.True(payloadAtt.GetPayload() != null, "nextToken.getPayload() is null and it shouldn't be");
+ String type = Encoding.UTF8.GetString(payloadAtt.GetPayload().GetData()); ;
+ Assert.True(type != null, "type is null and it shouldn't be");
+ Assert.True(type.Equals(typeAtt.Type()) == true, type + " is not equal to " + typeAtt.Type());
+ count++;
+ }
+
+ Assert.True(count == 10, count + " does not equal: " + 10);
+ }
+
+ private sealed class WordTokenFilter : TokenFilter
+ {
+ private TermAttribute termAtt;
+ private TypeAttribute typeAtt;
+
+ internal WordTokenFilter(TokenStream input)
+ : base(input)
+ {
+ termAtt = AddAttribute<TermAttribute>();
+ typeAtt = AddAttribute<TypeAttribute>();
+ }
+
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+ typeAtt.SetType(char.ToUpper(termAtt.TermBuffer()[0]).ToString());
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/PortedTests.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/PortedTests.txt?rev=1204396&r1=1204395&r2=1204396&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/PortedTests.txt (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/PortedTests.txt Mon Nov 21 08:41:52 2011
@@ -1,48 +1,3 @@
-
-analysis\ar\TestArabicAnalyzer.java - Text files are different
-analysis\ar\TestArabicNormalizationFilter.java - Text files are identical
-analysis\ar\TestArabicStemFilter.java - Text files are identical
-analysis\br\TestBrazilianStemmer.java - Text files are different
-analysis\cjk\TestCJKTokenizer.java - Text files are different
-analysis\cn\TestChineseTokenizer.java - Text files are different
-analysis\compound\TestCompoundWordTokenFilter.java - Text files are different
-analysis\cz\customStopWordFile.txt - Text files are identical
-analysis\cz\TestCzechAnalyzer.java - Text files are different
-analysis\de\data.txt - Text files are identical
-analysis\de\TestGermanStemFilter.java - Text files are different
-analysis\el\GreekAnalyzerTest.java - Text files are different
-analysis\fa\TestPersianAnalyzer.java - Text files are different
-analysis\fa\TestPersianNormalizationFilter.java - Text files are identical
-analysis\fr\TestElision.java - Text files are different
-analysis\fr\TestFrenchAnalyzer.java - Text files are different
-analysis\miscellaneous\PatternAnalyzerTest.java - Right only: C:\SVN\apache\Lucene\lucene-3.0.3\contrib\analyzers\common\src\test\org\apache\lucene\analysis\miscellaneous
-analysis\miscellaneous\TestEmptyTokenStream.java - Text files are identical
-analysis\miscellaneous\TestPrefixAndSuffixAwareTokenFilter.java - Text files are identical
-analysis\miscellaneous\TestPrefixAwareTokenFilter.java - Text files are identical
-analysis\miscellaneous\TestSingleTokenTokenFilter.java - Text files are different
-analysis\ngram\EdgeNGramTokenFilterTest.java - Text files are different
-analysis\ngram\EdgeNGramTokenizerTest.java - Text files are different
-analysis\ngram\NGramTokenFilterTest.java - Text files are different
-analysis\ngram\NGramTokenizerTest.java - Text files are different
-analysis\nl\customStemDict.txt - Text files are identical
-analysis\nl\TestDutchStemmer.java - Text files are different
-analysis\payloads\DelimitedPayloadTokenFilterTest.java - Text files are different
-analysis\payloads\NumericPayloadTokenFilterTest.java - Text files are different
-analysis\payloads\TokenOffsetPayloadTokenFilterTest.java - Text files are different
-analysis\payloads\TypeAsPayloadTokenFilterTest.java - Text files are different
-analysis\position\PositionFilterTest.java - Text files are different
-analysis\query\QueryAutoStopWordAnalyzerTest.java - Text files are different
-analysis\reverse\TestReverseStringFilter.java - Text files are different
-analysis\ru\resUTF8.htm - Text files are identical
-analysis\ru\stemsUTF8.txt - Text files are identical
-analysis\ru\TestRussianAnalyzer.java - Text files are different
-analysis\ru\TestRussianStem.java - Text files are different
-analysis\ru\testUTF8.txt - Text files are identical
-analysis\ru\wordsUTF8.txt - Text files are identical
-analysis\shingle\ShingleAnalyzerWrapperTest.java - Text files are different
-analysis\shingle\ShingleFilterTest.java - Text files are different
-analysis\shingle\TestShingleMatrixFilter.java - Text files are different
-analysis\sinks\DateRecognizerSinkTokenizerTest.java - Text files are identical
-analysis\sinks\TokenRangeSinkTokenizerTest.java - Text files are identical
-analysis\sinks\TokenTypeSinkTokenizerTest.java - Text files are different
-analysis\th\TestThaiAnalyzer.java - Text files are different
\ No newline at end of file
+TODO: Add tests to make sure that Version numbers match for all contrib assemblies, all test contrib, lucene core and lucene test.
+
+All ported, except ThaiAnalyzer and Hyphenation
\ No newline at end of file