You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2011/11/21 09:41:54 UTC

[Lucene.Net] svn commit: r1204396 [2/3] - in /incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk: src/contrib/Analyzers/BR/ src/contrib/Analyzers/CJK/ src/contrib/Analyzers/Cn/ src/contrib/Analyzers/Fr/ src/contrib/Analyzers/Miscellaneous/ src/contrib/Analyzers/N...

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/El/GreekAnalyzerTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/El/GreekAnalyzerTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/El/GreekAnalyzerTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/El/GreekAnalyzerTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,99 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using NUnit.Framework;
+using Version=Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.El
+{
+/**
+ * A unit test class for verifying the correct operation of the GreekAnalyzer.
+ *
+ */
+    [TestFixture]
+public class GreekAnalyzerTest : BaseTokenStreamTestCase {
+
+	/**
+	 * Test the analysis of various greek strings.
+	 *
+	 * @throws Exception in case an error occurs
+	 */
+    [Test]
+	public void testAnalyzer(){
+		Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
+		// Verify the correct analysis of capitals and small accented letters
+        AssertAnalyzesTo(a,
+                         "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
+                         new String[]
+                             {
+                                 "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1",
+                                 "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1",
+                                 "\u03c3\u03b5\u03b9\u03c1\u03b1",
+                                 "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
+                                 "\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03b7\u03c3",
+                                 "\u03b3\u03bb\u03c9\u03c3\u03c3\u03b1\u03c3"
+                             });
+		// Verify the correct analysis of small letters with diaeresis and the elimination
+		// of punctuation marks
+        AssertAnalyzesTo(a,
+                         "\u03a0\u03c1\u03bf\u03ca\u03cc\u03bd\u03c4\u03b1 (\u03ba\u03b1\u03b9)     [\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03ad\u03c2]	-	\u0391\u039d\u0391\u0393\u039a\u0395\u03a3",
+                         new String[]
+                             {
+                                 "\u03c0\u03c1\u03bf\u03b9\u03bf\u03bd\u03c4\u03b1",
+                                 "\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03b5\u03c3",
+                                 "\u03b1\u03bd\u03b1\u03b3\u03ba\u03b5\u03c3"
+                             });
+		// Verify the correct analysis of capital accented letters and capitalletters with diaeresis,
+		// as well as the elimination of stop words
+        AssertAnalyzesTo(a,
+                         "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3  \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
+                         new String[]
+                             {
+                                 "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3",
+                                 "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3", "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3",
+                                 "\u03b1\u03bb\u03bb\u03bf\u03b9"
+                             });
+	}
+
+    [Test]
+	public void testReusableTokenStream(){
+	    Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
+	    // Verify the correct analysis of capitals and small accented letters
+        AssertAnalyzesToReuse(a,
+                              "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
+                              new String[]
+                                  {
+                                      "\u03bc\u03b9\u03b1",
+                                      "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1",
+                                      "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1",
+                                      "\u03c3\u03b5\u03b9\u03c1\u03b1",
+                                      "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
+                                      "\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03b7\u03c3",
+                                      "\u03b3\u03bb\u03c9\u03c3\u03c3\u03b1\u03c3"
+                                  });
+	    // Verify the correct analysis of small letters with diaeresis and the elimination
+	    // of punctuation marks
+        AssertAnalyzesToReuse(a,
+                              "\u03a0\u03c1\u03bf\u03ca\u03cc\u03bd\u03c4\u03b1 (\u03ba\u03b1\u03b9)     [\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03ad\u03c2] -   \u0391\u039d\u0391\u0393\u039a\u0395\u03a3",
+                              new String[]
+                                  {
+                                      "\u03c0\u03c1\u03bf\u03b9\u03bf\u03bd\u03c4\u03b1",
+                                      "\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03b5\u03c3",
+                                      "\u03b1\u03bd\u03b1\u03b3\u03ba\u03b5\u03c3"
+                                  });
+	    // Verify the correct analysis of capital accented letters and capitalletters with diaeresis,
+	    // as well as the elimination of stop words
+        AssertAnalyzesToReuse(a,
+                              "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3  \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
+                              new String[]
+                                  {
+                                      "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3",
+                                      "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3", "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3",
+                                      "\u03b1\u03bb\u03bb\u03bf\u03b9"
+                                  });
+	}
+}
+
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianAnalyzer.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianAnalyzer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianAnalyzer.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,212 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Fa
+{
+/**
+ * Test the Persian Analyzer
+ * 
+ */
+public class TestPersianAnalyzer : BaseTokenStreamTestCase {
+
+  /**
+   * This test fails with NPE when the stopwords file is missing in classpath
+   */
+  public void testResourcesAvailable() {
+    new PersianAnalyzer(Version.LUCENE_CURRENT);
+  }
+
+  /**
+   * This test shows how the combination of tokenization (breaking on zero-width
+   * non-joiner), normalization (such as treating arabic YEH and farsi YEH the
+   * same), and stopwords creates a light-stemming effect for verbs.
+   * 
+   * These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
+   */
+  public void testBehaviorVerbs(){
+    Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
+    // active present indicative
+    AssertAnalyzesTo(a, "می‌خورد", new String[] { "خورد" });
+    // active preterite indicative
+    AssertAnalyzesTo(a, "خورد", new String[] { "خورد" });
+    // active imperfective preterite indicative
+    AssertAnalyzesTo(a, "می‌خورد", new String[] { "خورد" });
+    // active future indicative
+    AssertAnalyzesTo(a, "خواهد خورد", new String[] { "خورد" });
+    // active present progressive indicative
+    AssertAnalyzesTo(a, "دارد می‌خورد", new String[] { "خورد" });
+    // active preterite progressive indicative
+    AssertAnalyzesTo(a, "داشت می‌خورد", new String[] { "خورد" });
+
+    // active perfect indicative
+    AssertAnalyzesTo(a, "خورده‌است", new String[] { "خورده" });
+    // active imperfective perfect indicative
+    AssertAnalyzesTo(a, "می‌خورده‌است", new String[] { "خورده" });
+    // active pluperfect indicative
+    AssertAnalyzesTo(a, "خورده بود", new String[] { "خورده" });
+    // active imperfective pluperfect indicative
+    AssertAnalyzesTo(a, "می‌خورده بود", new String[] { "خورده" });
+    // active preterite subjunctive
+    AssertAnalyzesTo(a, "خورده باشد", new String[] { "خورده" });
+    // active imperfective preterite subjunctive
+    AssertAnalyzesTo(a, "می‌خورده باشد", new String[] { "خورده" });
+    // active pluperfect subjunctive
+    AssertAnalyzesTo(a, "خورده بوده باشد", new String[] { "خورده" });
+    // active imperfective pluperfect subjunctive
+    AssertAnalyzesTo(a, "می‌خورده بوده باشد", new String[] { "خورده" });
+    // passive present indicative
+    AssertAnalyzesTo(a, "خورده می‌شود", new String[] { "خورده" });
+    // passive preterite indicative
+    AssertAnalyzesTo(a, "خورده شد", new String[] { "خورده" });
+    // passive imperfective preterite indicative
+    AssertAnalyzesTo(a, "خورده می‌شد", new String[] { "خورده" });
+    // passive perfect indicative
+    AssertAnalyzesTo(a, "خورده شده‌است", new String[] { "خورده" });
+    // passive imperfective perfect indicative
+    AssertAnalyzesTo(a, "خورده می‌شده‌است", new String[] { "خورده" });
+    // passive pluperfect indicative
+    AssertAnalyzesTo(a, "خورده شده بود", new String[] { "خورده" });
+    // passive imperfective pluperfect indicative
+    AssertAnalyzesTo(a, "خورده می‌شده بود", new String[] { "خورده" });
+    // passive future indicative
+    AssertAnalyzesTo(a, "خورده خواهد شد", new String[] { "خورده" });
+    // passive present progressive indicative
+    AssertAnalyzesTo(a, "دارد خورده می‌شود", new String[] { "خورده" });
+    // passive preterite progressive indicative
+    AssertAnalyzesTo(a, "داشت خورده می‌شد", new String[] { "خورده" });
+    // passive present subjunctive
+    AssertAnalyzesTo(a, "خورده شود", new String[] { "خورده" });
+    // passive preterite subjunctive
+    AssertAnalyzesTo(a, "خورده شده باشد", new String[] { "خورده" });
+    // passive imperfective preterite subjunctive
+    AssertAnalyzesTo(a, "خورده می‌شده باشد", new String[] { "خورده" });
+    // passive pluperfect subjunctive
+    AssertAnalyzesTo(a, "خورده شده بوده باشد", new String[] { "خورده" });
+    // passive imperfective pluperfect subjunctive
+    AssertAnalyzesTo(a, "خورده می‌شده بوده باشد", new String[] { "خورده" });
+
+    // active present subjunctive
+    AssertAnalyzesTo(a, "بخورد", new String[] { "بخورد" });
+  }
+
+  /**
+   * This test shows how the combination of tokenization and stopwords creates a
+   * light-stemming effect for verbs.
+   * 
+   * In this case, these forms are presented with alternative orthography, using
+   * arabic yeh and whitespace. This yeh phenomenon is common for legacy text
+   * due to some previous bugs in Microsoft Windows.
+   * 
+   * These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
+   */
+  public void testBehaviorVerbsDefective(){
+    Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
+    // active present indicative
+    AssertAnalyzesTo(a, "مي خورد", new String[] { "خورد" });
+    // active preterite indicative
+    AssertAnalyzesTo(a, "خورد", new String[] { "خورد" });
+    // active imperfective preterite indicative
+    AssertAnalyzesTo(a, "مي خورد", new String[] { "خورد" });
+    // active future indicative
+    AssertAnalyzesTo(a, "خواهد خورد", new String[] { "خورد" });
+    // active present progressive indicative
+    AssertAnalyzesTo(a, "دارد مي خورد", new String[] { "خورد" });
+    // active preterite progressive indicative
+    AssertAnalyzesTo(a, "داشت مي خورد", new String[] { "خورد" });
+
+    // active perfect indicative
+    AssertAnalyzesTo(a, "خورده است", new String[] { "خورده" });
+    // active imperfective perfect indicative
+    AssertAnalyzesTo(a, "مي خورده است", new String[] { "خورده" });
+    // active pluperfect indicative
+    AssertAnalyzesTo(a, "خورده بود", new String[] { "خورده" });
+    // active imperfective pluperfect indicative
+    AssertAnalyzesTo(a, "مي خورده بود", new String[] { "خورده" });
+    // active preterite subjunctive
+    AssertAnalyzesTo(a, "خورده باشد", new String[] { "خورده" });
+    // active imperfective preterite subjunctive
+    AssertAnalyzesTo(a, "مي خورده باشد", new String[] { "خورده" });
+    // active pluperfect subjunctive
+    AssertAnalyzesTo(a, "خورده بوده باشد", new String[] { "خورده" });
+    // active imperfective pluperfect subjunctive
+    AssertAnalyzesTo(a, "مي خورده بوده باشد", new String[] { "خورده" });
+    // passive present indicative
+    AssertAnalyzesTo(a, "خورده مي شود", new String[] { "خورده" });
+    // passive preterite indicative
+    AssertAnalyzesTo(a, "خورده شد", new String[] { "خورده" });
+    // passive imperfective preterite indicative
+    AssertAnalyzesTo(a, "خورده مي شد", new String[] { "خورده" });
+    // passive perfect indicative
+    AssertAnalyzesTo(a, "خورده شده است", new String[] { "خورده" });
+    // passive imperfective perfect indicative
+    AssertAnalyzesTo(a, "خورده مي شده است", new String[] { "خورده" });
+    // passive pluperfect indicative
+    AssertAnalyzesTo(a, "خورده شده بود", new String[] { "خورده" });
+    // passive imperfective pluperfect indicative
+    AssertAnalyzesTo(a, "خورده مي شده بود", new String[] { "خورده" });
+    // passive future indicative
+    AssertAnalyzesTo(a, "خورده خواهد شد", new String[] { "خورده" });
+    // passive present progressive indicative
+    AssertAnalyzesTo(a, "دارد خورده مي شود", new String[] { "خورده" });
+    // passive preterite progressive indicative
+    AssertAnalyzesTo(a, "داشت خورده مي شد", new String[] { "خورده" });
+    // passive present subjunctive
+    AssertAnalyzesTo(a, "خورده شود", new String[] { "خورده" });
+    // passive preterite subjunctive
+    AssertAnalyzesTo(a, "خورده شده باشد", new String[] { "خورده" });
+    // passive imperfective preterite subjunctive
+    AssertAnalyzesTo(a, "خورده مي شده باشد", new String[] { "خورده" });
+    // passive pluperfect subjunctive
+    AssertAnalyzesTo(a, "خورده شده بوده باشد", new String[] { "خورده" });
+    // passive imperfective pluperfect subjunctive
+    AssertAnalyzesTo(a, "خورده مي شده بوده باشد", new String[] { "خورده" });
+
+    // active present subjunctive
+    AssertAnalyzesTo(a, "بخورد", new String[] { "بخورد" });
+  }
+
+  /**
+   * This test shows how the combination of tokenization (breaking on zero-width
+   * non-joiner or space) and stopwords creates a light-stemming effect for
+   * nouns, removing the plural -ha.
+   */
+  public void testBehaviorNouns(){
+    Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
+    AssertAnalyzesTo(a, "برگ ها", new String[] { "برگ" });
+    AssertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" });
+  }
+
+  /**
+   * Test showing that non-persian text is treated very much like SimpleAnalyzer
+   * (lowercased, etc)
+   */
+  public void testBehaviorNonPersian(){
+    Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
+    AssertAnalyzesTo(a, "English test.", new String[] { "english", "test" });
+  }
+  
+  /**
+   * Basic test ensuring that reusableTokenStream works correctly.
+   */
+  public void testReusableTokenStream(){
+    Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
+    AssertAnalyzesToReuse(a, "خورده مي شده بوده باشد", new String[] { "خورده" });
+    AssertAnalyzesToReuse(a, "برگ‌ها", new String[] { "برگ" });
+  }
+  
+  /**
+   * Test that custom stopwords work, and are not case-sensitive.
+   */
+  public void testCustomStopwords(){
+    PersianAnalyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT, new String[] { "the", "and", "a" });
+    AssertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
+        "brown", "fox" });
+  }
+
+}
+
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianNormalizationFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianNormalizationFilter.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianNormalizationFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fa/TestPersianNormalizationFilter.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,64 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.AR;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Fa
+{
+    /**
+     * Test the Arabic Normalization Filter
+     * 
+     */
+    [TestFixture]
+    public class TestPersianNormalizationFilter : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestFarsiYeh()
+        {
+            Check("های", "هاي");
+        }
+
+        [Test]
+        public void TestYehBarree()
+        {
+            Check("هاے", "هاي");
+        }
+
+        [Test]
+        public void TestKeheh()
+        {
+            Check("کشاندن", "كشاندن");
+        }
+
+        [Test]
+        public void TestHehYeh()
+        {
+            Check("كتابۀ", "كتابه");
+        }
+
+        [Test]
+        public void TestHehHamzaAbove()
+        {
+            Check("كتابهٔ", "كتابه");
+        }
+
+        [Test]
+        public void TestHehGoal()
+        {
+            Check("زادہ", "زاده");
+        }
+
+        private void Check(String input, String expected)
+        {
+            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(
+                new StringReader(input));
+            PersianNormalizationFilter filter = new PersianNormalizationFilter(
+                tokenStream);
+            AssertTokenStreamContents(filter, new String[] { expected });
+        }
+    }
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestElision.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestElision.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestElision.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestElision.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,47 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using NUnit.Framework;
+using Version=Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Fr
+{
+    /**
+     * 
+     */
+    [TestFixture]
+    public class TestElision : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestElision2()
+        {
+            String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
+            Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(test));
+            HashSet<String> articles = new HashSet<String>();
+            articles.Add("l");
+            articles.Add("M");
+            TokenFilter filter = new ElisionFilter(tokenizer, articles);
+            List<string> tas = Filtre(filter);
+            Assert.AreEqual("embrouille", tas[4]);
+            Assert.AreEqual("O'brian", tas[6]);
+            Assert.AreEqual("enfin", tas[7]);
+        }
+
+        private List<string> Filtre(TokenFilter filter)
+        {
+            List<string> tas = new List<string>();
+            TermAttribute termAtt = filter.GetAttribute<TermAttribute>();
+            while (filter.IncrementToken())
+            {
+                tas.Add(termAtt.Term());
+            }
+            return tas;
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestFrenchAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestFrenchAnalyzer.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestFrenchAnalyzer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Fr/TestFrenchAnalyzer.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,147 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Fr;
+using NUnit.Framework;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Fr
+{
+    /**
+     * Test case for FrenchAnalyzer.
+     *
+     * @version   $version$
+     */
+    [TestFixture]
+    public class TestFrenchAnalyzer : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestAnalyzer()
+        {
+            FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
+
+            AssertAnalyzesTo(fa, "", new String[0]);
+
+            AssertAnalyzesTo(
+                fa,
+                "chien chat cheval",
+                new String[] {"chien", "chat", "cheval"});
+
+            AssertAnalyzesTo(
+                fa,
+                "chien CHAT CHEVAL",
+                new String[] {"chien", "chat", "cheval"});
+
+            AssertAnalyzesTo(
+                fa,
+                "  chien  ,? + = -  CHAT /: > CHEVAL",
+                new String[] {"chien", "chat", "cheval"});
+
+            AssertAnalyzesTo(fa, "chien++", new String[] {"chien"});
+
+            AssertAnalyzesTo(
+                fa,
+                "mot \"entreguillemet\"",
+                new String[] {"mot", "entreguillemet"});
+
+            // let's do some french specific tests now	
+
+            /* 1. couldn't resist
+             I would expect this to stay one term as in French the minus 
+            sign is often used for composing words */
+            AssertAnalyzesTo(
+                fa,
+                "Jean-François",
+                new String[] {"jean", "françois"});
+
+            // 2. stopwords
+            AssertAnalyzesTo(
+                fa,
+                "le la chien les aux chat du des à cheval",
+                new String[] {"chien", "chat", "cheval"});
+
+            // some nouns and adjectives
+            AssertAnalyzesTo(
+                fa,
+                "lances chismes habitable chiste éléments captifs",
+                new String[]
+                    {
+                        "lanc",
+                        "chism",
+                        "habit",
+                        "chist",
+                        "élément",
+                        "captif"
+                    });
+
+            // some verbs
+            AssertAnalyzesTo(
+                fa,
+                "finissions souffrirent rugissante",
+                new String[] {"fin", "souffr", "rug"});
+
+            // some everything else
+            // aujourd'hui stays one term which is OK
+            AssertAnalyzesTo(
+                fa,
+                "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",
+                new String[]
+                    {
+                        "c3po",
+                        "aujourd'hui",
+                        "oeuf",
+                        "ïâöûàä",
+                        "anticonstitutionnel",
+                        "jav"
+                    });
+
+            // some more everything else
+            // here 1940-1945 stays as one term, 1940:1945 not ?
+            AssertAnalyzesTo(
+                fa,
+                "33Bis 1940-1945 1940:1945 (---i+++)*",
+                new String[] {"33bis", "1940-1945", "1940", "1945", "i"});
+
+        }
+
+        [Test]
+        public void TestReusableTokenStream()
+        {
+            FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
+            // stopwords
+            AssertAnalyzesToReuse(
+                fa,
+                "le la chien les aux chat du des à cheval",
+                new String[] {"chien", "chat", "cheval"});
+
+            // some nouns and adjectives
+            AssertAnalyzesToReuse(
+                fa,
+                "lances chismes habitable chiste éléments captifs",
+                new String[]
+                    {
+                        "lanc",
+                        "chism",
+                        "habit",
+                        "chist",
+                        "élément",
+                        "captif"
+                    });
+        }
+
+        /* 
+         * Test that changes to the exclusion table are applied immediately
+         * when using reusable token streams.
+         */
+        [Test]
+        public void TestExclusionTableReuse()
+        {
+            FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
+            AssertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
+            fa.SetStemExclusionTable(new String[] { "habitable" });
+            AssertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Lucene.Net.snk
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Lucene.Net.snk?rev=1204396&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Lucene.Net.snk
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/PatternAnalyzerTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/PatternAnalyzerTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/PatternAnalyzerTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/PatternAnalyzerTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,149 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+using Lucene.Net.Analysis;
+using NUnit.Framework;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+    /**
+     * Verifies the behavior of PatternAnalyzer.
+     */
+    [TestFixture]
+    public class PatternAnalyzerTest : BaseTokenStreamTestCase
+    {
+        /**
+         * Test PatternAnalyzer when it is configured with a non-word pattern.
+         * Behavior can be similar to SimpleAnalyzer (depending upon options)
+         */
+        [Test]
+        public void TestNonWordPattern()
+        {
+            // Split on non-letter pattern, do not lowercase, no stopwords
+            PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
+                false, null);
+            Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[]
+                                                                         {
+                                                                             "The", "quick", "brown", "Fox", "the",
+                                                                             "abcd", "dc"
+                                                                         });
+
+            // split on non-letter pattern, lowercase, english stopwords
+            PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
+                true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+            Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[]
+                                                                         {
+                                                                             "quick", "brown", "fox", "abcd", "dc"
+                                                                         });
+        }
+
+        /**
+         * Test PatternAnalyzer when it is configured with a whitespace pattern.
+         * Behavior can be similar to WhitespaceAnalyzer (depending upon options)
+         */
+        [Test]
+        public void TestWhitespacePattern()
+        {
+            // Split on whitespace patterns, do not lowercase, no stopwords
+            PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
+                false, null);
+            Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[]
+                                                                         {
+                                                                             "The", "quick", "brown", "Fox,the",
+                                                                             "abcd1234", "(56.78)", "dc."
+                                                                         });
+
+            // Split on whitespace patterns, lowercase, english stopwords
+            PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
+                true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+            Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[]
+                                                                         {
+                                                                             "quick", "brown", "fox,the", "abcd1234",
+                                                                             "(56.78)", "dc."
+                                                                         });
+        }
+
+        /**
+         * Test PatternAnalyzer when it is configured with a custom pattern. In this
+         * case, text is tokenized on the comma ","
+         */
+        [Test]
+        public void TestCustomPattern()
+        {
+            // Split on comma, do not lowercase, no stopwords
+            PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, new Regex(",", RegexOptions.Compiled), false, null);
+            Check(a, "Here,Are,some,Comma,separated,words,", new String[]
+                                                                 {
+                                                                     "Here",
+                                                                     "Are", "some", "Comma", "separated", "words"
+                                                                 });
+
+            // split on comma, lowercase, english stopwords
+            PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, new Regex(",", RegexOptions.Compiled), true,
+                StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+            Check(b, "Here,Are,some,Comma,separated,words,", new String[]
+                                                                 {
+                                                                     "here",
+                                                                     "some", "comma", "separated", "words"
+                                                                 });
+        }
+
+        /**
+         * Test PatternAnalyzer against a large document.
+         */
+        [Test]
+        public void TestHugeDocument()
+        {
+            StringBuilder document = new StringBuilder();
+            // 5000 a's
+            char[] largeWord;
+            largeWord = Enumerable.Repeat('a', 5000).ToArray();
+            document.Append(largeWord);
+
+            // a space
+            document.Append(' ');
+
+            // 2000 b's
+            char[] largeWord2;
+            largeWord2 = Enumerable.Repeat('b', 2000).ToArray();
+            document.Append(largeWord2);
+
+            // Split on whitespace patterns, do not lowercase, no stopwords
+            PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
+                false, null);
+            Check(a, document.ToString(), new String[]
+                                              {
+                                                  new String(largeWord),
+                                                  new String(largeWord2)
+                                              });
+        }
+
+        /**
+         * Verify the analyzer analyzes to the expected contents. For PatternAnalyzer,
+         * several methods are verified:
+         * <ul>
+         * <li>Analysis with a normal Reader
+         * <li>Analysis with a FastStringReader
+         * <li>Analysis with a String
+         * </ul>
+         */
+        private void Check(PatternAnalyzer analyzer, String document,
+            String[] expected)
+        {
+            // ordinary analysis of a Reader
+            AssertAnalyzesTo(analyzer, document, expected);
+
+            // analysis with a "FastStringReader"
+            TokenStream ts = analyzer.TokenStream("dummy",
+                new PatternAnalyzer.FastStringReader(document));
+            AssertTokenStreamContents(ts, expected);
+
+            // analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
+            TokenStream ts2 = analyzer.TokenStream("dummy", document);
+            AssertTokenStreamContents(ts2, expected);
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestEmptyTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestEmptyTokenStream.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestEmptyTokenStream.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestEmptyTokenStream.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,23 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+    [TestFixture]
+    public class TestEmptyTokenStream : LuceneTestCase
+    {
+        [Test]
+        public void Test()
+        {
+            TokenStream ts = new EmptyTokenStream();
+            Assert.False(ts.IncrementToken());
+            ts.Reset();
+            Assert.False(ts.IncrementToken());
+        }
+    }
+}

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestPrefixAndSuffixAwareTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestPrefixAndSuffixAwareTokenFilter.cs?rev=1204396&r1=1204395&r2=1204396&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestPrefixAndSuffixAwareTokenFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestPrefixAndSuffixAwareTokenFilter.cs Mon Nov 21 08:41:52 2011
@@ -22,6 +22,7 @@ using NUnit.Framework;
 
 namespace Lucene.Net.Analyzers.Miscellaneous
 {
+    [TestFixture]
     public class TestPrefixAndSuffixAwareTokenFilter : BaseTokenStreamTestCase
     {
         [Test]

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestSingleTokenTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestSingleTokenTokenFilter.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestSingleTokenTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Miscellaneous/TestSingleTokenTokenFilter.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,37 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+    [TestFixture]
+    public class TestSingleTokenTokenFilter : LuceneTestCase
+    {
+        [Test]
+        public void Test()
+        {
+            Token token = new Token();
+            SingleTokenTokenStream ts = new SingleTokenTokenStream(token);
+            AttributeImpl tokenAtt = (AttributeImpl)ts.AddAttribute<TermAttribute>();
+            Assert.True(tokenAtt is Token);
+            ts.Reset();
+
+            Assert.True(ts.IncrementToken());
+            Assert.AreEqual(token, tokenAtt);
+            Assert.False(ts.IncrementToken());
+
+            token = new Token("hallo", 10, 20, "someType");
+            ts.SetToken(token);
+            ts.Reset();
+
+            Assert.True(ts.IncrementToken());
+            Assert.AreEqual(token, tokenAtt);
+            Assert.False(ts.IncrementToken());
+        }
+    }
+}

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs?rev=1204396&r1=1204395&r2=1204396&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs Mon Nov 21 08:41:52 2011
@@ -1,44 +1,24 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
+using System;
+using System.Collections.Generic;
 using System.IO;
-using System.Collections;
-
+using System.Linq;
+using System.Text;
 using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Util;
+using Lucene.Net.Analysis.NGram;
 using NUnit.Framework;
 
-namespace Lucene.Net.Analysis.NGram
+namespace Lucene.Net.Analyzers.Miscellaneous
 {
-
     /**
      * Tests {@link EdgeNGramTokenFilter} for correctness.
      */
     [TestFixture]
-    public class TestEdgeNGramTokenFilter : BaseTokenStreamTestCase
+    public class EdgeNGramTokenFilterTest : BaseTokenStreamTestCase
     {
         private TokenStream input;
 
-        [SetUp]
-        public void SetUp()
+        public override void SetUp()
         {
-            base.SetUp();
             input = new WhitespaceTokenizer(new StringReader("abcde"));
         }
 
@@ -50,11 +30,11 @@ namespace Lucene.Net.Analysis.NGram
             {
                 new EdgeNGramTokenFilter(input, Side.FRONT, 0, 0);
             }
-            catch (System.ArgumentException e)
+            catch (ArgumentException e)
             {
                 gotException = true;
             }
-            Assert.IsTrue(gotException);
+            Assert.True(gotException);
         }
 
         [Test]
@@ -65,11 +45,11 @@ namespace Lucene.Net.Analysis.NGram
             {
                 new EdgeNGramTokenFilter(input, Side.FRONT, 2, 1);
             }
-            catch (System.ArgumentException e)
+            catch (ArgumentException e)
             {
                 gotException = true;
             }
-            Assert.IsTrue(gotException);
+            Assert.True(gotException);
         }
 
         [Test]
@@ -80,11 +60,11 @@ namespace Lucene.Net.Analysis.NGram
             {
                 new EdgeNGramTokenFilter(input, Side.FRONT, -1, 2);
             }
-            catch (System.ArgumentException e)
+            catch (ArgumentException e)
             {
                 gotException = true;
             }
-            Assert.IsTrue(gotException);
+            Assert.True(gotException);
         }
 
         [Test]

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs?rev=1204396&r1=1204395&r2=1204396&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs Mon Nov 21 08:41:52 2011
@@ -1,47 +1,26 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
+using System;
+using System.Collections.Generic;
 using System.IO;
-using System.Collections;
-
+using System.Linq;
+using System.Text;
 using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Util;
+using Lucene.Net.Analysis.NGram;
 using NUnit.Framework;
 
-namespace Lucene.Net.Analysis.NGram
+namespace Lucene.Net.Analyzers.Miscellaneous
 {
-
-    /**
-     * Tests {@link EdgeNGramTokenizer} for correctness.
+    /**
+     * Tests {@link EdgeNGramTokenizer} for correctness.
      */
     [TestFixture]
-    public class TestEdgeNGramTokenizer : BaseTokenStreamTestCase
+    public class EdgeNGramTokenizerTest : BaseTokenStreamTestCase
     {
         private StringReader input;
 
-        [SetUp]
-        public void SetUp()
+        public override void SetUp()
         {
-            base.SetUp();
             input = new StringReader("abcde");
         }
-
         [Test]
         public void TestInvalidInput()
         {
@@ -50,11 +29,11 @@ namespace Lucene.Net.Analysis.NGram
             {
                 new EdgeNGramTokenizer(input, Side.FRONT, 0, 0);
             }
-            catch (System.ArgumentException e)
+            catch (ArgumentException e)
             {
                 gotException = true;
             }
-            Assert.IsTrue(gotException);
+            Assert.True(gotException);
         }
 
         [Test]
@@ -65,11 +44,11 @@ namespace Lucene.Net.Analysis.NGram
             {
                 new EdgeNGramTokenizer(input, Side.FRONT, 2, 1);
             }
-            catch (System.ArgumentException e)
+            catch (ArgumentException e)
             {
                 gotException = true;
             }
-            Assert.IsTrue(gotException);
+            Assert.True(gotException);
         }
 
         [Test]
@@ -80,11 +59,11 @@ namespace Lucene.Net.Analysis.NGram
             {
                 new EdgeNGramTokenizer(input, Side.FRONT, -1, 2);
             }
-            catch (System.ArgumentException e)
+            catch (ArgumentException e)
             {
                 gotException = true;
             }
-            Assert.IsTrue(gotException);
+            Assert.True(gotException);
         }
 
         [Test]

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/TestDutchStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/TestDutchStemmer.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/TestDutchStemmer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/TestDutchStemmer.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,177 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Nl;
+using NUnit.Framework;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Nl
+{
+    /**
+     * Test the Dutch Stem Filter, which only modifies the term text.
+     * 
+     * The code states that it uses the snowball algorithm, but tests reveal some differences.
+     * 
+     */
+    [TestFixture]
+    public class TestDutchStemmer : BaseTokenStreamTestCase
+    {
+        FileInfo customDictFile = new FileInfo(@"nl\customStemDict.txt");
+
+        [Test]
+        public void TestWithSnowballExamples()
+        {
+            Check("lichaamsziek", "lichaamsziek");
+            Check("lichamelijk", "licham");
+            Check("lichamelijke", "licham");
+            Check("lichamelijkheden", "licham");
+            Check("lichamen", "licham");
+            Check("lichere", "licher");
+            Check("licht", "licht");
+            Check("lichtbeeld", "lichtbeeld");
+            Check("lichtbruin", "lichtbruin");
+            Check("lichtdoorlatende", "lichtdoorlat");
+            Check("lichte", "licht");
+            Check("lichten", "licht");
+            Check("lichtende", "lichtend");
+            Check("lichtenvoorde", "lichtenvoord");
+            Check("lichter", "lichter");
+            Check("lichtere", "lichter");
+            Check("lichters", "lichter");
+            Check("lichtgevoeligheid", "lichtgevoel");
+            Check("lichtgewicht", "lichtgewicht");
+            Check("lichtgrijs", "lichtgrijs");
+            Check("lichthoeveelheid", "lichthoevel");
+            Check("lichtintensiteit", "lichtintensiteit");
+            Check("lichtje", "lichtj");
+            Check("lichtjes", "lichtjes");
+            Check("lichtkranten", "lichtkrant");
+            Check("lichtkring", "lichtkring");
+            Check("lichtkringen", "lichtkring");
+            Check("lichtregelsystemen", "lichtregelsystem");
+            Check("lichtste", "lichtst");
+            Check("lichtstromende", "lichtstrom");
+            Check("lichtte", "licht");
+            Check("lichtten", "licht");
+            Check("lichttoetreding", "lichttoetred");
+            Check("lichtverontreinigde", "lichtverontreinigd");
+            Check("lichtzinnige", "lichtzinn");
+            Check("lid", "lid");
+            Check("lidia", "lidia");
+            Check("lidmaatschap", "lidmaatschap");
+            Check("lidstaten", "lidstat");
+            Check("lidvereniging", "lidveren");
+            Check("opgingen", "opging");
+            Check("opglanzing", "opglanz");
+            Check("opglanzingen", "opglanz");
+            Check("opglimlachten", "opglimlacht");
+            Check("opglimpen", "opglimp");
+            Check("opglimpende", "opglimp");
+            Check("opglimping", "opglimp");
+            Check("opglimpingen", "opglimp");
+            Check("opgraven", "opgrav");
+            Check("opgrijnzen", "opgrijnz");
+            Check("opgrijzende", "opgrijz");
+            Check("opgroeien", "opgroei");
+            Check("opgroeiende", "opgroei");
+            Check("opgroeiplaats", "opgroeiplat");
+            Check("ophaal", "ophal");
+            Check("ophaaldienst", "ophaaldienst");
+            Check("ophaalkosten", "ophaalkost");
+            Check("ophaalsystemen", "ophaalsystem");
+            Check("ophaalt", "ophaalt");
+            Check("ophaaltruck", "ophaaltruck");
+            Check("ophalen", "ophal");
+            Check("ophalend", "ophal");
+            Check("ophalers", "ophaler");
+            Check("ophef", "ophef");
+            Check("opheffen", "ophef"); // versus snowball 'opheff'
+            Check("opheffende", "ophef"); // versus snowball 'opheff'
+            Check("opheffing", "ophef"); // versus snowball 'opheff'
+            Check("opheldering", "ophelder");
+            Check("ophemelde", "ophemeld");
+            Check("ophemelen", "ophemel");
+            Check("opheusden", "opheusd");
+            Check("ophief", "ophief");
+            Check("ophield", "ophield");
+            Check("ophieven", "ophiev");
+            Check("ophoepelt", "ophoepelt");
+            Check("ophoog", "ophog");
+            Check("ophoogzand", "ophoogzand");
+            Check("ophopen", "ophop");
+            Check("ophoping", "ophop");
+            Check("ophouden", "ophoud");
+        }
+
+        [Test]
+        public void TestReusableTokenStream()
+        {
+            Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
+            CheckOneTermReuse(a, "lichaamsziek", "lichaamsziek");
+            CheckOneTermReuse(a, "lichamelijk", "licham");
+            CheckOneTermReuse(a, "lichamelijke", "licham");
+            CheckOneTermReuse(a, "lichamelijkheden", "licham");
+        }
+
+        /**
+         * subclass that acts just like whitespace analyzer for testing
+         */
+        private class DutchSubclassAnalyzer : DutchAnalyzer
+        {
+            public DutchSubclassAnalyzer(Version matchVersion)
+                : base(matchVersion)
+            {
+
+            }
+            public override TokenStream TokenStream(String fieldName, TextReader reader)
+            {
+                return new WhitespaceTokenizer(reader);
+            }
+        }
+
+        [Test]
+        public void TestLucene1678BwComp()
+        {
+            Analyzer a = new DutchSubclassAnalyzer(Version.LUCENE_CURRENT);
+            CheckOneTermReuse(a, "lichaamsziek", "lichaamsziek");
+            CheckOneTermReuse(a, "lichamelijk", "lichamelijk");
+            CheckOneTermReuse(a, "lichamelijke", "lichamelijke");
+            CheckOneTermReuse(a, "lichamelijkheden", "lichamelijkheden");
+        }
+
+        /* 
+         * Test that changes to the exclusion table are applied immediately
+         * when using reusable token streams.
+         */
+        [Test]
+        public void TestExclusionTableReuse()
+        {
+            DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
+            CheckOneTermReuse(a, "lichamelijk", "licham");
+            a.SetStemExclusionTable(new String[] { "lichamelijk" });
+            CheckOneTermReuse(a, "lichamelijk", "lichamelijk");
+        }
+
+        /* 
+         * Test that changes to the dictionary stemming table are applied immediately
+         * when using reusable token streams.
+         */
+        [Test]
+        public void TestStemDictionaryReuse()
+        {
+            DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
+            CheckOneTermReuse(a, "lichamelijk", "licham");
+            a.SetStemDictionary(customDictFile);
+            CheckOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
+        }
+
+        private void Check(String input, String expected)
+        {
+            CheckOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected);
+        }
+
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/customStemDict.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/customStemDict.txt?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/customStemDict.txt (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Nl/customStemDict.txt Mon Nov 21 08:41:52 2011
@@ -0,0 +1,3 @@
+lichamelijk	somethingentirelydifferent
+lichamelijke	licham
+lichamelijkheden	licham

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilterTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilterTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilterTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,140 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    [TestFixture]
+    public class DelimitedPayloadTokenFilterTest : LuceneTestCase
+    {
+        [Test]
+        public void TestPayloads()
+        {
+            var encoding = Encoding.UTF8;
+            String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
+            DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+            TermAttribute termAtt = filter.GetAttribute<TermAttribute>();
+            PayloadAttribute payAtt = filter.GetAttribute<PayloadAttribute>();
+            AssertTermEquals("The", filter, termAtt, payAtt, null);
+            AssertTermEquals("quick", filter, termAtt, payAtt, encoding.GetBytes("JJ"));
+            AssertTermEquals("red", filter, termAtt, payAtt, encoding.GetBytes("JJ"));
+            AssertTermEquals("fox", filter, termAtt, payAtt, encoding.GetBytes("NN"));
+            AssertTermEquals("jumped", filter, termAtt, payAtt, encoding.GetBytes("VB"));
+            AssertTermEquals("over", filter, termAtt, payAtt, null);
+            AssertTermEquals("the", filter, termAtt, payAtt, null);
+            AssertTermEquals("lazy", filter, termAtt, payAtt, encoding.GetBytes("JJ"));
+            AssertTermEquals("brown", filter, termAtt, payAtt, encoding.GetBytes("JJ"));
+            AssertTermEquals("dogs", filter, termAtt, payAtt, encoding.GetBytes("NN"));
+            Assert.False(filter.IncrementToken());
+        }
+
+        [Test]
+        public void TestNext()
+        {
+            var encoding = Encoding.UTF8;
+            String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
+            DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+            AssertTermEquals("The", filter, null);
+            AssertTermEquals("quick", filter, encoding.GetBytes("JJ"));
+            AssertTermEquals("red", filter, encoding.GetBytes("JJ"));
+            AssertTermEquals("fox", filter, encoding.GetBytes("NN"));
+            AssertTermEquals("jumped", filter, encoding.GetBytes("VB"));
+            AssertTermEquals("over", filter, null);
+            AssertTermEquals("the", filter, null);
+            AssertTermEquals("lazy", filter, encoding.GetBytes("JJ"));
+            AssertTermEquals("brown", filter, encoding.GetBytes("JJ"));
+            AssertTermEquals("dogs", filter, encoding.GetBytes("NN"));
+            Assert.False(filter.IncrementToken());
+        }
+
+
+        [Test]
+        public void TestFloatEncoding()
+        {
+            String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
+            DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new FloatEncoder());
+            TermAttribute termAtt = filter.GetAttribute<TermAttribute>();
+            PayloadAttribute payAtt = filter.GetAttribute<PayloadAttribute>();
+            AssertTermEquals("The", filter, termAtt, payAtt, null);
+            AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(1.0f));
+            AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(2.0f));
+            AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(3.5f));
+            AssertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(0.5f));
+            AssertTermEquals("over", filter, termAtt, payAtt, null);
+            AssertTermEquals("the", filter, termAtt, payAtt, null);
+            AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(5.0f));
+            AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(99.3f));
+            AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(83.7f));
+            Assert.False(filter.IncrementToken());
+        }
+
+        [Test]
+        public void TestIntEncoding()
+        {
+            String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
+            DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new IntegerEncoder());
+            TermAttribute termAtt = filter.GetAttribute<TermAttribute>();
+            PayloadAttribute payAtt = filter.GetAttribute<PayloadAttribute>();
+            AssertTermEquals("The", filter, termAtt, payAtt, null);
+            AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeInt(1));
+            AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeInt(2));
+            AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeInt(3));
+            AssertTermEquals("jumped", filter, termAtt, payAtt, null);
+            AssertTermEquals("over", filter, termAtt, payAtt, null);
+            AssertTermEquals("the", filter, termAtt, payAtt, null);
+            AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeInt(5));
+            AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeInt(99));
+            AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeInt(83));
+            Assert.False(filter.IncrementToken());
+        }
+
+        void AssertTermEquals(String expected, TokenStream stream, byte[] expectPay)
+        {
+            TermAttribute termAtt = stream.GetAttribute<TermAttribute>();
+            PayloadAttribute payloadAtt = stream.GetAttribute<PayloadAttribute>();
+            Assert.True(stream.IncrementToken());
+            Assert.AreEqual(expected, termAtt.Term());
+            Payload payload = payloadAtt.GetPayload();
+            if (payload != null)
+            {
+                Assert.True(payload.Length() == expectPay.Length, payload.Length() + " does not equal: " + expectPay.Length);
+                for (int i = 0; i < expectPay.Length; i++)
+                {
+                    Assert.True(expectPay[i] == payload.ByteAt(i), expectPay[i] + " does not equal: " + payload.ByteAt(i));
+
+                }
+            }
+            else
+            {
+                Assert.True(expectPay == null, "expectPay is not null and it should be");
+            }
+        }
+
+        void AssertTermEquals(String expected, TokenStream stream, TermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay)
+        {
+            Assert.True(stream.IncrementToken());
+            Assert.AreEqual(expected, termAtt.Term());
+            Payload payload = payAtt.GetPayload();
+            if (payload != null)
+            {
+                Assert.True(payload.Length() == expectPay.Length, payload.Length() + " does not equal: " + expectPay.Length);
+                for (int i = 0; i < expectPay.Length; i++)
+                {
+                    Assert.True(expectPay[i] == payload.ByteAt(i), expectPay[i] + " does not equal: " + payload.ByteAt(i));
+
+                }
+            }
+            else
+            {
+                Assert.True(expectPay == null, "expectPay is not null and it should be");
+            }
+        }
+    }
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/NumericPayloadTokenFilterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/NumericPayloadTokenFilterTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/NumericPayloadTokenFilterTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/NumericPayloadTokenFilterTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,73 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    [TestFixture]
+    public class NumericPayloadTokenFilterTest : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void Test()
+        {
+            String test = "The quick red fox jumped over the lazy brown dogs";
+
+            NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))), 3, "D");
+            bool seenDogs = false;
+            TermAttribute termAtt = nptf.GetAttribute<TermAttribute>();
+            TypeAttribute typeAtt = nptf.GetAttribute<TypeAttribute>();
+            PayloadAttribute payloadAtt = nptf.GetAttribute<PayloadAttribute>();
+            while (nptf.IncrementToken())
+            {
+                if (termAtt.Term().Equals("dogs"))
+                {
+                    seenDogs = true;
+                    Assert.True(typeAtt.Type().Equals("D") == true, typeAtt.Type() + " is not equal to " + "D");
+                    Assert.True(payloadAtt.GetPayload() != null, "payloadAtt.GetPayload() is null and it shouldn't be");
+                    byte[] bytes = payloadAtt.GetPayload().GetData();//safe here to just use the bytes, otherwise we should use offset, length
+                    Assert.True(bytes.Length == payloadAtt.GetPayload().Length(), bytes.Length + " does not equal: " + payloadAtt.GetPayload().Length());
+                    Assert.True(payloadAtt.GetPayload().GetOffset() == 0, payloadAtt.GetPayload().GetOffset() + " does not equal: " + 0);
+                    float pay = PayloadHelper.DecodeFloat(bytes);
+                    Assert.True(pay == 3, pay + " does not equal: " + 3);
+                }
+                else
+                {
+                    Assert.True(typeAtt.Type().Equals("word"), typeAtt.Type() + " is not null and it should be");
+                }
+            }
+            Assert.True(seenDogs == true, seenDogs + " does not equal: " + true);
+        }
+
+        internal sealed class WordTokenFilter : TokenFilter
+        {
+            private TermAttribute termAtt;
+            private TypeAttribute typeAtt;
+
+            internal WordTokenFilter(TokenStream input)
+                : base(input)
+            {
+                termAtt = AddAttribute<TermAttribute>();
+                typeAtt = AddAttribute<TypeAttribute>();
+            }
+
+            public override bool IncrementToken()
+            {
+                if (input.IncrementToken())
+                {
+                    if (termAtt.Term().Equals("dogs"))
+                        typeAtt.SetType("D");
+                    return true;
+                }
+                else
+                {
+                    return false;
+                }
+            }
+        }
+    }
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilterTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilterTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilterTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,40 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    [TestFixture]
+    public class TokenOffsetPayloadTokenFilterTest : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void Test()
+        {
+            String test = "The quick red fox jumped over the lazy brown dogs";
+
+            TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+            int count = 0;
+            PayloadAttribute payloadAtt = nptf.GetAttribute<PayloadAttribute>();
+            OffsetAttribute offsetAtt = nptf.GetAttribute<OffsetAttribute>();
+
+            while (nptf.IncrementToken())
+            {
+                Payload pay = payloadAtt.GetPayload();
+                Assert.True(pay != null, "pay is null and it shouldn't be");
+                byte[] data = pay.GetData();
+                int start = PayloadHelper.DecodeInt(data, 0);
+                Assert.True(start == offsetAtt.StartOffset(), start + " does not equal: " + offsetAtt.StartOffset());
+                int end = PayloadHelper.DecodeInt(data, 4);
+                Assert.True(end == offsetAtt.EndOffset(), end + " does not equal: " + offsetAtt.EndOffset());
+                count++;
+            }
+            Assert.True(count == 10, count + " does not equal: " + 10);
+        }
+    }
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilterTest.cs?rev=1204396&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilterTest.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilterTest.cs Mon Nov 21 08:41:52 2011
@@ -0,0 +1,65 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    [TestFixture]
+    public class TypeAsPayloadTokenFilterTest : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void test()
+        {
+            String test = "The quick red fox jumped over the lazy brown dogs";
+
+            TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test))));
+            int count = 0;
+            TermAttribute termAtt = nptf.GetAttribute<TermAttribute>();
+            TypeAttribute typeAtt = nptf.GetAttribute<TypeAttribute>();
+            PayloadAttribute payloadAtt = nptf.GetAttribute<PayloadAttribute>();
+
+            while (nptf.IncrementToken())
+            {
+                Assert.True(typeAtt.Type().Equals(char.ToUpper(termAtt.TermBuffer()[0]).ToString()), typeAtt.Type() + " is not null and it should be");
+                Assert.True(payloadAtt.GetPayload() != null, "nextToken.getPayload() is null and it shouldn't be");
+                String type = Encoding.UTF8.GetString(payloadAtt.GetPayload().GetData()); ;
+                Assert.True(type != null, "type is null and it shouldn't be");
+                Assert.True(type.Equals(typeAtt.Type()) == true, type + " is not equal to " + typeAtt.Type());
+                count++;
+            }
+
+            Assert.True(count == 10, count + " does not equal: " + 10);
+        }
+
+        private sealed class WordTokenFilter : TokenFilter
+        {
+            private TermAttribute termAtt;
+            private TypeAttribute typeAtt;
+
+            internal WordTokenFilter(TokenStream input)
+                : base(input)
+            {
+                termAtt = AddAttribute<TermAttribute>();
+                typeAtt = AddAttribute<TypeAttribute>();
+            }
+
+            public override bool IncrementToken()
+            {
+                if (input.IncrementToken())
+                {
+                    typeAtt.SetType(char.ToUpper(termAtt.TermBuffer()[0]).ToString());
+                    return true;
+                }
+                else
+                {
+                    return false;
+                }
+            }
+        }
+    }
+}
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/PortedTests.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/PortedTests.txt?rev=1204396&r1=1204395&r2=1204396&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/PortedTests.txt (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Analyzers/PortedTests.txt Mon Nov 21 08:41:52 2011
@@ -1,48 +1,3 @@
-
-analysis\ar\TestArabicAnalyzer.java - Text files are different
-analysis\ar\TestArabicNormalizationFilter.java - Text files are identical
-analysis\ar\TestArabicStemFilter.java - Text files are identical
-analysis\br\TestBrazilianStemmer.java - Text files are different
-analysis\cjk\TestCJKTokenizer.java - Text files are different
-analysis\cn\TestChineseTokenizer.java - Text files are different
-analysis\compound\TestCompoundWordTokenFilter.java - Text files are different
-analysis\cz\customStopWordFile.txt - Text files are identical
-analysis\cz\TestCzechAnalyzer.java - Text files are different
-analysis\de\data.txt - Text files are identical
-analysis\de\TestGermanStemFilter.java - Text files are different
-analysis\el\GreekAnalyzerTest.java - Text files are different
-analysis\fa\TestPersianAnalyzer.java - Text files are different
-analysis\fa\TestPersianNormalizationFilter.java - Text files are identical
-analysis\fr\TestElision.java - Text files are different
-analysis\fr\TestFrenchAnalyzer.java - Text files are different
-analysis\miscellaneous\PatternAnalyzerTest.java - Right only: C:\SVN\apache\Lucene\lucene-3.0.3\contrib\analyzers\common\src\test\org\apache\lucene\analysis\miscellaneous
-analysis\miscellaneous\TestEmptyTokenStream.java - Text files are identical
-analysis\miscellaneous\TestPrefixAndSuffixAwareTokenFilter.java - Text files are identical
-analysis\miscellaneous\TestPrefixAwareTokenFilter.java - Text files are identical
-analysis\miscellaneous\TestSingleTokenTokenFilter.java - Text files are different
-analysis\ngram\EdgeNGramTokenFilterTest.java - Text files are different
-analysis\ngram\EdgeNGramTokenizerTest.java - Text files are different
-analysis\ngram\NGramTokenFilterTest.java - Text files are different
-analysis\ngram\NGramTokenizerTest.java - Text files are different
-analysis\nl\customStemDict.txt - Text files are identical
-analysis\nl\TestDutchStemmer.java - Text files are different
-analysis\payloads\DelimitedPayloadTokenFilterTest.java - Text files are different
-analysis\payloads\NumericPayloadTokenFilterTest.java - Text files are different
-analysis\payloads\TokenOffsetPayloadTokenFilterTest.java - Text files are different
-analysis\payloads\TypeAsPayloadTokenFilterTest.java - Text files are different
-analysis\position\PositionFilterTest.java - Text files are different
-analysis\query\QueryAutoStopWordAnalyzerTest.java - Text files are different
-analysis\reverse\TestReverseStringFilter.java - Text files are different
-analysis\ru\resUTF8.htm - Text files are identical
-analysis\ru\stemsUTF8.txt - Text files are identical
-analysis\ru\TestRussianAnalyzer.java - Text files are different
-analysis\ru\TestRussianStem.java - Text files are different
-analysis\ru\testUTF8.txt - Text files are identical
-analysis\ru\wordsUTF8.txt - Text files are identical
-analysis\shingle\ShingleAnalyzerWrapperTest.java - Text files are different
-analysis\shingle\ShingleFilterTest.java - Text files are different
-analysis\shingle\TestShingleMatrixFilter.java - Text files are different
-analysis\sinks\DateRecognizerSinkTokenizerTest.java - Text files are identical
-analysis\sinks\TokenRangeSinkTokenizerTest.java - Text files are identical
-analysis\sinks\TokenTypeSinkTokenizerTest.java - Text files are different
-analysis\th\TestThaiAnalyzer.java - Text files are different
\ No newline at end of file
+TODO: Add tests to make sure that Version numbers match for all contrib assemblies, all test contrib, lucene core and lucene test.
+
+All ported, except ThaiAnalyzer and Hyphenation
\ No newline at end of file