You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/12/10 19:39:03 UTC
[14/27] lucenenet git commit: adding converted analysis common tests
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs
new file mode 100644
index 0000000..cb7bb6f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs
@@ -0,0 +1,50 @@
+namespace org.apache.lucene.analysis.hunspell
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using BeforeClass = org.junit.BeforeClass;
+
+ public class TestOptionalCondition : StemmerTestBase
+ {
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ public static void beforeClass()
+ {
+ init("optional-condition.aff", "condition.dic");
+ }
+
+ public virtual void testStemming()
+ {
+ assertStemsTo("hello", "hello");
+ assertStemsTo("try", "try");
+ assertStemsTo("tried", "try");
+ assertStemsTo("work", "work");
+ assertStemsTo("worked", "work");
+ assertStemsTo("rework", "work");
+ assertStemsTo("reworked", "work");
+ assertStemsTo("retried");
+ assertStemsTo("workied");
+ assertStemsTo("tryed");
+ assertStemsTo("tryied");
+ assertStemsTo("helloed");
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs
new file mode 100644
index 0000000..93df12e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs
@@ -0,0 +1,88 @@
+namespace org.apache.lucene.analysis.hunspell
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using BeforeClass = org.junit.BeforeClass;
+
+ public class TestStemmer : StemmerTestBase
+ {
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ public static void beforeClass()
+ {
+ init("simple.aff", "simple.dic");
+ }
+
+ public virtual void testSimpleSuffix()
+ {
+ assertStemsTo("lucene", "lucene", "lucen");
+ assertStemsTo("mahoute", "mahout");
+ }
+
+ public virtual void testSimplePrefix()
+ {
+ assertStemsTo("solr", "olr");
+ }
+
+ public virtual void testRecursiveSuffix()
+ {
+ // we should not recurse here, as the suffix has no continuation!
+ assertStemsTo("abcd");
+ }
+
+ // all forms unmunched from dictionary
+ public virtual void testAllStems()
+ {
+ assertStemsTo("ab", "ab");
+ assertStemsTo("abc", "ab");
+ assertStemsTo("apach", "apach");
+ assertStemsTo("apache", "apach");
+ assertStemsTo("foo", "foo", "foo");
+ assertStemsTo("food", "foo");
+ assertStemsTo("foos", "foo");
+ assertStemsTo("lucen", "lucen");
+ assertStemsTo("lucene", "lucen", "lucene");
+ assertStemsTo("mahout", "mahout");
+ assertStemsTo("mahoute", "mahout");
+ assertStemsTo("moo", "moo");
+ assertStemsTo("mood", "moo");
+ assertStemsTo("olr", "olr");
+ assertStemsTo("solr", "olr");
+ }
+
+ // some bogus stuff that should not stem (empty lists)!
+ public virtual void testBogusStems()
+ {
+ assertStemsTo("abs");
+ assertStemsTo("abe");
+ assertStemsTo("sab");
+ assertStemsTo("sapach");
+ assertStemsTo("sapache");
+ assertStemsTo("apachee");
+ assertStemsTo("sfoo");
+ assertStemsTo("sfoos");
+ assertStemsTo("fooss");
+ assertStemsTo("lucenee");
+ assertStemsTo("solre");
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs
new file mode 100644
index 0000000..3cecced
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs
@@ -0,0 +1,45 @@
+namespace org.apache.lucene.analysis.hunspell
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using BeforeClass = org.junit.BeforeClass;
+
+ public class TestTwoFold : StemmerTestBase
+ {
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ public static void beforeClass()
+ {
+ init("twofold.aff", "morph.dic");
+ }
+
+ public virtual void testExamples()
+ {
+ assertStemsTo("drink", "drink");
+ assertStemsTo("drinkable", "drink");
+ assertStemsTo("drinkables", "drink");
+ assertStemsTo("drinksable");
+ assertStemsTo("drinkableable");
+ assertStemsTo("drinks");
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
new file mode 100644
index 0000000..23141fc
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
@@ -0,0 +1,44 @@
+namespace org.apache.lucene.analysis.hunspell
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using BeforeClass = org.junit.BeforeClass;
+
+ public class TestTwoSuffixes : StemmerTestBase
+ {
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ public static void beforeClass()
+ {
+ init("twosuffixes.aff", "twosuffixes.dic");
+ }
+
+ public virtual void testExamples()
+ {
+ assertStemsTo("drink", "drink");
+ assertStemsTo("drinkable", "drink");
+ assertStemsTo("drinks", "drink");
+ assertStemsTo("drinkableable");
+ assertStemsTo("drinkss");
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hy/TestArmenianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hy/TestArmenianAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hy/TestArmenianAnalyzer.cs
new file mode 100644
index 0000000..861a2e3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hy/TestArmenianAnalyzer.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.hy
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ public class TestArmenianAnalyzer : BaseTokenStreamTestCase
+ {
+ /// <summary>
+ /// This test fails with NPE when the
+ /// stopwords file is missing in classpath
+ /// </summary>
+ public virtual void testResourcesAvailable()
+ {
+ new ArmenianAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+ /// <summary>
+ /// test stopwords and stemming </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+ public virtual void testBasics()
+ {
+ Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT);
+ // stemming
+ checkOneTerm(a, "արծիվ", "արծ");
+ checkOneTerm(a, "արծիվներ", "արծ");
+ // stopword
+ assertAnalyzesTo(a, "է", new string[] { });
+ }
+
+ /// <summary>
+ /// test use of exclusion set </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExclude() throws java.io.IOException
+ public virtual void testExclude()
+ {
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("արծիվներ"), false);
+ Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT, ArmenianAnalyzer.DefaultStopSet, exclusionSet);
+ checkOneTerm(a, "արծիվներ", "արծիվներ");
+ checkOneTerm(a, "արծիվ", "արծ");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new ArmenianAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianAnalyzer.cs
new file mode 100644
index 0000000..e38ea21
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianAnalyzer.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.id
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ public class TestIndonesianAnalyzer : BaseTokenStreamTestCase
+ {
+ /// <summary>
+ /// This test fails with NPE when the
+ /// stopwords file is missing in classpath
+ /// </summary>
+ public virtual void testResourcesAvailable()
+ {
+ new IndonesianAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+ /// <summary>
+ /// test stopwords and stemming </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+ public virtual void testBasics()
+ {
+ Analyzer a = new IndonesianAnalyzer(TEST_VERSION_CURRENT);
+ // stemming
+ checkOneTerm(a, "peledakan", "ledak");
+ checkOneTerm(a, "pembunuhan", "bunuh");
+ // stopword
+ assertAnalyzesTo(a, "bahwa", new string[] {});
+ }
+
+ /// <summary>
+ /// test use of exclusion set </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExclude() throws java.io.IOException
+ public virtual void testExclude()
+ {
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("peledakan"), false);
+ Analyzer a = new IndonesianAnalyzer(TEST_VERSION_CURRENT, IndonesianAnalyzer.DefaultStopSet, exclusionSet);
+ checkOneTerm(a, "peledakan", "peledakan");
+ checkOneTerm(a, "pembunuhan", "bunuh");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new IndonesianAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianStemFilterFactory.cs
new file mode 100644
index 0000000..5210956
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianStemFilterFactory.cs
@@ -0,0 +1,73 @@
+namespace org.apache.lucene.analysis.id
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Indonesian stem filter factory is working.
+ /// </summary>
+ public class TestIndonesianStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+ /// <summary>
+ /// Ensure the filter actually stems text.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("dibukukannya");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("IndonesianStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"buku"});
+ }
+
+ /// <summary>
+ /// Test inflectional-only mode
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemmingInflectional() throws Exception
+ public virtual void testStemmingInflectional()
+ {
+ Reader reader = new StringReader("dibukukannya");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("IndonesianStem", "stemDerivational", "false").create(stream);
+ assertTokenStreamContents(stream, new string[] {"dibukukan"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("IndonesianStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianStemmer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianStemmer.cs
new file mode 100644
index 0000000..3b9e5f3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Id/TestIndonesianStemmer.cs
@@ -0,0 +1,187 @@
+namespace org.apache.lucene.analysis.id
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+ /// <summary>
+ /// Tests <seealso cref="IndonesianStemmer"/>
+ /// </summary>
+ public class TestIndonesianStemmer : BaseTokenStreamTestCase
+ {
+ /* full stemming, no stopwords */
+ internal Analyzer a = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer));
+ }
+ }
+
+ /// <summary>
+ /// Some examples from the paper </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExamples() throws java.io.IOException
+ public virtual void testExamples()
+ {
+ checkOneTerm(a, "bukukah", "buku");
+ checkOneTerm(a, "adalah", "ada");
+ checkOneTerm(a, "bukupun", "buku");
+ checkOneTerm(a, "bukuku", "buku");
+ checkOneTerm(a, "bukumu", "buku");
+ checkOneTerm(a, "bukunya", "buku");
+ checkOneTerm(a, "mengukur", "ukur");
+ checkOneTerm(a, "menyapu", "sapu");
+ checkOneTerm(a, "menduga", "duga");
+ checkOneTerm(a, "menuduh", "uduh");
+ checkOneTerm(a, "membaca", "baca");
+ checkOneTerm(a, "merusak", "rusak");
+ checkOneTerm(a, "pengukur", "ukur");
+ checkOneTerm(a, "penyapu", "sapu");
+ checkOneTerm(a, "penduga", "duga");
+ checkOneTerm(a, "pembaca", "baca");
+ checkOneTerm(a, "diukur", "ukur");
+ checkOneTerm(a, "tersapu", "sapu");
+ checkOneTerm(a, "kekasih", "kasih");
+ checkOneTerm(a, "berlari", "lari");
+ checkOneTerm(a, "belajar", "ajar");
+ checkOneTerm(a, "bekerja", "kerja");
+ checkOneTerm(a, "perjelas", "jelas");
+ checkOneTerm(a, "pelajar", "ajar");
+ checkOneTerm(a, "pekerja", "kerja");
+ checkOneTerm(a, "tarikkan", "tarik");
+ checkOneTerm(a, "ambilkan", "ambil");
+ checkOneTerm(a, "mengambilkan", "ambil");
+ checkOneTerm(a, "makanan", "makan");
+ checkOneTerm(a, "janjian", "janji");
+ checkOneTerm(a, "perjanjian", "janji");
+ checkOneTerm(a, "tandai", "tanda");
+ checkOneTerm(a, "dapati", "dapat");
+ checkOneTerm(a, "mendapati", "dapat");
+ checkOneTerm(a, "pantai", "panta");
+ }
+
+ /// <summary>
+ /// Some detailed analysis examples (that might not be the best) </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testIRExamples() throws java.io.IOException
+ public virtual void testIRExamples()
+ {
+ checkOneTerm(a, "penyalahgunaan", "salahguna");
+ checkOneTerm(a, "menyalahgunakan", "salahguna");
+ checkOneTerm(a, "disalahgunakan", "salahguna");
+
+ checkOneTerm(a, "pertanggungjawaban", "tanggungjawab");
+ checkOneTerm(a, "mempertanggungjawabkan", "tanggungjawab");
+ checkOneTerm(a, "dipertanggungjawabkan", "tanggungjawab");
+
+ checkOneTerm(a, "pelaksanaan", "laksana");
+ checkOneTerm(a, "pelaksana", "laksana");
+ checkOneTerm(a, "melaksanakan", "laksana");
+ checkOneTerm(a, "dilaksanakan", "laksana");
+
+ checkOneTerm(a, "melibatkan", "libat");
+ checkOneTerm(a, "terlibat", "libat");
+
+ checkOneTerm(a, "penculikan", "culik");
+ checkOneTerm(a, "menculik", "culik");
+ checkOneTerm(a, "diculik", "culik");
+ checkOneTerm(a, "penculik", "culik");
+
+ checkOneTerm(a, "perubahan", "ubah");
+ checkOneTerm(a, "peledakan", "ledak");
+ checkOneTerm(a, "penanganan", "tangan");
+ checkOneTerm(a, "kepolisian", "polisi");
+ checkOneTerm(a, "kenaikan", "naik");
+ checkOneTerm(a, "bersenjata", "senjata");
+ checkOneTerm(a, "penyelewengan", "seleweng");
+ checkOneTerm(a, "kecelakaan", "celaka");
+ }
+
+ /* inflectional-only stemming */
+ internal Analyzer b = new AnalyzerAnonymousInnerClassHelper2();
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper2()
+ {
+ }
+
+ public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer, false));
+ }
+ }
+
+ /// <summary>
+ /// Test stemming only inflectional suffixes </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testInflectionalOnly() throws java.io.IOException
+ public virtual void testInflectionalOnly()
+ {
+ checkOneTerm(b, "bukunya", "buku");
+ checkOneTerm(b, "bukukah", "buku");
+ checkOneTerm(b, "bukunyakah", "buku");
+ checkOneTerm(b, "dibukukannya", "dibukukan");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testShouldntStem() throws java.io.IOException
+ public virtual void testShouldntStem()
+ {
+ checkOneTerm(a, "bersenjata", "senjata");
+ checkOneTerm(a, "bukukah", "buku");
+ checkOneTerm(a, "gigi", "gigi");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+ {
+ private readonly TestIndonesianStemmer outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper3(TestIndonesianStemmer outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/In/TestIndicNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/In/TestIndicNormalizer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/In/TestIndicNormalizer.cs
new file mode 100644
index 0000000..846bd4b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/In/TestIndicNormalizer.cs
@@ -0,0 +1,80 @@
+namespace org.apache.lucene.analysis.@in
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+ /// <summary>
+ /// Test IndicNormalizer
+ /// </summary>
+ public class TestIndicNormalizer : BaseTokenStreamTestCase
+ {
+ /// <summary>
+ /// Test some basic normalization
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+ public virtual void testBasics()
+ {
+ check("अाॅअाॅ", "ऑऑ");
+ check("अाॆअाॆ", "ऒऒ");
+ check("अाेअाे", "ओओ");
+ check("अाैअाै", "औऔ");
+ check("अाअा", "आआ");
+ check("अाैर", "और");
+ // khanda-ta
+ check("ত্", "ৎ");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void check(String input, String output) throws java.io.IOException
+ private void check(string input, string output)
+ {
+ Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+ TokenFilter tf = new IndicNormalizationFilter(tokenizer);
+ assertTokenStreamContents(tf, new string[] {output});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly TestIndicNormalizer outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper(TestIndicNormalizer outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new IndicNormalizationFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianAnalyzer.cs
new file mode 100644
index 0000000..cedacaf
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianAnalyzer.cs
@@ -0,0 +1,94 @@
+namespace org.apache.lucene.analysis.it
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+ using Version = org.apache.lucene.util.Version;
+
+ public class TestItalianAnalyzer : BaseTokenStreamTestCase
+ {
+ /// <summary>
+ /// This test fails with NPE when the
+ /// stopwords file is missing in classpath
+ /// </summary>
+ public virtual void testResourcesAvailable()
+ {
+ new ItalianAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+ /// <summary>
+ /// test stopwords and stemming </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+ public virtual void testBasics()
+ {
+ Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT);
+ // stemming
+ checkOneTerm(a, "abbandonata", "abbandonat");
+ checkOneTerm(a, "abbandonati", "abbandonat");
+ // stopword
+ assertAnalyzesTo(a, "dallo", new string[] {});
+ }
+
+ /// <summary>
+ /// test use of exclusion set </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExclude() throws java.io.IOException
+ public virtual void testExclude()
+ {
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("abbandonata"), false);
+ Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT, ItalianAnalyzer.DefaultStopSet, exclusionSet);
+ checkOneTerm(a, "abbandonata", "abbandonata");
+ checkOneTerm(a, "abbandonati", "abbandonat");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new ItalianAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+
+ /// <summary>
+ /// test that the elisionfilter is working </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testContractions() throws java.io.IOException
+ public virtual void testContractions()
+ {
+ Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(a, "dell'Italia", new string[] {"ital"});
+ assertAnalyzesTo(a, "l'Italiano", new string[] {"italian"});
+ }
+
+ /// <summary>
+ /// test that we don't enable this before 3.2 </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testContractionsBackwards() throws java.io.IOException
+ public virtual void testContractionsBackwards()
+ {
+ Analyzer a = new ItalianAnalyzer(Version.LUCENE_31);
+ assertAnalyzesTo(a, "dell'Italia", new string[] {"dell'ital"});
+ assertAnalyzesTo(a, "l'Italiano", new string[] {"l'ital"});
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianLightStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianLightStemFilter.cs
new file mode 100644
index 0000000..9710b7b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianLightStemFilter.cs
@@ -0,0 +1,90 @@
+namespace org.apache.lucene.analysis.it
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+// import static org.apache.lucene.analysis.VocabularyAssert.*;
+
+ /// <summary>
+ /// Simple tests for <seealso cref="ItalianLightStemFilter"/>
+ /// </summary>
+ public class TestItalianLightStemFilter : BaseTokenStreamTestCase
+ {
+ private Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(source, new ItalianLightStemFilter(source));
+ }
+ }
+
+ /// <summary>
+ /// Test against a vocabulary from the reference impl </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVocabulary() throws java.io.IOException
+ public virtual void testVocabulary()
+ {
+ assertVocabulary(analyzer, getDataFile("itlighttestdata.zip"), "itlight.txt");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestItalianLightStemFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestItalianLightStemFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new ItalianLightStemFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianLightStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianLightStemFilterFactory.cs
new file mode 100644
index 0000000..f09f646
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/It/TestItalianLightStemFilterFactory.cs
@@ -0,0 +1,57 @@
+namespace org.apache.lucene.analysis.it
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Italian light stem factory is working.
+ /// </summary>
+ public class TestItalianLightStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("ragazzo ragazzi");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("ItalianLightStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"ragazz", "ragazz"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("ItalianLightStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianAnalyzer.cs
new file mode 100644
index 0000000..1f2a61e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianAnalyzer.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.lv
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ public class TestLatvianAnalyzer : BaseTokenStreamTestCase
+ {
+ /// <summary>
+ /// This test fails with NPE when the
+ /// stopwords file is missing in classpath
+ /// </summary>
+ public virtual void testResourcesAvailable()
+ {
+ new LatvianAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+ /// <summary>
+ /// test stopwords and stemming </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+ public virtual void testBasics()
+ {
+ Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT);
+ // stemming
+ checkOneTerm(a, "tirgiem", "tirg");
+ checkOneTerm(a, "tirgus", "tirg");
+ // stopword
+ assertAnalyzesTo(a, "un", new string[] {});
+ }
+
+ /// <summary>
+ /// test use of exclusion set </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExclude() throws java.io.IOException
+ public virtual void testExclude()
+ {
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("tirgiem"), false);
+ Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT, LatvianAnalyzer.DefaultStopSet, exclusionSet);
+ checkOneTerm(a, "tirgiem", "tirgiem");
+ checkOneTerm(a, "tirgus", "tirg");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new LatvianAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianStemFilterFactory.cs
new file mode 100644
index 0000000..0bb4be7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianStemFilterFactory.cs
@@ -0,0 +1,57 @@
+namespace org.apache.lucene.analysis.lv
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Latvian stem factory is working.
+ /// </summary>
+ public class TestLatvianStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("tirgiem tirgus");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("LatvianStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"tirg", "tirg"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("LatvianStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianStemmer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianStemmer.cs
new file mode 100644
index 0000000..b3c5610
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Lv/TestLatvianStemmer.cs
@@ -0,0 +1,328 @@
+namespace org.apache.lucene.analysis.lv
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+ /// <summary>
+ /// Basic tests for <seealso cref="LatvianStemmer"/>
+ /// </summary>
+ public class TestLatvianStemmer : BaseTokenStreamTestCase
+ {
+ private Analyzer a = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNouns1() throws java.io.IOException
+ public virtual void testNouns1()
+ {
+ // decl. I
+ checkOneTerm(a, "tēvs", "tēv"); // nom. sing.
+ checkOneTerm(a, "tēvi", "tēv"); // nom. pl.
+ checkOneTerm(a, "tēva", "tēv"); // gen. sing.
+ checkOneTerm(a, "tēvu", "tēv"); // gen. pl.
+ checkOneTerm(a, "tēvam", "tēv"); // dat. sing.
+ checkOneTerm(a, "tēviem", "tēv"); // dat. pl.
+ checkOneTerm(a, "tēvu", "tēv"); // acc. sing.
+ checkOneTerm(a, "tēvus", "tēv"); // acc. pl.
+ checkOneTerm(a, "tēvā", "tēv"); // loc. sing.
+ checkOneTerm(a, "tēvos", "tēv"); // loc. pl.
+ checkOneTerm(a, "tēvs", "tēv"); // voc. sing.
+ checkOneTerm(a, "tēvi", "tēv"); // voc. pl.
+ }
+
+ /// <summary>
+ /// decl II nouns with (s,t) -> š and (d,z) -> ž
+ /// palatalization will generally conflate to two stems
+ /// due to the ambiguity (plural and singular).
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNouns2() throws java.io.IOException
+ public virtual void testNouns2()
+ {
+ // decl. II
+
+ // c -> č palatalization
+ checkOneTerm(a, "lācis", "lāc"); // nom. sing.
+ checkOneTerm(a, "lāči", "lāc"); // nom. pl.
+ checkOneTerm(a, "lāča", "lāc"); // gen. sing.
+ checkOneTerm(a, "lāču", "lāc"); // gen. pl.
+ checkOneTerm(a, "lācim", "lāc"); // dat. sing.
+ checkOneTerm(a, "lāčiem", "lāc"); // dat. pl.
+ checkOneTerm(a, "lāci", "lāc"); // acc. sing.
+ checkOneTerm(a, "lāčus", "lāc"); // acc. pl.
+ checkOneTerm(a, "lācī", "lāc"); // loc. sing.
+ checkOneTerm(a, "lāčos", "lāc"); // loc. pl.
+ checkOneTerm(a, "lāci", "lāc"); // voc. sing.
+ checkOneTerm(a, "lāči", "lāc"); // voc. pl.
+
+ // n -> ņ palatalization
+ checkOneTerm(a, "akmens", "akmen"); // nom. sing.
+ checkOneTerm(a, "akmeņi", "akmen"); // nom. pl.
+ checkOneTerm(a, "akmens", "akmen"); // gen. sing.
+ checkOneTerm(a, "akmeņu", "akmen"); // gen. pl.
+ checkOneTerm(a, "akmenim", "akmen"); // dat. sing.
+ checkOneTerm(a, "akmeņiem", "akmen"); // dat. pl.
+ checkOneTerm(a, "akmeni", "akmen"); // acc. sing.
+ checkOneTerm(a, "akmeņus", "akmen"); // acc. pl.
+ checkOneTerm(a, "akmenī", "akmen"); // loc. sing.
+ checkOneTerm(a, "akmeņos", "akmen"); // loc. pl.
+ checkOneTerm(a, "akmens", "akmen"); // voc. sing.
+ checkOneTerm(a, "akmeņi", "akmen"); // voc. pl.
+
+ // no palatalization
+ checkOneTerm(a, "kurmis", "kurm"); // nom. sing.
+ checkOneTerm(a, "kurmji", "kurm"); // nom. pl.
+ checkOneTerm(a, "kurmja", "kurm"); // gen. sing.
+ checkOneTerm(a, "kurmju", "kurm"); // gen. pl.
+ checkOneTerm(a, "kurmim", "kurm"); // dat. sing.
+ checkOneTerm(a, "kurmjiem", "kurm"); // dat. pl.
+ checkOneTerm(a, "kurmi", "kurm"); // acc. sing.
+ checkOneTerm(a, "kurmjus", "kurm"); // acc. pl.
+ checkOneTerm(a, "kurmī", "kurm"); // loc. sing.
+ checkOneTerm(a, "kurmjos", "kurm"); // loc. pl.
+ checkOneTerm(a, "kurmi", "kurm"); // voc. sing.
+ checkOneTerm(a, "kurmji", "kurm"); // voc. pl.
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNouns3() throws java.io.IOException
+ public virtual void testNouns3()
+ {
+ // decl III
+ checkOneTerm(a, "lietus", "liet"); // nom. sing.
+ checkOneTerm(a, "lieti", "liet"); // nom. pl.
+ checkOneTerm(a, "lietus", "liet"); // gen. sing.
+ checkOneTerm(a, "lietu", "liet"); // gen. pl.
+ checkOneTerm(a, "lietum", "liet"); // dat. sing.
+ checkOneTerm(a, "lietiem", "liet"); // dat. pl.
+ checkOneTerm(a, "lietu", "liet"); // acc. sing.
+ checkOneTerm(a, "lietus", "liet"); // acc. pl.
+ checkOneTerm(a, "lietū", "liet"); // loc. sing.
+ checkOneTerm(a, "lietos", "liet"); // loc. pl.
+ checkOneTerm(a, "lietus", "liet"); // voc. sing.
+ checkOneTerm(a, "lieti", "liet"); // voc. pl.
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNouns4() throws java.io.IOException
+ public virtual void testNouns4()
+ {
+ // decl IV
+ checkOneTerm(a, "lapa", "lap"); // nom. sing.
+ checkOneTerm(a, "lapas", "lap"); // nom. pl.
+ checkOneTerm(a, "lapas", "lap"); // gen. sing.
+ checkOneTerm(a, "lapu", "lap"); // gen. pl.
+ checkOneTerm(a, "lapai", "lap"); // dat. sing.
+ checkOneTerm(a, "lapām", "lap"); // dat. pl.
+ checkOneTerm(a, "lapu", "lap"); // acc. sing.
+ checkOneTerm(a, "lapas", "lap"); // acc. pl.
+ checkOneTerm(a, "lapā", "lap"); // loc. sing.
+ checkOneTerm(a, "lapās", "lap"); // loc. pl.
+ checkOneTerm(a, "lapa", "lap"); // voc. sing.
+ checkOneTerm(a, "lapas", "lap"); // voc. pl.
+
+ checkOneTerm(a, "puika", "puik"); // nom. sing.
+ checkOneTerm(a, "puikas", "puik"); // nom. pl.
+ checkOneTerm(a, "puikas", "puik"); // gen. sing.
+ checkOneTerm(a, "puiku", "puik"); // gen. pl.
+ checkOneTerm(a, "puikam", "puik"); // dat. sing.
+ checkOneTerm(a, "puikām", "puik"); // dat. pl.
+ checkOneTerm(a, "puiku", "puik"); // acc. sing.
+ checkOneTerm(a, "puikas", "puik"); // acc. pl.
+ checkOneTerm(a, "puikā", "puik"); // loc. sing.
+ checkOneTerm(a, "puikās", "puik"); // loc. pl.
+ checkOneTerm(a, "puika", "puik"); // voc. sing.
+ checkOneTerm(a, "puikas", "puik"); // voc. pl.
+ }
+
+ /// <summary>
+ /// Genitive plural forms with (s,t) -> š and (d,z) -> ž
+ /// will not conflate due to ambiguity.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNouns5() throws java.io.IOException
+ public virtual void testNouns5()
+ {
+ // decl V
+ // l -> ļ palatalization
+ checkOneTerm(a, "egle", "egl"); // nom. sing.
+ checkOneTerm(a, "egles", "egl"); // nom. pl.
+ checkOneTerm(a, "egles", "egl"); // gen. sing.
+ checkOneTerm(a, "egļu", "egl"); // gen. pl.
+ checkOneTerm(a, "eglei", "egl"); // dat. sing.
+ checkOneTerm(a, "eglēm", "egl"); // dat. pl.
+ checkOneTerm(a, "egli", "egl"); // acc. sing.
+ checkOneTerm(a, "egles", "egl"); // acc. pl.
+ checkOneTerm(a, "eglē", "egl"); // loc. sing.
+ checkOneTerm(a, "eglēs", "egl"); // loc. pl.
+ checkOneTerm(a, "egle", "egl"); // voc. sing.
+ checkOneTerm(a, "egles", "egl"); // voc. pl.
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNouns6() throws java.io.IOException
+ public virtual void testNouns6()
+ {
+ // decl VI
+
+ // no palatalization
+ checkOneTerm(a, "govs", "gov"); // nom. sing.
+ checkOneTerm(a, "govis", "gov"); // nom. pl.
+ checkOneTerm(a, "govs", "gov"); // gen. sing.
+ checkOneTerm(a, "govju", "gov"); // gen. pl.
+ checkOneTerm(a, "govij", "gov"); // dat. sing.
+ checkOneTerm(a, "govīm", "gov"); // dat. pl.
+ checkOneTerm(a, "govi ", "gov"); // acc. sing.
+ checkOneTerm(a, "govis", "gov"); // acc. pl.
+ checkOneTerm(a, "govi ", "gov"); // inst. sing.
+ checkOneTerm(a, "govīm", "gov"); // inst. pl.
+ checkOneTerm(a, "govī", "gov"); // loc. sing.
+ checkOneTerm(a, "govīs", "gov"); // loc. pl.
+ checkOneTerm(a, "govs", "gov"); // voc. sing.
+ checkOneTerm(a, "govis", "gov"); // voc. pl.
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAdjectives() throws java.io.IOException
+ public virtual void testAdjectives()
+ {
+ checkOneTerm(a, "zils", "zil"); // indef. nom. masc. sing.
+ checkOneTerm(a, "zilais", "zil"); // def. nom. masc. sing.
+ checkOneTerm(a, "zili", "zil"); // indef. nom. masc. pl.
+ checkOneTerm(a, "zilie", "zil"); // def. nom. masc. pl.
+ checkOneTerm(a, "zila", "zil"); // indef. nom. fem. sing.
+ checkOneTerm(a, "zilā", "zil"); // def. nom. fem. sing.
+ checkOneTerm(a, "zilas", "zil"); // indef. nom. fem. pl.
+ checkOneTerm(a, "zilās", "zil"); // def. nom. fem. pl.
+ checkOneTerm(a, "zila", "zil"); // indef. gen. masc. sing.
+ checkOneTerm(a, "zilā", "zil"); // def. gen. masc. sing.
+ checkOneTerm(a, "zilu", "zil"); // indef. gen. masc. pl.
+ checkOneTerm(a, "zilo", "zil"); // def. gen. masc. pl.
+ checkOneTerm(a, "zilas", "zil"); // indef. gen. fem. sing.
+ checkOneTerm(a, "zilās", "zil"); // def. gen. fem. sing.
+ checkOneTerm(a, "zilu", "zil"); // indef. gen. fem. pl.
+ checkOneTerm(a, "zilo", "zil"); // def. gen. fem. pl.
+ checkOneTerm(a, "zilam", "zil"); // indef. dat. masc. sing.
+ checkOneTerm(a, "zilajam", "zil"); // def. dat. masc. sing.
+ checkOneTerm(a, "ziliem", "zil"); // indef. dat. masc. pl.
+ checkOneTerm(a, "zilajiem", "zil"); // def. dat. masc. pl.
+ checkOneTerm(a, "zilai", "zil"); // indef. dat. fem. sing.
+ checkOneTerm(a, "zilajai", "zil"); // def. dat. fem. sing.
+ checkOneTerm(a, "zilām", "zil"); // indef. dat. fem. pl.
+ checkOneTerm(a, "zilajām", "zil"); // def. dat. fem. pl.
+ checkOneTerm(a, "zilu", "zil"); // indef. acc. masc. sing.
+ checkOneTerm(a, "zilo", "zil"); // def. acc. masc. sing.
+ checkOneTerm(a, "zilus", "zil"); // indef. acc. masc. pl.
+ checkOneTerm(a, "zilos", "zil"); // def. acc. masc. pl.
+ checkOneTerm(a, "zilu", "zil"); // indef. acc. fem. sing.
+ checkOneTerm(a, "zilo", "zil"); // def. acc. fem. sing.
+ checkOneTerm(a, "zilās", "zil"); // indef. acc. fem. pl.
+ checkOneTerm(a, "zilās", "zil"); // def. acc. fem. pl.
+ checkOneTerm(a, "zilā", "zil"); // indef. loc. masc. sing.
+ checkOneTerm(a, "zilajā", "zil"); // def. loc. masc. sing.
+ checkOneTerm(a, "zilos", "zil"); // indef. loc. masc. pl.
+ checkOneTerm(a, "zilajos", "zil"); // def. loc. masc. pl.
+ checkOneTerm(a, "zilā", "zil"); // indef. loc. fem. sing.
+ checkOneTerm(a, "zilajā", "zil"); // def. loc. fem. sing.
+ checkOneTerm(a, "zilās", "zil"); // indef. loc. fem. pl.
+ checkOneTerm(a, "zilajās", "zil"); // def. loc. fem. pl.
+ checkOneTerm(a, "zilais", "zil"); // voc. masc. sing.
+ checkOneTerm(a, "zilie", "zil"); // voc. masc. pl.
+ checkOneTerm(a, "zilā", "zil"); // voc. fem. sing.
+ checkOneTerm(a, "zilās", "zil"); // voc. fem. pl.
+ }
+
+ /// <summary>
+ /// Note: we intentionally don't handle the ambiguous
+ /// (s,t) -> š and (d,z) -> ž
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testPalatalization() throws java.io.IOException
+ public virtual void testPalatalization()
+ {
+ checkOneTerm(a, "krāsns", "krāsn"); // nom. sing.
+ checkOneTerm(a, "krāšņu", "krāsn"); // gen. pl.
+ checkOneTerm(a, "zvaigzne", "zvaigzn"); // nom. sing.
+ checkOneTerm(a, "zvaigžņu", "zvaigzn"); // gen. pl.
+ checkOneTerm(a, "kāpslis", "kāpsl"); // nom. sing.
+ checkOneTerm(a, "kāpšļu", "kāpsl"); // gen. pl.
+ checkOneTerm(a, "zizlis", "zizl"); // nom. sing.
+ checkOneTerm(a, "zižļu", "zizl"); // gen. pl.
+ checkOneTerm(a, "vilnis", "viln"); // nom. sing.
+ checkOneTerm(a, "viļņu", "viln"); // gen. pl.
+ checkOneTerm(a, "lelle", "lell"); // nom. sing.
+ checkOneTerm(a, "leļļu", "lell"); // gen. pl.
+ checkOneTerm(a, "pinne", "pinn"); // nom. sing.
+ checkOneTerm(a, "piņņu", "pinn"); // gen. pl.
+ checkOneTerm(a, "rīkste", "rīkst"); // nom. sing.
+ checkOneTerm(a, "rīkšu", "rīkst"); // gen. pl.
+ }
+
+ /// <summary>
+ /// Test some length restrictions, we require a 3+ char stem,
+ /// with at least one vowel.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLength() throws java.io.IOException
+ public virtual void testLength()
+ {
+ checkOneTerm(a, "usa", "usa"); // length
+ checkOneTerm(a, "60ms", "60ms"); // vowel count
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestLatvianStemmer outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestLatvianStemmer outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzerTest.cs
new file mode 100644
index 0000000..de1db38
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzerTest.cs
@@ -0,0 +1,214 @@
+using System;
+using System.Text;
+using System.Threading;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using UncaughtExceptionHandler = Thread.UncaughtExceptionHandler;
+
+ using StopAnalyzer = org.apache.lucene.analysis.core.StopAnalyzer;
+
+ /// <summary>
+ /// Verifies the behavior of PatternAnalyzer.
+ /// </summary>
+ public class PatternAnalyzerTest : BaseTokenStreamTestCase
+ {
+
+ /// <summary>
+ /// Test PatternAnalyzer when it is configured with a non-word pattern.
+ /// Behavior can be similar to SimpleAnalyzer (depending upon options)
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNonWordPattern() throws java.io.IOException
+ public virtual void testNonWordPattern()
+ {
+ // Split on non-letter pattern, do not lowercase, no stopwords
+ PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, false, null);
+ check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"The", "quick", "brown", "Fox", "the", "abcd", "dc"});
+
+ // split on non-letter pattern, lowercase, english stopwords
+ PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+ check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"quick", "brown", "fox", "abcd", "dc"});
+ }
+
+ /// <summary>
+ /// Test PatternAnalyzer when it is configured with a whitespace pattern.
+ /// Behavior can be similar to WhitespaceAnalyzer (depending upon options)
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWhitespacePattern() throws java.io.IOException
+ public virtual void testWhitespacePattern()
+ {
+ // Split on whitespace patterns, do not lowercase, no stopwords
+ PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);
+ check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc."});
+
+ // Split on whitespace patterns, lowercase, english stopwords
+ PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+ check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc."});
+ }
+
+ /// <summary>
+ /// Test PatternAnalyzer when it is configured with a custom pattern. In this
+ /// case, text is tokenized on the comma ","
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCustomPattern() throws java.io.IOException
+ public virtual void testCustomPattern()
+ {
+ // Split on comma, do not lowercase, no stopwords
+ PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null);
+ check(a, "Here,Are,some,Comma,separated,words,", new string[] {"Here", "Are", "some", "Comma", "separated", "words"});
+
+ // split on comma, lowercase, english stopwords
+ PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+ check(b, "Here,Are,some,Comma,separated,words,", new string[] {"here", "some", "comma", "separated", "words"});
+ }
+
+ /// <summary>
+ /// Test PatternAnalyzer against a large document.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHugeDocument() throws java.io.IOException
+ public virtual void testHugeDocument()
+ {
+ StringBuilder document = new StringBuilder();
+ // 5000 a's
+ char[] largeWord = new char[5000];
+ Arrays.fill(largeWord, 'a');
+ document.Append(largeWord);
+
+ // a space
+ document.Append(' ');
+
+ // 2000 b's
+ char[] largeWord2 = new char[2000];
+ Arrays.fill(largeWord2, 'b');
+ document.Append(largeWord2);
+
+ // Split on whitespace patterns, do not lowercase, no stopwords
+ PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);
+ check(a, document.ToString(), new string[]
+ {
+ new string(largeWord),
+ new string(largeWord2)
+ });
+ }
+
+ /// <summary>
+ /// Verify the analyzer analyzes to the expected contents. For PatternAnalyzer,
+ /// several methods are verified:
+ /// <ul>
+ /// <li>Analysis with a normal Reader
+ /// <li>Analysis with a FastStringReader
+ /// <li>Analysis with a String
+ /// </ul>
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void check(PatternAnalyzer analyzer, String document, String expected[]) throws java.io.IOException
+ private void check(PatternAnalyzer analyzer, string document, string[] expected)
+ {
+ // ordinary analysis of a Reader
+ assertAnalyzesTo(analyzer, document, expected);
+
+ // analysis with a "FastStringReader"
+ TokenStream ts = analyzer.tokenStream("dummy", new PatternAnalyzer.FastStringReader(document));
+ assertTokenStreamContents(ts, expected);
+
+ // analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
+ TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document));
+ assertTokenStreamContents(ts2, expected);
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+
+ // dodge jre bug http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7104012
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Thread.UncaughtExceptionHandler savedHandler = Thread.getDefaultUncaughtExceptionHandler();
+ UncaughtExceptionHandler savedHandler = Thread.DefaultUncaughtExceptionHandler;
+ Thread.DefaultUncaughtExceptionHandler = new UncaughtExceptionHandlerAnonymousInnerClassHelper(this, savedHandler);
+
+ try
+ {
+ Thread.DefaultUncaughtExceptionHandler;
+ checkRandomData(random(), a, 10000 * RANDOM_MULTIPLIER);
+ }
+ catch (System.IndexOutOfRangeException ex)
+ {
+ assumeTrue("not failing due to jre bug ", !isJREBug7104012(ex));
+ throw ex; // otherwise rethrow
+ }
+ finally
+ {
+ Thread.DefaultUncaughtExceptionHandler = savedHandler;
+ }
+ }
+
+ private class UncaughtExceptionHandlerAnonymousInnerClassHelper : UncaughtExceptionHandler
+ {
+ private readonly PatternAnalyzerTest outerInstance;
+
+ private UncaughtExceptionHandler savedHandler;
+
+ public UncaughtExceptionHandlerAnonymousInnerClassHelper(PatternAnalyzerTest outerInstance, UncaughtExceptionHandler savedHandler)
+ {
+ this.outerInstance = outerInstance;
+ this.savedHandler = savedHandler;
+ }
+
+ public override void uncaughtException(Thread thread, Exception throwable)
+ {
+ assumeTrue("not failing due to jre bug ", !isJREBug7104012(throwable));
+ // otherwise its some other bug, pass to default handler
+ savedHandler.uncaughtException(thread, throwable);
+ }
+ }
+
+ internal static bool isJREBug7104012(Exception t)
+ {
+ if (!(t is System.IndexOutOfRangeException))
+ {
+ // BaseTokenStreamTestCase now wraps exc in a new RuntimeException:
+ t = t.InnerException;
+ if (!(t is System.IndexOutOfRangeException))
+ {
+ return false;
+ }
+ }
+ StackTraceElement[] trace = t.StackTrace;
+ foreach (StackTraceElement st in trace)
+ {
+ if ("java.text.RuleBasedBreakIterator".Equals(st.ClassName) || "sun.util.locale.provider.RuleBasedBreakIterator".Equals(st.ClassName) && "lookupBackwardState".Equals(st.MethodName))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+
+}
\ No newline at end of file