You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/12/10 19:39:07 UTC
[18/27] lucenenet git commit: adding converted analysis common tests
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechAnalyzer.cs
new file mode 100644
index 0000000..d71ee2a
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechAnalyzer.cs
@@ -0,0 +1,91 @@
+using System;
+
+namespace org.apache.lucene.analysis.cz
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+ using Version = org.apache.lucene.util.Version;
+
+ /// <summary>
+ /// Test the CzechAnalyzer
+ ///
+ /// Before Lucene 3.1, CzechAnalyzer was a StandardAnalyzer with a custom
+ /// stopword list. As of 3.1 it also includes a stemmer.
+ ///
+ /// </summary>
+ public class TestCzechAnalyzer : BaseTokenStreamTestCase
+ {
+ /// @deprecated (3.1) Remove this test when support for 3.0 indexes is no longer needed.
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("(3.1) Remove this test when support for 3.0 indexes is no longer needed.") public void testStopWordLegacy() throws Exception
+ [Obsolete("(3.1) Remove this test when support for 3.0 indexes is no longer needed.")]
+ public virtual void testStopWordLegacy()
+ {
+ assertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_30), "Pokud mluvime o volnem", new string[] {"mluvime", "volnem"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStopWord() throws Exception
+ public virtual void testStopWord()
+ {
+ assertAnalyzesTo(new CzechAnalyzer(TEST_VERSION_CURRENT), "Pokud mluvime o volnem", new string[] {"mluvim", "voln"});
+ }
+
+ /// @deprecated (3.1) Remove this test when support for 3.0 indexes is no longer needed.
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("(3.1) Remove this test when support for 3.0 indexes is no longer needed.") public void testReusableTokenStreamLegacy() throws Exception
+ [Obsolete("(3.1) Remove this test when support for 3.0 indexes is no longer needed.")]
+ public virtual void testReusableTokenStreamLegacy()
+ {
+ Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_30);
+ assertAnalyzesTo(analyzer, "Pokud mluvime o volnem", new string[] {"mluvime", "volnem"});
+ assertAnalyzesTo(analyzer, "Česká Republika", new string[] {"česká", "republika"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws Exception
+ public virtual void testReusableTokenStream()
+ {
+ Analyzer analyzer = new CzechAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(analyzer, "Pokud mluvime o volnem", new string[] {"mluvim", "voln"});
+ assertAnalyzesTo(analyzer, "Česká Republika", new string[] {"česk", "republik"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWithStemExclusionSet() throws java.io.IOException
+ public virtual void testWithStemExclusionSet()
+ {
+ CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
+ set.add("hole");
+ CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
+ assertAnalyzesTo(cz, "hole desek", new string[] {"hole", "desk"});
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new CzechAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechStemFilterFactory.cs
new file mode 100644
index 0000000..53e309c
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechStemFilterFactory.cs
@@ -0,0 +1,60 @@
+namespace org.apache.lucene.analysis.cz
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Czech stem filter factory is working.
+ /// </summary>
+ public class TestCzechStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+ /// <summary>
+ /// Ensure the filter actually stems text.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("angličtí");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("CzechStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"anglick"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("CzechStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechStemmer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechStemmer.cs
new file mode 100644
index 0000000..c8badda
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cz/TestCzechStemmer.cs
@@ -0,0 +1,333 @@
+namespace org.apache.lucene.analysis.cz
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+ using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ /// <summary>
+ /// Test the Czech Stemmer.
+ ///
+ /// Note: its algorithmic, so some stems are nonsense
+ ///
+ /// </summary>
+ public class TestCzechStemmer : BaseTokenStreamTestCase
+ {
+
+ /// <summary>
+ /// Test showing how masculine noun forms conflate
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMasculineNouns() throws java.io.IOException
+ public virtual void testMasculineNouns()
+ {
+ CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
+
+ /* animate ending with a hard consonant */
+ assertAnalyzesTo(cz, "pán", new string[] {"pán"});
+ assertAnalyzesTo(cz, "páni", new string[] {"pán"});
+ assertAnalyzesTo(cz, "pánové", new string[] {"pán"});
+ assertAnalyzesTo(cz, "pána", new string[] {"pán"});
+ assertAnalyzesTo(cz, "pánů", new string[] {"pán"});
+ assertAnalyzesTo(cz, "pánovi", new string[] {"pán"});
+ assertAnalyzesTo(cz, "pánům", new string[] {"pán"});
+ assertAnalyzesTo(cz, "pány", new string[] {"pán"});
+ assertAnalyzesTo(cz, "páne", new string[] {"pán"});
+ assertAnalyzesTo(cz, "pánech", new string[] {"pán"});
+ assertAnalyzesTo(cz, "pánem", new string[] {"pán"});
+
+ /* inanimate ending with hard consonant */
+ assertAnalyzesTo(cz, "hrad", new string[] {"hrad"});
+ assertAnalyzesTo(cz, "hradu", new string[] {"hrad"});
+ assertAnalyzesTo(cz, "hrade", new string[] {"hrad"});
+ assertAnalyzesTo(cz, "hradem", new string[] {"hrad"});
+ assertAnalyzesTo(cz, "hrady", new string[] {"hrad"});
+ assertAnalyzesTo(cz, "hradech", new string[] {"hrad"});
+ assertAnalyzesTo(cz, "hradům", new string[] {"hrad"});
+ assertAnalyzesTo(cz, "hradů", new string[] {"hrad"});
+
+ /* animate ending with a soft consonant */
+ assertAnalyzesTo(cz, "muž", new string[] {"muh"});
+ assertAnalyzesTo(cz, "muži", new string[] {"muh"});
+ assertAnalyzesTo(cz, "muže", new string[] {"muh"});
+ assertAnalyzesTo(cz, "mužů", new string[] {"muh"});
+ assertAnalyzesTo(cz, "mužům", new string[] {"muh"});
+ assertAnalyzesTo(cz, "mužích", new string[] {"muh"});
+ assertAnalyzesTo(cz, "mužem", new string[] {"muh"});
+
+ /* inanimate ending with a soft consonant */
+ assertAnalyzesTo(cz, "stroj", new string[] {"stroj"});
+ assertAnalyzesTo(cz, "stroje", new string[] {"stroj"});
+ assertAnalyzesTo(cz, "strojů", new string[] {"stroj"});
+ assertAnalyzesTo(cz, "stroji", new string[] {"stroj"});
+ assertAnalyzesTo(cz, "strojům", new string[] {"stroj"});
+ assertAnalyzesTo(cz, "strojích", new string[] {"stroj"});
+ assertAnalyzesTo(cz, "strojem", new string[] {"stroj"});
+
+ /* ending with a */
+ assertAnalyzesTo(cz, "předseda", new string[] {"předsd"});
+ assertAnalyzesTo(cz, "předsedové", new string[] {"předsd"});
+ assertAnalyzesTo(cz, "předsedy", new string[] {"předsd"});
+ assertAnalyzesTo(cz, "předsedů", new string[] {"předsd"});
+ assertAnalyzesTo(cz, "předsedovi", new string[] {"předsd"});
+ assertAnalyzesTo(cz, "předsedům", new string[] {"předsd"});
+ assertAnalyzesTo(cz, "předsedu", new string[] {"předsd"});
+ assertAnalyzesTo(cz, "předsedo", new string[] {"předsd"});
+ assertAnalyzesTo(cz, "předsedech", new string[] {"předsd"});
+ assertAnalyzesTo(cz, "předsedou", new string[] {"předsd"});
+
+ /* ending with e */
+ assertAnalyzesTo(cz, "soudce", new string[] {"soudk"});
+ assertAnalyzesTo(cz, "soudci", new string[] {"soudk"});
+ assertAnalyzesTo(cz, "soudců", new string[] {"soudk"});
+ assertAnalyzesTo(cz, "soudcům", new string[] {"soudk"});
+ assertAnalyzesTo(cz, "soudcích", new string[] {"soudk"});
+ assertAnalyzesTo(cz, "soudcem", new string[] {"soudk"});
+ }
+
+ /// <summary>
+ /// Test showing how feminine noun forms conflate
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFeminineNouns() throws java.io.IOException
+ public virtual void testFeminineNouns()
+ {
+ CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
+
+ /* ending with hard consonant */
+ assertAnalyzesTo(cz, "kost", new string[] {"kost"});
+ assertAnalyzesTo(cz, "kosti", new string[] {"kost"});
+ assertAnalyzesTo(cz, "kostí", new string[] {"kost"});
+ assertAnalyzesTo(cz, "kostem", new string[] {"kost"});
+ assertAnalyzesTo(cz, "kostech", new string[] {"kost"});
+ assertAnalyzesTo(cz, "kostmi", new string[] {"kost"});
+
+ /* ending with a soft consonant */
+ // note: in this example sing nom. and sing acc. don't conflate w/ the rest
+ assertAnalyzesTo(cz, "píseň", new string[] {"písň"});
+ assertAnalyzesTo(cz, "písně", new string[] {"písn"});
+ assertAnalyzesTo(cz, "písni", new string[] {"písn"});
+ assertAnalyzesTo(cz, "písněmi", new string[] {"písn"});
+ assertAnalyzesTo(cz, "písních", new string[] {"písn"});
+ assertAnalyzesTo(cz, "písním", new string[] {"písn"});
+
+ /* ending with e */
+ assertAnalyzesTo(cz, "růže", new string[] {"růh"});
+ assertAnalyzesTo(cz, "růží", new string[] {"růh"});
+ assertAnalyzesTo(cz, "růžím", new string[] {"růh"});
+ assertAnalyzesTo(cz, "růžích", new string[] {"růh"});
+ assertAnalyzesTo(cz, "růžemi", new string[] {"růh"});
+ assertAnalyzesTo(cz, "růži", new string[] {"růh"});
+
+ /* ending with a */
+ assertAnalyzesTo(cz, "žena", new string[] {"žn"});
+ assertAnalyzesTo(cz, "ženy", new string[] {"žn"});
+ assertAnalyzesTo(cz, "žen", new string[] {"žn"});
+ assertAnalyzesTo(cz, "ženě", new string[] {"žn"});
+ assertAnalyzesTo(cz, "ženám", new string[] {"žn"});
+ assertAnalyzesTo(cz, "ženu", new string[] {"žn"});
+ assertAnalyzesTo(cz, "ženo", new string[] {"žn"});
+ assertAnalyzesTo(cz, "ženách", new string[] {"žn"});
+ assertAnalyzesTo(cz, "ženou", new string[] {"žn"});
+ assertAnalyzesTo(cz, "ženami", new string[] {"žn"});
+ }
+
+ /// <summary>
+ /// Test showing how neuter noun forms conflate
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNeuterNouns() throws java.io.IOException
+ public virtual void testNeuterNouns()
+ {
+ CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
+
+ /* ending with o */
+ assertAnalyzesTo(cz, "město", new string[] {"měst"});
+ assertAnalyzesTo(cz, "města", new string[] {"měst"});
+ assertAnalyzesTo(cz, "měst", new string[] {"měst"});
+ assertAnalyzesTo(cz, "městu", new string[] {"měst"});
+ assertAnalyzesTo(cz, "městům", new string[] {"měst"});
+ assertAnalyzesTo(cz, "městě", new string[] {"měst"});
+ assertAnalyzesTo(cz, "městech", new string[] {"měst"});
+ assertAnalyzesTo(cz, "městem", new string[] {"měst"});
+ assertAnalyzesTo(cz, "městy", new string[] {"měst"});
+
+ /* ending with e */
+ assertAnalyzesTo(cz, "moře", new string[] {"moř"});
+ assertAnalyzesTo(cz, "moří", new string[] {"moř"});
+ assertAnalyzesTo(cz, "mořím", new string[] {"moř"});
+ assertAnalyzesTo(cz, "moři", new string[] {"moř"});
+ assertAnalyzesTo(cz, "mořích", new string[] {"moř"});
+ assertAnalyzesTo(cz, "mořem", new string[] {"moř"});
+
+ /* ending with ě */
+ assertAnalyzesTo(cz, "kuře", new string[] {"kuř"});
+ assertAnalyzesTo(cz, "kuřata", new string[] {"kuř"});
+ assertAnalyzesTo(cz, "kuřete", new string[] {"kuř"});
+ assertAnalyzesTo(cz, "kuřat", new string[] {"kuř"});
+ assertAnalyzesTo(cz, "kuřeti", new string[] {"kuř"});
+ assertAnalyzesTo(cz, "kuřatům", new string[] {"kuř"});
+ assertAnalyzesTo(cz, "kuřatech", new string[] {"kuř"});
+ assertAnalyzesTo(cz, "kuřetem", new string[] {"kuř"});
+ assertAnalyzesTo(cz, "kuřaty", new string[] {"kuř"});
+
+ /* ending with í */
+ assertAnalyzesTo(cz, "stavení", new string[] {"stavn"});
+ assertAnalyzesTo(cz, "stavením", new string[] {"stavn"});
+ assertAnalyzesTo(cz, "staveních", new string[] {"stavn"});
+ assertAnalyzesTo(cz, "staveními", new string[] {"stavn"});
+ }
+
+ /// <summary>
+ /// Test showing how adjectival forms conflate
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAdjectives() throws java.io.IOException
+ public virtual void testAdjectives()
+ {
+ CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
+
+ /* ending with ý/á/é */
+ assertAnalyzesTo(cz, "mladý", new string[] {"mlad"});
+ assertAnalyzesTo(cz, "mladí", new string[] {"mlad"});
+ assertAnalyzesTo(cz, "mladého", new string[] {"mlad"});
+ assertAnalyzesTo(cz, "mladých", new string[] {"mlad"});
+ assertAnalyzesTo(cz, "mladému", new string[] {"mlad"});
+ assertAnalyzesTo(cz, "mladým", new string[] {"mlad"});
+ assertAnalyzesTo(cz, "mladé", new string[] {"mlad"});
+ assertAnalyzesTo(cz, "mladém", new string[] {"mlad"});
+ assertAnalyzesTo(cz, "mladými", new string[] {"mlad"});
+ assertAnalyzesTo(cz, "mladá", new string[] {"mlad"});
+ assertAnalyzesTo(cz, "mladou", new string[] {"mlad"});
+
+ /* ending with í */
+ assertAnalyzesTo(cz, "jarní", new string[] {"jarn"});
+ assertAnalyzesTo(cz, "jarního", new string[] {"jarn"});
+ assertAnalyzesTo(cz, "jarních", new string[] {"jarn"});
+ assertAnalyzesTo(cz, "jarnímu", new string[] {"jarn"});
+ assertAnalyzesTo(cz, "jarním", new string[] {"jarn"});
+ assertAnalyzesTo(cz, "jarními", new string[] {"jarn"});
+ }
+
+ /// <summary>
+ /// Test some possessive suffixes
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testPossessive() throws java.io.IOException
+ public virtual void testPossessive()
+ {
+ CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(cz, "Karlův", new string[] {"karl"});
+ assertAnalyzesTo(cz, "jazykový", new string[] {"jazyk"});
+ }
+
+ /// <summary>
+ /// Test some exceptional rules, implemented as rewrites.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExceptions() throws java.io.IOException
+ public virtual void testExceptions()
+ {
+ CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
+
+ /* rewrite of št -> sk */
+ assertAnalyzesTo(cz, "český", new string[] {"česk"});
+ assertAnalyzesTo(cz, "čeští", new string[] {"česk"});
+
+ /* rewrite of čt -> ck */
+ assertAnalyzesTo(cz, "anglický", new string[] {"anglick"});
+ assertAnalyzesTo(cz, "angličtí", new string[] {"anglick"});
+
+ /* rewrite of z -> h */
+ assertAnalyzesTo(cz, "kniha", new string[] {"knih"});
+ assertAnalyzesTo(cz, "knize", new string[] {"knih"});
+
+ /* rewrite of ž -> h */
+ assertAnalyzesTo(cz, "mazat", new string[] {"mah"});
+ assertAnalyzesTo(cz, "mažu", new string[] {"mah"});
+
+ /* rewrite of c -> k */
+ assertAnalyzesTo(cz, "kluk", new string[] {"kluk"});
+ assertAnalyzesTo(cz, "kluci", new string[] {"kluk"});
+ assertAnalyzesTo(cz, "klucích", new string[] {"kluk"});
+
+ /* rewrite of č -> k */
+ assertAnalyzesTo(cz, "hezký", new string[] {"hezk"});
+ assertAnalyzesTo(cz, "hezčí", new string[] {"hezk"});
+
+ /* rewrite of *ů* -> *o* */
+ assertAnalyzesTo(cz, "hůl", new string[] {"hol"});
+ assertAnalyzesTo(cz, "hole", new string[] {"hol"});
+
+ /* rewrite of e* -> * */
+ assertAnalyzesTo(cz, "deska", new string[] {"desk"});
+ assertAnalyzesTo(cz, "desek", new string[] {"desk"});
+ }
+
+ /// <summary>
+ /// Test that very short words are not stemmed.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDontStem() throws java.io.IOException
+ public virtual void testDontStem()
+ {
+ CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(cz, "e", new string[] {"e"});
+ assertAnalyzesTo(cz, "zi", new string[] {"zi"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWithKeywordAttribute() throws java.io.IOException
+ public virtual void testWithKeywordAttribute()
+ {
+ CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
+ set.add("hole");
+ CzechStemFilter filter = new CzechStemFilter(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));
+ assertTokenStreamContents(filter, new string[] {"hole", "desk"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly TestCzechStemmer outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper(TestCzechStemmer outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new CzechStemFilter(tokenizer));
+ }
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Da/TestDanishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Da/TestDanishAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Da/TestDanishAnalyzer.cs
new file mode 100644
index 0000000..00509ee
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Da/TestDanishAnalyzer.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.da
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ public class TestDanishAnalyzer : BaseTokenStreamTestCase
+ {
+ /// <summary>
+ /// This test fails with NPE when the
+ /// stopwords file is missing in classpath
+ /// </summary>
+ public virtual void testResourcesAvailable()
+ {
+ new DanishAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+ /// <summary>
+ /// test stopwords and stemming </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+ public virtual void testBasics()
+ {
+ Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT);
+ // stemming
+ checkOneTerm(a, "undersøg", "undersøg");
+ checkOneTerm(a, "undersøgelse", "undersøg");
+ // stopword
+ assertAnalyzesTo(a, "på", new string[] {});
+ }
+
+ /// <summary>
+ /// test use of exclusion set </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExclude() throws java.io.IOException
+ public virtual void testExclude()
+ {
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("undersøgelse"), false);
+ Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT, DanishAnalyzer.DefaultStopSet, exclusionSet);
+ checkOneTerm(a, "undersøgelse", "undersøgelse");
+ checkOneTerm(a, "undersøg", "undersøg");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new DanishAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanAnalyzer.cs
new file mode 100644
index 0000000..a5e12f2
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanAnalyzer.cs
@@ -0,0 +1,85 @@
+namespace org.apache.lucene.analysis.de
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using LowerCaseTokenizer = org.apache.lucene.analysis.core.LowerCaseTokenizer;
+ using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+ using Version = org.apache.lucene.util.Version;
+
+ public class TestGermanAnalyzer : BaseTokenStreamTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws Exception
+ public virtual void testReusableTokenStream()
+ {
+ Analyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
+ checkOneTerm(a, "Tisch", "tisch");
+ checkOneTerm(a, "Tische", "tisch");
+ checkOneTerm(a, "Tischen", "tisch");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWithKeywordAttribute() throws java.io.IOException
+ public virtual void testWithKeywordAttribute()
+ {
+ CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
+ set.add("fischen");
+ GermanStemFilter filter = new GermanStemFilter(new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Fischen Trinken")), set));
+ assertTokenStreamContents(filter, new string[] {"fischen", "trink"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemExclusionTable() throws Exception
+ public virtual void testStemExclusionTable()
+ {
+ GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, new CharArraySet(TEST_VERSION_CURRENT, asSet("tischen"), false));
+ checkOneTerm(a, "tischen", "tischen");
+ }
+
+ /// <summary>
+ /// test some features of the new snowball filter
+ /// these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testGermanSpecials() throws Exception
+ public virtual void testGermanSpecials()
+ {
+ GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
+ // a/o/u + e is equivalent to the umlaut form
+ checkOneTerm(a, "Schaltflächen", "schaltflach");
+ checkOneTerm(a, "Schaltflaechen", "schaltflach");
+ // here they are with the old stemmer
+ a = new GermanAnalyzer(Version.LUCENE_30);
+ checkOneTerm(a, "Schaltflächen", "schaltflach");
+ checkOneTerm(a, "Schaltflaechen", "schaltflaech");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new GermanAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanLightStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanLightStemFilter.cs
new file mode 100644
index 0000000..08614c6
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanLightStemFilter.cs
@@ -0,0 +1,123 @@
+namespace org.apache.lucene.analysis.de
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+ using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+// import static org.apache.lucene.analysis.VocabularyAssert.*;
+
+ /// <summary>
+ /// Simple tests for <seealso cref="GermanLightStemFilter"/>
+ /// </summary>
+ public class TestGermanLightStemFilter : BaseTokenStreamTestCase
+ {
+ private Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(source, new GermanLightStemFilter(source));
+ }
+ }
+
+ /// <summary>
+ /// Test against a vocabulary from the reference impl </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVocabulary() throws java.io.IOException
+ public virtual void testVocabulary()
+ {
+ assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKeyword() throws java.io.IOException
+ public virtual void testKeyword()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet exclusionSet = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false);
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false);
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet);
+ checkOneTerm(a, "sängerinnen", "sängerinnen");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestGermanLightStemFilter outerInstance;
+
+ private CharArraySet exclusionSet;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestGermanLightStemFilter outerInstance, CharArraySet exclusionSet)
+ {
+ this.outerInstance = outerInstance;
+ this.exclusionSet = exclusionSet;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
+ return new TokenStreamComponents(source, new GermanLightStemFilter(sink));
+ }
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+ {
+ private readonly TestGermanLightStemFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper3(TestGermanLightStemFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new GermanLightStemFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanLightStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanLightStemFilterFactory.cs
new file mode 100644
index 0000000..d0c9dea
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanLightStemFilterFactory.cs
@@ -0,0 +1,57 @@
+namespace org.apache.lucene.analysis.de
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the German light stem factory is working.
+ /// </summary>
+ public class TestGermanLightStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("häuser");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("GermanLightStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"haus"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("GermanLightStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanMinimalStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanMinimalStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanMinimalStemFilter.cs
new file mode 100644
index 0000000..2bb224f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanMinimalStemFilter.cs
@@ -0,0 +1,139 @@
+namespace org.apache.lucene.analysis.de
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+ using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+// import static org.apache.lucene.analysis.VocabularyAssert.*;
+
+ /// <summary>
+ /// Simple tests for <seealso cref="GermanMinimalStemFilter"/>
+ /// </summary>
+ public class TestGermanMinimalStemFilter : BaseTokenStreamTestCase
+ {
+ private Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
+ }
+ }
+
+ /// <summary>
+ /// Test some examples from the paper </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExamples() throws java.io.IOException
+ public virtual void testExamples()
+ {
+ checkOneTerm(analyzer, "sängerinnen", "sangerin");
+ checkOneTerm(analyzer, "frauen", "frau");
+ checkOneTerm(analyzer, "kenntnisse", "kenntnis");
+ checkOneTerm(analyzer, "staates", "staat");
+ checkOneTerm(analyzer, "bilder", "bild");
+ checkOneTerm(analyzer, "boote", "boot");
+ checkOneTerm(analyzer, "götter", "gott");
+ checkOneTerm(analyzer, "äpfel", "apfel");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKeyword() throws java.io.IOException
+ public virtual void testKeyword()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet exclusionSet = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false);
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false);
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet);
+ checkOneTerm(a, "sängerinnen", "sängerinnen");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestGermanMinimalStemFilter outerInstance;
+
+ private CharArraySet exclusionSet;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestGermanMinimalStemFilter outerInstance, CharArraySet exclusionSet)
+ {
+ this.outerInstance = outerInstance;
+ this.exclusionSet = exclusionSet;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
+ return new TokenStreamComponents(source, new GermanMinimalStemFilter(sink));
+ }
+ }
+
+ /// <summary>
+ /// Test against a vocabulary from the reference impl </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVocabulary() throws java.io.IOException
+ public virtual void testVocabulary()
+ {
+ assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+ {
+ private readonly TestGermanMinimalStemFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper3(TestGermanMinimalStemFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new GermanMinimalStemFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanMinimalStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanMinimalStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanMinimalStemFilterFactory.cs
new file mode 100644
index 0000000..6abbb9e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanMinimalStemFilterFactory.cs
@@ -0,0 +1,57 @@
+namespace org.apache.lucene.analysis.de
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the German minimal stem factory is working.
+ /// </summary>
+ public class TestGermanMinimalStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("bilder");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("GermanMinimalStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"bild"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("GermanMinimalStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanNormalizationFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanNormalizationFilter.cs
new file mode 100644
index 0000000..5bb92b5
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanNormalizationFilter.cs
@@ -0,0 +1,114 @@
+namespace org.apache.lucene.analysis.de
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+ /// <summary>
+ /// Tests <seealso cref="GermanNormalizationFilter"/>
+ /// </summary>
+ public class TestGermanNormalizationFilter : BaseTokenStreamTestCase
+ {
+ private Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string field, Reader reader)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer tokenizer = new org.apache.lucene.analysis.MockTokenizer(reader, org.apache.lucene.analysis.MockTokenizer.WHITESPACE, false);
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.TokenStream stream = new GermanNormalizationFilter(tokenizer);
+ TokenStream stream = new GermanNormalizationFilter(tokenizer);
+ return new TokenStreamComponents(tokenizer, stream);
+ }
+ }
+
+ /// <summary>
+ /// Tests that a/o/u + e is equivalent to the umlaut form
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasicExamples() throws java.io.IOException
+ public virtual void testBasicExamples()
+ {
+ checkOneTerm(analyzer, "Schaltflächen", "Schaltflachen");
+ checkOneTerm(analyzer, "Schaltflaechen", "Schaltflachen");
+ }
+
+ /// <summary>
+ /// Tests the specific heuristic that ue is not folded after a vowel or q.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUHeuristic() throws java.io.IOException
+ public virtual void testUHeuristic()
+ {
+ checkOneTerm(analyzer, "dauer", "dauer");
+ }
+
+ /// <summary>
+ /// Tests german specific folding of sharp-s
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSpecialFolding() throws java.io.IOException
+ public virtual void testSpecialFolding()
+ {
+ checkOneTerm(analyzer, "weißbier", "weissbier");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestGermanNormalizationFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestGermanNormalizationFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new GermanNormalizationFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanNormalizationFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanNormalizationFilterFactory.cs
new file mode 100644
index 0000000..e3da605
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanNormalizationFilterFactory.cs
@@ -0,0 +1,57 @@
+namespace org.apache.lucene.analysis.de
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the German normalization factory is working.
+ /// </summary>
+ public class TestGermanNormalizationFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("weißbier");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("GermanNormalization").create(stream);
+ assertTokenStreamContents(stream, new string[] {"weissbier"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("GermanNormalization", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanStemFilter.cs
new file mode 100644
index 0000000..c31b02d
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanStemFilter.cs
@@ -0,0 +1,136 @@
+namespace org.apache.lucene.analysis.de
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+ using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+ using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+// import static org.apache.lucene.analysis.VocabularyAssert.*;
+
+ /// <summary>
+ /// Test the German stemmer. The stemming algorithm is known to work less
+ /// than perfect, as it doesn't use any word lists with exceptions. We
+ /// also check some of the cases where the algorithm is wrong.
+ ///
+ /// </summary>
+ public class TestGermanStemFilter : BaseTokenStreamTestCase
+ {
+ internal Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer t = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(t, new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ System.IO.Stream vocOut = this.GetType().getResourceAsStream("data.txt");
+ assertVocabulary(analyzer, vocOut);
+ vocOut.Close();
+ }
+
+ // LUCENE-3043: we use keywordtokenizer in this test,
+ // so ensure the stemmer does not crash on zero-length strings.
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmpty() throws Exception
+ public virtual void testEmpty()
+ {
+ assertAnalyzesTo(analyzer, "", new string[] {""});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKeyword() throws java.io.IOException
+ public virtual void testKeyword()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet exclusionSet = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false);
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sängerinnen"), false);
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet);
+ checkOneTerm(a, "sängerinnen", "sängerinnen");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestGermanStemFilter outerInstance;
+
+ private CharArraySet exclusionSet;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestGermanStemFilter outerInstance, CharArraySet exclusionSet)
+ {
+ this.outerInstance = outerInstance;
+ this.exclusionSet = exclusionSet;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
+ return new TokenStreamComponents(source, new GermanStemFilter(sink));
+ }
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+ {
+ private readonly TestGermanStemFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper3(TestGermanStemFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new GermanStemFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanStemFilterFactory.cs
new file mode 100644
index 0000000..f4b9e73
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/De/TestGermanStemFilterFactory.cs
@@ -0,0 +1,60 @@
+namespace org.apache.lucene.analysis.de
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the German stem filter factory is working.
+ /// </summary>
+ public class TestGermanStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+ /// <summary>
+ /// Ensure the filter actually stems text.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("Tischen");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("GermanStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"tisch"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("GermanStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/GreekAnalyzerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/GreekAnalyzerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/GreekAnalyzerTest.cs
new file mode 100644
index 0000000..f18f101
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/GreekAnalyzerTest.cs
@@ -0,0 +1,98 @@
+using System;
+
+namespace org.apache.lucene.analysis.el
+{
+
+ /// <summary>
+ /// Copyright 2005 The Apache Software Foundation
+ ///
+ /// Licensed under the Apache License, Version 2.0 (the "License");
+ /// you may not use this file except in compliance with the License.
+ /// You may obtain a copy of the License at
+ ///
+ /// http://www.apache.org/licenses/LICENSE-2.0
+ ///
+ /// Unless required by applicable law or agreed to in writing, software
+ /// distributed under the License is distributed on an "AS IS" BASIS,
+ /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ /// See the License for the specific language governing permissions and
+ /// limitations under the License.
+ /// </summary>
+
+ using Version = org.apache.lucene.util.Version;
+
+ /// <summary>
+ /// A unit test class for verifying the correct operation of the GreekAnalyzer.
+ ///
+ /// </summary>
+ public class GreekAnalyzerTest : BaseTokenStreamTestCase
+ {
+
+ /// <summary>
+ /// Test the analysis of various greek strings.
+ /// </summary>
+ /// <exception cref="Exception"> in case an error occurs </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAnalyzer() throws Exception
+ public virtual void testAnalyzer()
+ {
+ Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
+ // Verify the correct analysis of capitals and small accented letters, and
+ // stemming
+ assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας", new string[] {"μια", "εξαιρετ", "καλ", "πλουσ", "σειρ", "χαρακτηρ", "ελληνικ", "γλωσσ"});
+ // Verify the correct analysis of small letters with diaeresis and the elimination
+ // of punctuation marks
+ assertAnalyzesTo(a, "Προϊόντα (και) [πολλαπλές] - ΑΝΑΓΚΕΣ", new string[] {"προιοντ", "πολλαπλ", "αναγκ"});
+ // Verify the correct analysis of capital accented letters and capital letters with diaeresis,
+ // as well as the elimination of stop words
+ assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι", new string[] {"προυποθεσ", "αψογ", "μεστ", "αλλ"});
+ }
+
+ /// <summary>
+ /// Test the analysis of various greek strings.
+ /// </summary>
+ /// <exception cref="Exception"> in case an error occurs </exception>
+ /// @deprecated (3.1) Remove this test when support for 3.0 is no longer needed
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("(3.1) Remove this test when support for 3.0 is no longer needed") public void testAnalyzerBWCompat() throws Exception
+ [Obsolete("(3.1) Remove this test when support for 3.0 is no longer needed")]
+ public virtual void testAnalyzerBWCompat()
+ {
+ Analyzer a = new GreekAnalyzer(Version.LUCENE_30);
+ // Verify the correct analysis of capitals and small accented letters
+ assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας", new string[] {"μια", "εξαιρετικα", "καλη", "πλουσια", "σειρα", "χαρακτηρων", "ελληνικησ", "γλωσσασ"});
+ // Verify the correct analysis of small letters with diaeresis and the elimination
+ // of punctuation marks
+ assertAnalyzesTo(a, "Προϊόντα (και) [πολλαπλές] - ΑΝΑΓΚΕΣ", new string[] {"προιοντα", "πολλαπλεσ", "αναγκεσ"});
+ // Verify the correct analysis of capital accented letters and capital letters with diaeresis,
+ // as well as the elimination of stop words
+ assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι", new string[] {"προυποθεσεισ", "αψογοσ", "μεστοσ", "αλλοι"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws Exception
+ public virtual void testReusableTokenStream()
+ {
+ Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
+ // Verify the correct analysis of capitals and small accented letters, and
+ // stemming
+ assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας", new string[] {"μια", "εξαιρετ", "καλ", "πλουσ", "σειρ", "χαρακτηρ", "ελληνικ", "γλωσσ"});
+ // Verify the correct analysis of small letters with diaeresis and the elimination
+ // of punctuation marks
+ assertAnalyzesTo(a, "Προϊόντα (και) [πολλαπλές] - ΑΝΑΓΚΕΣ", new string[] {"προιοντ", "πολλαπλ", "αναγκ"});
+ // Verify the correct analysis of capital accented letters and capital letters with diaeresis,
+ // as well as the elimination of stop words
+ assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι", new string[] {"προυποθεσ", "αψογ", "μεστ", "αλλ"});
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new GreekAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekLowerCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekLowerCaseFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekLowerCaseFilterFactory.cs
new file mode 100644
index 0000000..2922563
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekLowerCaseFilterFactory.cs
@@ -0,0 +1,60 @@
+namespace org.apache.lucene.analysis.el
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Greek lowercase filter factory is working.
+ /// </summary>
+ public class TestGreekLowerCaseFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+ /// <summary>
+ /// Ensure the filter actually lowercases (and a bit more) greek text.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNormalization() throws Exception
+ public virtual void testNormalization()
+ {
+ Reader reader = new StringReader("Μάϊος ΜΆΪΟΣ");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("GreekLowerCase").create(stream);
+ assertTokenStreamContents(stream, new string[] {"μαιοσ", "μαιοσ"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("GreekLowerCase", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekStemFilterFactory.cs
new file mode 100644
index 0000000..e26eca3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekStemFilterFactory.cs
@@ -0,0 +1,58 @@
+namespace org.apache.lucene.analysis.el
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Greek stem filter factory is working.
+ /// </summary>
+ public class TestGreekStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("άνθρωπος");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("GreekLowerCase").create(stream);
+ stream = tokenFilterFactory("GreekStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"ανθρωπ"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("GreekStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file