You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/12/10 19:39:06 UTC
[17/27] lucenenet git commit: adding converted analysis common tests
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekStemmer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekStemmer.cs
new file mode 100644
index 0000000..a022749
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/El/TestGreekStemmer.cs
@@ -0,0 +1,571 @@
+namespace org.apache.lucene.analysis.el
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+ public class TestGreekStemmer : BaseTokenStreamTestCase
+ {
+ internal Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMasculineNouns() throws Exception
+ public virtual void testMasculineNouns()
+ {
+ // -ος
+ checkOneTerm(a, "άνθρωπος", "ανθρωπ");
+ checkOneTerm(a, "ανθρώπου", "ανθρωπ");
+ checkOneTerm(a, "άνθρωπο", "ανθρωπ");
+ checkOneTerm(a, "άνθρωπε", "ανθρωπ");
+ checkOneTerm(a, "άνθρωποι", "ανθρωπ");
+ checkOneTerm(a, "ανθρώπων", "ανθρωπ");
+ checkOneTerm(a, "ανθρώπους", "ανθρωπ");
+ checkOneTerm(a, "άνθρωποι", "ανθρωπ");
+
+ // -ης
+ checkOneTerm(a, "πελάτης", "πελατ");
+ checkOneTerm(a, "πελάτη", "πελατ");
+ checkOneTerm(a, "πελάτες", "πελατ");
+ checkOneTerm(a, "πελατών", "πελατ");
+
+ // -ας/-ες
+ checkOneTerm(a, "ελέφαντας", "ελεφαντ");
+ checkOneTerm(a, "ελέφαντα", "ελεφαντ");
+ checkOneTerm(a, "ελέφαντες", "ελεφαντ");
+ checkOneTerm(a, "ελεφάντων", "ελεφαντ");
+
+ // -ας/-αδες
+ checkOneTerm(a, "μπαμπάς", "μπαμπ");
+ checkOneTerm(a, "μπαμπά", "μπαμπ");
+ checkOneTerm(a, "μπαμπάδες", "μπαμπ");
+ checkOneTerm(a, "μπαμπάδων", "μπαμπ");
+
+ // -ης/-ηδες
+ checkOneTerm(a, "μπακάλης", "μπακαλ");
+ checkOneTerm(a, "μπακάλη", "μπακαλ");
+ checkOneTerm(a, "μπακάληδες", "μπακαλ");
+ checkOneTerm(a, "μπακάληδων", "μπακαλ");
+
+ // -ες
+ checkOneTerm(a, "καφές", "καφ");
+ checkOneTerm(a, "καφέ", "καφ");
+ checkOneTerm(a, "καφέδες", "καφ");
+ checkOneTerm(a, "καφέδων", "καφ");
+
+ // -έας/είς
+ checkOneTerm(a, "γραμματέας", "γραμματε");
+ checkOneTerm(a, "γραμματέα", "γραμματε");
+ // plural forms conflate w/ each other, not w/ the sing forms
+ checkOneTerm(a, "γραμματείς", "γραμματ");
+ checkOneTerm(a, "γραμματέων", "γραμματ");
+
+ // -ους/οι
+ checkOneTerm(a, "απόπλους", "αποπλ");
+ checkOneTerm(a, "απόπλου", "αποπλ");
+ checkOneTerm(a, "απόπλοι", "αποπλ");
+ checkOneTerm(a, "απόπλων", "αποπλ");
+
+ // -ους/-ουδες
+ checkOneTerm(a, "παππούς", "παππ");
+ checkOneTerm(a, "παππού", "παππ");
+ checkOneTerm(a, "παππούδες", "παππ");
+ checkOneTerm(a, "παππούδων", "παππ");
+
+ // -ης/-εις
+ checkOneTerm(a, "λάτρης", "λατρ");
+ checkOneTerm(a, "λάτρη", "λατρ");
+ checkOneTerm(a, "λάτρεις", "λατρ");
+ checkOneTerm(a, "λάτρεων", "λατρ");
+
+ // -υς
+ checkOneTerm(a, "πέλεκυς", "πελεκ");
+ checkOneTerm(a, "πέλεκυ", "πελεκ");
+ checkOneTerm(a, "πελέκεις", "πελεκ");
+ checkOneTerm(a, "πελέκεων", "πελεκ");
+
+ // -ωρ
+ // note: nom./voc. doesn't conflate w/ the rest
+ checkOneTerm(a, "μέντωρ", "μεντωρ");
+ checkOneTerm(a, "μέντορος", "μεντορ");
+ checkOneTerm(a, "μέντορα", "μεντορ");
+ checkOneTerm(a, "μέντορες", "μεντορ");
+ checkOneTerm(a, "μεντόρων", "μεντορ");
+
+ // -ων
+ checkOneTerm(a, "αγώνας", "αγων");
+ checkOneTerm(a, "αγώνος", "αγων");
+ checkOneTerm(a, "αγώνα", "αγων");
+ checkOneTerm(a, "αγώνα", "αγων");
+ checkOneTerm(a, "αγώνες", "αγων");
+ checkOneTerm(a, "αγώνων", "αγων");
+
+ // -ας/-ηδες
+ checkOneTerm(a, "αέρας", "αερ");
+ checkOneTerm(a, "αέρα", "αερ");
+ checkOneTerm(a, "αέρηδες", "αερ");
+ checkOneTerm(a, "αέρηδων", "αερ");
+
+ // -ης/-ητες
+ checkOneTerm(a, "γόης", "γο");
+ checkOneTerm(a, "γόη", "γοη"); // too short
+ // the two plural forms conflate
+ checkOneTerm(a, "γόητες", "γοητ");
+ checkOneTerm(a, "γοήτων", "γοητ");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFeminineNouns() throws Exception
+ public virtual void testFeminineNouns()
+ {
+ // -α/-ες,-ών
+ checkOneTerm(a, "φορά", "φορ");
+ checkOneTerm(a, "φοράς", "φορ");
+ checkOneTerm(a, "φορές", "φορ");
+ checkOneTerm(a, "φορών", "φορ");
+
+ // -α/-ες,-ων
+ checkOneTerm(a, "αγελάδα", "αγελαδ");
+ checkOneTerm(a, "αγελάδας", "αγελαδ");
+ checkOneTerm(a, "αγελάδες", "αγελαδ");
+ checkOneTerm(a, "αγελάδων", "αγελαδ");
+
+ // -η/-ες
+ checkOneTerm(a, "ζάχαρη", "ζαχαρ");
+ checkOneTerm(a, "ζάχαρης", "ζαχαρ");
+ checkOneTerm(a, "ζάχαρες", "ζαχαρ");
+ checkOneTerm(a, "ζαχάρεων", "ζαχαρ");
+
+ // -η/-εις
+ checkOneTerm(a, "τηλεόραση", "τηλεορασ");
+ checkOneTerm(a, "τηλεόρασης", "τηλεορασ");
+ checkOneTerm(a, "τηλεοράσεις", "τηλεορασ");
+ checkOneTerm(a, "τηλεοράσεων", "τηλεορασ");
+
+ // -α/-αδες
+ checkOneTerm(a, "μαμά", "μαμ");
+ checkOneTerm(a, "μαμάς", "μαμ");
+ checkOneTerm(a, "μαμάδες", "μαμ");
+ checkOneTerm(a, "μαμάδων", "μαμ");
+
+ // -ος
+ checkOneTerm(a, "λεωφόρος", "λεωφορ");
+ checkOneTerm(a, "λεωφόρου", "λεωφορ");
+ checkOneTerm(a, "λεωφόρο", "λεωφορ");
+ checkOneTerm(a, "λεωφόρε", "λεωφορ");
+ checkOneTerm(a, "λεωφόροι", "λεωφορ");
+ checkOneTerm(a, "λεωφόρων", "λεωφορ");
+ checkOneTerm(a, "λεωφόρους", "λεωφορ");
+
+ // -ου
+ checkOneTerm(a, "αλεπού", "αλεπ");
+ checkOneTerm(a, "αλεπούς", "αλεπ");
+ checkOneTerm(a, "αλεπούδες", "αλεπ");
+ checkOneTerm(a, "αλεπούδων", "αλεπ");
+
+ // -έας/είς
+ // note: not all forms conflate
+ checkOneTerm(a, "γραμματέας", "γραμματε");
+ checkOneTerm(a, "γραμματέως", "γραμματ");
+ checkOneTerm(a, "γραμματέα", "γραμματε");
+ checkOneTerm(a, "γραμματείς", "γραμματ");
+ checkOneTerm(a, "γραμματέων", "γραμματ");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNeuterNouns() throws Exception
+ public virtual void testNeuterNouns()
+ {
+ // ending with -ο
+ // note: nom doesnt conflate
+ checkOneTerm(a, "βιβλίο", "βιβλι");
+ checkOneTerm(a, "βιβλίου", "βιβλ");
+ checkOneTerm(a, "βιβλία", "βιβλ");
+ checkOneTerm(a, "βιβλίων", "βιβλ");
+
+ // ending with -ι
+ checkOneTerm(a, "πουλί", "πουλ");
+ checkOneTerm(a, "πουλιού", "πουλ");
+ checkOneTerm(a, "πουλιά", "πουλ");
+ checkOneTerm(a, "πουλιών", "πουλ");
+
+ // ending with -α
+ // note: nom. doesnt conflate
+ checkOneTerm(a, "πρόβλημα", "προβλημ");
+ checkOneTerm(a, "προβλήματος", "προβλημα");
+ checkOneTerm(a, "προβλήματα", "προβλημα");
+ checkOneTerm(a, "προβλημάτων", "προβλημα");
+
+ // ending with -ος/-ους
+ checkOneTerm(a, "πέλαγος", "πελαγ");
+ checkOneTerm(a, "πελάγους", "πελαγ");
+ checkOneTerm(a, "πελάγη", "πελαγ");
+ checkOneTerm(a, "πελάγων", "πελαγ");
+
+ // ending with -ός/-ότος
+ checkOneTerm(a, "γεγονός", "γεγον");
+ checkOneTerm(a, "γεγονότος", "γεγον");
+ checkOneTerm(a, "γεγονότα", "γεγον");
+ checkOneTerm(a, "γεγονότων", "γεγον");
+
+ // ending with -υ/-ιου
+ checkOneTerm(a, "βράδυ", "βραδ");
+ checkOneTerm(a, "βράδι", "βραδ");
+ checkOneTerm(a, "βραδιού", "βραδ");
+ checkOneTerm(a, "βράδια", "βραδ");
+ checkOneTerm(a, "βραδιών", "βραδ");
+
+ // ending with -υ/-ατος
+ // note: nom. doesnt conflate
+ checkOneTerm(a, "δόρυ", "δορ");
+ checkOneTerm(a, "δόρατος", "δορατ");
+ checkOneTerm(a, "δόρατα", "δορατ");
+ checkOneTerm(a, "δοράτων", "δορατ");
+
+ // ending with -ας
+ checkOneTerm(a, "κρέας", "κρε");
+ checkOneTerm(a, "κρέατος", "κρε");
+ checkOneTerm(a, "κρέατα", "κρε");
+ checkOneTerm(a, "κρεάτων", "κρε");
+
+ // ending with -ως
+ checkOneTerm(a, "λυκόφως", "λυκοφω");
+ checkOneTerm(a, "λυκόφωτος", "λυκοφω");
+ checkOneTerm(a, "λυκόφωτα", "λυκοφω");
+ checkOneTerm(a, "λυκοφώτων", "λυκοφω");
+
+ // ending with -ον/-ου
+ // note: nom. doesnt conflate
+ checkOneTerm(a, "μέσον", "μεσον");
+ checkOneTerm(a, "μέσου", "μεσ");
+ checkOneTerm(a, "μέσα", "μεσ");
+ checkOneTerm(a, "μέσων", "μεσ");
+
+ // ending in -ον/-οντος
+ // note: nom. doesnt conflate
+ checkOneTerm(a, "ενδιαφέρον", "ενδιαφερον");
+ checkOneTerm(a, "ενδιαφέροντος", "ενδιαφεροντ");
+ checkOneTerm(a, "ενδιαφέροντα", "ενδιαφεροντ");
+ checkOneTerm(a, "ενδιαφερόντων", "ενδιαφεροντ");
+
+ // ending with -εν/-εντος
+ checkOneTerm(a, "ανακοινωθέν", "ανακοινωθεν");
+ checkOneTerm(a, "ανακοινωθέντος", "ανακοινωθεντ");
+ checkOneTerm(a, "ανακοινωθέντα", "ανακοινωθεντ");
+ checkOneTerm(a, "ανακοινωθέντων", "ανακοινωθεντ");
+
+ // ending with -αν/-αντος
+ checkOneTerm(a, "σύμπαν", "συμπ");
+ checkOneTerm(a, "σύμπαντος", "συμπαντ");
+ checkOneTerm(a, "σύμπαντα", "συμπαντ");
+ checkOneTerm(a, "συμπάντων", "συμπαντ");
+
+ // ending with -α/-ακτος
+ checkOneTerm(a, "γάλα", "γαλ");
+ checkOneTerm(a, "γάλακτος", "γαλακτ");
+ checkOneTerm(a, "γάλατα", "γαλατ");
+ checkOneTerm(a, "γαλάκτων", "γαλακτ");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAdjectives() throws Exception
+ public virtual void testAdjectives()
+ {
+ // ending with -ής, -ές/-είς, -ή
+ checkOneTerm(a, "συνεχής", "συνεχ");
+ checkOneTerm(a, "συνεχούς", "συνεχ");
+ checkOneTerm(a, "συνεχή", "συνεχ");
+ checkOneTerm(a, "συνεχών", "συνεχ");
+ checkOneTerm(a, "συνεχείς", "συνεχ");
+ checkOneTerm(a, "συνεχές", "συνεχ");
+
+ // ending with -ης, -ες/-εις, -η
+ checkOneTerm(a, "συνήθης", "συνηθ");
+ checkOneTerm(a, "συνήθους", "συνηθ");
+ checkOneTerm(a, "συνήθη", "συνηθ");
+ // note: doesn't conflate
+ checkOneTerm(a, "συνήθεις", "συν");
+ checkOneTerm(a, "συνήθων", "συνηθ");
+ checkOneTerm(a, "σύνηθες", "συνηθ");
+
+ // ending with -υς, -υ/-εις, -ια
+ checkOneTerm(a, "βαθύς", "βαθ");
+ checkOneTerm(a, "βαθέος", "βαθε");
+ checkOneTerm(a, "βαθύ", "βαθ");
+ checkOneTerm(a, "βαθείς", "βαθ");
+ checkOneTerm(a, "βαθέων", "βαθ");
+
+ checkOneTerm(a, "βαθιά", "βαθ");
+ checkOneTerm(a, "βαθιάς", "βαθι");
+ checkOneTerm(a, "βαθιές", "βαθι");
+ checkOneTerm(a, "βαθιών", "βαθ");
+
+ checkOneTerm(a, "βαθέα", "βαθε");
+
+ // comparative/superlative
+ checkOneTerm(a, "ψηλός", "ψηλ");
+ checkOneTerm(a, "ψηλότερος", "ψηλ");
+ checkOneTerm(a, "ψηλότατος", "ψηλ");
+
+ checkOneTerm(a, "ωραίος", "ωραι");
+ checkOneTerm(a, "ωραιότερος", "ωραι");
+ checkOneTerm(a, "ωραιότατος", "ωραι");
+
+ checkOneTerm(a, "επιεικής", "επιεικ");
+ checkOneTerm(a, "επιεικέστερος", "επιεικ");
+ checkOneTerm(a, "επιεικέστατος", "επιεικ");
+ }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVerbs() throws Exception
+ public virtual void testVerbs()
+ {
+ // note, past/present verb stems will not conflate (from the paper)
+ //-ω,-α/-.ω,-.α
+ checkOneTerm(a, "ορίζω", "οριζ");
+ checkOneTerm(a, "όριζα", "οριζ");
+ checkOneTerm(a, "όριζε", "οριζ");
+ checkOneTerm(a, "ορίζοντας", "οριζ");
+ checkOneTerm(a, "ορίζομαι", "οριζ");
+ checkOneTerm(a, "οριζόμουν", "οριζ");
+ checkOneTerm(a, "ορίζεσαι", "οριζ");
+
+ checkOneTerm(a, "όρισα", "ορισ");
+ checkOneTerm(a, "ορίσω", "ορισ");
+ checkOneTerm(a, "όρισε", "ορισ");
+ checkOneTerm(a, "ορίσει", "ορισ");
+
+ checkOneTerm(a, "ορίστηκα", "οριστ");
+ checkOneTerm(a, "οριστώ", "οριστ");
+ checkOneTerm(a, "οριστείς", "οριστ");
+ checkOneTerm(a, "οριστεί", "οριστ");
+
+ checkOneTerm(a, "ορισμένο", "ορισμεν");
+ checkOneTerm(a, "ορισμένη", "ορισμεν");
+ checkOneTerm(a, "ορισμένος", "ορισμεν");
+
+ // -ω,-α/-ξω,-ξα
+ checkOneTerm(a, "ανοίγω", "ανοιγ");
+ checkOneTerm(a, "άνοιγα", "ανοιγ");
+ checkOneTerm(a, "άνοιγε", "ανοιγ");
+ checkOneTerm(a, "ανοίγοντας", "ανοιγ");
+ checkOneTerm(a, "ανοίγομαι", "ανοιγ");
+ checkOneTerm(a, "ανοιγόμουν", "ανοιγ");
+
+ checkOneTerm(a, "άνοιξα", "ανοιξ");
+ checkOneTerm(a, "ανοίξω", "ανοιξ");
+ checkOneTerm(a, "άνοιξε", "ανοιξ");
+ checkOneTerm(a, "ανοίξει", "ανοιξ");
+
+ checkOneTerm(a, "ανοίχτηκα", "ανοιχτ");
+ checkOneTerm(a, "ανοιχτώ", "ανοιχτ");
+ checkOneTerm(a, "ανοίχτηκα", "ανοιχτ");
+ checkOneTerm(a, "ανοιχτείς", "ανοιχτ");
+ checkOneTerm(a, "ανοιχτεί", "ανοιχτ");
+
+ checkOneTerm(a, "ανοίξου", "ανοιξ");
+
+ //-ώ/-άω,-ούσα/-άσω,-ασα
+ checkOneTerm(a, "περνώ", "περν");
+ checkOneTerm(a, "περνάω", "περν");
+ checkOneTerm(a, "περνούσα", "περν");
+ checkOneTerm(a, "πέρναγα", "περν");
+ checkOneTerm(a, "πέρνα", "περν");
+ checkOneTerm(a, "περνώντας", "περν");
+
+ checkOneTerm(a, "πέρασα", "περασ");
+ checkOneTerm(a, "περάσω", "περασ");
+ checkOneTerm(a, "πέρασε", "περασ");
+ checkOneTerm(a, "περάσει", "περασ");
+
+ checkOneTerm(a, "περνιέμαι", "περν");
+ checkOneTerm(a, "περνιόμουν", "περν");
+
+ checkOneTerm(a, "περάστηκα", "περαστ");
+ checkOneTerm(a, "περαστώ", "περαστ");
+ checkOneTerm(a, "περαστείς", "περαστ");
+ checkOneTerm(a, "περαστεί", "περαστ");
+
+ checkOneTerm(a, "περασμένο", "περασμεν");
+ checkOneTerm(a, "περασμένη", "περασμεν");
+ checkOneTerm(a, "περασμένος", "περασμεν");
+
+ // -ώ/-άω,-ούσα/-άξω,-αξα
+ checkOneTerm(a, "πετώ", "πετ");
+ checkOneTerm(a, "πετάω", "πετ");
+ checkOneTerm(a, "πετούσα", "πετ");
+ checkOneTerm(a, "πέταγα", "πετ");
+ checkOneTerm(a, "πέτα", "πετ");
+ checkOneTerm(a, "πετώντας", "πετ");
+ checkOneTerm(a, "πετιέμαι", "πετ");
+ checkOneTerm(a, "πετιόμουν", "πετ");
+
+ checkOneTerm(a, "πέταξα", "πεταξ");
+ checkOneTerm(a, "πετάξω", "πεταξ");
+ checkOneTerm(a, "πέταξε", "πεταξ");
+ checkOneTerm(a, "πετάξει", "πεταξ");
+
+ checkOneTerm(a, "πετάχτηκα", "πεταχτ");
+ checkOneTerm(a, "πεταχτώ", "πεταχτ");
+ checkOneTerm(a, "πεταχτείς", "πεταχτ");
+ checkOneTerm(a, "πεταχτεί", "πεταχτ");
+
+ checkOneTerm(a, "πεταμένο", "πεταμεν");
+ checkOneTerm(a, "πεταμένη", "πεταμεν");
+ checkOneTerm(a, "πεταμένος", "πεταμεν");
+
+ // -ώ/-άω,-ούσα / -έσω,-εσα
+ checkOneTerm(a, "καλώ", "καλ");
+ checkOneTerm(a, "καλούσα", "καλ");
+ checkOneTerm(a, "καλείς", "καλ");
+ checkOneTerm(a, "καλώντας", "καλ");
+
+ checkOneTerm(a, "καλούμαι", "καλ");
+ // pass. imperfect /imp. progressive doesnt conflate
+ checkOneTerm(a, "καλούμουν", "καλουμ");
+ checkOneTerm(a, "καλείσαι", "καλεισα");
+
+ checkOneTerm(a, "καλέστηκα", "καλεστ");
+ checkOneTerm(a, "καλεστώ", "καλεστ");
+ checkOneTerm(a, "καλεστείς", "καλεστ");
+ checkOneTerm(a, "καλεστεί", "καλεστ");
+
+ checkOneTerm(a, "καλεσμένο", "καλεσμεν");
+ checkOneTerm(a, "καλεσμένη", "καλεσμεν");
+ checkOneTerm(a, "καλεσμένος", "καλεσμεν");
+
+ checkOneTerm(a, "φορώ", "φορ");
+ checkOneTerm(a, "φοράω", "φορ");
+ checkOneTerm(a, "φορούσα", "φορ");
+ checkOneTerm(a, "φόραγα", "φορ");
+ checkOneTerm(a, "φόρα", "φορ");
+ checkOneTerm(a, "φορώντας", "φορ");
+ checkOneTerm(a, "φοριέμαι", "φορ");
+ checkOneTerm(a, "φοριόμουν", "φορ");
+ checkOneTerm(a, "φοριέσαι", "φορ");
+
+ checkOneTerm(a, "φόρεσα", "φορεσ");
+ checkOneTerm(a, "φορέσω", "φορεσ");
+ checkOneTerm(a, "φόρεσε", "φορεσ");
+ checkOneTerm(a, "φορέσει", "φορεσ");
+
+ checkOneTerm(a, "φορέθηκα", "φορεθ");
+ checkOneTerm(a, "φορεθώ", "φορεθ");
+ checkOneTerm(a, "φορεθείς", "φορεθ");
+ checkOneTerm(a, "φορεθεί", "φορεθ");
+
+ checkOneTerm(a, "φορεμένο", "φορεμεν");
+ checkOneTerm(a, "φορεμένη", "φορεμεν");
+ checkOneTerm(a, "φορεμένος", "φορεμεν");
+
+ // -ώ/-άω,-ούσα / -ήσω,-ησα
+ checkOneTerm(a, "κρατώ", "κρατ");
+ checkOneTerm(a, "κρατάω", "κρατ");
+ checkOneTerm(a, "κρατούσα", "κρατ");
+ checkOneTerm(a, "κράταγα", "κρατ");
+ checkOneTerm(a, "κράτα", "κρατ");
+ checkOneTerm(a, "κρατώντας", "κρατ");
+
+ checkOneTerm(a, "κράτησα", "κρατ");
+ checkOneTerm(a, "κρατήσω", "κρατ");
+ checkOneTerm(a, "κράτησε", "κρατ");
+ checkOneTerm(a, "κρατήσει", "κρατ");
+
+ checkOneTerm(a, "κρατούμαι", "κρατ");
+ checkOneTerm(a, "κρατιέμαι", "κρατ");
+ // this imperfect form doesnt conflate
+ checkOneTerm(a, "κρατούμουν", "κρατουμ");
+ checkOneTerm(a, "κρατιόμουν", "κρατ");
+ // this imp. prog form doesnt conflate
+ checkOneTerm(a, "κρατείσαι", "κρατεισα");
+
+ checkOneTerm(a, "κρατήθηκα", "κρατ");
+ checkOneTerm(a, "κρατηθώ", "κρατ");
+ checkOneTerm(a, "κρατηθείς", "κρατ");
+ checkOneTerm(a, "κρατηθεί", "κρατ");
+ checkOneTerm(a, "κρατήσου", "κρατ");
+
+ checkOneTerm(a, "κρατημένο", "κρατημεν");
+ checkOneTerm(a, "κρατημένη", "κρατημεν");
+ checkOneTerm(a, "κρατημένος", "κρατημεν");
+
+ // -.μαι,-.μουν / -.ώ,-.ηκα
+ checkOneTerm(a, "κοιμάμαι", "κοιμ");
+ checkOneTerm(a, "κοιμόμουν", "κοιμ");
+ checkOneTerm(a, "κοιμάσαι", "κοιμ");
+
+ checkOneTerm(a, "κοιμήθηκα", "κοιμ");
+ checkOneTerm(a, "κοιμηθώ", "κοιμ");
+ checkOneTerm(a, "κοιμήσου", "κοιμ");
+ checkOneTerm(a, "κοιμηθεί", "κοιμ");
+
+ checkOneTerm(a, "κοιμισμένο", "κοιμισμεν");
+ checkOneTerm(a, "κοιμισμένη", "κοιμισμεν");
+ checkOneTerm(a, "κοιμισμένος", "κοιμισμεν");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExceptions() throws Exception
+ public virtual void testExceptions()
+ {
+ checkOneTerm(a, "καθεστώτα", "καθεστ");
+ checkOneTerm(a, "καθεστώτος", "καθεστ");
+ checkOneTerm(a, "καθεστώς", "καθεστ");
+ checkOneTerm(a, "καθεστώτων", "καθεστ");
+
+ checkOneTerm(a, "χουμε", "χουμ");
+ checkOneTerm(a, "χουμ", "χουμ");
+
+ checkOneTerm(a, "υποταγεσ", "υποταγ");
+ checkOneTerm(a, "υποταγ", "υποταγ");
+
+ checkOneTerm(a, "εμετε", "εμετ");
+ checkOneTerm(a, "εμετ", "εμετ");
+
+ checkOneTerm(a, "αρχοντασ", "αρχοντ");
+ checkOneTerm(a, "αρχοντων", "αρχοντ");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly TestGreekStemmer outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper(TestGreekStemmer outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new GreekStemFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishAnalyzer.cs
new file mode 100644
index 0000000..337d8ee
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishAnalyzer.cs
@@ -0,0 +1,74 @@
+namespace org.apache.lucene.analysis.en
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ public class TestEnglishAnalyzer : BaseTokenStreamTestCase
+ {
+ /// <summary>
+ /// This test fails with NPE when the
+ /// stopwords file is missing in classpath
+ /// </summary>
+ public virtual void testResourcesAvailable()
+ {
+ new EnglishAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+ /// <summary>
+ /// test stopwords and stemming </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+ public virtual void testBasics()
+ {
+ Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT);
+ // stemming
+ checkOneTerm(a, "books", "book");
+ checkOneTerm(a, "book", "book");
+ // stopword
+ assertAnalyzesTo(a, "the", new string[] {});
+ // possessive removal
+ checkOneTerm(a, "steven's", "steven");
+ checkOneTerm(a, "steven\u2019s", "steven");
+ checkOneTerm(a, "steven\uFF07s", "steven");
+ }
+
+ /// <summary>
+ /// test use of exclusion set </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExclude() throws java.io.IOException
+ public virtual void testExclude()
+ {
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("books"), false);
+ Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT, EnglishAnalyzer.DefaultStopSet, exclusionSet);
+ checkOneTerm(a, "books", "books");
+ checkOneTerm(a, "book", "book");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new EnglishAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishMinimalStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishMinimalStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishMinimalStemFilter.cs
new file mode 100644
index 0000000..df1aa3e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishMinimalStemFilter.cs
@@ -0,0 +1,95 @@
+namespace org.apache.lucene.analysis.en
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+ /// <summary>
+ /// Simple tests for <seealso cref="EnglishMinimalStemFilter"/>
+ /// </summary>
+ public class TestEnglishMinimalStemFilter : BaseTokenStreamTestCase
+ {
+ private Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
+ }
+ }
+
+ /// <summary>
+ /// Test some examples from various papers about this technique </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExamples() throws java.io.IOException
+ public virtual void testExamples()
+ {
+ checkOneTerm(analyzer, "queries", "query");
+ checkOneTerm(analyzer, "phrases", "phrase");
+ checkOneTerm(analyzer, "corpus", "corpus");
+ checkOneTerm(analyzer, "stress", "stress");
+ checkOneTerm(analyzer, "kings", "king");
+ checkOneTerm(analyzer, "panels", "panel");
+ checkOneTerm(analyzer, "aerodynamics", "aerodynamic");
+ checkOneTerm(analyzer, "congress", "congress");
+ checkOneTerm(analyzer, "serious", "serious");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestEnglishMinimalStemFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestEnglishMinimalStemFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new EnglishMinimalStemFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishMinimalStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishMinimalStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishMinimalStemFilterFactory.cs
new file mode 100644
index 0000000..554c453
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestEnglishMinimalStemFilterFactory.cs
@@ -0,0 +1,57 @@
+namespace org.apache.lucene.analysis.en
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the English minimal stem factory is working.
+ /// </summary>
+ public class TestEnglishMinimalStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("bricks");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("EnglishMinimalStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"brick"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("EnglishMinimalStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestKStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestKStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestKStemFilterFactory.cs
new file mode 100644
index 0000000..909cde2
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestKStemFilterFactory.cs
@@ -0,0 +1,57 @@
+namespace org.apache.lucene.analysis.en
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the kstem filter factory is working.
+ /// </summary>
+ public class TestKStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("bricks");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("KStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"brick"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("KStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestKStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestKStemmer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestKStemmer.cs
new file mode 100644
index 0000000..8e7097c
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestKStemmer.cs
@@ -0,0 +1,124 @@
+namespace org.apache.lucene.analysis.en
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+// import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+ /// <summary>
+ /// Tests for <seealso cref="KStemmer"/>
+ /// </summary>
+ public class TestKStemmer : BaseTokenStreamTestCase
+ {
+ internal Analyzer a = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ return new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer));
+ }
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
+ }
+
+ /// <summary>
+ /// test the kstemmer optimizations against a bunch of words
+ /// that were stemmed with the original java kstemmer (generated from
+ /// testCreateMap, commented out below).
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVocabulary() throws Exception
+ public virtual void testVocabulary()
+ {
+ assertVocabulary(a, getDataFile("kstemTestData.zip"), "kstem_examples.txt");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestKStemmer outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestKStemmer outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer));
+ }
+ }
+
+ /// <summary>
+ ///**** requires original java kstem source code to create map
+ /// public void testCreateMap() throws Exception {
+ /// String input = getBigDoc();
+ /// Reader r = new StringReader(input);
+ /// TokenFilter tf = new LowerCaseFilter(new LetterTokenizer(r));
+ /// // tf = new KStemFilter(tf);
+ ///
+ /// KStemmer kstem = new KStemmer();
+ /// Map<String,String> map = new TreeMap<>();
+ /// for(;;) {
+ /// Token t = tf.next();
+ /// if (t==null) break;
+ /// String s = t.termText();
+ /// if (map.containsKey(s)) continue;
+ /// map.put(s, kstem.stem(s));
+ /// }
+ ///
+ /// Writer out = new BufferedWriter(new FileWriter("kstem_examples.txt"));
+ /// for (String key : map.keySet()) {
+ /// out.write(key);
+ /// out.write('\t');
+ /// out.write(map.get(key));
+ /// out.write('\n');
+ /// }
+ /// out.close();
+ /// }
+ /// *****
+ /// </summary>
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestPorterStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestPorterStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestPorterStemFilter.cs
new file mode 100644
index 0000000..6528e8a
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestPorterStemFilter.cs
@@ -0,0 +1,105 @@
+namespace org.apache.lucene.analysis.en
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+ using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+// import static org.apache.lucene.analysis.VocabularyAssert.*;
+
+ /// <summary>
+ /// Test the PorterStemFilter with Martin Porter's test data.
+ /// </summary>
+ public class TestPorterStemFilter : BaseTokenStreamTestCase
+ {
+ internal Analyzer a = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer t = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ return new TokenStreamComponents(t, new PorterStemFilter(t));
+ }
+ }
+
+ /// <summary>
+ /// Run the stemmer against all strings in voc.txt
+ /// The output should be the same as the string in output.txt
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testPorterStemFilter() throws Exception
+ public virtual void testPorterStemFilter()
+ {
+ assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWithKeywordAttribute() throws java.io.IOException
+ public virtual void testWithKeywordAttribute()
+ {
+ CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
+ set.add("yourselves");
+ Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false);
+ TokenStream filter = new PorterStemFilter(new SetKeywordMarkerFilter(tokenizer, set));
+ assertTokenStreamContents(filter, new string[] {"yourselves", "your"});
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestPorterStemFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestPorterStemFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new PorterStemFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestPorterStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestPorterStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestPorterStemFilterFactory.cs
new file mode 100644
index 0000000..0627c02
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/En/TestPorterStemFilterFactory.cs
@@ -0,0 +1,60 @@
+namespace org.apache.lucene.analysis.en
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Porter stem filter factory is working.
+ /// </summary>
+ public class TestPorterStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+ /// <summary>
+ /// Ensure the filter actually stems text.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("dogs");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("PorterStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"dog"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("PorterStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishAnalyzer.cs
new file mode 100644
index 0000000..d3c166d
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishAnalyzer.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.es
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ public class TestSpanishAnalyzer : BaseTokenStreamTestCase
+ {
+ /// <summary>
+ /// This test fails with NPE when the
+ /// stopwords file is missing in classpath
+ /// </summary>
+ public virtual void testResourcesAvailable()
+ {
+ new SpanishAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+ /// <summary>
+ /// test stopwords and stemming </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+ public virtual void testBasics()
+ {
+ Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT);
+ // stemming
+ checkOneTerm(a, "chicana", "chican");
+ checkOneTerm(a, "chicano", "chican");
+ // stopword
+ assertAnalyzesTo(a, "los", new string[] {});
+ }
+
+ /// <summary>
+ /// test use of exclusion set </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExclude() throws java.io.IOException
+ public virtual void testExclude()
+ {
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chicano"), false);
+ Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT, SpanishAnalyzer.DefaultStopSet, exclusionSet);
+ checkOneTerm(a, "chicana", "chican");
+ checkOneTerm(a, "chicano", "chicano");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new SpanishAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishLightStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishLightStemFilter.cs
new file mode 100644
index 0000000..4d83803
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishLightStemFilter.cs
@@ -0,0 +1,90 @@
+namespace org.apache.lucene.analysis.es
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+// import static org.apache.lucene.analysis.VocabularyAssert.*;
+
+ /// <summary>
+ /// Simple tests for <seealso cref="SpanishLightStemFilter"/>
+ /// </summary>
+ public class TestSpanishLightStemFilter : BaseTokenStreamTestCase
+ {
+ private Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
+ }
+ }
+
+ /// <summary>
+ /// Test against a vocabulary from the reference impl </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVocabulary() throws java.io.IOException
+ public virtual void testVocabulary()
+ {
+ assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestSpanishLightStemFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestSpanishLightStemFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new SpanishLightStemFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishLightStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishLightStemFilterFactory.cs
new file mode 100644
index 0000000..9c9cf4e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Es/TestSpanishLightStemFilterFactory.cs
@@ -0,0 +1,57 @@
+namespace org.apache.lucene.analysis.es
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Spanish Light stem factory is working.
+ /// </summary>
+ public class TestSpanishLightStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("sociedades");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("SpanishLightStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"sociedad"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("SpanishLightStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Eu/TestBasqueAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Eu/TestBasqueAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Eu/TestBasqueAnalyzer.cs
new file mode 100644
index 0000000..4ba6d5d
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Eu/TestBasqueAnalyzer.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.eu
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ public class TestBasqueAnalyzer : BaseTokenStreamTestCase
+ {
+ /// <summary>
+ /// This test fails with NPE when the
+ /// stopwords file is missing in classpath
+ /// </summary>
+ public virtual void testResourcesAvailable()
+ {
+ new BasqueAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+ /// <summary>
+ /// test stopwords and stemming </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+ public virtual void testBasics()
+ {
+ Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT);
+ // stemming
+ checkOneTerm(a, "zaldi", "zaldi");
+ checkOneTerm(a, "zaldiak", "zaldi");
+ // stopword
+ assertAnalyzesTo(a, "izan", new string[] { });
+ }
+
+ /// <summary>
+ /// test use of exclusion set </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExclude() throws java.io.IOException
+ public virtual void testExclude()
+ {
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("zaldiak"), false);
+ Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT, BasqueAnalyzer.DefaultStopSet, exclusionSet);
+ checkOneTerm(a, "zaldiak", "zaldiak");
+ checkOneTerm(a, "mendiari", "mendi");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new BasqueAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianAnalyzer.cs
new file mode 100644
index 0000000..031d2e4
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianAnalyzer.cs
@@ -0,0 +1,252 @@
+namespace org.apache.lucene.analysis.fa
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ /// <summary>
+ /// Test the Persian Analyzer
+ ///
+ /// </summary>
+ public class TestPersianAnalyzer : BaseTokenStreamTestCase
+ {
+
+ /// <summary>
+ /// This test fails with NPE when the stopwords file is missing in classpath
+ /// </summary>
+ public virtual void testResourcesAvailable()
+ {
+ new PersianAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+ /// <summary>
+ /// This test shows how the combination of tokenization (breaking on zero-width
+ /// non-joiner), normalization (such as treating arabic YEH and farsi YEH the
+ /// same), and stopwords creates a light-stemming effect for verbs.
+ ///
+ /// These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBehaviorVerbs() throws Exception
+ public virtual void testBehaviorVerbs()
+ {
+ Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
+ // active present indicative
+ assertAnalyzesTo(a, "میخورد", new string[] {"خورد"});
+ // active preterite indicative
+ assertAnalyzesTo(a, "خورد", new string[] {"خورد"});
+ // active imperfective preterite indicative
+ assertAnalyzesTo(a, "میخورد", new string[] {"خورد"});
+ // active future indicative
+ assertAnalyzesTo(a, "خواهد خورد", new string[] {"خورد"});
+ // active present progressive indicative
+ assertAnalyzesTo(a, "دارد میخورد", new string[] {"خورد"});
+ // active preterite progressive indicative
+ assertAnalyzesTo(a, "داشت میخورد", new string[] {"خورد"});
+
+ // active perfect indicative
+ assertAnalyzesTo(a, "خوردهاست", new string[] {"خورده"});
+ // active imperfective perfect indicative
+ assertAnalyzesTo(a, "میخوردهاست", new string[] {"خورده"});
+ // active pluperfect indicative
+ assertAnalyzesTo(a, "خورده بود", new string[] {"خورده"});
+ // active imperfective pluperfect indicative
+ assertAnalyzesTo(a, "میخورده بود", new string[] {"خورده"});
+ // active preterite subjunctive
+ assertAnalyzesTo(a, "خورده باشد", new string[] {"خورده"});
+ // active imperfective preterite subjunctive
+ assertAnalyzesTo(a, "میخورده باشد", new string[] {"خورده"});
+ // active pluperfect subjunctive
+ assertAnalyzesTo(a, "خورده بوده باشد", new string[] {"خورده"});
+ // active imperfective pluperfect subjunctive
+ assertAnalyzesTo(a, "میخورده بوده باشد", new string[] {"خورده"});
+ // passive present indicative
+ assertAnalyzesTo(a, "خورده میشود", new string[] {"خورده"});
+ // passive preterite indicative
+ assertAnalyzesTo(a, "خورده شد", new string[] {"خورده"});
+ // passive imperfective preterite indicative
+ assertAnalyzesTo(a, "خورده میشد", new string[] {"خورده"});
+ // passive perfect indicative
+ assertAnalyzesTo(a, "خورده شدهاست", new string[] {"خورده"});
+ // passive imperfective perfect indicative
+ assertAnalyzesTo(a, "خورده میشدهاست", new string[] {"خورده"});
+ // passive pluperfect indicative
+ assertAnalyzesTo(a, "خورده شده بود", new string[] {"خورده"});
+ // passive imperfective pluperfect indicative
+ assertAnalyzesTo(a, "خورده میشده بود", new string[] {"خورده"});
+ // passive future indicative
+ assertAnalyzesTo(a, "خورده خواهد شد", new string[] {"خورده"});
+ // passive present progressive indicative
+ assertAnalyzesTo(a, "دارد خورده میشود", new string[] {"خورده"});
+ // passive preterite progressive indicative
+ assertAnalyzesTo(a, "داشت خورده میشد", new string[] {"خورده"});
+ // passive present subjunctive
+ assertAnalyzesTo(a, "خورده شود", new string[] {"خورده"});
+ // passive preterite subjunctive
+ assertAnalyzesTo(a, "خورده شده باشد", new string[] {"خورده"});
+ // passive imperfective preterite subjunctive
+ assertAnalyzesTo(a, "خورده میشده باشد", new string[] {"خورده"});
+ // passive pluperfect subjunctive
+ assertAnalyzesTo(a, "خورده شده بوده باشد", new string[] {"خورده"});
+ // passive imperfective pluperfect subjunctive
+ assertAnalyzesTo(a, "خورده میشده بوده باشد", new string[] {"خورده"});
+
+ // active present subjunctive
+ assertAnalyzesTo(a, "بخورد", new string[] {"بخورد"});
+ }
+
+ /// <summary>
+ /// This test shows how the combination of tokenization and stopwords creates a
+ /// light-stemming effect for verbs.
+ ///
+ /// In this case, these forms are presented with alternative orthography, using
+ /// arabic yeh and whitespace. This yeh phenomenon is common for legacy text
+ /// due to some previous bugs in Microsoft Windows.
+ ///
+ /// These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBehaviorVerbsDefective() throws Exception
+ public virtual void testBehaviorVerbsDefective()
+ {
+ Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
+ // active present indicative
+ assertAnalyzesTo(a, "مي خورد", new string[] {"خورد"});
+ // active preterite indicative
+ assertAnalyzesTo(a, "خورد", new string[] {"خورد"});
+ // active imperfective preterite indicative
+ assertAnalyzesTo(a, "مي خورد", new string[] {"خورد"});
+ // active future indicative
+ assertAnalyzesTo(a, "خواهد خورد", new string[] {"خورد"});
+ // active present progressive indicative
+ assertAnalyzesTo(a, "دارد مي خورد", new string[] {"خورد"});
+ // active preterite progressive indicative
+ assertAnalyzesTo(a, "داشت مي خورد", new string[] {"خورد"});
+
+ // active perfect indicative
+ assertAnalyzesTo(a, "خورده است", new string[] {"خورده"});
+ // active imperfective perfect indicative
+ assertAnalyzesTo(a, "مي خورده است", new string[] {"خورده"});
+ // active pluperfect indicative
+ assertAnalyzesTo(a, "خورده بود", new string[] {"خورده"});
+ // active imperfective pluperfect indicative
+ assertAnalyzesTo(a, "مي خورده بود", new string[] {"خورده"});
+ // active preterite subjunctive
+ assertAnalyzesTo(a, "خورده باشد", new string[] {"خورده"});
+ // active imperfective preterite subjunctive
+ assertAnalyzesTo(a, "مي خورده باشد", new string[] {"خورده"});
+ // active pluperfect subjunctive
+ assertAnalyzesTo(a, "خورده بوده باشد", new string[] {"خورده"});
+ // active imperfective pluperfect subjunctive
+ assertAnalyzesTo(a, "مي خورده بوده باشد", new string[] {"خورده"});
+ // passive present indicative
+ assertAnalyzesTo(a, "خورده مي شود", new string[] {"خورده"});
+ // passive preterite indicative
+ assertAnalyzesTo(a, "خورده شد", new string[] {"خورده"});
+ // passive imperfective preterite indicative
+ assertAnalyzesTo(a, "خورده مي شد", new string[] {"خورده"});
+ // passive perfect indicative
+ assertAnalyzesTo(a, "خورده شده است", new string[] {"خورده"});
+ // passive imperfective perfect indicative
+ assertAnalyzesTo(a, "خورده مي شده است", new string[] {"خورده"});
+ // passive pluperfect indicative
+ assertAnalyzesTo(a, "خورده شده بود", new string[] {"خورده"});
+ // passive imperfective pluperfect indicative
+ assertAnalyzesTo(a, "خورده مي شده بود", new string[] {"خورده"});
+ // passive future indicative
+ assertAnalyzesTo(a, "خورده خواهد شد", new string[] {"خورده"});
+ // passive present progressive indicative
+ assertAnalyzesTo(a, "دارد خورده مي شود", new string[] {"خورده"});
+ // passive preterite progressive indicative
+ assertAnalyzesTo(a, "داشت خورده مي شد", new string[] {"خورده"});
+ // passive present subjunctive
+ assertAnalyzesTo(a, "خورده شود", new string[] {"خورده"});
+ // passive preterite subjunctive
+ assertAnalyzesTo(a, "خورده شده باشد", new string[] {"خورده"});
+ // passive imperfective preterite subjunctive
+ assertAnalyzesTo(a, "خورده مي شده باشد", new string[] {"خورده"});
+ // passive pluperfect subjunctive
+ assertAnalyzesTo(a, "خورده شده بوده باشد", new string[] {"خورده"});
+ // passive imperfective pluperfect subjunctive
+ assertAnalyzesTo(a, "خورده مي شده بوده باشد", new string[] {"خورده"});
+
+ // active present subjunctive
+ assertAnalyzesTo(a, "بخورد", new string[] {"بخورد"});
+ }
+
+ /// <summary>
+ /// This test shows how the combination of tokenization (breaking on zero-width
+ /// non-joiner or space) and stopwords creates a light-stemming effect for
+ /// nouns, removing the plural -ha.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBehaviorNouns() throws Exception
+ public virtual void testBehaviorNouns()
+ {
+ Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(a, "برگ ها", new string[] {"برگ"});
+ assertAnalyzesTo(a, "برگها", new string[] {"برگ"});
+ }
+
+ /// <summary>
+ /// Test showing that non-persian text is treated very much like SimpleAnalyzer
+ /// (lowercased, etc)
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBehaviorNonPersian() throws Exception
+ public virtual void testBehaviorNonPersian()
+ {
+ Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(a, "English test.", new string[] {"english", "test"});
+ }
+
+ /// <summary>
+ /// Basic test ensuring that tokenStream works correctly.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws Exception
+ public virtual void testReusableTokenStream()
+ {
+ Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(a, "خورده مي شده بوده باشد", new string[] {"خورده"});
+ assertAnalyzesTo(a, "برگها", new string[] {"برگ"});
+ }
+
+ /// <summary>
+ /// Test that custom stopwords work, and are not case-sensitive.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCustomStopwords() throws Exception
+ public virtual void testCustomStopwords()
+ {
+ PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, new CharArraySet(TEST_VERSION_CURRENT, asSet("the", "and", "a"), false));
+ assertAnalyzesTo(a, "The quick brown fox.", new string[] {"quick", "brown", "fox"});
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new PersianAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianCharFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianCharFilter.cs
new file mode 100644
index 0000000..71e6912
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianCharFilter.cs
@@ -0,0 +1,60 @@
+namespace org.apache.lucene.analysis.fa
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ public class TestPersianCharFilter : BaseTokenStreamTestCase
+ {
+ private Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ return new TokenStreamComponents(new MockTokenizer(reader));
+ }
+
+ protected internal override Reader initReader(string fieldName, Reader reader)
+ {
+ return new PersianCharFilter(reader);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws Exception
+ public virtual void testBasics()
+ {
+ assertAnalyzesTo(analyzer, "this is a\u200Ctest", new string[] {"this", "is", "a", "test"});
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianNormalizationFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianNormalizationFilter.cs
new file mode 100644
index 0000000..7d47ae4
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianNormalizationFilter.cs
@@ -0,0 +1,110 @@
+namespace org.apache.lucene.analysis.fa
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using ArabicLetterTokenizer = org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+ /// <summary>
+ /// Test the Persian Normalization Filter
+ ///
+ /// </summary>
+ public class TestPersianNormalizationFilter : BaseTokenStreamTestCase
+ {
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFarsiYeh() throws java.io.IOException
+ public virtual void testFarsiYeh()
+ {
+ check("های", "هاي");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testYehBarree() throws java.io.IOException
+ public virtual void testYehBarree()
+ {
+ check("هاے", "هاي");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKeheh() throws java.io.IOException
+ public virtual void testKeheh()
+ {
+ check("کشاندن", "كشاندن");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHehYeh() throws java.io.IOException
+ public virtual void testHehYeh()
+ {
+ check("كتابۀ", "كتابه");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHehHamzaAbove() throws java.io.IOException
+ public virtual void testHehHamzaAbove()
+ {
+ check("كتابهٔ", "كتابه");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHehGoal() throws java.io.IOException
+ public virtual void testHehGoal()
+ {
+ check("زادہ", "زاده");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void check(final String input, final String expected) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ private void check(string input, string expected)
+ {
+ ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ PersianNormalizationFilter filter = new PersianNormalizationFilter(tokenStream);
+ assertTokenStreamContents(filter, new string[]{expected});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly TestPersianNormalizationFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper(TestPersianNormalizationFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new PersianNormalizationFilter(tokenizer));
+ }
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianNormalizationFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianNormalizationFilterFactory.cs
new file mode 100644
index 0000000..fd666ea
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianNormalizationFilterFactory.cs
@@ -0,0 +1,60 @@
+namespace org.apache.lucene.analysis.fa
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Persian normalization factory is working.
+ /// </summary>
+ public class TestPersianNormalizationFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+ /// <summary>
+ /// Ensure the filter actually normalizes persian text.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNormalization() throws Exception
+ public virtual void testNormalization()
+ {
+ Reader reader = new StringReader("های");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("PersianNormalization").create(stream);
+ assertTokenStreamContents(stream, new string[] {"هاي"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("PersianNormalization", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fi/TestFinnishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fi/TestFinnishAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fi/TestFinnishAnalyzer.cs
new file mode 100644
index 0000000..4cd9941
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fi/TestFinnishAnalyzer.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.fi
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ public class TestFinnishAnalyzer : BaseTokenStreamTestCase
+ {
+ /// <summary>
+ /// This test fails with NPE when the
+ /// stopwords file is missing in classpath
+ /// </summary>
+ public virtual void testResourcesAvailable()
+ {
+ new FinnishAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+ /// <summary>
+ /// test stopwords and stemming </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+ public virtual void testBasics()
+ {
+ Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT);
+ // stemming
+ checkOneTerm(a, "edeltäjiinsä", "edeltäj");
+ checkOneTerm(a, "edeltäjistään", "edeltäj");
+ // stopword
+ assertAnalyzesTo(a, "olla", new string[] {});
+ }
+
+ /// <summary>
+ /// test use of exclusion set </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExclude() throws java.io.IOException
+ public virtual void testExclude()
+ {
+ CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false);
+ Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT, FinnishAnalyzer.DefaultStopSet, exclusionSet);
+ checkOneTerm(a, "edeltäjiinsä", "edeltäj");
+ checkOneTerm(a, "edeltäjistään", "edeltäjistään");
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new FinnishAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file