You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/05/17 13:28:05 UTC
svn commit: r945090 [2/2] - in /lucene/dev/trunk: lucene/contrib/
modules/analysis/common/src/java/org/apache/lucene/analysis/el/
modules/analysis/common/src/resources/org/apache/lucene/analysis/el/
modules/analysis/common/src/test/org/apache/lucene/an...
Added: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java?rev=945090&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java (added)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java Mon May 17 11:28:04 2010
@@ -0,0 +1,508 @@
+package org.apache.lucene.analysis.el;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+
+public class TestGreekStemmer extends BaseTokenStreamTestCase {
+ Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
+
+ public void testMasculineNouns() throws Exception {
+ // -οÏ
+ checkOneTerm(a, "άνθÏÏÏοÏ", "ανθÏÏÏ");
+ checkOneTerm(a, "ανθÏÏÏοÏ
", "ανθÏÏÏ");
+ checkOneTerm(a, "άνθÏÏÏο", "ανθÏÏÏ");
+ checkOneTerm(a, "άνθÏÏÏε", "ανθÏÏÏ");
+ checkOneTerm(a, "άνθÏÏÏοι", "ανθÏÏÏ");
+ checkOneTerm(a, "ανθÏÏÏÏν", "ανθÏÏÏ");
+ checkOneTerm(a, "ανθÏÏÏοÏ
Ï", "ανθÏÏÏ");
+ checkOneTerm(a, "άνθÏÏÏοι", "ανθÏÏÏ");
+
+ // -ηÏ
+ checkOneTerm(a, "ÏελάÏηÏ", "ÏελαÏ");
+ checkOneTerm(a, "ÏελάÏη", "ÏελαÏ");
+ checkOneTerm(a, "ÏελάÏεÏ", "ÏελαÏ");
+ checkOneTerm(a, "ÏελαÏÏν", "ÏελαÏ");
+
+ // -αÏ/-εÏ
+ checkOneTerm(a, "ελÎÏανÏαÏ", "ελεÏανÏ");
+ checkOneTerm(a, "ελÎÏανÏα", "ελεÏανÏ");
+ checkOneTerm(a, "ελÎÏανÏεÏ", "ελεÏανÏ");
+ checkOneTerm(a, "ελεÏάνÏÏν", "ελεÏανÏ");
+
+ // -αÏ/-αδεÏ
+ checkOneTerm(a, "μÏαμÏάÏ", "μÏαμÏ");
+ checkOneTerm(a, "μÏαμÏά", "μÏαμÏ");
+ checkOneTerm(a, "μÏαμÏάδεÏ", "μÏαμÏ");
+ checkOneTerm(a, "μÏαμÏάδÏν", "μÏαμÏ");
+
+ // -ηÏ/-ηδεÏ
+ checkOneTerm(a, "μÏακάληÏ", "μÏακαλ");
+ checkOneTerm(a, "μÏακάλη", "μÏακαλ");
+ checkOneTerm(a, "μÏακάληδεÏ", "μÏακαλ");
+ checkOneTerm(a, "μÏακάληδÏν", "μÏακαλ");
+
+ // -εÏ
+ checkOneTerm(a, "καÏÎÏ", "καÏ");
+ checkOneTerm(a, "καÏÎ", "καÏ");
+ checkOneTerm(a, "καÏÎδεÏ", "καÏ");
+ checkOneTerm(a, "καÏÎδÏν", "καÏ");
+
+ // -ÎαÏ/είÏ
+ checkOneTerm(a, "γÏαμμαÏÎαÏ", "γÏαμμαÏε");
+ checkOneTerm(a, "γÏαμμαÏÎα", "γÏαμμαÏε");
+ // plural forms conflate w/ each other, not w/ the sing forms
+ checkOneTerm(a, "γÏαμμαÏείÏ", "γÏαμμαÏ");
+ checkOneTerm(a, "γÏαμμαÏÎÏν", "γÏαμμαÏ");
+
+ // -οÏ
Ï/οι
+ checkOneTerm(a, "αÏÏÏλοÏ
Ï", "αÏοÏλ");
+ checkOneTerm(a, "αÏÏÏλοÏ
", "αÏοÏλ");
+ checkOneTerm(a, "αÏÏÏλοι", "αÏοÏλ");
+ checkOneTerm(a, "αÏÏÏλÏν", "αÏοÏλ");
+
+ // -οÏ
Ï/-οÏ
δεÏ
+ checkOneTerm(a, "ÏαÏÏοÏÏ", "ÏαÏÏ");
+ checkOneTerm(a, "ÏαÏÏοÏ", "ÏαÏÏ");
+ checkOneTerm(a, "ÏαÏÏοÏδεÏ", "ÏαÏÏ");
+ checkOneTerm(a, "ÏαÏÏοÏδÏν", "ÏαÏÏ");
+
+ // -ηÏ/-ειÏ
+ checkOneTerm(a, "λάÏÏηÏ", "λαÏÏ");
+ checkOneTerm(a, "λάÏÏη", "λαÏÏ");
+ checkOneTerm(a, "λάÏÏειÏ", "λαÏÏ");
+ checkOneTerm(a, "λάÏÏεÏν", "λαÏÏ");
+
+ // -Ï
Ï
+ checkOneTerm(a, "ÏÎλεκÏ
Ï", "Ïελεκ");
+ checkOneTerm(a, "ÏÎλεκÏ
", "Ïελεκ");
+ checkOneTerm(a, "ÏελÎκειÏ", "Ïελεκ");
+ checkOneTerm(a, "ÏελÎκεÏν", "Ïελεκ");
+
+ // -ÏÏ
+ // note: nom./voc. doesn't conflate w/ the rest
+ checkOneTerm(a, "μÎνÏÏÏ", "μενÏÏÏ");
+ checkOneTerm(a, "μÎνÏοÏοÏ", "μενÏοÏ");
+ checkOneTerm(a, "μÎνÏοÏα", "μενÏοÏ");
+ checkOneTerm(a, "μÎνÏοÏεÏ", "μενÏοÏ");
+ checkOneTerm(a, "μενÏÏÏÏν", "μενÏοÏ");
+
+ // -Ïν
+ checkOneTerm(a, "αγÏναÏ", "αγÏν");
+ checkOneTerm(a, "αγÏνοÏ", "αγÏν");
+ checkOneTerm(a, "αγÏνα", "αγÏν");
+ checkOneTerm(a, "αγÏνα", "αγÏν");
+ checkOneTerm(a, "αγÏνεÏ", "αγÏν");
+ checkOneTerm(a, "αγÏνÏν", "αγÏν");
+
+ // -αÏ/-ηδεÏ
+ checkOneTerm(a, "αÎÏαÏ", "αεÏ");
+ checkOneTerm(a, "αÎÏα", "αεÏ");
+ checkOneTerm(a, "αÎÏηδεÏ", "αεÏ");
+ checkOneTerm(a, "αÎÏηδÏν", "αεÏ");
+
+ // -ηÏ/-ηÏεÏ
+ checkOneTerm(a, "γÏηÏ", "γο");
+ checkOneTerm(a, "γÏη", "γοη"); // too short
+ // the two plural forms conflate
+ checkOneTerm(a, "γÏηÏεÏ", "γοηÏ");
+ checkOneTerm(a, "γοήÏÏν", "γοηÏ");
+ }
+
+ public void testFeminineNouns() throws Exception {
+ // -α/-εÏ,-Ïν
+ checkOneTerm(a, "ÏοÏά", "ÏοÏ");
+ checkOneTerm(a, "ÏοÏάÏ", "ÏοÏ");
+ checkOneTerm(a, "ÏοÏÎÏ", "ÏοÏ");
+ checkOneTerm(a, "ÏοÏÏν", "ÏοÏ");
+
+ // -α/-εÏ,-Ïν
+ checkOneTerm(a, "αγελάδα", "αγελαδ");
+ checkOneTerm(a, "αγελάδαÏ", "αγελαδ");
+ checkOneTerm(a, "αγελάδεÏ", "αγελαδ");
+ checkOneTerm(a, "αγελάδÏν", "αγελαδ");
+
+ // -η/-εÏ
+ checkOneTerm(a, "ζάÏαÏη", "ζαÏαÏ");
+ checkOneTerm(a, "ζάÏαÏηÏ", "ζαÏαÏ");
+ checkOneTerm(a, "ζάÏαÏεÏ", "ζαÏαÏ");
+ checkOneTerm(a, "ζαÏάÏεÏν", "ζαÏαÏ");
+
+ // -η/-ειÏ
+ checkOneTerm(a, "ÏηλεÏÏαÏη", "ÏηλεοÏαÏ");
+ checkOneTerm(a, "ÏηλεÏÏαÏηÏ", "ÏηλεοÏαÏ");
+ checkOneTerm(a, "ÏηλεοÏάÏειÏ", "ÏηλεοÏαÏ");
+ checkOneTerm(a, "ÏηλεοÏάÏεÏν", "ÏηλεοÏαÏ");
+
+ // -α/-αδεÏ
+ checkOneTerm(a, "μαμά", "μαμ");
+ checkOneTerm(a, "μαμάÏ", "μαμ");
+ checkOneTerm(a, "μαμάδεÏ", "μαμ");
+ checkOneTerm(a, "μαμάδÏν", "μαμ");
+
+ // -οÏ
+ checkOneTerm(a, "λεÏÏÏÏοÏ", "λεÏÏοÏ");
+ checkOneTerm(a, "λεÏÏÏÏοÏ
", "λεÏÏοÏ");
+ checkOneTerm(a, "λεÏÏÏÏο", "λεÏÏοÏ");
+ checkOneTerm(a, "λεÏÏÏÏε", "λεÏÏοÏ");
+ checkOneTerm(a, "λεÏÏÏÏοι", "λεÏÏοÏ");
+ checkOneTerm(a, "λεÏÏÏÏÏν", "λεÏÏοÏ");
+ checkOneTerm(a, "λεÏÏÏÏοÏ
Ï", "λεÏÏοÏ");
+
+ // -οÏ
+ checkOneTerm(a, "αλεÏοÏ", "αλεÏ");
+ checkOneTerm(a, "αλεÏοÏÏ", "αλεÏ");
+ checkOneTerm(a, "αλεÏοÏδεÏ", "αλεÏ");
+ checkOneTerm(a, "αλεÏοÏδÏν", "αλεÏ");
+
+ // -ÎαÏ/είÏ
+ // note: not all forms conflate
+ checkOneTerm(a, "γÏαμμαÏÎαÏ", "γÏαμμαÏε");
+ checkOneTerm(a, "γÏαμμαÏÎÏÏ", "γÏαμμαÏ");
+ checkOneTerm(a, "γÏαμμαÏÎα", "γÏαμμαÏε");
+ checkOneTerm(a, "γÏαμμαÏείÏ", "γÏαμμαÏ");
+ checkOneTerm(a, "γÏαμμαÏÎÏν", "γÏαμμαÏ");
+ }
+
+ public void testNeuterNouns() throws Exception {
+ // ending with -ο
+ // note: nom doesnt conflate
+ checkOneTerm(a, "βιβλίο", "βιβλι");
+ checkOneTerm(a, "βιβλίοÏ
", "βιβλ");
+ checkOneTerm(a, "βιβλία", "βιβλ");
+ checkOneTerm(a, "βιβλίÏν", "βιβλ");
+
+ // ending with -ι
+ checkOneTerm(a, "ÏοÏ
λί", "ÏοÏ
λ");
+ checkOneTerm(a, "ÏοÏ
λιοÏ", "ÏοÏ
λ");
+ checkOneTerm(a, "ÏοÏ
λιά", "ÏοÏ
λ");
+ checkOneTerm(a, "ÏοÏ
λιÏν", "ÏοÏ
λ");
+
+ // ending with -α
+ // note: nom. doesnt conflate
+ checkOneTerm(a, "ÏÏÏβλημα", "ÏÏοβλημ");
+ checkOneTerm(a, "ÏÏοβλήμαÏοÏ", "ÏÏοβλημα");
+ checkOneTerm(a, "ÏÏοβλήμαÏα", "ÏÏοβλημα");
+ checkOneTerm(a, "ÏÏοβλημάÏÏν", "ÏÏοβλημα");
+
+ // ending with -οÏ/-οÏ
Ï
+ checkOneTerm(a, "ÏÎλαγοÏ", "Ïελαγ");
+ checkOneTerm(a, "ÏελάγοÏ
Ï", "Ïελαγ");
+ checkOneTerm(a, "Ïελάγη", "Ïελαγ");
+ checkOneTerm(a, "ÏελάγÏν", "Ïελαγ");
+
+ // ending with -ÏÏ/-ÏÏοÏ
+ checkOneTerm(a, "γεγονÏÏ", "γεγον");
+ checkOneTerm(a, "γεγονÏÏοÏ", "γεγον");
+ checkOneTerm(a, "γεγονÏÏα", "γεγον");
+ checkOneTerm(a, "γεγονÏÏÏν", "γεγον");
+
+ // ending with -Ï
/-ιοÏ
+ checkOneTerm(a, "βÏάδÏ
", "βÏαδ");
+ checkOneTerm(a, "βÏάδι", "βÏαδ");
+ checkOneTerm(a, "βÏαδιοÏ", "βÏαδ");
+ checkOneTerm(a, "βÏάδια", "βÏαδ");
+ checkOneTerm(a, "βÏαδιÏν", "βÏαδ");
+
+ // ending with -Ï
/-αÏοÏ
+ // note: nom. doesnt conflate
+ checkOneTerm(a, "δÏÏÏ
", "δοÏ");
+ checkOneTerm(a, "δÏÏαÏοÏ", "δοÏαÏ");
+ checkOneTerm(a, "δÏÏαÏα", "δοÏαÏ");
+ checkOneTerm(a, "δοÏάÏÏν", "δοÏαÏ");
+
+ // ending with -αÏ
+ checkOneTerm(a, "κÏÎαÏ", "κÏε");
+ checkOneTerm(a, "κÏÎαÏοÏ", "κÏε");
+ checkOneTerm(a, "κÏÎαÏα", "κÏε");
+ checkOneTerm(a, "κÏεάÏÏν", "κÏε");
+
+ // ending with -ÏÏ
+ checkOneTerm(a, "λÏ
κÏÏÏÏ", "λÏ
κοÏÏ");
+ checkOneTerm(a, "λÏ
κÏÏÏÏοÏ", "λÏ
κοÏÏ");
+ checkOneTerm(a, "λÏ
κÏÏÏÏα", "λÏ
κοÏÏ");
+ checkOneTerm(a, "λÏ
κοÏÏÏÏν", "λÏ
κοÏÏ");
+
+ // ending with -ον/-οÏ
+ // note: nom. doesnt conflate
+ checkOneTerm(a, "μÎÏον", "μεÏον");
+ checkOneTerm(a, "μÎÏοÏ
", "μεÏ");
+ checkOneTerm(a, "μÎÏα", "μεÏ");
+ checkOneTerm(a, "μÎÏÏν", "μεÏ");
+
+ // ending in -ον/-ονÏοÏ
+ // note: nom. doesnt conflate
+ checkOneTerm(a, "ενδιαÏÎÏον", "ενδιαÏεÏον");
+ checkOneTerm(a, "ενδιαÏÎÏονÏοÏ", "ενδιαÏεÏονÏ");
+ checkOneTerm(a, "ενδιαÏÎÏονÏα", "ενδιαÏεÏονÏ");
+ checkOneTerm(a, "ενδιαÏεÏÏνÏÏν", "ενδιαÏεÏονÏ");
+
+ // ending with -εν/-ενÏοÏ
+ checkOneTerm(a, "ανακοινÏθÎν", "ανακοινÏθεν");
+ checkOneTerm(a, "ανακοινÏθÎνÏοÏ", "ανακοινÏθενÏ");
+ checkOneTerm(a, "ανακοινÏθÎνÏα", "ανακοινÏθενÏ");
+ checkOneTerm(a, "ανακοινÏθÎνÏÏν", "ανακοινÏθενÏ");
+
+ // ending with -αν/-ανÏοÏ
+ checkOneTerm(a, "ÏÏμÏαν", "ÏÏ
μÏ");
+ checkOneTerm(a, "ÏÏμÏανÏοÏ", "ÏÏ
μÏανÏ");
+ checkOneTerm(a, "ÏÏμÏανÏα", "ÏÏ
μÏανÏ");
+ checkOneTerm(a, "ÏÏ
μÏάνÏÏν", "ÏÏ
μÏανÏ");
+
+ // ending with -α/-ακÏοÏ
+ checkOneTerm(a, "γάλα", "γαλ");
+ checkOneTerm(a, "γάλακÏοÏ", "γαλακÏ");
+ checkOneTerm(a, "γάλαÏα", "γαλαÏ");
+ checkOneTerm(a, "γαλάκÏÏν", "γαλακÏ");
+ }
+
+ public void testAdjectives() throws Exception {
+ // ending with -ήÏ, -ÎÏ/-είÏ, -ή
+ checkOneTerm(a, "ÏÏ
νεÏήÏ", "ÏÏ
νεÏ");
+ checkOneTerm(a, "ÏÏ
νεÏοÏÏ", "ÏÏ
νεÏ");
+ checkOneTerm(a, "ÏÏ
νεÏή", "ÏÏ
νεÏ");
+ checkOneTerm(a, "ÏÏ
νεÏÏν", "ÏÏ
νεÏ");
+ checkOneTerm(a, "ÏÏ
νεÏείÏ", "ÏÏ
νεÏ");
+ checkOneTerm(a, "ÏÏ
νεÏÎÏ", "ÏÏ
νεÏ");
+
+ // ending with -ηÏ, -εÏ/-ειÏ, -η
+ checkOneTerm(a, "ÏÏ
νήθηÏ", "ÏÏ
νηθ");
+ checkOneTerm(a, "ÏÏ
νήθοÏ
Ï", "ÏÏ
νηθ");
+ checkOneTerm(a, "ÏÏ
νήθη", "ÏÏ
νηθ");
+ // note: doesn't conflate
+ checkOneTerm(a, "ÏÏ
νήθειÏ", "ÏÏ
ν");
+ checkOneTerm(a, "ÏÏ
νήθÏν", "ÏÏ
νηθ");
+ checkOneTerm(a, "ÏÏνηθεÏ", "ÏÏ
νηθ");
+
+ // ending with -Ï
Ï, -Ï
/-ειÏ, -ια
+ checkOneTerm(a, "βαθÏÏ", "βαθ");
+ checkOneTerm(a, "βαθÎοÏ", "βαθε");
+ checkOneTerm(a, "βαθÏ", "βαθ");
+ checkOneTerm(a, "βαθείÏ", "βαθ");
+ checkOneTerm(a, "βαθÎÏν", "βαθ");
+
+ checkOneTerm(a, "βαθιά", "βαθ");
+ checkOneTerm(a, "βαθιάÏ", "βαθι");
+ checkOneTerm(a, "βαθιÎÏ", "βαθι");
+ checkOneTerm(a, "βαθιÏν", "βαθ");
+
+ checkOneTerm(a, "βαθÎα", "βαθε");
+
+ // comparative/superlative
+ checkOneTerm(a, "ÏηλÏÏ", "Ïηλ");
+ checkOneTerm(a, "ÏηλÏÏεÏοÏ", "Ïηλ");
+ checkOneTerm(a, "ÏηλÏÏαÏοÏ", "Ïηλ");
+
+ checkOneTerm(a, "ÏÏαίοÏ", "ÏÏαι");
+ checkOneTerm(a, "ÏÏαιÏÏεÏοÏ", "ÏÏαι");
+ checkOneTerm(a, "ÏÏαιÏÏαÏοÏ", "ÏÏαι");
+
+ checkOneTerm(a, "εÏιεικήÏ", "εÏιεικ");
+ checkOneTerm(a, "εÏιεικÎÏÏεÏοÏ", "εÏιεικ");
+ checkOneTerm(a, "εÏιεικÎÏÏαÏοÏ", "εÏιεικ");
+ }
+
+
+ public void testVerbs() throws Exception {
+ // note, past/present verb stems will not conflate (from the paper)
+ //-Ï,-α/-.Ï,-.α
+ checkOneTerm(a, "οÏίζÏ", "οÏιζ");
+ checkOneTerm(a, "ÏÏιζα", "οÏιζ");
+ checkOneTerm(a, "ÏÏιζε", "οÏιζ");
+ checkOneTerm(a, "οÏίζονÏαÏ", "οÏιζ");
+ checkOneTerm(a, "οÏίζομαι", "οÏιζ");
+ checkOneTerm(a, "οÏιζÏμοÏ
ν", "οÏιζ");
+ checkOneTerm(a, "οÏίζεÏαι", "οÏιζ");
+
+ checkOneTerm(a, "ÏÏιÏα", "οÏιÏ");
+ checkOneTerm(a, "οÏίÏÏ", "οÏιÏ");
+ checkOneTerm(a, "ÏÏιÏε", "οÏιÏ");
+ checkOneTerm(a, "οÏίÏει", "οÏιÏ");
+
+ checkOneTerm(a, "οÏίÏÏηκα", "οÏιÏÏ");
+ checkOneTerm(a, "οÏιÏÏÏ", "οÏιÏÏ");
+ checkOneTerm(a, "οÏιÏÏείÏ", "οÏιÏÏ");
+ checkOneTerm(a, "οÏιÏÏεί", "οÏιÏÏ");
+
+ checkOneTerm(a, "οÏιÏμÎνο", "οÏιÏμεν");
+ checkOneTerm(a, "οÏιÏμÎνη", "οÏιÏμεν");
+ checkOneTerm(a, "οÏιÏμÎνοÏ", "οÏιÏμεν");
+
+ // -Ï,-α/-ξÏ,-ξα
+ checkOneTerm(a, "ανοίγÏ", "ανοιγ");
+ checkOneTerm(a, "άνοιγα", "ανοιγ");
+ checkOneTerm(a, "άνοιγε", "ανοιγ");
+ checkOneTerm(a, "ανοίγονÏαÏ", "ανοιγ");
+ checkOneTerm(a, "ανοίγομαι", "ανοιγ");
+ checkOneTerm(a, "ανοιγÏμοÏ
ν", "ανοιγ");
+
+ checkOneTerm(a, "άνοιξα", "ανοιξ");
+ checkOneTerm(a, "ανοίξÏ", "ανοιξ");
+ checkOneTerm(a, "άνοιξε", "ανοιξ");
+ checkOneTerm(a, "ανοίξει", "ανοιξ");
+
+ checkOneTerm(a, "ανοίÏÏηκα", "ανοιÏÏ");
+ checkOneTerm(a, "ανοιÏÏÏ", "ανοιÏÏ");
+ checkOneTerm(a, "ανοίÏÏηκα", "ανοιÏÏ");
+ checkOneTerm(a, "ανοιÏÏείÏ", "ανοιÏÏ");
+ checkOneTerm(a, "ανοιÏÏεί", "ανοιÏÏ");
+
+ checkOneTerm(a, "ανοίξοÏ
", "ανοιξ");
+
+ //-Ï/-άÏ,-οÏÏα/-άÏÏ,-αÏα
+ checkOneTerm(a, "ÏεÏνÏ", "ÏεÏν");
+ checkOneTerm(a, "ÏεÏνάÏ", "ÏεÏν");
+ checkOneTerm(a, "ÏεÏνοÏÏα", "ÏεÏν");
+ checkOneTerm(a, "ÏÎÏναγα", "ÏεÏν");
+ checkOneTerm(a, "ÏÎÏνα", "ÏεÏν");
+ checkOneTerm(a, "ÏεÏνÏνÏαÏ", "ÏεÏν");
+
+ checkOneTerm(a, "ÏÎÏαÏα", "ÏεÏαÏ");
+ checkOneTerm(a, "ÏεÏάÏÏ", "ÏεÏαÏ");
+ checkOneTerm(a, "ÏÎÏαÏε", "ÏεÏαÏ");
+ checkOneTerm(a, "ÏεÏάÏει", "ÏεÏαÏ");
+
+ checkOneTerm(a, "ÏεÏνιÎμαι", "ÏεÏν");
+ checkOneTerm(a, "ÏεÏνιÏμοÏ
ν", "ÏεÏν");
+
+ checkOneTerm(a, "ÏεÏάÏÏηκα", "ÏεÏαÏÏ");
+ checkOneTerm(a, "ÏεÏαÏÏÏ", "ÏεÏαÏÏ");
+ checkOneTerm(a, "ÏεÏαÏÏείÏ", "ÏεÏαÏÏ");
+ checkOneTerm(a, "ÏεÏαÏÏεί", "ÏεÏαÏÏ");
+
+ checkOneTerm(a, "ÏεÏαÏμÎνο", "ÏεÏαÏμεν");
+ checkOneTerm(a, "ÏεÏαÏμÎνη", "ÏεÏαÏμεν");
+ checkOneTerm(a, "ÏεÏαÏμÎνοÏ", "ÏεÏαÏμεν");
+
+ // -Ï/-άÏ,-οÏÏα/-άξÏ,-αξα
+ checkOneTerm(a, "ÏεÏÏ", "ÏεÏ");
+ checkOneTerm(a, "ÏεÏάÏ", "ÏεÏ");
+ checkOneTerm(a, "ÏεÏοÏÏα", "ÏεÏ");
+ checkOneTerm(a, "ÏÎÏαγα", "ÏεÏ");
+ checkOneTerm(a, "ÏÎÏα", "ÏεÏ");
+ checkOneTerm(a, "ÏεÏÏνÏαÏ", "ÏεÏ");
+ checkOneTerm(a, "ÏεÏιÎμαι", "ÏεÏ");
+ checkOneTerm(a, "ÏεÏιÏμοÏ
ν", "ÏεÏ");
+
+ checkOneTerm(a, "ÏÎÏαξα", "ÏεÏαξ");
+ checkOneTerm(a, "ÏεÏάξÏ", "ÏεÏαξ");
+ checkOneTerm(a, "ÏÎÏαξε", "ÏεÏαξ");
+ checkOneTerm(a, "ÏεÏάξει", "ÏεÏαξ");
+
+ checkOneTerm(a, "ÏεÏάÏÏηκα", "ÏεÏαÏÏ");
+ checkOneTerm(a, "ÏεÏαÏÏÏ", "ÏεÏαÏÏ");
+ checkOneTerm(a, "ÏεÏαÏÏείÏ", "ÏεÏαÏÏ");
+ checkOneTerm(a, "ÏεÏαÏÏεί", "ÏεÏαÏÏ");
+
+ checkOneTerm(a, "ÏεÏαμÎνο", "ÏεÏαμεν");
+ checkOneTerm(a, "ÏεÏαμÎνη", "ÏεÏαμεν");
+ checkOneTerm(a, "ÏεÏαμÎνοÏ", "ÏεÏαμεν");
+
+ // -Ï/-άÏ,-οÏÏα / -ÎÏÏ,-εÏα
+ checkOneTerm(a, "καλÏ", "καλ");
+ checkOneTerm(a, "καλοÏÏα", "καλ");
+ checkOneTerm(a, "καλείÏ", "καλ");
+ checkOneTerm(a, "καλÏνÏαÏ", "καλ");
+
+ checkOneTerm(a, "καλοÏμαι", "καλ");
+ // pass. imperfect /imp. progressive doesnt conflate
+ checkOneTerm(a, "καλοÏμοÏ
ν", "καλοÏ
μ");
+ checkOneTerm(a, "καλείÏαι", "καλειÏα");
+
+ checkOneTerm(a, "καλÎÏÏηκα", "καλεÏÏ");
+ checkOneTerm(a, "καλεÏÏÏ", "καλεÏÏ");
+ checkOneTerm(a, "καλεÏÏείÏ", "καλεÏÏ");
+ checkOneTerm(a, "καλεÏÏεί", "καλεÏÏ");
+
+ checkOneTerm(a, "καλεÏμÎνο", "καλεÏμεν");
+ checkOneTerm(a, "καλεÏμÎνη", "καλεÏμεν");
+ checkOneTerm(a, "καλεÏμÎνοÏ", "καλεÏμεν");
+
+ checkOneTerm(a, "ÏοÏÏ", "ÏοÏ");
+ checkOneTerm(a, "ÏοÏάÏ", "ÏοÏ");
+ checkOneTerm(a, "ÏοÏοÏÏα", "ÏοÏ");
+ checkOneTerm(a, "ÏÏÏαγα", "ÏοÏ");
+ checkOneTerm(a, "ÏÏÏα", "ÏοÏ");
+ checkOneTerm(a, "ÏοÏÏνÏαÏ", "ÏοÏ");
+ checkOneTerm(a, "ÏοÏιÎμαι", "ÏοÏ");
+ checkOneTerm(a, "ÏοÏιÏμοÏ
ν", "ÏοÏ");
+ checkOneTerm(a, "ÏοÏιÎÏαι", "ÏοÏ");
+
+ checkOneTerm(a, "ÏÏÏεÏα", "ÏοÏεÏ");
+ checkOneTerm(a, "ÏοÏÎÏÏ", "ÏοÏεÏ");
+ checkOneTerm(a, "ÏÏÏεÏε", "ÏοÏεÏ");
+ checkOneTerm(a, "ÏοÏÎÏει", "ÏοÏεÏ");
+
+ checkOneTerm(a, "ÏοÏÎθηκα", "ÏοÏεθ");
+ checkOneTerm(a, "ÏοÏεθÏ", "ÏοÏεθ");
+ checkOneTerm(a, "ÏοÏεθείÏ", "ÏοÏεθ");
+ checkOneTerm(a, "ÏοÏεθεί", "ÏοÏεθ");
+
+ checkOneTerm(a, "ÏοÏεμÎνο", "ÏοÏεμεν");
+ checkOneTerm(a, "ÏοÏεμÎνη", "ÏοÏεμεν");
+ checkOneTerm(a, "ÏοÏεμÎνοÏ", "ÏοÏεμεν");
+
+ // -Ï/-άÏ,-οÏÏα / -ήÏÏ,-ηÏα
+ checkOneTerm(a, "κÏαÏÏ", "κÏαÏ");
+ checkOneTerm(a, "κÏαÏάÏ", "κÏαÏ");
+ checkOneTerm(a, "κÏαÏοÏÏα", "κÏαÏ");
+ checkOneTerm(a, "κÏάÏαγα", "κÏαÏ");
+ checkOneTerm(a, "κÏάÏα", "κÏαÏ");
+ checkOneTerm(a, "κÏαÏÏνÏαÏ", "κÏαÏ");
+
+ checkOneTerm(a, "κÏάÏηÏα", "κÏαÏ");
+ checkOneTerm(a, "κÏαÏήÏÏ", "κÏαÏ");
+ checkOneTerm(a, "κÏάÏηÏε", "κÏαÏ");
+ checkOneTerm(a, "κÏαÏήÏει", "κÏαÏ");
+
+ checkOneTerm(a, "κÏαÏοÏμαι", "κÏαÏ");
+ checkOneTerm(a, "κÏαÏιÎμαι", "κÏαÏ");
+ // this imperfect form doesnt conflate
+ checkOneTerm(a, "κÏαÏοÏμοÏ
ν", "κÏαÏοÏ
μ");
+ checkOneTerm(a, "κÏαÏιÏμοÏ
ν", "κÏαÏ");
+ // this imp. prog form doesnt conflate
+ checkOneTerm(a, "κÏαÏείÏαι", "κÏαÏειÏα");
+
+ checkOneTerm(a, "κÏαÏήθηκα", "κÏαÏ");
+ checkOneTerm(a, "κÏαÏηθÏ", "κÏαÏ");
+ checkOneTerm(a, "κÏαÏηθείÏ", "κÏαÏ");
+ checkOneTerm(a, "κÏαÏηθεί", "κÏαÏ");
+ checkOneTerm(a, "κÏαÏήÏοÏ
", "κÏαÏ");
+
+ checkOneTerm(a, "κÏαÏημÎνο", "κÏαÏημεν");
+ checkOneTerm(a, "κÏαÏημÎνη", "κÏαÏημεν");
+ checkOneTerm(a, "κÏαÏημÎνοÏ", "κÏαÏημεν");
+
+ // -.μαι,-.μοÏ
ν / -.Ï,-.ηκα
+ checkOneTerm(a, "κοιμάμαι", "κοιμ");
+ checkOneTerm(a, "κοιμÏμοÏ
ν", "κοιμ");
+ checkOneTerm(a, "κοιμάÏαι", "κοιμ");
+
+ checkOneTerm(a, "κοιμήθηκα", "κοιμ");
+ checkOneTerm(a, "κοιμηθÏ", "κοιμ");
+ checkOneTerm(a, "κοιμήÏοÏ
", "κοιμ");
+ checkOneTerm(a, "κοιμηθεί", "κοιμ");
+
+ checkOneTerm(a, "κοιμιÏμÎνο", "κοιμιÏμεν");
+ checkOneTerm(a, "κοιμιÏμÎνη", "κοιμιÏμεν");
+ checkOneTerm(a, "κοιμιÏμÎνοÏ", "κοιμιÏμεν");
+ }
+
+ public void testExceptions() throws Exception {
+ checkOneTerm(a, "καθεÏÏÏÏα", "καθεÏÏ");
+ checkOneTerm(a, "καθεÏÏÏÏοÏ", "καθεÏÏ");
+ checkOneTerm(a, "καθεÏÏÏÏ", "καθεÏÏ");
+ checkOneTerm(a, "καθεÏÏÏÏÏν", "καθεÏÏ");
+
+ checkOneTerm(a, "ÏοÏ
με", "ÏοÏ
μ");
+ checkOneTerm(a, "ÏοÏ
μ", "ÏοÏ
μ");
+
+ checkOneTerm(a, "Ï
ÏοÏαγεÏ", "Ï
ÏοÏαγ");
+ checkOneTerm(a, "Ï
ÏοÏαγ", "Ï
ÏοÏαγ");
+
+ checkOneTerm(a, "εμεÏε", "εμεÏ");
+ checkOneTerm(a, "εμεÏ", "εμεÏ");
+
+ checkOneTerm(a, "αÏÏονÏαÏ", "αÏÏονÏ");
+ checkOneTerm(a, "αÏÏονÏÏν", "αÏÏονÏ");
+ }
+}
Propchange: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java?rev=945090&r1=945089&r2=945090&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java Mon May 17 11:28:04 2010
@@ -33,6 +33,7 @@ public class GreekLowerCaseFilterFactory
@Override
public void init(Map<String, String> args) {
super.init(args);
+ assureMatchVersion();
if (args.containsKey("charset"))
throw new SolrException(ErrorCode.SERVER_ERROR,
"The charset parameter is no longer supported. "
Added: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java?rev=945090&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java (added)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java Mon May 17 11:28:04 2010
@@ -0,0 +1,30 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.el.GreekStemFilter;
+
+/** Factory for {@link GreekStemFilter} */
+public class GreekStemFilterFactory extends BaseTokenFilterFactory {
+
+ public TokenStream create(TokenStream input) {
+ return new GreekStemFilter(input);
+ }
+
+}
Propchange: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestGreekLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestGreekLowerCaseFilterFactory.java?rev=945090&r1=945089&r2=945090&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestGreekLowerCaseFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestGreekLowerCaseFilterFactory.java Mon May 17 11:28:04 2010
@@ -31,10 +31,11 @@ public class TestGreekLowerCaseFilterFac
/**
* Ensure the filter actually lowercases (and a bit more) greek text.
*/
- public void testStemming() throws Exception {
+ public void testNormalization() throws Exception {
Reader reader = new StringReader("ÎάÏÎ¿Ï ÎÎΪÎΣ");
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
GreekLowerCaseFilterFactory factory = new GreekLowerCaseFilterFactory();
+ factory.init(DEFAULT_VERSION_PARAM);
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "μαιοÏ", "μαιοÏ" });
}
Added: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestGreekStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestGreekStemFilterFactory.java?rev=945090&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestGreekStemFilterFactory.java (added)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestGreekStemFilterFactory.java Mon May 17 11:28:04 2010
@@ -0,0 +1,40 @@
+package org.apache.solr.analysis;
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Simple tests to ensure the Greek stem filter factory is working.
+ */
+public class TestGreekStemFilterFactory extends BaseTokenTestCase {
+ public void testStemming() throws Exception {
+ Reader reader = new StringReader("άνθÏÏÏοÏ");
+ Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
+ TokenStream normalized = new GreekLowerCaseFilter(DEFAULT_VERSION, tokenizer);
+ GreekStemFilterFactory factory = new GreekStemFilterFactory();
+ TokenStream stream = factory.create(normalized);
+ assertTokenStreamContents(stream, new String[] { "ανθÏÏÏ" });
+ }
+}
Propchange: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestGreekStemFilterFactory.java
------------------------------------------------------------------------------
svn:eol-style = native