You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by rm...@apache.org on 2010/02/06 00:05:49 UTC
svn commit: r907125 [3/3] - in /lucene/java/trunk: ./ contrib/
contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/
contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/
contrib/analyzers/common/src/java/org/apache/lucene/analysi...
Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java Fri Feb 5 23:05:46 2010
@@ -17,6 +17,8 @@
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.util.Version;
@@ -113,6 +115,94 @@
}
+ /**
+ * @deprecated remove this test for Lucene 4.0
+ */
+ @Deprecated
+ public void testAnalyzer30() throws Exception {
+ FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_30);
+
+ assertAnalyzesTo(fa, "", new String[] {
+ });
+
+ assertAnalyzesTo(
+ fa,
+ "chien chat cheval",
+ new String[] { "chien", "chat", "cheval" });
+
+ assertAnalyzesTo(
+ fa,
+ "chien CHAT CHEVAL",
+ new String[] { "chien", "chat", "cheval" });
+
+ assertAnalyzesTo(
+ fa,
+ " chien ,? + = - CHAT /: > CHEVAL",
+ new String[] { "chien", "chat", "cheval" });
+
+ assertAnalyzesTo(fa, "chien++", new String[] { "chien" });
+
+ assertAnalyzesTo(
+ fa,
+ "mot \"entreguillemet\"",
+ new String[] { "mot", "entreguillemet" });
+
+ // let's do some french specific tests now
+
+ /* 1. couldn't resist
+ I would expect this to stay one term as in French the minus
+ sign is often used for composing words */
+ assertAnalyzesTo(
+ fa,
+ "Jean-François",
+ new String[] { "jean", "françois" });
+
+ // 2. stopwords
+ assertAnalyzesTo(
+ fa,
+ "le la chien les aux chat du des à cheval",
+ new String[] { "chien", "chat", "cheval" });
+
+ // some nouns and adjectives
+ assertAnalyzesTo(
+ fa,
+ "lances chismes habitable chiste éléments captifs",
+ new String[] {
+ "lanc",
+ "chism",
+ "habit",
+ "chist",
+ "élément",
+ "captif" });
+
+ // some verbs
+ assertAnalyzesTo(
+ fa,
+ "finissions souffrirent rugissante",
+ new String[] { "fin", "souffr", "rug" });
+
+ // some everything else
+ // aujourd'hui stays one term which is OK
+ assertAnalyzesTo(
+ fa,
+ "C3PO aujourd'hui oeuf ïâöûà ä anticonstitutionnellement Java++ ",
+ new String[] {
+ "c3po",
+ "aujourd'hui",
+ "oeuf",
+ "ïâöûà ä",
+ "anticonstitutionnel",
+ "jav" });
+
+ // some more everything else
+ // here 1940-1945 stays as one term, 1940:1945 not ?
+ assertAnalyzesTo(
+ fa,
+ "33Bis 1940-1945 1940:1945 (---i+++)*",
+ new String[] { "33bis", "1940-1945", "1940", "1945", "i" });
+
+ }
+
public void testReusableTokenStream() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
// stopwords
@@ -157,4 +247,28 @@
assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
"chist" });
}
+
+ public void testElision() throws Exception {
+ FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
+ assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouill" });
+ }
+
+ /**
+ * Prior to 3.1, this analyzer had no lowercase filter.
+ * stopwords were case sensitive. Preserve this for back compat.
+ * @deprecated Remove this test in Lucene 4.0
+ */
+ @Deprecated
+ public void testBuggyStopwordsCasing() throws IOException {
+ FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_30);
+ assertAnalyzesTo(a, "Votre", new String[] { "votr" });
+ }
+
+ /**
+ * Test that stopwords are not case sensitive
+ */
+ public void testStopwordsCasing() throws IOException {
+ FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
+ assertAnalyzesTo(a, "Votre", new String[] { });
+ }
}
Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java Fri Feb 5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.hu;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
+ /** This test fails with NPE when the
+ * stopwords file is missing in classpath */
+ public void testResourcesAvailable() {
+ new HungarianAnalyzer(Version.LUCENE_CURRENT);
+ }
+
+ /** test stopwords and stemming */
+ public void testBasics() throws IOException {
+ Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT);
+ // stemming
+ checkOneTermReuse(a, "babakocsi", "babakocs");
+ checkOneTermReuse(a, "babakocsijáért", "babakocs");
+ // stopword
+ assertAnalyzesTo(a, "által", new String[] {});
+ }
+
+ /** test use of exclusion set */
+ public void testExclude() throws IOException {
+ Set<String> exclusionSet = new HashSet<String>();
+ exclusionSet.add("babakocsi");
+ Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT,
+ HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
+ checkOneTermReuse(a, "babakocsi", "babakocsi");
+ checkOneTermReuse(a, "babakocsijáért", "babakocs");
+ }
+}
Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java Fri Feb 5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.it;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
+ /** This test fails with NPE when the
+ * stopwords file is missing in classpath */
+ public void testResourcesAvailable() {
+ new ItalianAnalyzer(Version.LUCENE_CURRENT);
+ }
+
+ /** test stopwords and stemming */
+ public void testBasics() throws IOException {
+ Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT);
+ // stemming
+ checkOneTermReuse(a, "abbandonata", "abbandon");
+ checkOneTermReuse(a, "abbandonati", "abbandon");
+ // stopword
+ assertAnalyzesTo(a, "dallo", new String[] {});
+ }
+
+ /** test use of exclusion set */
+ public void testExclude() throws IOException {
+ Set<String> exclusionSet = new HashSet<String>();
+ exclusionSet.add("abbandonata");
+ Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT,
+ ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
+ checkOneTermReuse(a, "abbandonata", "abbandonata");
+ checkOneTermReuse(a, "abbandonati", "abbandon");
+ }
+}
Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java Fri Feb 5 23:05:46 2010
@@ -0,0 +1,44 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.PorterStemFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.Version;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
+ public void testOverride() throws IOException {
+ // lets make booked stem to books
+ // the override filter will convert "booked" to "books",
+ // but also mark it with KeywordAttribute so Porter will not change it.
+ Map<String,String> dictionary = new HashMap<String,String>();
+ dictionary.put("booked", "books");
+ Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
+ TokenStream stream = new PorterStemFilter(
+ new StemmerOverrideFilter(Version.LUCENE_CURRENT, tokenizer, dictionary));
+ assertTokenStreamContents(stream, new String[] { "books" });
+ }
+}
Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java Fri Feb 5 23:05:46 2010
@@ -22,7 +22,6 @@
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
-import java.io.IOException;
import java.io.StringReader;
/**
Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java Fri Feb 5 23:05:46 2010
@@ -18,10 +18,8 @@
*/
-import java.io.IOException;
import java.io.StringReader;
-import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
/**
Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java Fri Feb 5 23:05:46 2010
@@ -100,9 +100,6 @@
check("ophalend", "ophal");
check("ophalers", "ophaler");
check("ophef", "ophef");
- check("opheffen", "ophef"); // versus snowball 'opheff'
- check("opheffende", "ophef"); // versus snowball 'opheff'
- check("opheffing", "ophef"); // versus snowball 'opheff'
check("opheldering", "ophelder");
check("ophemelde", "ophemeld");
check("ophemelen", "ophemel");
@@ -118,6 +115,24 @@
check("ophouden", "ophoud");
}
+ /**
+ * @deprecated remove this test in Lucene 4.0
+ */
+ @Deprecated
+ public void testOldBuggyStemmer() throws Exception {
+ Analyzer a = new DutchAnalyzer(Version.LUCENE_30);
+ checkOneTermReuse(a, "opheffen", "ophef"); // versus snowball 'opheff'
+ checkOneTermReuse(a, "opheffende", "ophef"); // versus snowball 'opheff'
+ checkOneTermReuse(a, "opheffing", "ophef"); // versus snowball 'opheff'
+ }
+
+ public void testSnowballCorrectness() throws Exception {
+ Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
+ checkOneTermReuse(a, "opheffen", "opheff");
+ checkOneTermReuse(a, "opheffende", "opheff");
+ checkOneTermReuse(a, "opheffing", "opheff");
+ }
+
public void testReusableTokenStream() throws Exception {
Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
@@ -161,6 +176,25 @@
checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
}
+ /**
+ * Prior to 3.1, this analyzer had no lowercase filter.
+ * stopwords were case sensitive. Preserve this for back compat.
+ * @deprecated Remove this test in Lucene 4.0
+ */
+ @Deprecated
+ public void testBuggyStopwordsCasing() throws IOException {
+ DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_30);
+ assertAnalyzesTo(a, "Zelf", new String[] { "zelf" });
+ }
+
+ /**
+ * Test that stopwords are not case sensitive
+ */
+ public void testStopwordsCasing() throws IOException {
+ DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_31);
+ assertAnalyzesTo(a, "Zelf", new String[] { });
+ }
+
private void check(final String input, final String expected) throws Exception {
checkOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected);
}
Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java Fri Feb 5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.no;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
+ /** This test fails with NPE when the
+ * stopwords file is missing in classpath */
+ public void testResourcesAvailable() {
+ new NorwegianAnalyzer(Version.LUCENE_CURRENT);
+ }
+
+ /** test stopwords and stemming */
+ public void testBasics() throws IOException {
+ Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT);
+ // stemming
+ checkOneTermReuse(a, "havnedistriktene", "havnedistrikt");
+ checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
+ // stopword
+ assertAnalyzesTo(a, "det", new String[] {});
+ }
+
+ /** test use of exclusion set */
+ public void testExclude() throws IOException {
+ Set<String> exclusionSet = new HashSet<String>();
+ exclusionSet.add("havnedistriktene");
+ Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT,
+ NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
+ checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
+ checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
+ }
+}
Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java Fri Feb 5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
+ /** This test fails with NPE when the
+ * stopwords file is missing in classpath */
+ public void testResourcesAvailable() {
+ new PortugueseAnalyzer(Version.LUCENE_CURRENT);
+ }
+
+ /** test stopwords and stemming */
+ public void testBasics() throws IOException {
+ Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT);
+ // stemming
+ checkOneTermReuse(a, "quilométricas", "quilométr");
+ checkOneTermReuse(a, "quilométricos", "quilométr");
+ // stopword
+ assertAnalyzesTo(a, "não", new String[] {});
+ }
+
+ /** test use of exclusion set */
+ public void testExclude() throws IOException {
+ Set<String> exclusionSet = new HashSet<String>();
+ exclusionSet.add("quilométricas");
+ Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT,
+ PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
+ checkOneTermReuse(a, "quilométricas", "quilométricas");
+ checkOneTermReuse(a, "quilométricos", "quilométr");
+ }
+}
Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java Fri Feb 5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.ro;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
+ /** This test fails with NPE when the
+ * stopwords file is missing in classpath */
+ public void testResourcesAvailable() {
+ new RomanianAnalyzer(Version.LUCENE_CURRENT);
+ }
+
+ /** test stopwords and stemming */
+ public void testBasics() throws IOException {
+ Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT);
+ // stemming
+ checkOneTermReuse(a, "absenţa", "absenţ");
+ checkOneTermReuse(a, "absenţi", "absenţ");
+ // stopword
+ assertAnalyzesTo(a, "îl", new String[] {});
+ }
+
+ /** test use of exclusion set */
+ public void testExclude() throws IOException {
+ Set<String> exclusionSet = new HashSet<String>();
+ exclusionSet.add("absenţa");
+ Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT,
+ RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
+ checkOneTermReuse(a, "absenţa", "absenţa");
+ checkOneTermReuse(a, "absenţi", "absenţ");
+ }
+}
Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Fri Feb 5 23:05:46 2010
@@ -50,9 +50,14 @@
dataDir = new File(System.getProperty("dataDir", "./bin"));
}
- public void testUnicode() throws IOException
+ /**
+ * @deprecated remove this test and its datafiles in Lucene 4.0
+ * the Snowball version has its own data tests.
+ */
+ @Deprecated
+ public void testUnicode30() throws IOException
{
- RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
+ RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_30);
inWords =
new InputStreamReader(
new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUTF8.txt")),
@@ -110,12 +115,22 @@
}
}
+ /** @deprecated remove this test in Lucene 4.0: stopwords changed */
+ @Deprecated
+ public void testReusableTokenStream30() throws Exception {
+ Analyzer a = new RussianAnalyzer(Version.LUCENE_30);
+ assertAnalyzesToReuse(a, "ÐмеÑÑе Ñ Ñем о Ñиле ÑлекÑÑомагниÑной ÑнеÑгии имели пÑедÑÑавление еÑе",
+ new String[] { "вмеÑÑ", "Ñил", "ÑлекÑÑомагниÑн", "ÑнеÑг", "имел", "пÑедÑÑавлен" });
+ assertAnalyzesToReuse(a, "Ðо знание ÑÑо Ñ
ÑанилоÑÑ Ð² Ñайне",
+ new String[] { "знан", "Ñ
Ñан", "Ñайн" });
+ }
+
public void testReusableTokenStream() throws Exception {
Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesToReuse(a, "ÐмеÑÑе Ñ Ñем о Ñиле ÑлекÑÑомагниÑной ÑнеÑгии имели пÑедÑÑавление еÑе",
new String[] { "вмеÑÑ", "Ñил", "ÑлекÑÑомагниÑн", "ÑнеÑг", "имел", "пÑедÑÑавлен" });
assertAnalyzesToReuse(a, "Ðо знание ÑÑо Ñ
ÑанилоÑÑ Ð² Ñайне",
- new String[] { "знан", "Ñ
Ñан", "Ñайн" });
+ new String[] { "знан", "ÑÑ", "Ñ
Ñан", "Ñайн" });
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java Fri Feb 5 23:05:46 2010
@@ -25,7 +25,9 @@
/**
* Testcase for {@link RussianLetterTokenizer}
+ * @deprecated Remove this test class in Lucene 4.0
*/
+@Deprecated
public class TestRussianLetterTokenizer extends BaseTokenStreamTestCase {
public void testRussianLetterTokenizer() throws IOException {
Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java Fri Feb 5 23:05:46 2010
@@ -24,6 +24,10 @@
import java.io.FileInputStream;
import java.util.ArrayList;
+/**
+ * @deprecated Remove this test class (and its datafiles!) in Lucene 4.0
+ */
+@Deprecated
public class TestRussianStem extends LuceneTestCase
{
private ArrayList words = new ArrayList();
Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java Fri Feb 5 23:05:46 2010
@@ -22,11 +22,8 @@
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
-import java.util.HashSet;
-import java.util.Arrays;
import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
import org.apache.lucene.analysis.miscellaneous.PrefixAndSuffixAwareTokenFilter;
import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java Fri Feb 5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.sv;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
+ /** This test fails with NPE when the
+ * stopwords file is missing in classpath */
+ public void testResourcesAvailable() {
+ new SwedishAnalyzer(Version.LUCENE_CURRENT);
+ }
+
+ /** test stopwords and stemming */
+ public void testBasics() throws IOException {
+ Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT);
+ // stemming
+ checkOneTermReuse(a, "jaktkarlarne", "jaktkarl");
+ checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
+ // stopword
+ assertAnalyzesTo(a, "och", new String[] {});
+ }
+
+ /** test use of exclusion set */
+ public void testExclude() throws IOException {
+ Set<String> exclusionSet = new HashSet<String>();
+ exclusionSet.add("jaktkarlarne");
+ Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT,
+ SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
+ checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
+ checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
+ }
+}
Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java Fri Feb 5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.tr;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
+ /** This test fails with NPE when the
+ * stopwords file is missing in classpath */
+ public void testResourcesAvailable() {
+ new TurkishAnalyzer(Version.LUCENE_CURRENT);
+ }
+
+ /** test stopwords and stemming */
+ public void testBasics() throws IOException {
+ Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT);
+ // stemming
+ checkOneTermReuse(a, "aÄacı", "aÄaç");
+ checkOneTermReuse(a, "aÄaç", "aÄaç");
+ // stopword
+ assertAnalyzesTo(a, "dolayı", new String[] {});
+ }
+
+ /** test use of exclusion set */
+ public void testExclude() throws IOException {
+ Set<String> exclusionSet = new HashSet<String>();
+ exclusionSet.add("aÄacı");
+ Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT,
+ TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
+ checkOneTermReuse(a, "aÄacı", "aÄacı");
+ checkOneTermReuse(a, "aÄaç", "aÄaç");
+ }
+}
Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
------------------------------------------------------------------------------
svn:eol-style = native