You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by rm...@apache.org on 2010/02/06 00:05:49 UTC
svn commit: r907125 [3/3] - in /lucene/java/trunk: ./ contrib/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/ contrib/analyzers/common/src/java/org/apache/lucene/analysi...

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java Fri Feb  5 23:05:46 2010
@@ -17,6 +17,8 @@
  * limitations under the License.
  */
 
+import java.io.IOException;
+
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
@@ -113,6 +115,94 @@
 
 	}
 	
+	/**
+	 * @deprecated remove this test for Lucene 4.0
+	 */
+	@Deprecated
+	public void testAnalyzer30() throws Exception {
+	    FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_30);
+	  
+	    assertAnalyzesTo(fa, "", new String[] {
+	    });
+
+	    assertAnalyzesTo(
+	      fa,
+	      "chien chat cheval",
+	      new String[] { "chien", "chat", "cheval" });
+
+	    assertAnalyzesTo(
+	      fa,
+	      "chien CHAT CHEVAL",
+	      new String[] { "chien", "chat", "cheval" });
+
+	    assertAnalyzesTo(
+	      fa,
+	      "  chien  ,? + = -  CHAT /: > CHEVAL",
+	      new String[] { "chien", "chat", "cheval" });
+
+	    assertAnalyzesTo(fa, "chien++", new String[] { "chien" });
+
+	    assertAnalyzesTo(
+	      fa,
+	      "mot \"entreguillemet\"",
+	      new String[] { "mot", "entreguillemet" });
+
+	    // let's do some french specific tests now  
+
+	    /* 1. couldn't resist
+	     I would expect this to stay one term as in French the minus 
+	    sign is often used for composing words */
+	    assertAnalyzesTo(
+	      fa,
+	      "Jean-FranÃ§ois",
+	      new String[] { "jean", "franÃ§ois" });
+
+	    // 2. stopwords
+	    assertAnalyzesTo(
+	      fa,
+	      "le la chien les aux chat du des Ã  cheval",
+	      new String[] { "chien", "chat", "cheval" });
+
+	    // some nouns and adjectives
+	    assertAnalyzesTo(
+	      fa,
+	      "lances chismes habitable chiste Ã©lÃ©ments captifs",
+	      new String[] {
+	        "lanc",
+	        "chism",
+	        "habit",
+	        "chist",
+	        "Ã©lÃ©ment",
+	        "captif" });
+
+	    // some verbs
+	    assertAnalyzesTo(
+	      fa,
+	      "finissions souffrirent rugissante",
+	      new String[] { "fin", "souffr", "rug" });
+
+	    // some everything else
+	    // aujourd'hui stays one term which is OK
+	    assertAnalyzesTo(
+	      fa,
+	      "C3PO aujourd'hui oeuf Ã¯Ã¢Ã¶Ã»Ã Ã¤ anticonstitutionnellement Java++ ",
+	      new String[] {
+	        "c3po",
+	        "aujourd'hui",
+	        "oeuf",
+	        "Ã¯Ã¢Ã¶Ã»Ã Ã¤",
+	        "anticonstitutionnel",
+	        "jav" });
+
+	    // some more everything else
+	    // here 1940-1945 stays as one term, 1940:1945 not ?
+	    assertAnalyzesTo(
+	      fa,
+	      "33Bis 1940-1945 1940:1945 (---i+++)*",
+	      new String[] { "33bis", "1940-1945", "1940", "1945", "i" });
+
+	  }
+	
 	public void testReusableTokenStream() throws Exception {
 	  FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
 	  // stopwords
@@ -157,4 +247,28 @@
     assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
         "chist" });
   }
+  
+  public void testElision() throws Exception {
+    FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
+    assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouill" });
+  }
+  
+  /**
+   * Prior to 3.1, this analyzer had no lowercase filter.
+   * stopwords were case sensitive. Preserve this for back compat.
+   * @deprecated Remove this test in Lucene 4.0
+   */
+  @Deprecated
+  public void testBuggyStopwordsCasing() throws IOException {
+    FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_30);
+    assertAnalyzesTo(a, "Votre", new String[] { "votr" });
+  }
+  
+  /**
+   * Test that stopwords are not case sensitive
+   */
+  public void testStopwordsCasing() throws IOException {
+    FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
+    assertAnalyzesTo(a, "Votre", new String[] { });
+  }
 }

Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java Fri Feb  5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.hu;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new HungarianAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "babakocsi", "babakocs");
+    checkOneTermReuse(a, "babakocsijÃ¡Ã©rt", "babakocs");
+    // stopword
+    assertAnalyzesTo(a, "Ã¡ltal", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("babakocsi");
+    Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT, 
+        HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "babakocsi", "babakocsi");
+    checkOneTermReuse(a, "babakocsijÃ¡Ã©rt", "babakocs");
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java Fri Feb  5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.it;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new ItalianAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "abbandonata", "abbandon");
+    checkOneTermReuse(a, "abbandonati", "abbandon");
+    // stopword
+    assertAnalyzesTo(a, "dallo", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("abbandonata");
+    Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT, 
+        ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "abbandonata", "abbandonata");
+    checkOneTermReuse(a, "abbandonati", "abbandon");
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java Fri Feb  5 23:05:46 2010
@@ -0,0 +1,44 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.PorterStemFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.Version;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
+  public void testOverride() throws IOException {
+    // lets make booked stem to books
+    // the override filter will convert "booked" to "books",
+    // but also mark it with KeywordAttribute so Porter will not change it.
+    Map<String,String> dictionary = new HashMap<String,String>();
+    dictionary.put("booked", "books");
+    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
+    TokenStream stream = new PorterStemFilter(
+        new StemmerOverrideFilter(Version.LUCENE_CURRENT, tokenizer, dictionary));
+    assertTokenStreamContents(stream, new String[] { "books" });
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java Fri Feb  5 23:05:46 2010
@@ -22,7 +22,6 @@
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 
-import java.io.IOException;
 import java.io.StringReader;
 
 /**

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java Fri Feb  5 23:05:46 2010
@@ -18,10 +18,8 @@
  */
 
 
-import java.io.IOException;
 import java.io.StringReader;
 
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 
 /**

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java Fri Feb  5 23:05:46 2010
@@ -100,9 +100,6 @@
 	 check("ophalend", "ophal");
 	 check("ophalers", "ophaler");
 	 check("ophef", "ophef");
-	 check("opheffen", "ophef"); // versus snowball 'opheff'
-	 check("opheffende", "ophef"); // versus snowball 'opheff'
-	 check("opheffing", "ophef"); // versus snowball 'opheff'
 	 check("opheldering", "ophelder");
 	 check("ophemelde", "ophemeld");
 	 check("ophemelen", "ophemel");
@@ -118,6 +115,24 @@
 	 check("ophouden", "ophoud");
   }
   
+  /**
+   * @deprecated remove this test in Lucene 4.0
+   */
+  @Deprecated
+  public void testOldBuggyStemmer() throws Exception {
+    Analyzer a = new DutchAnalyzer(Version.LUCENE_30);
+    checkOneTermReuse(a, "opheffen", "ophef"); // versus snowball 'opheff'
+    checkOneTermReuse(a, "opheffende", "ophef"); // versus snowball 'opheff'
+    checkOneTermReuse(a, "opheffing", "ophef"); // versus snowball 'opheff'
+  }
+  
+  public void testSnowballCorrectness() throws Exception {
+    Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
+    checkOneTermReuse(a, "opheffen", "opheff");
+    checkOneTermReuse(a, "opheffende", "opheff");
+    checkOneTermReuse(a, "opheffing", "opheff");
+  }
+  
   public void testReusableTokenStream() throws Exception {
     Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT); 
     checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
@@ -161,6 +176,25 @@
     checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
   }
   
+  /**
+   * Prior to 3.1, this analyzer had no lowercase filter.
+   * stopwords were case sensitive. Preserve this for back compat.
+   * @deprecated Remove this test in Lucene 4.0
+   */
+  @Deprecated
+  public void testBuggyStopwordsCasing() throws IOException {
+    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_30);
+    assertAnalyzesTo(a, "Zelf", new String[] { "zelf" });
+  }
+  
+  /**
+   * Test that stopwords are not case sensitive
+   */
+  public void testStopwordsCasing() throws IOException {
+    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_31);
+    assertAnalyzesTo(a, "Zelf", new String[] { });
+  }
+  
   private void check(final String input, final String expected) throws Exception {
     checkOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected); 
   }

Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java Fri Feb  5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.no;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new NorwegianAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "havnedistriktene", "havnedistrikt");
+    checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
+    // stopword
+    assertAnalyzesTo(a, "det", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("havnedistriktene");
+    Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT, 
+        NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
+    checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java Fri Feb  5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new PortugueseAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "quilomÃ©tricas", "quilomÃ©tr");
+    checkOneTermReuse(a, "quilomÃ©tricos", "quilomÃ©tr");
+    // stopword
+    assertAnalyzesTo(a, "nÃ£o", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("quilomÃ©tricas");
+    Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT, 
+        PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "quilomÃ©tricas", "quilomÃ©tricas");
+    checkOneTermReuse(a, "quilomÃ©tricos", "quilomÃ©tr");
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java Fri Feb  5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.ro;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new RomanianAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "absenÅ£a", "absenÅ£");
+    checkOneTermReuse(a, "absenÅ£i", "absenÅ£");
+    // stopword
+    assertAnalyzesTo(a, "Ã®l", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("absenÅ£a");
+    Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT, 
+        RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "absenÅ£a", "absenÅ£a");
+    checkOneTermReuse(a, "absenÅ£i", "absenÅ£");
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Fri Feb  5 23:05:46 2010
@@ -50,9 +50,14 @@
       dataDir = new File(System.getProperty("dataDir", "./bin"));
     }
 
-    public void testUnicode() throws IOException
+    /**
+     * @deprecated remove this test and its datafiles in Lucene 4.0
+     * the Snowball version has its own data tests.
+     */
+    @Deprecated
+    public void testUnicode30() throws IOException
     {
-        RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
+        RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_30);
         inWords =
             new InputStreamReader(
                 new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUTF8.txt")),
@@ -110,12 +115,22 @@
         }
     }
     
+    /** @deprecated remove this test in Lucene 4.0: stopwords changed */
+    @Deprecated
+    public void testReusableTokenStream30() throws Exception {
+      Analyzer a = new RussianAnalyzer(Version.LUCENE_30);
+      assertAnalyzesToReuse(a, "ÐÐ¼ÐµÑÑÐµ Ñ ÑÐµÐ¼ Ð¾ ÑÐ¸Ð»Ðµ ÑÐ»ÐµÐºÑÑÐ¾Ð¼Ð°Ð³Ð½Ð¸ÑÐ½Ð¾Ð¹ ÑÐ½ÐµÑÐ³Ð¸Ð¸ Ð¸Ð¼ÐµÐ»Ð¸ Ð¿ÑÐµÐ´ÑÑÐ°Ð²Ð»ÐµÐ½Ð¸Ðµ ÐµÑÐµ",
+          new String[] { "Ð²Ð¼ÐµÑÑ", "ÑÐ¸Ð»", "ÑÐ»ÐµÐºÑÑÐ¾Ð¼Ð°Ð³Ð½Ð¸ÑÐ½", "ÑÐ½ÐµÑÐ³", "Ð¸Ð¼ÐµÐ»", "Ð¿ÑÐµÐ´ÑÑÐ°Ð²Ð»ÐµÐ½" });
+      assertAnalyzesToReuse(a, "ÐÐ¾ Ð·Ð½Ð°Ð½Ð¸Ðµ ÑÑÐ¾ ÑÑÐ°Ð½Ð¸Ð»Ð¾ÑÑ Ð² ÑÐ°Ð¹Ð½Ðµ",
+          new String[] { "Ð·Ð½Ð°Ð½", "ÑÑÐ°Ð½", "ÑÐ°Ð¹Ð½" });
+    }
+    
     public void testReusableTokenStream() throws Exception {
       Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT);
       assertAnalyzesToReuse(a, "ÐÐ¼ÐµÑÑÐµ Ñ ÑÐµÐ¼ Ð¾ ÑÐ¸Ð»Ðµ ÑÐ»ÐµÐºÑÑÐ¾Ð¼Ð°Ð³Ð½Ð¸ÑÐ½Ð¾Ð¹ ÑÐ½ÐµÑÐ³Ð¸Ð¸ Ð¸Ð¼ÐµÐ»Ð¸ Ð¿ÑÐµÐ´ÑÑÐ°Ð²Ð»ÐµÐ½Ð¸Ðµ ÐµÑÐµ",
           new String[] { "Ð²Ð¼ÐµÑÑ", "ÑÐ¸Ð»", "ÑÐ»ÐµÐºÑÑÐ¾Ð¼Ð°Ð³Ð½Ð¸ÑÐ½", "ÑÐ½ÐµÑÐ³", "Ð¸Ð¼ÐµÐ»", "Ð¿ÑÐµÐ´ÑÑÐ°Ð²Ð»ÐµÐ½" });
       assertAnalyzesToReuse(a, "ÐÐ¾ Ð·Ð½Ð°Ð½Ð¸Ðµ ÑÑÐ¾ ÑÑÐ°Ð½Ð¸Ð»Ð¾ÑÑ Ð² ÑÐ°Ð¹Ð½Ðµ",
-          new String[] { "Ð·Ð½Ð°Ð½", "ÑÑÐ°Ð½", "ÑÐ°Ð¹Ð½" });
+          new String[] { "Ð·Ð½Ð°Ð½", "ÑÑ", "ÑÑÐ°Ð½", "ÑÐ°Ð¹Ð½" });
     }
     
     

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java Fri Feb  5 23:05:46 2010
@@ -25,7 +25,9 @@
 
 /**
  * Testcase for {@link RussianLetterTokenizer}
+ * @deprecated Remove this test class in Lucene 4.0
  */
+@Deprecated
 public class TestRussianLetterTokenizer extends BaseTokenStreamTestCase {
   
   public void testRussianLetterTokenizer() throws IOException {

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java Fri Feb  5 23:05:46 2010
@@ -24,6 +24,10 @@
 import java.io.FileInputStream;
 import java.util.ArrayList;
 
+/**
+ * @deprecated Remove this test class (and its datafiles!) in Lucene 4.0
+ */
+@Deprecated
 public class TestRussianStem extends LuceneTestCase
 {
     private ArrayList words = new ArrayList();

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java?rev=907125&r1=907124&r2=907125&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java Fri Feb  5 23:05:46 2010
@@ -22,11 +22,8 @@
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.LinkedList;
-import java.util.HashSet;
-import java.util.Arrays;
 
 import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
 import org.apache.lucene.analysis.miscellaneous.PrefixAndSuffixAwareTokenFilter;
 import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;

Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java Fri Feb  5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.sv;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new SwedishAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "jaktkarlarne", "jaktkarl");
+    checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
+    // stopword
+    assertAnalyzesTo(a, "och", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("jaktkarlarne");
+    Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT, 
+        SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
+    checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java?rev=907125&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java (added)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java Fri Feb  5 23:05:46 2010
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.tr;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new TurkishAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "aÄacÄ±", "aÄaÃ§");
+    checkOneTermReuse(a, "aÄaÃ§", "aÄaÃ§");
+    // stopword
+    assertAnalyzesTo(a, "dolayÄ±", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("aÄacÄ±");
+    Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT, 
+        TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "aÄacÄ±", "aÄacÄ±");
+    checkOneTermReuse(a, "aÄaÃ§", "aÄaÃ§");
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
------------------------------------------------------------------------------
    svn:eol-style = native