You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/02/09 21:29:26 UTC

svn commit: r1242509 - in /lucene/dev/branches/branch_3x: lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/ lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/ lucene/contrib/analyzers/common/src/test/org/apach...

Author: rmuir
Date: Thu Feb  9 20:29:25 2012
New Revision: 1242509

URL: http://svn.apache.org/viewvc?rev=1242509&view=rev
Log:
LUCENE-3765: Trappy behavior with StopFilter/ignoreCase

Modified:
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CommonGramsFilter.java

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -59,6 +59,9 @@ import java.util.Map;
  * <p>You must specify the required {@link Version}
  * compatibility when creating DutchAnalyzer:
  * <ul>
+ *   <li> As of 3.6, {@link #DutchAnalyzer(Version, Set)} and
+ *        {@link #DutchAnalyzer(Version, Set, Set)} also populate
+ *        the default entries for the stem override dictionary
  *   <li> As of 3.1, Snowball stemming is done with SnowballFilter, 
  *        LowerCaseFilter is used prior to StopFilter, and Snowball 
  *        stopwords are used by default.
@@ -99,6 +102,7 @@ public final class DutchAnalyzer extends
   
   private static class DefaultSetHolder {
     static final Set<?> DEFAULT_STOP_SET;
+    static final CharArrayMap<String> DEFAULT_STEM_DICT;
 
     static {
       try {
@@ -109,6 +113,12 @@ public final class DutchAnalyzer extends
         // distribution (JAR)
         throw new RuntimeException("Unable to load default stopword set");
       }
+      
+      DEFAULT_STEM_DICT = new CharArrayMap<String>(Version.LUCENE_CURRENT, 4, false);
+      DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet
+      DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet
+      DEFAULT_STEM_DICT.put("ei", "eier");
+      DEFAULT_STEM_DICT.put("kind", "kinder");
     }
     
   }
@@ -124,7 +134,7 @@ public final class DutchAnalyzer extends
    */
   private Set<?> excltable = Collections.emptySet();
 
-  private Map<Object,String>  stemdict = CharArrayMap.emptyMap();
+  private CharArrayMap<String> stemdict = CharArrayMap.emptyMap();
   private final Version matchVersion;
 
   /**
@@ -133,22 +143,33 @@ public final class DutchAnalyzer extends
    * 
    */
   public DutchAnalyzer(Version matchVersion) {
-    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
-    stemdict = new CharArrayMap<String>(matchVersion, 16, false);
-    stemdict.put("fiets", "fiets"); //otherwise fiet
-    stemdict.put("bromfiets", "bromfiets"); //otherwise bromfiet
-    stemdict.put("ei", "eier");
-    stemdict.put("kind", "kinder");
+    // historically, only this ctor populated the stem dict!!!!!
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT);
   }
   
   public DutchAnalyzer(Version matchVersion, Set<?> stopwords){
-    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+    // historically, this ctor never the stem dict!!!!!
+    // so we populate it only for >= 3.6
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET, 
+        matchVersion.onOrAfter(Version.LUCENE_36) 
+        ? DefaultSetHolder.DEFAULT_STEM_DICT 
+        : CharArrayMap.<String>emptyMap());
   }
   
   public DutchAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionTable){
-    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
-    excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
+    // historically, this ctor never the stem dict!!!!!
+    // so we populate it only for >= 3.6
+    this(matchVersion, stopwords, stemExclusionTable,
+        matchVersion.onOrAfter(Version.LUCENE_36)
+        ? DefaultSetHolder.DEFAULT_STEM_DICT
+        : CharArrayMap.<String>emptyMap());
+  }
+  
+  public DutchAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
     this.matchVersion = matchVersion;
+    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
+    this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
+    this.stemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
   }
 
   /**

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,9 +18,6 @@ package org.apache.lucene.analysis.ar;
  */
 
 import java.io.IOException;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
@@ -79,16 +76,14 @@ public class TestArabicAnalyzer extends 
    * Test that custom stopwords work, and are not case-sensitive.
    */
   public void testCustomStopwords() throws Exception {
-    Set<String> set = new HashSet<String>();
-    Collections.addAll(set, "the", "and", "a");
+    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, asSet("the", "and", "a"), false);
     ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, set);
     assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
         "brown", "fox" });
   }
   
   public void testWithStemExclusionSet() throws IOException {
-    Set<String> set = new HashSet<String>();
-    set.add("ساهدهات");
+    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, asSet("ساهدهات"), false);
     ArabicAnalyzer a = new ArabicAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
     assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });
     assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهدهات" });

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,7 +18,6 @@ package org.apache.lucene.analysis.bg;
  */
 
 import java.io.IOException;
-import java.util.Collections;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -43,8 +42,7 @@ public class TestBulgarianAnalyzer exten
   }
   
   public void testCustomStopwords() throws IOException {
-    Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, Collections
-        .emptySet());
+    Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
     assertAnalyzesTo(a, "Как се казваш?", 
         new String[] {"как", "се", "казваш"});
   }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java Thu Feb  9 20:29:25 2012
@@ -135,8 +135,8 @@ public class TestBrazilianStemmer extend
   }
  
   public void testStemExclusionTable() throws Exception {
-    BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
-    a.setStemExclusionTable("quintessência");
+    BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT, 
+        CharArraySet.EMPTY_SET, new CharArraySet(TEST_VERSION_CURRENT, asSet("quintessência"), false));
     checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
   }
   

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.ca;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -50,8 +49,7 @@ public class TestCatalanAnalyzer extends
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("llengües");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("llengües"), false);
     Analyzer a = new CatalanAnalyzer(TEST_VERSION_CURRENT, 
         CatalanAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "llengües", "llengües");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.da;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestDanishAnalyzer extends 
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("undersøgelse");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("undersøgelse"), false);
     Analyzer a = new DanishAnalyzer(TEST_VERSION_CURRENT, 
         DanishAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "undersøgelse", "undersøgelse");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -77,6 +77,12 @@ public class TestGermanAnalyzer extends 
     checkOneTermReuse(a, "tischen", "tischen");
   }
   
+  public void testStemExclusionTable() throws Exception {
+    GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, 
+        new CharArraySet(TEST_VERSION_CURRENT, asSet("tischen"), false));
+    checkOneTermReuse(a, "tischen", "tischen");
+  }
+  
   /** test some features of the new snowball filter
    * these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer
    */

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.en;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -45,8 +44,7 @@ public class TestEnglishAnalyzer extends
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("books");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("books"), false);
     Analyzer a = new EnglishAnalyzer(TEST_VERSION_CURRENT, 
         EnglishAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "books", "books");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.es;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestSpanishAnalyzer extends
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("chicano");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("chicano"), false);
     Analyzer a = new SpanishAnalyzer(TEST_VERSION_CURRENT, 
         SpanishAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "chicana", "chican");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.eu;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestBasqueAnalyzer extends 
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("zaldiak");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("zaldiak"), false);
     Analyzer a = new BasqueAnalyzer(TEST_VERSION_CURRENT, 
         BasqueAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "zaldiak", "zaldiak");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.fa;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * Test the Persian Analyzer
@@ -215,7 +216,8 @@ public class TestPersianAnalyzer extends
    * Test that custom stopwords work, and are not case-sensitive.
    */
   public void testCustomStopwords() throws Exception {
-    PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, "the", "and", "a");
+    PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, 
+        new CharArraySet(TEST_VERSION_CURRENT, asSet("the", "and", "a"), false));
     assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
         "brown", "fox" });
   }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.fi;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestFinnishAnalyzer extends
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("edeltäjistään");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("edeltäjistään"), false);
     Analyzer a = new FinnishAnalyzer(TEST_VERSION_CURRENT, 
         FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java Thu Feb  9 20:29:25 2012
@@ -20,15 +20,14 @@ package org.apache.lucene.analysis.fr;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * 
@@ -38,9 +37,7 @@ public class TestElision extends BaseTok
   public void testElision() throws Exception {
     String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
     Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
-    Set<String> articles = new HashSet<String>();
-    articles.add("l");
-    articles.add("M");
+    CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, asSet("l", "M"), false);
     TokenFilter filter = new ElisionFilter(TEST_VERSION_CURRENT, tokenizer, articles);
     List<String> tas = filter(filter);
     assertEquals("embrouille", tas.get(4));

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.gl;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestGalicianAnalyzer extend
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("correspondente");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("correspondente"), false);
     Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT, 
         GalicianAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "correspondente", "correspondente");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -1,10 +1,8 @@
 package org.apache.lucene.analysis.hi;
 
-import java.util.HashSet;
-import java.util.Set;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -41,8 +39,7 @@ public class TestHindiAnalyzer extends B
   }
   
   public void testExclusionSet() throws Exception {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("हिंदी");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("हिंदी"), false);
     Analyzer a = new HindiAnalyzer(TEST_VERSION_CURRENT, 
         HindiAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "हिंदी", "हिंदी");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.hu;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestHungarianAnalyzer exten
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("babakocsi");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("babakocsi"), false);
     Analyzer a = new HungarianAnalyzer(TEST_VERSION_CURRENT, 
         HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "babakocsi", "babakocsi");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.hy;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestArmenianAnalyzer extend
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("արծիվներ");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("արծիվներ"), false);
     Analyzer a = new ArmenianAnalyzer(TEST_VERSION_CURRENT, 
         ArmenianAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "արծիվներ", "արծիվներ");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.id;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestIndonesianAnalyzer exte
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("peledakan");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("peledakan"), false);
     Analyzer a = new IndonesianAnalyzer(TEST_VERSION_CURRENT, 
         IndonesianAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "peledakan", "peledakan");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.it;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
 
 public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
@@ -44,8 +43,7 @@ public class TestItalianAnalyzer extends
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("abbandonata");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("abbandonata"), false);
     Analyzer a = new ItalianAnalyzer(TEST_VERSION_CURRENT, 
         ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "abbandonata", "abbandonata");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.lv;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestLatvianAnalyzer extends
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("tirgiem");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("tirgiem"), false);
     Analyzer a = new LatvianAnalyzer(TEST_VERSION_CURRENT, 
         LatvianAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "tirgiem", "tirgiem");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java Thu Feb  9 20:29:25 2012
@@ -2,12 +2,11 @@ package org.apache.lucene.analysis.misce
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.HashMap;
-import java.util.Map;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.PorterStemFilter;
+import org.apache.lucene.analysis.CharArrayMap;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 
@@ -33,7 +32,7 @@ public class TestStemmerOverrideFilter e
     // lets make booked stem to books
     // the override filter will convert "booked" to "books",
     // but also mark it with KeywordAttribute so Porter will not change it.
-    Map<String,String> dictionary = new HashMap<String,String>();
+    CharArrayMap<String> dictionary = new CharArrayMap<String>(TEST_VERSION_CURRENT, 1, false);
     dictionary.put("booked", "books");
     Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
     TokenStream stream = new PorterStemFilter(

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java Thu Feb  9 20:29:25 2012
@@ -175,6 +175,26 @@ public class TestDutchStemmer extends Ba
     checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
   }
   
+  /** 
+   * check that the default stem overrides are used
+   * even if you use a non-default ctor.
+   */
+  public void testStemOverrides() throws IOException {
+    DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
+    checkOneTerm(a, "fiets", "fiets");
+  }
+  
+  /**
+   * prior to 3.6, this confusingly did not happen if 
+   * you specified your own stoplist!!!!
+   * @deprecated (3.6) Remove this test in Lucene 5.0
+   */
+  @Deprecated
+  public void testBuggyStemOverrides() throws IOException {
+    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_35, CharArraySet.EMPTY_SET);
+    checkOneTerm(a, "fiets", "fiet");
+  }
+  
   /**
    * Prior to 3.1, this analyzer had no lowercase filter.
    * stopwords were case sensitive. Preserve this for back compat.

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.no;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestNorwegianAnalyzer exten
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("havnedistriktene");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("havnedistriktene"), false);
     Analyzer a = new NorwegianAnalyzer(TEST_VERSION_CURRENT, 
         NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.pt;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestPortugueseAnalyzer exte
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("quilométricas");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false);
     Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT, 
         PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "quilométricas", "quilométricas");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.ro;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestRomanianAnalyzer extend
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("absenţa");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("absenţa"), false);
     Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT, 
         RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "absenţa", "absenţa");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,12 +18,10 @@ package org.apache.lucene.analysis.sv;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.hu.HungarianAnalyzer;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -44,8 +42,7 @@ public class TestSwedishAnalyzer extends
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("jaktkarlarne");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlarne"), false);
     Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT, 
         SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.tr;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestTurkishAnalyzer extends
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("ağacı");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("ağacı"), false);
     Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT, 
         TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "ağacı", "ağacı");

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -18,11 +18,10 @@ package org.apache.lucene.analysis.pl;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 
@@ -43,8 +42,7 @@ public class TestPolishAnalyzer extends 
   
   /** test use of exclusion set */
   public void testExclude() throws IOException {
-    Set<String> exclusionSet = new HashSet<String>();
-    exclusionSet.add("studenta");
+    CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("studenta"), false);;
     Analyzer a = new PolishAnalyzer(TEST_VERSION_CURRENT, 
         PolishAnalyzer.getDefaultStopSet(), exclusionSet);
     checkOneTermReuse(a, "studenta", "studenta");

Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java Thu Feb  9 20:29:25 2012
@@ -58,7 +58,7 @@ public final class StopFilter extends Fi
    * @param input Input TokenStream
    * @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords
    * @param ignoreCase if true, all words are lower cased first
-   * @deprecated use {@link #StopFilter(Version, TokenStream, Set, boolean)} instead
+   * @deprecated Use {@link #StopFilter(Version, TokenStream, Set)} instead
    */
   @Deprecated
   public StopFilter(boolean enablePositionIncrements, TokenStream input, Set<?> stopWords, boolean ignoreCase)
@@ -87,7 +87,9 @@ public final class StopFilter extends Fi
    *          representing the stopwords
    * @param ignoreCase
    *          if true, all words are lower cased first
+   * @deprecated Use {@link #StopFilter(Version, TokenStream, Set)} instead
    */
+  @Deprecated
   public StopFilter(Version matchVersion, TokenStream input, Set<?> stopWords, boolean ignoreCase)
   {
    this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_29), input, stopWords, ignoreCase);

Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -91,7 +91,10 @@ public final class ClassicAnalyzer exten
    * @see WordlistLoader#getWordSet(Reader, Version)
    * @param matchVersion Lucene version to match See {@link
    * <a href="#version">above</a>}
-   * @param stopwords File to read stop words from */
+   * @param stopwords File to read stop words from 
+   * @deprecated Use {@link #ClassicAnalyzer(Version, Reader)} instead. 
+   */
+  @Deprecated
   public ClassicAnalyzer(Version matchVersion, File stopwords) throws IOException {
     this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords,
         IOUtils.CHARSET_UTF_8), matchVersion));

Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Thu Feb  9 20:29:25 2012
@@ -87,7 +87,10 @@ public final class StandardAnalyzer exte
    * @see WordlistLoader#getWordSet(Reader, Version)
    * @param matchVersion Lucene version to match See {@link
    * <a href="#version">above</a>}
-   * @param stopwords File to read stop words from */
+   * @param stopwords File to read stop words from 
+   * @deprecated Use {@link #StandardAnalyzer(Version, Reader)} instead. 
+   */
+  @Deprecated
   public StandardAnalyzer(Version matchVersion, File stopwords) throws IOException {
     this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords,
         IOUtils.CHARSET_UTF_8), matchVersion));

Modified: lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java Thu Feb  9 20:29:25 2012
@@ -805,7 +805,11 @@ public abstract class LuceneTestCase ext
   public static void assumeNoException(String msg, Exception e) {
     Assume.assumeNoException(e == null ? null : new _TestIgnoredException(msg, e));
   }
- 
+
+  public static <T> Set<T> asSet(T... args) {
+    return new HashSet<T>(Arrays.asList(args));
+  }
+
   /**
    * Convenience method for logging an iterator.
    *

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CommonGramsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CommonGramsFilter.java?rev=1242509&r1=1242508&r2=1242509&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CommonGramsFilter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/CommonGramsFilter.java Thu Feb  9 20:29:25 2012
@@ -67,7 +67,7 @@ public final class CommonGramsFilter ext
     this(Version.LUCENE_29, input, commonWords);
   }
   
-  /** @deprecated Use {@link #CommonGramsFilter(Version, TokenStream, Set, boolean)} instead */
+  /** @deprecated Use {@link #CommonGramsFilter(Version, TokenStream, Set)} instead */
   @Deprecated
   public CommonGramsFilter(TokenStream input, Set<?> commonWords, boolean ignoreCase) {
     this(Version.LUCENE_29, input, commonWords, ignoreCase);
@@ -102,7 +102,9 @@ public final class CommonGramsFilter ext
    * @param input TokenStream input in filter chain.
    * @param commonWords The set of common words.
    * @param ignoreCase -Ignore case when constructing bigrams for common words.
+   * @deprecated Use {@link #CommonGramsFilter(Version, TokenStream, Set)} instead
    */
+  @Deprecated
   public CommonGramsFilter(Version matchVersion, TokenStream input, Set<?> commonWords, boolean ignoreCase) {
     super(input);
     if (commonWords instanceof CharArraySet) {