You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2011/11/14 23:36:32 UTC
svn commit: r1201946 [11/14] - in /lucene/dev/branches/solrcloud: ./ dev-tools/idea/lucene/contrib/ lucene/ lucene/contrib/ lucene/contrib/demo/src/java/org/apache/lucene/demo/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/ luc...

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.DanishStemmer;
 
@@ -62,8 +63,8 @@ public final class DanishAnalyzer extend
 
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -36,6 +36,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.German2Stemmer;
 
@@ -100,8 +101,8 @@ public final class GermanAnalyzer extend
     private static final Set<?> DEFAULT_SET;
     static {
       try {
-        DEFAULT_SET = 
-          WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
+        DEFAULT_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.SpanishStemmer;
 
@@ -62,8 +63,8 @@ public final class SpanishAnalyzer exten
 
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.FinnishStemmer;
 
@@ -62,8 +63,8 @@ public final class FinnishAnalyzer exten
 
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -30,6 +30,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 
 import java.io.IOException;
@@ -118,8 +119,8 @@ public final class FrenchAnalyzer extend
     static final Set<?> DEFAULT_STOP_SET;
     static {
       try {
-        DEFAULT_STOP_SET = 
-          WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+                DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -32,6 +32,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 
 /**
@@ -60,12 +61,12 @@ public final class GalicianAnalyzer exte
 
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getWordSet(GalicianAnalyzer.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(GalicianAnalyzer.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)
-        throw new RuntimeException("Unable to load default stopword set");
+        throw new RuntimeException("Unable to load default stopword set", ex);
       }
     }
   }

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.HungarianStemmer;
 
@@ -62,8 +63,8 @@ public final class HungarianAnalyzer ext
 
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -35,6 +35,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.ItalianStemmer;
 
@@ -79,8 +80,8 @@ public final class ItalianAnalyzer exten
 
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -27,11 +27,13 @@ import org.apache.lucene.analysis.core.S
 import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 
 /**
@@ -60,8 +62,8 @@ public final class LatvianAnalyzer exten
 
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getWordSet(LatvianAnalyzer.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(LatvianAnalyzer.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -30,6 +30,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 
 import java.io.File;
@@ -83,8 +84,8 @@ public final class DutchAnalyzer extends
 
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.NorwegianStemmer;
 
@@ -62,8 +63,8 @@ public final class NorwegianAnalyzer ext
 
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.PortugueseStemmer;
 
@@ -62,8 +63,8 @@ public final class PortugueseAnalyzer ex
 
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -34,6 +34,7 @@ import org.apache.lucene.analysis.core.S
 import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 
 /**
@@ -84,12 +85,12 @@ public final class RussianAnalyzer exten
       
       static {
         try {
-          DEFAULT_STOP_SET = 
-            WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
+          DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+              DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
         } catch (IOException ex) {
           // default set should always be present as it is part of the
           // distribution (JAR)
-          throw new RuntimeException("Unable to load default stopword set");
+          throw new RuntimeException("Unable to load default stopword set", ex);
         }
       }
     }

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -80,21 +80,21 @@ public final class ClassicAnalyzer exten
   }
 
   /** Builds an analyzer with the stop words from the given file.
-   * @see WordlistLoader#getWordSet(File)
+   * @see WordlistLoader#getWordSet(Reader, Version)
    * @param matchVersion Lucene version to match See {@link
    * <a href="#version">above</a>}
    * @param stopwords File to read stop words from */
   public ClassicAnalyzer(Version matchVersion, File stopwords) throws IOException {
-    this(matchVersion, WordlistLoader.getWordSet(stopwords));
+    this(matchVersion, loadStopwordSet(stopwords, matchVersion));
   }
 
   /** Builds an analyzer with the stop words from the given reader.
-   * @see WordlistLoader#getWordSet(Reader)
+   * @see WordlistLoader#getWordSet(Reader, Version)
    * @param matchVersion Lucene version to match See {@link
    * <a href="#version">above</a>}
    * @param stopwords Reader to read stop words from */
   public ClassicAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
-    this(matchVersion, WordlistLoader.getWordSet(stopwords));
+    this(matchVersion, loadStopwordSet(stopwords, matchVersion));
   }
 
   /**

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -81,21 +81,21 @@ public final class StandardAnalyzer exte
   }
 
   /** Builds an analyzer with the stop words from the given file.
-   * @see WordlistLoader#getWordSet(File)
+   * @see WordlistLoader#getWordSet(Reader, Version)
    * @param matchVersion Lucene version to match See {@link
    * <a href="#version">above</a>}
    * @param stopwords File to read stop words from */
   public StandardAnalyzer(Version matchVersion, File stopwords) throws IOException {
-    this(matchVersion, WordlistLoader.getWordSet(stopwords));
+    this(matchVersion, loadStopwordSet(stopwords, matchVersion));
   }
 
   /** Builds an analyzer with the stop words from the given reader.
-   * @see WordlistLoader#getWordSet(Reader)
+   * @see WordlistLoader#getWordSet(Reader, Version)
    * @param matchVersion Lucene version to match See {@link
    * <a href="#version">above</a>}
    * @param stopwords Reader to read stop words from */
   public StandardAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
-    this(matchVersion, WordlistLoader.getWordSet(stopwords));
+    this(matchVersion, loadStopwordSet(stopwords, matchVersion));
   }
 
   /**

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.standa
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.SwedishStemmer;
 
@@ -62,8 +63,8 @@ public final class SwedishAnalyzer exten
 
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java Mon Nov 14 22:36:20 2011
@@ -17,10 +17,13 @@
 
 package org.apache.lucene.analysis.util;
 
+import java.io.File;
 import java.io.IOException;
+import java.io.Reader;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 
 /**
@@ -93,11 +96,59 @@ public abstract class StopwordAnalyzerBa
   protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
       final Class<? extends Analyzer> aClass, final String resource,
       final String comment) throws IOException {
-    final Set<String> wordSet = WordlistLoader.getWordSet(aClass, resource,
-        comment);
-    final CharArraySet set = new CharArraySet(Version.LUCENE_31, wordSet.size(), ignoreCase);
-    set.addAll(wordSet);
-    return set;
+    Reader reader = null;
+    try {
+      reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), IOUtils.CHARSET_UTF_8);
+      return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_31, 16, ignoreCase));
+    } finally {
+      IOUtils.close(reader);
+    }
+    
+  }
+  
+  /**
+   * Creates a CharArraySet from a file.
+   * 
+   * @param stopwords
+   *          the stopwords file to load
+   * 
+   * @param matchVersion
+   *          the Lucene version for cross version compatibility
+   * @return a CharArraySet containing the distinct stopwords from the given
+   *         file
+   * @throws IOException
+   *           if loading the stopwords throws an {@link IOException}
+   */
+  protected static CharArraySet loadStopwordSet(File stopwords,
+      Version matchVersion) throws IOException {
+    Reader reader = null;
+    try {
+      reader = IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8);
+      return WordlistLoader.getWordSet(reader, matchVersion);
+    } finally {
+      IOUtils.close(reader);
+    }
+  }
+  
+  /**
+   * Creates a CharArraySet from a file.
+   * 
+   * @param stopwords
+   *          the stopwords reader to load
+   * 
+   * @param matchVersion
+   *          the Lucene version for cross version compatibility
+   * @return a CharArraySet containing the distinct stopwords from the given
+   *         reader
+   * @throws IOException
+   *           if loading the stopwords throws an {@link IOException}
+   */
+  protected static CharArraySet loadStopwordSet(Reader stopwords,
+      Version matchVersion) throws IOException {
+    try {
+      return WordlistLoader.getWordSet(stopwords, matchVersion);
+    } finally {
+      IOUtils.close(stopwords);
+    }
   }
-
 }

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java Mon Nov 14 22:36:20 2011
@@ -18,165 +18,91 @@ package org.apache.lucene.analysis.util;
  */
 
 import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
 import java.io.IOException;
-import java.io.InputStreamReader;
 import java.io.Reader;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Set;
+
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
 
 /**
  * Loader for text files that represent a list of stopwords.
+ * 
+ * @see IOUtils to obtain {@link Reader} instances
+ * @lucene.internal
  */
 public class WordlistLoader {
- 
-  /**
-   * Loads a text file associated with a given class (See
-   * {@link Class#getResourceAsStream(String)}) and adds every line as an entry
-   * to a {@link Set} (omitting leading and trailing whitespace). Every line of
-   * the file should contain only one word. The words need to be in lower-case if
-   * you make use of an Analyzer which uses LowerCaseFilter (like
-   * StandardAnalyzer).
-   * 
-   * @param aClass
-   *          a class that is associated with the given stopwordResource
-   * @param stopwordResource
-   *          name of the resource file associated with the given class
-   * @return a {@link Set} with the file's words
-   */
-  public static Set<String> getWordSet(Class<?> aClass, String stopwordResource)
-      throws IOException {
-    final Reader reader = new BufferedReader(new InputStreamReader(aClass
-        .getResourceAsStream(stopwordResource), "UTF-8"));
-    try {
-      return getWordSet(reader);
-    } finally {
-      reader.close();
-    }
-  }
   
-  /**
-   * Loads a text file associated with a given class (See
-   * {@link Class#getResourceAsStream(String)}) and adds every line as an entry
-   * to a {@link Set} (omitting leading and trailing whitespace). Every line of
-   * the file should contain only one word. The words need to be in lower-case if
-   * you make use of an Analyzer which uses LowerCaseFilter (like
-   * StandardAnalyzer).
-   * 
-   * @param aClass
-   *          a class that is associated with the given stopwordResource
-   * @param stopwordResource
-   *          name of the resource file associated with the given class
-   * @param comment
-   *          the comment string to ignore
-   * @return a {@link Set} with the file's words
-   */
-  public static Set<String> getWordSet(Class<?> aClass,
-      String stopwordResource, String comment) throws IOException {
-    final Reader reader = new BufferedReader(new InputStreamReader(aClass
-        .getResourceAsStream(stopwordResource), "UTF-8"));
-    try {
-      return getWordSet(reader, comment);
-    } finally {
-      reader.close();
-    }
-  }
+  private static final int INITITAL_CAPACITY = 16;
   
   /**
-   * Loads a text file and adds every line as an entry to a HashSet (omitting
-   * leading and trailing whitespace). Every line of the file should contain only
+   * Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
+   * leading and trailing whitespace). Every line of the Reader should contain only
    * one word. The words need to be in lowercase if you make use of an
    * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
    *
-   * @param wordfile File containing the wordlist
-   * @return A HashSet with the file's words
+   * @param reader Reader containing the wordlist
+   * @param result the {@link CharArraySet} to fill with the readers words
+   * @return the given {@link CharArraySet} with the reader's words
    */
-  public static HashSet<String> getWordSet(File wordfile) throws IOException {
-    FileReader reader = null;
+  public static CharArraySet getWordSet(Reader reader, CharArraySet result) throws IOException {
+    BufferedReader br = null;
     try {
-      reader = new FileReader(wordfile);
-      return getWordSet(reader);
+      br = getBufferedReader(reader);
+      String word = null;
+      while ((word = br.readLine()) != null) {
+        result.add(word.trim());
+      }
     }
     finally {
-      if (reader != null)
-        reader.close();
+      IOUtils.close(br);
     }
+    return result;
   }
-
+  
   /**
-   * Loads a text file and adds every non-comment line as an entry to a HashSet (omitting
-   * leading and trailing whitespace). Every line of the file should contain only
+   * Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
+   * leading and trailing whitespace). Every line of the Reader should contain only
    * one word. The words need to be in lowercase if you make use of an
    * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
    *
-   * @param wordfile File containing the wordlist
-   * @param comment The comment string to ignore
-   * @return A HashSet with the file's words
+   * @param reader Reader containing the wordlist
+   * @param matchVersion the Lucene {@link Version}
+   * @return A {@link CharArraySet} with the reader's words
    */
-  public static HashSet<String> getWordSet(File wordfile, String comment) throws IOException {
-    FileReader reader = null;
-    try {
-      reader = new FileReader(wordfile);
-      return getWordSet(reader, comment);
-    }
-    finally {
-      if (reader != null)
-        reader.close();
-    }
+  public static CharArraySet getWordSet(Reader reader, Version matchVersion) throws IOException {
+    return getWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
   }
 
-
   /**
-   * Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
+   * Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
    * leading and trailing whitespace). Every line of the Reader should contain only
    * one word. The words need to be in lowercase if you make use of an
    * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
    *
    * @param reader Reader containing the wordlist
-   * @return A HashSet with the reader's words
+   * @param comment The string representing a comment.
+   * @param matchVersion the Lucene {@link Version}
+   * @return A CharArraySet with the reader's words
    */
-  public static HashSet<String> getWordSet(Reader reader) throws IOException {
-    final HashSet<String> result = new HashSet<String>();
-    BufferedReader br = null;
-    try {
-      if (reader instanceof BufferedReader) {
-        br = (BufferedReader) reader;
-      } else {
-        br = new BufferedReader(reader);
-      }
-      String word = null;
-      while ((word = br.readLine()) != null) {
-        result.add(word.trim());
-      }
-    }
-    finally {
-      if (br != null)
-        br.close();
-    }
-    return result;
+  public static CharArraySet getWordSet(Reader reader, String comment, Version matchVersion) throws IOException {
+    return getWordSet(reader, comment, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
   }
 
   /**
-   * Reads lines from a Reader and adds every non-comment line as an entry to a HashSet (omitting
+   * Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
    * leading and trailing whitespace). Every line of the Reader should contain only
    * one word. The words need to be in lowercase if you make use of an
    * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
    *
    * @param reader Reader containing the wordlist
    * @param comment The string representing a comment.
-   * @return A HashSet with the reader's words
+   * @param result the {@link CharArraySet} to fill with the readers words
+   * @return the given {@link CharArraySet} with the reader's words
    */
-  public static HashSet<String> getWordSet(Reader reader, String comment) throws IOException {
-    final HashSet<String> result = new HashSet<String>();
+  public static CharArraySet getWordSet(Reader reader, String comment, CharArraySet result) throws IOException {
     BufferedReader br = null;
     try {
-      if (reader instanceof BufferedReader) {
-        br = (BufferedReader) reader;
-      } else {
-        br = new BufferedReader(reader);
-      }
+      br = getBufferedReader(reader);
       String word = null;
       while ((word = br.readLine()) != null) {
         if (word.startsWith(comment) == false){
@@ -185,34 +111,11 @@ public class WordlistLoader {
       }
     }
     finally {
-      if (br != null)
-        br.close();
+      IOUtils.close(br);
     }
     return result;
   }
 
-  /**
-   * Loads a text file in Snowball format associated with a given class (See
-   * {@link Class#getResourceAsStream(String)}) and adds all words as entries to
-   * a {@link Set}. The words need to be in lower-case if you make use of an
-   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-   * 
-   * @param aClass a class that is associated with the given stopwordResource
-   * @param stopwordResource name of the resource file associated with the given
-   *          class
-   * @return a {@link Set} with the file's words
-   * @see #getSnowballWordSet(Reader)
-   */
-  public static Set<String> getSnowballWordSet(Class<?> aClass,
-      String stopwordResource) throws IOException {
-    final Reader reader = new BufferedReader(new InputStreamReader(aClass
-        .getResourceAsStream(stopwordResource), "UTF-8"));
-    try {
-      return getSnowballWordSet(reader);
-    } finally {
-      reader.close();
-    }
-  }
   
   /**
    * Reads stopwords from a stopword list in Snowball format.
@@ -226,18 +129,14 @@ public class WordlistLoader {
    * </p>
    * 
    * @param reader Reader containing a Snowball stopword list
-   * @return A Set with the reader's words
+   * @param result the {@link CharArraySet} to fill with the readers words
+   * @return the given {@link CharArraySet} with the reader's words
    */
-  public static Set<String> getSnowballWordSet(Reader reader)
+  public static CharArraySet getSnowballWordSet(Reader reader, CharArraySet result)
       throws IOException {
-    final Set<String> result = new HashSet<String>();
     BufferedReader br = null;
     try {
-      if (reader instanceof BufferedReader) {
-        br = (BufferedReader) reader;
-      } else {
-        br = new BufferedReader(reader);
-      }
+      br = getBufferedReader(reader);
       String line = null;
       while ((line = br.readLine()) != null) {
         int comment = line.indexOf('|');
@@ -247,10 +146,29 @@ public class WordlistLoader {
           if (words[i].length() > 0) result.add(words[i]);
       }
     } finally {
-      if (br != null) br.close();
+      IOUtils.close(br);
     }
     return result;
   }
+  
+  /**
+   * Reads stopwords from a stopword list in Snowball format.
+   * <p>
+   * The snowball format is the following:
+   * <ul>
+   * <li>Lines may contain multiple words separated by whitespace.
+   * <li>The comment character is the vertical line (&#124;).
+   * <li>Lines may contain trailing comments.
+   * </ul>
+   * </p>
+   * 
+   * @param reader Reader containing a Snowball stopword list
+   * @param matchVersion the Lucene {@link Version}
+   * @return A {@link CharArraySet} with the reader's words
+   */
+  public static CharArraySet getSnowballWordSet(Reader reader, Version matchVersion) throws IOException {
+    return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
+  }
 
 
   /**
@@ -261,24 +179,24 @@ public class WordlistLoader {
    * @return stem dictionary that overrules the stemming algorithm
    * @throws IOException 
    */
-  public static HashMap<String, String> getStemDict(File wordstemfile) throws IOException {
-    if (wordstemfile == null)
-      throw new NullPointerException("wordstemfile may not be null");
-    final HashMap<String, String> result = new HashMap<String,String>();
+  public static CharArrayMap<String> getStemDict(Reader reader, CharArrayMap<String> result) throws IOException {
     BufferedReader br = null;
-    
     try {
-      br = new BufferedReader(new FileReader(wordstemfile));
+      br = getBufferedReader(reader);
       String line;
       while ((line = br.readLine()) != null) {
         String[] wordstem = line.split("\t", 2);
         result.put(wordstem[0], wordstem[1]);
       }
     } finally {
-      if(br != null)
-        br.close();
+      IOUtils.close(br);
     }
     return result;
   }
-
+  
+  private static BufferedReader getBufferedReader(Reader reader) {
+    return (reader instanceof BufferedReader) ? (BufferedReader) reader
+        : new BufferedReader(reader);
+  }
+  
 }

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java Mon Nov 14 22:36:20 2011
@@ -46,7 +46,7 @@ public class TestCharArraySet extends Lu
   public void testNonZeroOffset() {
     String[] words={"Hello","World","this","is","a","test"};
     char[] findme="xthisy".toCharArray();   
-    CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10,true);
+    CharArraySet set= new CharArraySet(TEST_VERSION_CURRENT, 10, true);
     set.addAll(Arrays.asList(words));
     assertTrue(set.contains(findme, 1, 4));
     assertTrue(set.contains(new String(findme,1,4)));

Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java Mon Nov 14 22:36:20 2011
@@ -20,8 +20,6 @@ package org.apache.lucene.analysis.util;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.HashSet;
-import java.util.Set;
 
 import org.apache.lucene.util.LuceneTestCase;
 
@@ -31,22 +29,22 @@ public class TestWordlistLoader extends 
 
   public void testWordlistLoading() throws IOException {
     String s = "ONE\n  two \nthree";
-    HashSet<String> wordSet1 = WordlistLoader.getWordSet(new StringReader(s));
+    CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), TEST_VERSION_CURRENT);
     checkSet(wordSet1);
-    HashSet<String> wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s)));
+    CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s)), TEST_VERSION_CURRENT);
     checkSet(wordSet2);
   }
 
   public void testComments() throws Exception {
     String s = "ONE\n  two \nthree\n#comment";
-    HashSet<String> wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#");
+    CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#", TEST_VERSION_CURRENT);
     checkSet(wordSet1);
     assertFalse(wordSet1.contains("#comment"));
     assertFalse(wordSet1.contains("comment"));
   }
 
 
-  private void checkSet(HashSet<String> wordset) {
+  private void checkSet(CharArraySet wordset) {
     assertEquals(3, wordset.size());
     assertTrue(wordset.contains("ONE"));		// case is not modified
     assertTrue(wordset.contains("two"));		// surrounding whitespace is removed
@@ -68,7 +66,7 @@ public class TestWordlistLoader extends 
       "   two   \n" + // stopword with leading/trailing space
       " three   four five \n" + // multiple stopwords
       "six seven | comment\n"; //multiple stopwords + comment
-    Set<String> wordset = WordlistLoader.getSnowballWordSet(new StringReader(s));
+    CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s), TEST_VERSION_CURRENT);
     assertEquals(7, wordset.size());
     assertTrue(wordset.contains("ONE"));
     assertTrue(wordset.contains("two"));

Modified: lucene/dev/branches/solrcloud/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -26,12 +26,14 @@ import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.cn.smart.SentenceTokenizer;
 import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
 import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 
 /**
@@ -66,7 +68,7 @@ public final class SmartChineseAnalyzer 
    * Returns an unmodifiable instance of the default stop-words set.
    * @return an unmodifiable instance of the default stop-words set.
    */
-  public static Set<String> getDefaultStopSet(){
+  public static CharArraySet getDefaultStopSet(){
     return DefaultSetHolder.DEFAULT_STOP_SET;
   }
   
@@ -75,7 +77,7 @@ public final class SmartChineseAnalyzer 
    * accesses the static final set the first time.;
    */
   private static class DefaultSetHolder {
-    static final Set<String> DEFAULT_STOP_SET;
+    static final CharArraySet DEFAULT_STOP_SET;
 
     static {
       try {
@@ -87,16 +89,12 @@ public final class SmartChineseAnalyzer 
       }
     }
 
-    static Set<String> loadDefaultStopWordSet() throws IOException {
-      InputStream stream = SmartChineseAnalyzer.class
-          .getResourceAsStream(DEFAULT_STOPWORD_FILE);
-      try {
-        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
-        // make sure it is unmodifiable as we expose it in the outer class
-        return Collections.unmodifiableSet(WordlistLoader.getWordSet(reader, STOPWORD_FILE_COMMENT));
-      } finally {
-        stream.close();
-      }
+    static CharArraySet loadDefaultStopWordSet() throws IOException {
+      // make sure it is unmodifiable as we expose it in the outer class
+      return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils
+          .getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE,
+              IOUtils.CHARSET_UTF_8), STOPWORD_FILE_COMMENT,
+          Version.LUCENE_CURRENT));
     }
   }
 

Modified: lucene/dev/branches/solrcloud/modules/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java (original)
+++ lucene/dev/branches/solrcloud/modules/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java Mon Nov 14 22:36:20 2011
@@ -34,6 +34,7 @@ import org.apache.lucene.analysis.stempe
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 import org.egothor.stemmer.Trie;
 
@@ -68,8 +69,8 @@ public final class PolishAnalyzer extend
     
     static {
       try {
-        DEFAULT_STOP_SET = WordlistLoader.getWordSet(PolishAnalyzer.class, 
-            DEFAULT_STOPWORD_FILE);
+        DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(PolishAnalyzer.class, 
+            DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), "#", Version.LUCENE_CURRENT);
       } catch (IOException ex) {
         // default set should always be present as it is part of the
         // distribution (JAR)

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/analyzer.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/analyzer.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/analyzer.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/analyzer.alg Mon Nov 14 22:36:20 2011
@@ -55,7 +55,7 @@ log.queries=true
     { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc > : 2000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     }
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/collector-small.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/collector-small.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/collector-small.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/collector-small.alg Mon Nov 14 22:36:20 2011
@@ -52,7 +52,7 @@ log.queries=true
     { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc } : 200000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     }
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/collector.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/collector.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/collector.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/collector.alg Mon Nov 14 22:36:20 2011
@@ -52,7 +52,7 @@ log.queries=true
     { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc } : 2000000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     }
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/deletes.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/deletes.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/deletes.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/deletes.alg Mon Nov 14 22:36:20 2011
@@ -58,7 +58,7 @@ CloseIndex
 { "Populate"
     OpenIndex
     { AddDoc(10) > : 200000
-    Optimize
+    ForcMerge(1)
     CloseIndex
 > 
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/facets.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/facets.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/facets.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/facets.alg Mon Nov 14 22:36:20 2011
@@ -52,7 +52,7 @@ task.max.depth.log=2
       -CreateIndex
       -CreateTaxonomyIndex
       { "MAddDocs" AddFacetedDoc > : *
-      -Optimize
+      -ForcMerge(1)
       -CloseIndex
       -CloseTaxonomyIndex
   }

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/highlight-profile.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/highlight-profile.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/highlight-profile.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/highlight-profile.alg Mon Nov 14 22:36:20 2011
@@ -44,7 +44,7 @@ log.queries=true
 { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc } : 20000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     }
 { "Rounds"

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/highlight-vs-vector-highlight.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/highlight-vs-vector-highlight.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/highlight-vs-vector-highlight.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/highlight-vs-vector-highlight.alg Mon Nov 14 22:36:20 2011
@@ -48,7 +48,7 @@ log.queries=true
 { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc } : 20000
-        Optimize
+        ForcMerge(1)
         CloseIndex
 }
 {

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg Mon Nov 14 22:36:20 2011
@@ -54,7 +54,7 @@ log.queries=true
     { "Populate"
         CreateIndex
         [{ "MAddDocs" AddDoc } : 5000] : 4
-        Optimize
+        ForcMerge(1)
         CloseIndex
     }
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-flush-by-RAM.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-flush-by-RAM.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-flush-by-RAM.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-flush-by-RAM.alg Mon Nov 14 22:36:20 2011
@@ -54,7 +54,7 @@ log.queries=true
     { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc } : 20000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     }
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-multithreaded.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-multithreaded.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-multithreaded.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing-multithreaded.alg Mon Nov 14 22:36:20 2011
@@ -54,7 +54,7 @@ log.queries=true
     { "Populate"
         CreateIndex
         [{ "MAddDocs" AddDoc } : 5000] : 4
-        Optimize
+        ForcMerge(1)
         CommitIndex(commit1)
         CloseIndex
     }

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/indexing.alg Mon Nov 14 22:36:20 2011
@@ -54,7 +54,7 @@ log.queries=true
     { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc } : 20000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     }
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/micro-standard-flush-by-ram.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/micro-standard-flush-by-ram.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/micro-standard-flush-by-ram.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/micro-standard-flush-by-ram.alg Mon Nov 14 22:36:20 2011
@@ -53,7 +53,7 @@ log.queries=true
     { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc > : 2000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     }
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/micro-standard.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/micro-standard.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/micro-standard.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/micro-standard.alg Mon Nov 14 22:36:20 2011
@@ -52,7 +52,7 @@ log.queries=true
     { "Populate"
         -CreateIndex
         { "MAddDocs" AddDoc > : 2000
-        -Optimize
+        -ForcMerge(1)
         -CloseIndex
     }
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/sample.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/sample.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/sample.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/sample.alg Mon Nov 14 22:36:20 2011
@@ -62,7 +62,7 @@ log.queries=false
     { "PopulateShort"
         CreateIndex
         { AddDoc(4000) > : 20000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     >
 
@@ -71,7 +71,7 @@ log.queries=false
     { "PopulateLong"
         CreateIndex
         { AddDoc(8000) > : 10000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     >
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/sloppy-phrase.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/sloppy-phrase.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/sloppy-phrase.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/sloppy-phrase.alg Mon Nov 14 22:36:20 2011
@@ -52,7 +52,7 @@ ResetSystemErase
 { "Populate"
     CreateIndex
     { "MAddDocs" AddDoc(2000) > : 20000     
-    Optimize
+    ForcMerge(1)
     CloseIndex
 }
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/sort-standard.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/sort-standard.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/sort-standard.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/sort-standard.alg Mon Nov 14 22:36:20 2011
@@ -50,7 +50,7 @@ log.queries=true
       { "Populate"
         -CreateIndex
         { "MAddDocs" AddDoc(100) > : 500000
-        -Optimize
+        -ForcMerge(1)
         -CloseIndex
       }
     

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-flush-by-RAM.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-flush-by-RAM.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-flush-by-RAM.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-flush-by-RAM.alg Mon Nov 14 22:36:20 2011
@@ -53,7 +53,7 @@ log.queries=true
     { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc } : 20000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     }
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-highlights-notv.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-highlights-notv.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-highlights-notv.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-highlights-notv.alg Mon Nov 14 22:36:20 2011
@@ -44,7 +44,7 @@ log.queries=true
 { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc } : 20000
-        Optimize
+        ForcMerge(1)
         CloseIndex
 }
 { "Rounds"

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-highlights-tv.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-highlights-tv.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-highlights-tv.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/standard-highlights-tv.alg Mon Nov 14 22:36:20 2011
@@ -44,7 +44,7 @@ log.queries=true
 { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc } : 20000
-        Optimize
+        ForcMerge(1)
         CloseIndex
 }
 { "Rounds"

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/standard.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/standard.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/standard.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/standard.alg Mon Nov 14 22:36:20 2011
@@ -53,7 +53,7 @@ log.queries=true
     { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc } : 20000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     }
 

Modified: lucene/dev/branches/solrcloud/modules/benchmark/conf/vector-highlight-profile.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/conf/vector-highlight-profile.alg?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/conf/vector-highlight-profile.alg (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/conf/vector-highlight-profile.alg Mon Nov 14 22:36:20 2011
@@ -44,7 +44,7 @@ log.queries=true
 { "Populate"
         CreateIndex
         { "MAddDocs" AddDoc } : 20000
-        Optimize
+        ForcMerge(1)
         CloseIndex
     }
 { "Rounds"

Modified: lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java Mon Nov 14 22:36:20 2011
@@ -149,6 +149,9 @@ public abstract class ContentItemsSource
   }
 
   public void printStatistics(String itemsName) {
+    if (!verbose) {
+      return;
+    }
     boolean print = false;
     String col = "                  ";
     StringBuilder sb = new StringBuilder();

Modified: lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java Mon Nov 14 22:36:20 2011
@@ -18,10 +18,10 @@ package org.apache.lucene.benchmark.byTa
  */
 
 import java.io.IOException;
-import java.io.PrintStream;
 
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.util.InfoStream;
 
 /**
  * Close index writer.
@@ -41,9 +41,8 @@ public class CloseIndexTask extends Perf
     IndexWriter iw = getRunData().getIndexWriter();
     if (iw != null) {
       // If infoStream was set to output to a file, close it.
-      PrintStream infoStream = iw.getInfoStream();
-      if (infoStream != null && infoStream != System.out
-          && infoStream != System.err) {
+      InfoStream infoStream = iw.getConfig().getInfoStream();
+      if (infoStream != null) {
         infoStream.close();
       }
       iw.close(doWait);

Modified: lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java Mon Nov 14 22:36:20 2011
@@ -58,12 +58,12 @@ import java.io.PrintStream;
  * This task also supports a "writer.info.stream" property with the following
  * values:
  * <ul>
- * <li>SystemOut - sets {@link IndexWriter#setInfoStream(java.io.PrintStream)}
+ * <li>SystemOut - sets {@link IndexWriterConfig#setInfoStream(java.io.PrintStream)}
  * to {@link System#out}.
- * <li>SystemErr - sets {@link IndexWriter#setInfoStream(java.io.PrintStream)}
+ * <li>SystemErr - sets {@link IndexWriterConfig#setInfoStream(java.io.PrintStream)}
  * to {@link System#err}.
  * <li>&lt;file_name&gt; - attempts to create a file given that name and sets
- * {@link IndexWriter#setInfoStream(java.io.PrintStream)} to that file. If this
+ * {@link IndexWriterConfig#setInfoStream(java.io.PrintStream)} to that file. If this
  * denotes an invalid file name, or some error occurs, an exception will be
  * thrown.
  * </ul>
@@ -175,18 +175,19 @@ public class CreateIndexTask extends Per
   }
   
   public static IndexWriter configureWriter(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit) throws CorruptIndexException, LockObtainFailedException, IOException {
-    IndexWriter writer = new IndexWriter(runData.getDirectory(), createWriterConfig(config, runData, mode, commit));
+    IndexWriterConfig iwc = createWriterConfig(config, runData, mode, commit);
     String infoStreamVal = config.get("writer.info.stream", null);
     if (infoStreamVal != null) {
       if (infoStreamVal.equals("SystemOut")) {
-        writer.setInfoStream(System.out);
+        iwc.setInfoStream(System.out);
       } else if (infoStreamVal.equals("SystemErr")) {
-        writer.setInfoStream(System.err);
+        iwc.setInfoStream(System.err);
       } else {
         File f = new File(infoStreamVal).getAbsoluteFile();
-        writer.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f))));
+        iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f))));
       }
     }
+    IndexWriter writer = new IndexWriter(runData.getDirectory(), iwc);
     return writer;
   }
 }

Modified: lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java Mon Nov 14 22:36:20 2011
@@ -136,9 +136,6 @@ public abstract class ReadTask extends P
 
         final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
         if (hits != null && printHitsField != null && printHitsField.length() > 0) {
-          if (q instanceof MultiTermQuery) {
-            System.out.println("MultiTermQuery term count = " + ((MultiTermQuery) q).getTotalNumberOfTerms());
-          }
           System.out.println("totalHits = " + hits.totalHits);
           System.out.println("maxDoc()  = " + reader.maxDoc());
           System.out.println("numDocs() = " + reader.numDocs());

Modified: lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RollbackIndexTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RollbackIndexTask.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RollbackIndexTask.java (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RollbackIndexTask.java Mon Nov 14 22:36:20 2011
@@ -22,6 +22,7 @@ import java.io.PrintStream;
 
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.util.InfoStream;
 
 /**
  * Rollback the index writer.
@@ -39,9 +40,8 @@ public class RollbackIndexTask extends P
     IndexWriter iw = getRunData().getIndexWriter();
     if (iw != null) {
       // If infoStream was set to output to a file, close it.
-      PrintStream infoStream = iw.getInfoStream();
-      if (infoStream != null && infoStream != System.out
-          && infoStream != System.err) {
+      InfoStream infoStream = iw.getConfig().getInfoStream();
+      if (infoStream != null) {
         infoStream.close();
       }
       iw.rollback();

Modified: lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java Mon Nov 14 22:36:20 2011
@@ -22,6 +22,7 @@ import java.util.List;
 
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.index.StoredFieldVisitor.Status;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.IndexInput;
 
@@ -51,15 +52,19 @@ public class DocNameExtractor {
     final List<String> name = new ArrayList<String>();
     searcher.getIndexReader().document(docid, new StoredFieldVisitor() {
         @Override
-        public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException {
-          if (fieldInfo.name.equals(docNameField) && name.size() == 0) {
-            final byte[] b = new byte[numUTF8Bytes];
-            in.readBytes(b, 0, b.length);
-            name.add(new String(b, "UTF-8"));
+        public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+          name.add(value);
+        }
+
+        @Override
+        public Status needsField(FieldInfo fieldInfo) throws IOException {
+          if (!name.isEmpty()) {
+            return Status.STOP;
+          } else if (fieldInfo.name.equals(docNameField)) {
+            return Status.YES;
           } else {
-            in.seek(in.getFilePointer() + numUTF8Bytes);
+            return Status.NO;
           }
-          return false;
         }
       });
     if (name.size() != 0) {

Modified: lucene/dev/branches/solrcloud/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/solrcloud/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Mon Nov 14 22:36:20 2011
@@ -77,7 +77,7 @@ public class TestPerfTasksLogic extends 
         "ResetSystemErase",
         "CreateIndex",
         "{ AddDoc } : 1000",
-        "Optimize",
+        "ForceMerge(1)",
         "CloseIndex",
         "OpenReader",
         "{ CountingSearchTest } : 200",
@@ -114,7 +114,7 @@ public class TestPerfTasksLogic extends 
         "ResetSystemErase",
         "CreateIndex",
         "{ AddDoc } : 100",
-        "Optimize",
+        "ForceMerge(1)",
         "CloseIndex",
         "OpenReader",
         "{ CountingSearchTest } : .5s",
@@ -137,7 +137,7 @@ public class TestPerfTasksLogic extends 
         "ResetSystemErase",
         "CreateIndex",
         "{ AddDoc } : 1000",
-        "Optimize",
+        "ForceMerge(1)",
         "CloseIndex",
         "OpenReader",
         "{",
@@ -163,7 +163,7 @@ public class TestPerfTasksLogic extends 
         "ResetSystemErase",
         "CreateIndex",
         "{ AddDoc } : 100",
-        "Optimize",
+        "ForceMerge(1)",
         "CloseIndex",
         "OpenReader(true)",
         "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
@@ -202,7 +202,7 @@ public class TestPerfTasksLogic extends 
         "ResetSystemErase",
         "CreateIndex",
         "{ AddDoc } : 1000",
-        "Optimize",
+        "ForceMerge(1)",
         "CloseIndex",
         "OpenReader(false)",
         "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
@@ -240,7 +240,7 @@ public class TestPerfTasksLogic extends 
         "ResetSystemErase",
         "CreateIndex",
         "{ AddDoc } : 1000",
-        "Optimize",
+        "ForceMerge(1)",
         "CloseIndex",
         "OpenReader",
         "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
@@ -277,7 +277,7 @@ public class TestPerfTasksLogic extends 
         "# ----- alg ",
         "CreateIndex",
         "{ AddDoc } : * ",
-        "Optimize",
+        "ForceMerge(1)",
         "CloseIndex",
         "OpenReader",
         "{ CountingSearchTest } : 100",
@@ -818,9 +818,9 @@ public class TestPerfTasksLogic extends 
   }
   
   /**
-   * Test that we can call optimize(maxNumSegments).
+   * Test that we can call forceMerge(maxNumSegments).
    */
-  public void testOptimizeMaxNumSegments() throws Exception {
+  public void testForceMerge() throws Exception {
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "# ----- properties ",
@@ -841,7 +841,7 @@ public class TestPerfTasksLogic extends 
         "  ResetSystemErase",
         "  CreateIndex",
         "  { \"AddDocs\"  AddDoc > : * ",
-        "  Optimize(3)",
+        "  ForceMerge(3)",
         "  CloseIndex()",
         "} : 2",
     };

Modified: lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java (original)
+++ lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java Mon Nov 14 22:36:20 2011
@@ -58,7 +58,7 @@ import org.apache.lucene.util.encoding.I
  * conf.setMergePolicy(new ForceOptimizeMergePolicy());
  * IndexWriter writer = new IndexWriter(oldDir, conf);
  * writer.setPayloadProcessorProvider(fppp);
- * writer.optimize();
+ * writer.forceMerge(1);
  * writer.close();
  * 
  * // merge that directory with the new index.

Modified: lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java (original)
+++ lucene/dev/branches/solrcloud/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java Mon Nov 14 22:36:20 2011
@@ -4,6 +4,7 @@ import java.io.IOException;
 
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.index.StoredFieldVisitor.Status;
 import org.apache.lucene.store.IndexInput;
 
 /**
@@ -41,13 +42,13 @@ abstract class Consts {
   public static final class LoadFullPathOnly extends StoredFieldVisitor {
     private String fullPath;
 
-    public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException {
-      final byte[] bytes = new byte[numUTF8Bytes];
-      in.readBytes(bytes, 0, bytes.length);
-      fullPath = new String(bytes, "UTF-8");
+    public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+      fullPath = value;
+    }
 
-      // Stop loading:
-      return true;
+    @Override
+    public Status needsField(FieldInfo fieldInfo) throws IOException {
+      return fullPath == null ? Status.YES : Status.STOP;
     }
 
     public String getFullPath() {

Modified: lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java?rev=1201946&r1=1201945&r2=1201946&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (original)
+++ lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java Mon Nov 14 22:36:20 2011
@@ -30,6 +30,7 @@ import org.apache.lucene.search.IndexSea
 import org.apache.lucene.store.Directory;
 
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
 import org.apache.lucene.facet.index.CategoryDocumentBuilder;
@@ -45,6 +46,8 @@ import org.apache.lucene.facet.taxonomy.
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -66,14 +69,17 @@ import org.apache.lucene.facet.taxonomy.
 /** Base faceted search test. */
 public abstract class FacetTestBase extends LuceneTestCase {
   
-  /** Documents text field. */
-  protected static final String CONTENT_FIELD = "content";
+  /** Holds a search and taxonomy Directories pair. */
+  private static final class SearchTaxoDirPair {
+    Directory searchDir, taxoDir;
+    SearchTaxoDirPair() {}
+  }
   
-  /** Directory for the index */
-  protected Directory indexDir;
+  private static HashMap<Integer, SearchTaxoDirPair> dirsPerPartitionSize;
+  private static File TEST_DIR;
   
-  /** Directory for the taxonomy */
-  protected Directory taxoDir;
+  /** Documents text field. */
+  protected static final String CONTENT_FIELD = "content";
   
   /** taxonomy Reader for the test. */
   protected TaxonomyReader taxoReader;
@@ -84,6 +90,19 @@ public abstract class FacetTestBase exte
   /** Searcher for the test. */
   protected IndexSearcher searcher;
   
+  @BeforeClass
+  public static void beforeClassFacetTestBase() throws Exception {
+    TEST_DIR = _TestUtil.getTempDir("facets");
+    dirsPerPartitionSize = new HashMap<Integer, FacetTestBase.SearchTaxoDirPair>(); 
+  }
+  
+  @AfterClass
+  public static void afterClassFacetTestBase() throws Exception {
+    for (SearchTaxoDirPair pair : dirsPerPartitionSize.values()) {
+      IOUtils.close(pair.searchDir, pair.taxoDir);
+    }
+  }
+  
   /** documents text (for the text field). */
   private static final String[] DEFAULT_CONTENT = {
       "the white car is the one I want.",
@@ -122,34 +141,39 @@ public abstract class FacetTestBase exte
   }
 
   /** Prepare index (in RAM/Disk) with some documents and some facets */
-  protected final void initIndex(int partitionSize, boolean onDisk) throws Exception {
+  protected final void initIndex(int partitionSize, boolean forceDisk) throws Exception {
     if (VERBOSE) {
-      System.out.println("Partition Size: " + partitionSize+"  onDisk: "+onDisk);
+      System.out.println("Partition Size: " + partitionSize+"  forceDisk: "+forceDisk);
     }
 
-    if (onDisk) {
-      File indexFile = _TestUtil.getTempDir("index");
-      indexDir = newFSDirectory(indexFile);
-      taxoDir = newFSDirectory(new File(indexFile,"facets"));
-    } else { 
-      indexDir = newDirectory();
-      taxoDir = newDirectory();
+    SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize));
+    if (pair == null) {
+      pair = new SearchTaxoDirPair();
+      if (forceDisk) {
+        pair.searchDir = newFSDirectory(new File(TEST_DIR, "index"));
+        pair.taxoDir = newFSDirectory(new File(TEST_DIR, "taxo"));
+      } else {
+        pair.searchDir = newDirectory();
+        pair.taxoDir = newDirectory();
+      }
+      
+      RandomIndexWriter iw = new RandomIndexWriter(random, pair.searchDir, getIndexWriterConfig(getAnalyzer()));
+      TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE);
+      
+      populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
+      
+      // commit changes (taxonomy prior to search index for consistency)
+      taxo.commit();
+      iw.commit();
+      taxo.close();
+      iw.close();
+      
+      dirsPerPartitionSize.put(Integer.valueOf(partitionSize), pair);
     }
     
-    RandomIndexWriter iw = new RandomIndexWriter(random, indexDir, getIndexWriterConfig(getAnalyzer()));
-    TaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
-    
-    populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
-    
-    // commit changes (taxonomy prior to search index for consistency)
-    taxo.commit();
-    iw.commit();
-    taxo.close();
-    iw.close();
-    
     // prepare for searching
-    taxoReader = new DirectoryTaxonomyReader(taxoDir);
-    indexReader = IndexReader.open(indexDir);
+    taxoReader = new DirectoryTaxonomyReader(pair.taxoDir);
+    indexReader = IndexReader.open(pair.searchDir);
     searcher = newSearcher(indexReader);
   }
   
@@ -207,16 +231,10 @@ public abstract class FacetTestBase exte
   /** Close all indexes */
   protected void closeAll() throws Exception {
     // close and nullify everything
-    taxoReader.close();
+    IOUtils.close(taxoReader, indexReader, searcher);
     taxoReader = null;
-    indexReader.close();
     indexReader = null;
-    searcher.close();
     searcher = null;
-    indexDir.close();
-    indexDir = null;
-    taxoDir.close();
-    taxoDir = null;
   }
   
   /**