You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ma...@apache.org on 2009/07/14 23:39:23 UTC
svn commit: r794078 - in /lucene/java/trunk: ./ contrib/analyzers/src/java/org/apache/lucene/analysis/th/ contrib/highlighter/src/test/org/apache/lucene/search/highlight/ contrib/memory/src/java/org/apache/lucene/index/memory/ contrib/memory/src/test/o...

Author: markrmiller
Date: Tue Jul 14 21:39:22 2009
New Revision: 794078

URL: http://svn.apache.org/viewvc?rev=794078&view=rev
Log:
LUCENE-1688: Deprecate static final String stop word array in and StopAnalzyer and replace it with an immutable implementation of CharArraySet.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
    lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java
    lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
    lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/PatternAnalyzerTest.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/StopAnalyzer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
    lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
    lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpans.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Jul 14 21:39:22 2009
@@ -309,6 +309,11 @@
     all synchronization in TermInfosReader, which previously could
     cause threads to pile up in certain cases. (Dan Rosher via Mike
     McCandless)
+    
+30. LUCENE-1688: Deprecate static final String stop word array in and 
+    StopAnalzyer and replace it with an immutable implementation of 
+    CharArraySet.  (Simon Willnauer via Mark Miller)
+
 
 Bug fixes
 
@@ -604,6 +609,11 @@
  9. LUCENE-1653: Avoid creating a Calendar in every call to 
     DateTools#dateToString, DateTools#timeToString and
     DateTools#round.  (Shai Erera via Mark Miller)
+    
+10. LUCENE-1688: Deprecate static final String stop word array and 
+    replace it with an immutable implementation of CharArraySet.
+    Removes conversions between Set and array.
+    (Simon Willnauer via Mark Miller)
 
 Documentation
 

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java Tue Jul 14 21:39:22 2009
@@ -33,7 +33,7 @@
 	  TokenStream ts = new StandardTokenizer(reader);
     ts = new StandardFilter(ts);
     ts = new ThaiWordFilter(ts);
-    ts = new StopFilter(ts, StopAnalyzer.ENGLISH_STOP_WORDS);
+    ts = new StopFilter(ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
     return ts;
   }
 }

Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Tue Jul 14 21:39:22 2009
@@ -23,9 +23,11 @@
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.StringTokenizer;
 
 import javax.xml.parsers.DocumentBuilder;
@@ -982,7 +984,8 @@
 
       public void run() throws Exception {
         String goodWord = "goodtoken";
-        String stopWords[] = { "stoppedtoken" };
+        Set stopWords = new HashSet(1);
+        stopWords.add("stoppedtoken");
 
         TermQuery query = new TermQuery(new Term("data", goodWord));
 
@@ -991,7 +994,8 @@
         sb.append(goodWord);
         for (int i = 0; i < 10000; i++) {
           sb.append(" ");
-          sb.append(stopWords[0]);
+          // only one stopword
+          sb.append(stopWords.iterator().next());
         }
         SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
         Highlighter hg = getHighlighter(query, "data", new StandardAnalyzer(stopWords).tokenStream(
@@ -1024,7 +1028,9 @@
   public void testMaxSizeEndHighlight() throws Exception {
     TestHighlightRunner helper = new TestHighlightRunner() {
       public void run() throws Exception {
-        String stopWords[] = { "in", "it" };
+        Set stopWords = new HashSet();
+        stopWords.add("in");
+        stopWords.add("it");
         TermQuery query = new TermQuery(new Term("text", "searchterm"));
 
         String text = "this is a text with searchterm in it";

Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java (original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java Tue Jul 14 21:39:22 2009
@@ -70,55 +70,60 @@
   /** <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) */
   public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
   
-  private static final Set EXTENDED_ENGLISH_STOP_WORDS = makeStopSet(new String[] {
-    "a", "about", "above", "across", "adj", "after", "afterwards",
-    "again", "against", "albeit", "all", "almost", "alone", "along",
-    "already", "also", "although", "always", "among", "amongst", "an",
-    "and", "another", "any", "anyhow", "anyone", "anything",
-    "anywhere", "are", "around", "as", "at", "be", "became", "because",
-    "become", "becomes", "becoming", "been", "before", "beforehand",
-    "behind", "being", "below", "beside", "besides", "between",
-    "beyond", "both", "but", "by", "can", "cannot", "co", "could",
-    "down", "during", "each", "eg", "either", "else", "elsewhere",
-    "enough", "etc", "even", "ever", "every", "everyone", "everything",
-    "everywhere", "except", "few", "first", "for", "former",
-    "formerly", "from", "further", "had", "has", "have", "he", "hence",
-    "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
-    "herself", "him", "himself", "his", "how", "however", "i", "ie", "if",
-    "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last",
-    "latter", "latterly", "least", "less", "ltd", "many", "may", "me",
-    "meanwhile", "might", "more", "moreover", "most", "mostly", "much",
-    "must", "my", "myself", "namely", "neither", "never",
-    "nevertheless", "next", "no", "nobody", "none", "noone", "nor",
-    "not", "nothing", "now", "nowhere", "of", "off", "often", "on",
-    "once one", "only", "onto", "or", "other", "others", "otherwise",
-    "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps",
-    "rather", "s", "same", "seem", "seemed", "seeming", "seems",
-    "several", "she", "should", "since", "so", "some", "somehow",
-    "someone", "something", "sometime", "sometimes", "somewhere",
-    "still", "such", "t", "than", "that", "the", "their", "them",
-    "themselves", "then", "thence", "there", "thereafter", "thereby",
-    "therefor", "therein", "thereupon", "these", "they", "this",
-    "those", "though", "through", "throughout", "thru", "thus", "to",
-    "together", "too", "toward", "towards", "under", "until", "up",
-    "upon", "us", "very", "via", "was", "we", "well", "were", "what",
-    "whatever", "whatsoever", "when", "whence", "whenever",
-    "whensoever", "where", "whereafter", "whereas", "whereat",
-    "whereby", "wherefrom", "wherein", "whereinto", "whereof",
-    "whereon", "whereto", "whereunto", "whereupon", "wherever",
-    "wherewith", "whether", "which", "whichever", "whichsoever",
-    "while", "whilst", "whither", "who", "whoever", "whole", "whom",
-    "whomever", "whomsoever", "whose", "whosoever", "why", "will",
-    "with", "within", "without", "would", "xsubj", "xcal", "xauthor",
-    "xother ", "xnote", "yet", "you", "your", "yours", "yourself",
-    "yourselves"});
+  private static final Set EXTENDED_ENGLISH_STOP_WORDS;
+  static {
+    EXTENDED_ENGLISH_STOP_WORDS = new HashSet();
+  
+    EXTENDED_ENGLISH_STOP_WORDS.addAll(Arrays.asList(new String[] {
+      "a", "about", "above", "across", "adj", "after", "afterwards",
+      "again", "against", "albeit", "all", "almost", "alone", "along",
+      "already", "also", "although", "always", "among", "amongst", "an",
+      "and", "another", "any", "anyhow", "anyone", "anything",
+      "anywhere", "are", "around", "as", "at", "be", "became", "because",
+      "become", "becomes", "becoming", "been", "before", "beforehand",
+      "behind", "being", "below", "beside", "besides", "between",
+      "beyond", "both", "but", "by", "can", "cannot", "co", "could",
+      "down", "during", "each", "eg", "either", "else", "elsewhere",
+      "enough", "etc", "even", "ever", "every", "everyone", "everything",
+      "everywhere", "except", "few", "first", "for", "former",
+      "formerly", "from", "further", "had", "has", "have", "he", "hence",
+      "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
+      "herself", "him", "himself", "his", "how", "however", "i", "ie", "if",
+      "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last",
+      "latter", "latterly", "least", "less", "ltd", "many", "may", "me",
+      "meanwhile", "might", "more", "moreover", "most", "mostly", "much",
+      "must", "my", "myself", "namely", "neither", "never",
+      "nevertheless", "next", "no", "nobody", "none", "noone", "nor",
+      "not", "nothing", "now", "nowhere", "of", "off", "often", "on",
+      "once one", "only", "onto", "or", "other", "others", "otherwise",
+      "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps",
+      "rather", "s", "same", "seem", "seemed", "seeming", "seems",
+      "several", "she", "should", "since", "so", "some", "somehow",
+      "someone", "something", "sometime", "sometimes", "somewhere",
+      "still", "such", "t", "than", "that", "the", "their", "them",
+      "themselves", "then", "thence", "there", "thereafter", "thereby",
+      "therefor", "therein", "thereupon", "these", "they", "this",
+      "those", "though", "through", "throughout", "thru", "thus", "to",
+      "together", "too", "toward", "towards", "under", "until", "up",
+      "upon", "us", "very", "via", "was", "we", "well", "were", "what",
+      "whatever", "whatsoever", "when", "whence", "whenever",
+      "whensoever", "where", "whereafter", "whereas", "whereat",
+      "whereby", "wherefrom", "wherein", "whereinto", "whereof",
+      "whereon", "whereto", "whereunto", "whereupon", "wherever",
+      "wherewith", "whether", "which", "whichever", "whichsoever",
+      "while", "whilst", "whither", "who", "whoever", "whole", "whom",
+      "whomever", "whomsoever", "whose", "whosoever", "why", "will",
+      "with", "within", "without", "would", "xsubj", "xcal", "xauthor",
+      "xother ", "xnote", "yet", "you", "your", "yours", "yourself",
+      "yourselves"}));
+  }
     
   /**
    * A lower-casing word analyzer with English stop words (can be shared
    * freely across threads without harm); global per class loader.
    */
   public static final PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(
-    NON_WORD_PATTERN, true, makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS));
+    NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
     
   /**
    * A lower-casing word analyzer with <b>extended </b> English stop words
@@ -191,7 +196,7 @@
     }
     else {
       stream = new PatternTokenizer(text, pattern, toLowerCase);
-      if (stopWords != null) stream = new StopFilter(stream, stopWords);
+      if (stopWords != null) stream = new StopFilter(false, stream, stopWords);
     }
     
     return stream;
@@ -304,9 +309,9 @@
   }
     
   /** somewhat oversized to minimize hash collisions */
-  private static Set makeStopSet(String[] stopWords) {
-    Set stops = new HashSet(stopWords.length * 2, 0.3f); 
-    stops.addAll(Arrays.asList(stopWords));
+  private static Set makeStopSet(Set stopWords) {
+    Set stops = new HashSet(stopWords.size() * 2, 0.3f); 
+    stops.addAll(stopWords);
     return stops;
 //    return Collections.unmodifiableSet(stops);
   }

Modified: lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (original)
+++ lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java Tue Jul 14 21:39:22 2009
@@ -271,7 +271,7 @@
     boolean toLowerCase = true;
 //    boolean toLowerCase = false;
 //    Set stopWords = null;
-    Set stopWords = StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS);
+    Set stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
     
     Analyzer[] analyzers = new Analyzer[] { 
         new SimpleAnalyzer(),

Modified: lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/PatternAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/PatternAnalyzerTest.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/PatternAnalyzerTest.java (original)
+++ lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/PatternAnalyzerTest.java Tue Jul 14 21:39:22 2009
@@ -135,7 +135,7 @@
           
           for (int stops=0; stops < maxStops; stops++) {
             Set stopWords = null;
-            if (stops != 0) stopWords = StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS);
+            if (stops != 0) stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
                 
             for (int toLower=0; toLower < maxToLower; toLower++) {
               boolean toLowerCase = toLower != 0;

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java Tue Jul 14 21:39:22 2009
@@ -2,6 +2,7 @@
 
 import java.util.AbstractSet;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Iterator;
 
 /**
@@ -53,6 +54,12 @@
     this(c.size(), ignoreCase);
     addAll(c);
   }
+  /** Create set from entries */
+  private CharArraySet(char[][] entries, boolean ignoreCase, int count){
+    this.entries = entries;
+    this.ignoreCase = ignoreCase;
+    this.count = count;
+  }
 
   /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
    * are in the set */
@@ -100,7 +107,7 @@
   public boolean add(CharSequence text) {
     return add(text.toString()); // could be more efficient
   }
-
+  
   /** Add this String into the set */
   public boolean add(String text) {
     return add(text.toCharArray());
@@ -228,6 +235,26 @@
     }
     return add(o.toString());
   }
+  
+  /**
+   * Returns an unmodifiable {@link CharArraySet}. This allows to provide
+   * unmodifiable views of internal sets for "read-only" use.
+   * 
+   * @param set
+   *          a set for which the unmodifiable set is returned.
+   * @return an new unmodifiable {@link CharArraySet}.
+   * @throws NullPointerException
+   *           if the given set is <code>null</code>.
+   */
+  public static CharArraySet unmodifiableSet(CharArraySet set) {
+    if (set == null)
+      throw new NullPointerException("Given set is null");
+    /*
+     * Instead of delegating calls to the given set copy the low-level values to
+     * the unmodifiable Subclass
+     */
+    return new UnmodifiableCharArraySet(set.entries, set.ignoreCase, set.count);
+  }
 
   /** The Iterator<String> for this set.  Strings are constructed on the fly, so
    * use <code>nextCharArray</code> for more efficient access. */
@@ -270,5 +297,40 @@
   public Iterator iterator() {
     return new CharArraySetIterator();
   }
+  
+  /**
+   * Efficient unmodifiable {@link CharArraySet}. This implementation does not
+   * delegate calls to a give {@link CharArraySet} like
+   * {@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passes
+   * the internal representation of a {@link CharArraySet} to a super
+   * constructor and overrides all mutators. 
+   */
+  private static final class UnmodifiableCharArraySet extends CharArraySet {
+
+    private UnmodifiableCharArraySet(char[][] entries, boolean ignoreCase,
+        int count) {
+      super(entries, ignoreCase, count);
+    }
+
+    public boolean add(Object o){
+      throw new UnsupportedOperationException();
+    }
+    
+    public boolean addAll(Collection coll) {
+      throw new UnsupportedOperationException();
+    }
+    
+    public boolean add(char[] text) {
+      throw new UnsupportedOperationException();
+    }
+
+    public boolean add(CharSequence text) {
+      throw new UnsupportedOperationException();
+    }
+
+    public boolean add(String text) {
+      throw new UnsupportedOperationException();
+    }
+  }
 
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/StopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/StopAnalyzer.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/StopAnalyzer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/StopAnalyzer.java Tue Jul 14 21:39:22 2009
@@ -20,18 +20,20 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
 import java.util.Set;
 
 /** Filters LetterTokenizer with LowerCaseFilter and StopFilter. */
 
 public final class StopAnalyzer extends Analyzer {
-  private Set stopWords;
+  private final Set/*<String>*/ stopWords;
   // @deprecated
-  private boolean useDefaultStopPositionIncrement;
-  private boolean enablePositionIncrements;
+  private final boolean useDefaultStopPositionIncrement;
+  private final boolean enablePositionIncrements;
 
   /** An array containing some common English words that are not usually useful
-    for searching. */
+    for searching. 
+    @deprecated Use {@link #ENGLISH_STOP_WORDS_SET} instead */
   public static final String[] ENGLISH_STOP_WORDS = {
     "a", "an", "and", "are", "as", "at", "be", "but", "by",
     "for", "if", "in", "into", "is", "it",
@@ -39,13 +41,31 @@
     "that", "the", "their", "then", "there", "these",
     "they", "this", "to", "was", "will", "with"
   };
-
+  
+  /** An unmodifiable set containing some common English words that are not usually useful
+  for searching.*/
+  public static final Set/*<String>*/ ENGLISH_STOP_WORDS_SET;
+  
+  static {
+	  final String[] stopWords = new String[]{
+  	    "a", "an", "and", "are", "as", "at", "be", "but", "by",
+	    "for", "if", "in", "into", "is", "it",
+	    "no", "not", "of", "on", "or", "such",
+	    "that", "the", "their", "then", "there", "these",
+	    "they", "this", "to", "was", "will", "with"
+	  };
+	  final CharArraySet stopSet = new CharArraySet(stopWords.length, false);
+    stopSet.addAll(Arrays.asList(stopWords));  
+	  ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); 
+  }
+  
   /** Builds an analyzer which removes words in
    * ENGLISH_STOP_WORDS.
    * @deprecated Use {@link #StopAnalyzer(boolean)} instead */
   public StopAnalyzer() {
-    stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS);
+    stopWords = ENGLISH_STOP_WORDS_SET;
     useDefaultStopPositionIncrement = true;
+    enablePositionIncrements = false;
   }
 
   /** Builds an analyzer which removes words in
@@ -53,8 +73,9 @@
    * @param enablePositionIncrements See {@link
    * StopFilter#setEnablePositionIncrements} */
   public StopAnalyzer(boolean enablePositionIncrements) {
-    stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS);
+    stopWords = ENGLISH_STOP_WORDS_SET;
     this.enablePositionIncrements = enablePositionIncrements;
+    useDefaultStopPositionIncrement = false;
   }
 
   /** Builds an analyzer with the stop words from the given set.
@@ -62,6 +83,7 @@
   public StopAnalyzer(Set stopWords) {
     this.stopWords = stopWords;
     useDefaultStopPositionIncrement = true;
+    enablePositionIncrements = false;
   }
 
   /** Builds an analyzer with the stop words from the given set.
@@ -71,22 +93,26 @@
   public StopAnalyzer(Set stopWords, boolean enablePositionIncrements) {
     this.stopWords = stopWords;
     this.enablePositionIncrements = enablePositionIncrements;
+    useDefaultStopPositionIncrement = false;
   }
 
   /** Builds an analyzer which removes words in the provided array.
-   * @deprecated Use {@link #StopAnalyzer(String[], boolean)} instead */
+   * @deprecated Use {@link #StopAnalyzer(Set, boolean)} instead */
   public StopAnalyzer(String[] stopWords) {
     this.stopWords = StopFilter.makeStopSet(stopWords);
     useDefaultStopPositionIncrement = true;
+    enablePositionIncrements = false;
   }
   
   /** Builds an analyzer which removes words in the provided array.
    * @param stopWords Array of stop words
    * @param enablePositionIncrements See {@link
-   * StopFilter#setEnablePositionIncrements} */
+   * StopFilter#setEnablePositionIncrements} 
+   * @deprecated Use {@link #StopAnalyzer(Set, boolean) instead*/
   public StopAnalyzer(String[] stopWords, boolean enablePositionIncrements) {
     this.stopWords = StopFilter.makeStopSet(stopWords);
     this.enablePositionIncrements = enablePositionIncrements;
+    useDefaultStopPositionIncrement = false;
   }
   
   /** Builds an analyzer with the stop words from the given file.
@@ -95,6 +121,7 @@
   public StopAnalyzer(File stopwordsFile) throws IOException {
     stopWords = WordlistLoader.getWordSet(stopwordsFile);
     useDefaultStopPositionIncrement = true;
+    enablePositionIncrements = false;
   }
 
   /** Builds an analyzer with the stop words from the given file.
@@ -105,6 +132,7 @@
   public StopAnalyzer(File stopwordsFile, boolean enablePositionIncrements) throws IOException {
     stopWords = WordlistLoader.getWordSet(stopwordsFile);
     this.enablePositionIncrements = enablePositionIncrements;
+    useDefaultStopPositionIncrement = false;
   }
 
   /** Builds an analyzer with the stop words from the given reader.
@@ -114,6 +142,7 @@
   public StopAnalyzer(Reader stopwords) throws IOException {
     stopWords = WordlistLoader.getWordSet(stopwords);
     useDefaultStopPositionIncrement = true;
+    enablePositionIncrements = false;
   }
 
   /** Builds an analyzer with the stop words from the given reader.
@@ -124,6 +153,7 @@
   public StopAnalyzer(Reader stopwords, boolean enablePositionIncrements) throws IOException {
     stopWords = WordlistLoader.getWordSet(stopwords);
     this.enablePositionIncrements = enablePositionIncrements;
+    useDefaultStopPositionIncrement = false;
   }
 
   /** Filters LowerCaseTokenizer with StopFilter. */

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java Tue Jul 14 21:39:22 2009
@@ -55,6 +55,7 @@
    * @param enablePositionIncrements true if token positions should record the removed stop words
    * @param input input TokenStream
    * @param stopWords array of stop words
+   * @deprecated Use {@link #StopFilter(boolean, TokenStream, Set)} instead.
    */
   public StopFilter(boolean enablePositionIncrements, TokenStream input, String [] stopWords)
   {
@@ -77,6 +78,7 @@
    * @param in input TokenStream
    * @param stopWords array of stop words
    * @param ignoreCase true if case is ignored
+   * @deprecated Use {@link #StopFilter(boolean, TokenStream, Set, boolean)} instead.
    */
   public StopFilter(boolean enablePositionIncrements, TokenStream in, String[] stopWords, boolean ignoreCase) {
     super(in);

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Tue Jul 14 21:39:22 2009
@@ -101,15 +101,19 @@
 
 
   /** An array containing some common English words that are usually not
-  useful for searching. */
+  useful for searching. 
+  @deprecated Use {@link #STOP_WORDS_SET} instead */
   public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
+  
+  /** An unmodifiable set containing some common English words that are usually not
+  useful for searching. */
+  public static final Set/*<String>*/ STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 
 
   /** Builds an analyzer with the default stop words ({@link
-   * #STOP_WORDS}).
-   * @deprecated Use {@link #StandardAnalyzer(Version)},
-   * instead. */
+   * #STOP_WORDS_SET}).
+   * @deprecated Use {@link #StandardAnalyzer(Version)} instead. */
   public StandardAnalyzer() {
-    this(Version.LUCENE_24, STOP_WORDS);
+    this(Version.LUCENE_24, STOP_WORDS_SET);
   }
 
   /** Builds an analyzer with the default stop words ({@link
@@ -118,7 +122,7 @@
    * <a href="#version">above</a>}
    */
   public StandardAnalyzer(Version matchVersion) {
-    this(matchVersion, STOP_WORDS);
+    this(matchVersion, STOP_WORDS_SET);
   }
 
   /** Builds an analyzer with the given stop words.
@@ -138,22 +142,9 @@
   }
 
   /** Builds an analyzer with the given stop words.
-   * @deprecated Use {@link #StandardAnalyzer(Version,
-   * String[])} instead */
+   * @deprecated Use {@link #StandardAnalyzer(Version, Set)} instead */
   public StandardAnalyzer(String[] stopWords) {
-    this(Version.LUCENE_24, stopWords);
-  }
-
-  /** Builds an analyzer with the given stop words.
-   * @param matchVersion Lucene version to match See {@link
-   * <a href="#version">above</a>}
-   * @param stopWords Array of stop words */
-  public StandardAnalyzer(Version matchVersion, String[] stopWords) {
-    if (stopWords == null) {
-      stopWords = STOP_WORDS;
-    }
-    stopSet = StopFilter.makeStopSet(stopWords);
-    init(matchVersion);
+    this(Version.LUCENE_24, StopFilter.makeStopSet(stopWords));
   }
 
   /** Builds an analyzer with the stop words from the given file.
@@ -203,8 +194,9 @@
    * @deprecated Remove in 3.X and make true the only valid value
    */
   public StandardAnalyzer(boolean replaceInvalidAcronym) {
-    this(Version.LUCENE_24, STOP_WORDS);
+    this(Version.LUCENE_24, STOP_WORDS_SET);
     this.replaceInvalidAcronym = replaceInvalidAcronym;
+    useDefaultStopPositionIncrements = true;
   }
 
   /**
@@ -243,7 +235,7 @@
    * @deprecated Remove in 3.X and make true the only valid value
    */
   public StandardAnalyzer(String [] stopwords, boolean replaceInvalidAcronym) throws IOException{
-    this(Version.LUCENE_24, stopwords);
+    this(Version.LUCENE_24, StopFilter.makeStopSet(stopwords));
     this.replaceInvalidAcronym = replaceInvalidAcronym;
   }
 

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java Tue Jul 14 21:39:22 2009
@@ -23,13 +23,22 @@
 
 public class TestCharArraySet extends LuceneTestCase {
   
+  static final String[] TEST_STOP_WORDS = {
+    "a", "an", "and", "are", "as", "at", "be", "but", "by",
+    "for", "if", "in", "into", "is", "it",
+    "no", "not", "of", "on", "or", "such",
+    "that", "the", "their", "then", "there", "these",
+    "they", "this", "to", "was", "will", "with"
+  };
+  
+  
   public void testRehash() throws Exception {
     CharArraySet cas = new CharArraySet(0, true);
-    for(int i=0;i<StopAnalyzer.ENGLISH_STOP_WORDS.length;i++)
-      cas.add(StopAnalyzer.ENGLISH_STOP_WORDS[i]);
-    assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS.length, cas.size());
-    for(int i=0;i<StopAnalyzer.ENGLISH_STOP_WORDS.length;i++)
-      assertTrue(cas.contains(StopAnalyzer.ENGLISH_STOP_WORDS[i]));
+    for(int i=0;i<TEST_STOP_WORDS.length;i++)
+      cas.add(TEST_STOP_WORDS[i]);
+    assertEquals(TEST_STOP_WORDS.length, cas.size());
+    for(int i=0;i<TEST_STOP_WORDS.length;i++)
+      assertTrue(cas.contains(TEST_STOP_WORDS[i]));
   }
 
   public void testNonZeroOffset() {
@@ -39,6 +48,11 @@
     set.addAll(Arrays.asList(words));
     assertTrue(set.contains(findme, 1, 4));
     assertTrue(set.contains(new String(findme,1,4)));
+    
+    // test unmodifiable
+    set = CharArraySet.unmodifiableSet(set);
+    assertTrue(set.contains(findme, 1, 4));
+    assertTrue(set.contains(new String(findme,1,4)));
   }
   
   public void testObjectContains() {
@@ -47,5 +61,118 @@
     set.add(val);
     assertTrue(set.contains(val));
     assertTrue(set.contains(new Integer(1)));
+    // test unmodifiable
+    set = CharArraySet.unmodifiableSet(set);
+    assertTrue(set.contains(val));
+    assertTrue(set.contains(new Integer(1)));
+  }
+  
+  public void testClear(){
+    CharArraySet set=new CharArraySet(10,true);
+    set.addAll(Arrays.asList(TEST_STOP_WORDS));
+    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
+    try{
+      set.clear();
+      fail("remove is not supported");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
+    }
+  }
+  
+  public void testModifyOnUnmodifiable(){
+    CharArraySet set=new CharArraySet(10,true);
+    set.addAll(Arrays.asList(TEST_STOP_WORDS));
+    final int size = set.size();
+    set = CharArraySet.unmodifiableSet(set);
+    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
+    String NOT_IN_SET = "SirGallahad";
+    assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));
+    
+    try{
+      set.add(NOT_IN_SET.toCharArray());  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    
+    try{
+      set.add(NOT_IN_SET);  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    
+    try{
+      set.add(new StringBuffer(NOT_IN_SET));  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    
+    try{
+      set.clear();  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    try{
+      set.add((Object) NOT_IN_SET);  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    try{
+      set.removeAll(Arrays.asList(TEST_STOP_WORDS));  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    
+    try{
+      set.retainAll(Arrays.asList(new String[]{NOT_IN_SET}));  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    
+    try{
+      set.addAll(Arrays.asList(new String[]{NOT_IN_SET}));  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+    }
+    
+    for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
+      assertTrue(set.contains(TEST_STOP_WORDS[i]));  
+    }
+  }
+  
+  public void testUnmodifiableSet(){
+    CharArraySet set=new CharArraySet(10,true);
+    set.addAll(Arrays.asList(TEST_STOP_WORDS));
+    final int size = set.size();
+    set = CharArraySet.unmodifiableSet(set);
+    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
+    
+    try{
+      CharArraySet.unmodifiableSet(null);
+      fail("can not make null unmodifiable");
+    }catch (NullPointerException e) {
+      // expected
+    }
   }
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java Tue Jul 14 21:39:22 2009
@@ -23,12 +23,13 @@
 
 import java.io.StringReader;
 import java.io.IOException;
+import java.util.Iterator;
 import java.util.Set;
 import java.util.HashSet;
 
 public class TestStopAnalyzer extends LuceneTestCase {
   
-  private StopAnalyzer stop = new StopAnalyzer();
+  private StopAnalyzer stop = new StopAnalyzer(false);
   private Set inValidTokens = new HashSet();
   
   public TestStopAnalyzer(String s) {
@@ -37,8 +38,10 @@
 
   protected void setUp() throws Exception {
     super.setUp();
-    for (int i = 0; i < StopAnalyzer.ENGLISH_STOP_WORDS.length; i++) {
-      inValidTokens.add(StopAnalyzer.ENGLISH_STOP_WORDS[i]);
+    
+    Iterator it = StopAnalyzer.ENGLISH_STOP_WORDS_SET.iterator();
+    while(it.hasNext()) {
+      inValidTokens.add(it.next());
     }
   }
 

Modified: lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java Tue Jul 14 21:39:22 2009
@@ -23,7 +23,9 @@
 import java.text.DateFormat;
 import java.util.Calendar;
 import java.util.Date;
+import java.util.HashSet;
 import java.util.Locale;
+import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.KeywordAnalyzer;
@@ -768,7 +770,9 @@
 
   public void testBoost()
     throws Exception {
-    StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(new String[]{"on"});
+    Set stopWords = new HashSet(1);
+    stopWords.add("on");
+    StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(stopWords);
     QueryParser qp = new QueryParser("field", oneStopAnalyzer);
     Query q = qp.parse("on^1.0");
     assertNotNull(q);

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java Tue Jul 14 21:39:22 2009
@@ -31,6 +31,7 @@
 import org.apache.lucene.util.LuceneTestCase;
 
 import java.io.IOException;
+import java.util.HashSet;
 import java.util.LinkedList;
 
 /**
@@ -169,7 +170,7 @@
     
   public void testPhrasePrefixWithBooleanQuery() throws IOException {
     RAMDirectory indexStore = new RAMDirectory();
-    IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(new String[]{}), true, IndexWriter.MaxFieldLength.LIMITED);
+    IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(new HashSet(0)), true, IndexWriter.MaxFieldLength.LIMITED);
     add("This is a test", "object", writer);
     add("a note", "note", writer);
     writer.close();

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpans.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpans.java?rev=794078&r1=794077&r2=794078&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpans.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpans.java Tue Jul 14 21:39:22 2009
@@ -39,6 +39,7 @@
 import org.apache.lucene.util.LuceneTestCase;
 
 import java.io.IOException;
+import java.util.HashSet;
 
 public class TestSpans extends LuceneTestCase {
   private IndexSearcher searcher;
@@ -448,7 +449,7 @@
   // LUCENE-1404
   public void testNPESpanQuery() throws Throwable {
     final Directory dir = new MockRAMDirectory();
-    final IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(new String[0]), IndexWriter.MaxFieldLength.LIMITED);
+    final IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(new HashSet(0)), IndexWriter.MaxFieldLength.LIMITED);
 
     // Add documents
     addDoc(writer, "1", "the big dogs went running to the market");