You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/11/30 12:22:46 UTC

svn commit: r1040463 [7/8] - in /lucene/dev/trunk: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ lucene/contrib/db/bdb-je/src/java/org/apache/lucene/store/je/ lucene/contrib/db/bdb/src/java/org/apache/lucene/store/db/ luce...

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -24,9 +24,9 @@ import org.apache.lucene.analysis.en.Eng
 import org.apache.lucene.analysis.standard.*;
 import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import org.apache.lucene.util.Version;
 
-import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 
@@ -43,11 +43,11 @@ import java.util.Set;
  *   <li> As of 3.1, uses {@link TurkishLowerCaseFilter} for Turkish language.
  * </ul>
  * </p>
- * @deprecated Use the language-specific analyzer in modules/analysis instead. 
- * This analyzer will be removed in Lucene 4.0
+ * @deprecated (3.1) Use the language-specific analyzer in modules/analysis instead. 
+ * This analyzer will be removed in Lucene 5.0
  */
 @Deprecated
-public final class SnowballAnalyzer extends Analyzer {
+public final class SnowballAnalyzer extends ReusableAnalyzerBase {
   private String name;
   private Set<?> stopSet;
   private final Version matchVersion;
@@ -58,16 +58,6 @@ public final class SnowballAnalyzer exte
     this.matchVersion = matchVersion;
   }
 
-  /** 
-   * Builds the named analyzer with the given stop words.
-   * @deprecated Use {@link #SnowballAnalyzer(Version, String, Set)} instead.  
-   */
-  @Deprecated
-  public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
-    this(matchVersion, name);
-    stopSet = StopFilter.makeStopSet(matchVersion, stopWords);
-  }
-  
   /** Builds the named analyzer with the given stop words. */
   public SnowballAnalyzer(Version matchVersion, String name, Set<?> stopWords) {
     this(matchVersion, name);
@@ -79,9 +69,9 @@ public final class SnowballAnalyzer exte
       StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
       and a {@link SnowballFilter} */
   @Override
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream result = new StandardTokenizer(matchVersion, reader);
-    result = new StandardFilter(matchVersion, result);
+  public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+    Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(matchVersion, tokenizer);
     // remove the possessive 's for english stemmers
     if (matchVersion.onOrAfter(Version.LUCENE_31) && 
         (name.equals("English") || name.equals("Porter") || name.equals("Lovins")))
@@ -95,38 +85,6 @@ public final class SnowballAnalyzer exte
       result = new StopFilter(matchVersion,
                               result, stopSet);
     result = new SnowballFilter(result, name);
-    return result;
-  }
-  
-  private class SavedStreams {
-    Tokenizer source;
-    TokenStream result;
-  }
-  
-  /** Returns a (possibly reused) {@link StandardTokenizer} filtered by a 
-   * {@link StandardFilter}, a {@link LowerCaseFilter}, 
-   * a {@link StopFilter}, and a {@link SnowballFilter} */
-  @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader)
-      throws IOException {
-    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new StandardTokenizer(matchVersion, reader);
-      streams.result = new StandardFilter(matchVersion, streams.source);
-      // Use a special lowercase filter for turkish, the stemmer expects it.
-      if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish"))
-        streams.result = new TurkishLowerCaseFilter(streams.result);
-      else
-        streams.result = new LowerCaseFilter(matchVersion, streams.result);
-      if (stopSet != null)
-        streams.result = new StopFilter(matchVersion,
-                                        streams.result, stopSet);
-      streams.result = new SnowballFilter(streams.result, name);
-      setPreviousTokenStream(streams);
-    } else {
-      streams.source.reset(reader);
-    }
-    return streams.result;
+    return new TokenStreamComponents(tokenizer, result);
   }
 }

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -58,12 +58,6 @@ public final class ClassicAnalyzer exten
 
   private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
 
-  /**
-   * Specifies whether deprecated acronyms should be replaced with HOST type.
-   * See {@linkplain "https://issues.apache.org/jira/browse/LUCENE-1068"}
-   */
-  private final boolean replaceInvalidAcronym;
-
   /** An unmodifiable set containing some common English words that are usually not
   useful for searching. */
   public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 
@@ -74,7 +68,6 @@ public final class ClassicAnalyzer exten
    * @param stopWords stop words */
   public ClassicAnalyzer(Version matchVersion, Set<?> stopWords) {
     super(matchVersion, stopWords);
-    replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
   }
 
   /** Builds an analyzer with the default stop words ({@link
@@ -125,7 +118,6 @@ public final class ClassicAnalyzer exten
   protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
     final ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
     src.setMaxTokenLength(maxTokenLength);
-    src.setReplaceInvalidAcronym(replaceInvalidAcronym);
     TokenStream tok = new ClassicFilter(src);
     tok = new LowerCaseFilter(matchVersion, tok);
     tok = new StopFilter(matchVersion, tok, stopwords);

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Tue Nov 30 11:22:39 2010
@@ -44,14 +44,6 @@ import org.apache.lucene.util.Version;
  * not suit your application, please consider copying this source code
  * directory to your project and maintaining your own grammar-based tokenizer.
  *
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating ClassicAnalyzer:
- * <ul>
- *   <li> As of 2.4, Tokens incorrectly identified as acronyms
- *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
- * </ul>
- * 
  * ClassicTokenizer was named StandardTokenizer in Lucene versions prior to 3.1.
  * As of 3.1, {@link StandardTokenizer} implements Unicode text segmentation,
  * as specified by UAX#29.
@@ -70,13 +62,8 @@ public final class ClassicTokenizer exte
   public static final int NUM               = 6;
   public static final int CJ                = 7;
 
-  /**
-   * @deprecated this solves a bug where HOSTs that end with '.' are identified
-   *             as ACRONYMs.
-   */
-  @Deprecated
   public static final int ACRONYM_DEP       = 8;
-  
+
   /** String token types that correspond to token type int constants */
   public static final String [] TOKEN_TYPES = new String [] {
     "<ALPHANUM>",
@@ -90,8 +77,6 @@ public final class ClassicTokenizer exte
     "<ACRONYM_DEP>"
   };
 
-  private boolean replaceInvalidAcronym;
-    
   private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
 
   /** Set the max allowed token length.  Any token longer
@@ -134,15 +119,9 @@ public final class ClassicTokenizer exte
     init(input, matchVersion);
   }
 
-  private final void init(Reader input, Version matchVersion) {
+  private void init(Reader input, Version matchVersion) {
     this.scanner = new ClassicTokenizerImpl(input);
-
-    if (matchVersion.onOrAfter(Version.LUCENE_24)) {
-      replaceInvalidAcronym = true;
-    } else {
-      replaceInvalidAcronym = false;
-    }
-    this.input = input;    
+    this.input = input;
   }
 
   // this tokenizer generates three attributes:
@@ -174,16 +153,10 @@ public final class ClassicTokenizer exte
         scanner.getText(termAtt);
         final int start = scanner.yychar();
         offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
-        // This 'if' should be removed in the next release. For now, it converts
-        // invalid acronyms to HOST. When removed, only the 'else' part should
-        // remain.
+
         if (tokenType == ClassicTokenizer.ACRONYM_DEP) {
-          if (replaceInvalidAcronym) {
-            typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST]);
-            termAtt.setLength(termAtt.length() - 1); // remove extra '.'
-          } else {
-            typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM]);
-          }
+          typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST]);
+          termAtt.setLength(termAtt.length() - 1); // remove extra '.'
         } else {
           typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[tokenType]);
         }
@@ -207,28 +180,4 @@ public final class ClassicTokenizer exte
     super.reset(reader);
     scanner.yyreset(reader);
   }
-
-  /**
-   * Prior to https://issues.apache.org/jira/browse/LUCENE-1068, ClassicTokenizer mischaracterized as acronyms tokens like www.abc.com
-   * when they should have been labeled as hosts instead.
-   * @return true if ClassicTokenizer now returns these tokens as Hosts, otherwise false
-   *
-   * @deprecated Remove in 3.X and make true the only valid value
-   */
-  @Deprecated
-  public boolean isReplaceInvalidAcronym() {
-    return replaceInvalidAcronym;
-  }
-
-  /**
-   *
-   * @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms as HOST.
-   * @deprecated Remove in 3.X and make true the only valid value
-   *
-   * See https://issues.apache.org/jira/browse/LUCENE-1068
-   */
-  @Deprecated
-  public void setReplaceInvalidAcronym(boolean replaceInvalidAcronym) {
-    this.replaceInvalidAcronym = replaceInvalidAcronym;
-  }
 }

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java Tue Nov 30 11:22:39 2010
@@ -355,11 +355,6 @@ public static final int EMAIL           
 public static final int HOST              = StandardTokenizer.HOST;
 public static final int NUM               = StandardTokenizer.NUM;
 public static final int CJ                = StandardTokenizer.CJ;
-/**
- * @deprecated this solves a bug where HOSTs that end with '.' are identified
- *             as ACRONYMs.
- */
-@Deprecated
 public static final int ACRONYM_DEP       = StandardTokenizer.ACRONYM_DEP;
 
 public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex Tue Nov 30 11:22:39 2010
@@ -47,11 +47,6 @@ public static final int EMAIL           
 public static final int HOST              = StandardTokenizer.HOST;
 public static final int NUM               = StandardTokenizer.NUM;
 public static final int CJ                = StandardTokenizer.CJ;
-/**
- * @deprecated this solves a bug where HOSTs that end with '.' are identified
- *             as ACRONYMs.
- */
-@Deprecated
 public static final int ACRONYM_DEP       = StandardTokenizer.ACRONYM_DEP;
 
 public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -56,12 +56,6 @@ public final class StandardAnalyzer exte
 
   private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
 
-  /**
-   * Specifies whether deprecated acronyms should be replaced with HOST type.
-   * See {@linkplain "https://issues.apache.org/jira/browse/LUCENE-1068"}
-   */
-  private final boolean replaceInvalidAcronym;
-
   /** An unmodifiable set containing some common English words that are usually not
   useful for searching. */
   public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 
@@ -72,7 +66,6 @@ public final class StandardAnalyzer exte
    * @param stopWords stop words */
   public StandardAnalyzer(Version matchVersion, Set<?> stopWords) {
     super(matchVersion, stopWords);
-    replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
   }
 
   /** Builds an analyzer with the default stop words ({@link
@@ -123,7 +116,6 @@ public final class StandardAnalyzer exte
   protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
     final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
     src.setMaxTokenLength(maxTokenLength);
-    src.setReplaceInvalidAcronym(replaceInvalidAcronym);
     TokenStream tok = new StandardFilter(matchVersion, src);
     tok = new LowerCaseFilter(matchVersion, tok);
     tok = new StopFilter(matchVersion, tok, stopwords);

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java Tue Nov 30 11:22:39 2010
@@ -31,12 +31,6 @@ import org.apache.lucene.util.Version;
 public class StandardFilter extends TokenFilter {
   private final Version matchVersion;
   
-  /** @deprecated Use {@link #StandardFilter(Version, TokenStream)} instead. */
-  @Deprecated
-  public StandardFilter(TokenStream in) {
-    this(Version.LUCENE_30, in);
-  }
-  
   public StandardFilter(Version matchVersion, TokenStream in) {
     super(in);
     this.matchVersion = matchVersion;

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Tue Nov 30 11:22:39 2010
@@ -17,6 +17,9 @@
 
 package org.apache.lucene.analysis.standard;
 
+import java.io.IOException;
+import java.io.Reader;
+
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -25,9 +28,6 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;
 
-import java.io.IOException;
-import java.io.Reader;
-
 /** A grammar-based tokenizer constructed with JFlex.
  * <p>
  * As of Lucene version 3.1, this class implements the Word Break rules from the
@@ -61,28 +61,25 @@ public final class StandardTokenizer ext
   private StandardTokenizerInterface scanner;
 
   public static final int ALPHANUM          = 0;
-  /** @deprecated */
+  /** @deprecated (3.1) */
   @Deprecated
   public static final int APOSTROPHE        = 1;
-  /** @deprecated */
+  /** @deprecated (3.1) */
   @Deprecated
   public static final int ACRONYM           = 2;
-  /** @deprecated */
+  /** @deprecated (3.1) */
   @Deprecated
   public static final int COMPANY           = 3;
   public static final int EMAIL             = 4;
-  /** @deprecated */
+  /** @deprecated (3.1) */
   @Deprecated
   public static final int HOST              = 5;
   public static final int NUM               = 6;
-  /** @deprecated */
+  /** @deprecated (3.1) */
   @Deprecated
   public static final int CJ                = 7;
 
-  /**
-   * @deprecated this solves a bug where HOSTs that end with '.' are identified
-   *             as ACRONYMs.
-   */
+  /** @deprecated (3.1) */
   @Deprecated
   public static final int ACRONYM_DEP       = 8;
 
@@ -108,8 +105,6 @@ public final class StandardTokenizer ext
     "<HIRAGANA>"
   };
 
-  private boolean replaceInvalidAcronym;
-    
   private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
 
   /** Set the max allowed token length.  Any token longer
@@ -155,12 +150,7 @@ public final class StandardTokenizer ext
   private final void init(Reader input, Version matchVersion) {
     this.scanner = matchVersion.onOrAfter(Version.LUCENE_31) ?
       new StandardTokenizerImpl(input) : new ClassicTokenizerImpl(input);
-    if (matchVersion.onOrAfter(Version.LUCENE_24)) {
-      replaceInvalidAcronym = true;
-    } else {
-      replaceInvalidAcronym = false;
-    }
-    this.input = input;    
+    this.input = input;
   }
 
   // this tokenizer generates three attributes:
@@ -196,12 +186,8 @@ public final class StandardTokenizer ext
         // invalid acronyms to HOST. When removed, only the 'else' part should
         // remain.
         if (tokenType == StandardTokenizer.ACRONYM_DEP) {
-          if (replaceInvalidAcronym) {
-            typeAtt.setType(StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST]);
-            termAtt.setLength(termAtt.length() - 1); // remove extra '.'
-          } else {
-            typeAtt.setType(StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ACRONYM]);
-          }
+          typeAtt.setType(StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST]);
+          termAtt.setLength(termAtt.length() - 1); // remove extra '.'
         } else {
           typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]);
         }
@@ -225,28 +211,4 @@ public final class StandardTokenizer ext
     super.reset(reader);
     scanner.yyreset(reader);
   }
-
-  /**
-   * Prior to https://issues.apache.org/jira/browse/LUCENE-1068, StandardTokenizer mischaracterized as acronyms tokens like www.abc.com
-   * when they should have been labeled as hosts instead.
-   * @return true if StandardTokenizer now returns these tokens as Hosts, otherwise false
-   *
-   * @deprecated Remove in 3.X and make true the only valid value
-   */
-  @Deprecated
-  public boolean isReplaceInvalidAcronym() {
-    return replaceInvalidAcronym;
-  }
-
-  /**
-   *
-   * @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms as HOST.
-   * @deprecated Remove in 3.X and make true the only valid value
-   *
-   * See https://issues.apache.org/jira/browse/LUCENE-1068
-   */
-  @Deprecated
-  public void setReplaceInvalidAcronym(boolean replaceInvalidAcronym) {
-    this.replaceInvalidAcronym = replaceInvalidAcronym;
-  }
 }

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java Tue Nov 30 11:22:39 2010
@@ -53,9 +53,9 @@ public class SynonymMap {
     SynonymMap currMap = this;
     for (String str : singleMatch) {
       if (currMap.submap==null) {
-        // for now hardcode at 2.9, as its what the old code did.
+        // for now hardcode at 4.0, as its what the old code did.
         // would be nice to fix, but shouldn't store a version in each submap!!!
-        currMap.submap = new CharArrayMap<SynonymMap>(Version.LUCENE_29, 1, ignoreCase());
+        currMap.submap = new CharArrayMap<SynonymMap>(Version.LUCENE_40, 1, ignoreCase());
       }
 
       SynonymMap map = currMap.submap.get(str);

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java Tue Nov 30 11:22:39 2010
@@ -17,17 +17,17 @@ package org.apache.lucene.analysis.th;
  */
 
 import java.io.IOException;
-import java.util.Locale;
 import java.lang.Character.UnicodeBlock;
-import javax.swing.text.Segment;
 import java.text.BreakIterator;
+import java.util.Locale;
+import javax.swing.text.Segment;
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;
 
@@ -69,14 +69,6 @@ public final class ThaiWordFilter extend
   private OffsetAttribute clonedOffsetAtt = null;
   private boolean hasMoreTokensInClone = false;
 
-  /** Creates a new ThaiWordFilter that also lowercases non-thai text.
-   * @deprecated Use the ctor with {@code matchVersion} instead!
-   */
-  @Deprecated
-  public ThaiWordFilter(TokenStream input) {
-    this(Version.LUCENE_30, input);
-  }
-  
   /** Creates a new ThaiWordFilter with the specified match version. */
   public ThaiWordFilter(Version matchVersion, TokenStream input) {
     super(matchVersion.onOrAfter(Version.LUCENE_31) ?

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java Tue Nov 30 11:22:39 2010
@@ -51,8 +51,7 @@ import org.apache.lucene.util.Version;
  * that has a string representation. The add methods will use
  * {@link Object#toString} and store the result using a {@code char[]}
  * buffer. The same behavior have the {@code contains()} methods.
- * The {@link #iterator()} returns an {@code Iterator<String>}.
- * For type safety also {@link #stringIterator()} is provided.
+ * The {@link #iterator()} returns an {@code Iterator<char[]>}.
  */
 public class CharArraySet extends AbstractSet<Object> {
   public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.<Object>emptyMap());
@@ -93,37 +92,6 @@ public class CharArraySet extends Abstra
     addAll(c);
   }
 
-  /**
-   * Creates a set with enough capacity to hold startSize terms
-   * 
-   * @param startSize
-   *          the initial capacity
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   * @deprecated use {@link #CharArraySet(Version, int, boolean)} instead
-   */
-  @Deprecated
-  public CharArraySet(int startSize, boolean ignoreCase) {
-    this(Version.LUCENE_30, startSize, ignoreCase);
-  }
-  
-  /**
-   * Creates a set from a Collection of objects. 
-   * 
-   * @param c
-   *          a collection whose elements to be placed into the set
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   * @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead         
-   */  
-  @Deprecated
-  public CharArraySet(Collection<?> c, boolean ignoreCase) {
-    this(Version.LUCENE_30, c.size(), ignoreCase);
-    addAll(c);
-  }
-  
   /** Create set from the specified map (internal only), used also by {@link CharArrayMap#keySet()} */
   CharArraySet(final CharArrayMap<Object> map){
     this.map = map;
@@ -202,24 +170,6 @@ public class CharArraySet extends Abstra
   /**
    * Returns a copy of the given set as a {@link CharArraySet}. If the given set
    * is a {@link CharArraySet} the ignoreCase property will be preserved.
-   * 
-   * @param set
-   *          a set to copy
-   * @return a copy of the given set as a {@link CharArraySet}. If the given set
-   *         is a {@link CharArraySet} the ignoreCase and matchVersion property will be
-   *         preserved.
-   * @deprecated use {@link #copy(Version, Set)} instead.
-   */
-  @Deprecated
-  public static CharArraySet copy(final Set<?> set) {
-    if(set == EMPTY_SET)
-      return EMPTY_SET;
-    return copy(Version.LUCENE_30, set);
-  }
-  
-  /**
-   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
-   * is a {@link CharArraySet} the ignoreCase property will be preserved.
    * <p>
    * <b>Note:</b> If you intend to create a copy of another {@link CharArraySet} where
    * the {@link Version} of the source set differs from its copy
@@ -248,68 +198,13 @@ public class CharArraySet extends Abstra
     return new CharArraySet(matchVersion, set, false);
   }
   
-  /** The Iterator<String> for this set.  Strings are constructed on the fly, so
-   * use <code>nextCharArray</code> for more efficient access.
-   * @deprecated Use the standard iterator, which returns {@code char[]} instances.
-   */
-  @Deprecated
-  public class CharArraySetIterator implements Iterator<String> {
-    int pos=-1;
-    char[] next;
-    private CharArraySetIterator() {
-      goNext();
-    }
-
-    private void goNext() {
-      next = null;
-      pos++;
-      while (pos < map.keys.length && (next=map.keys[pos]) == null) pos++;
-    }
-
-    public boolean hasNext() {
-      return next != null;
-    }
-
-    /** do not modify the returned char[] */
-    public char[] nextCharArray() {
-      char[] ret = next;
-      goNext();
-      return ret;
-    }
-
-    /** Returns the next String, as a Set<String> would...
-     * use nextCharArray() for better efficiency. */
-    public String next() {
-      return new String(nextCharArray());
-    }
-
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-  }
-
-  /** returns an iterator of new allocated Strings (an instance of {@link CharArraySetIterator}).
-   * @deprecated Use {@link #iterator}, which returns {@code char[]} instances.
-   */
-  @Deprecated
-  public Iterator<String> stringIterator() {
-    return new CharArraySetIterator();
-  }
-
-  /** Returns an {@link Iterator} depending on the version used:
-   * <ul>
-   * <li>if {@code matchVersion} &ge; 3.1, it returns {@code char[]} instances in this set.</li>
-   * <li>if {@code matchVersion} is 3.0 or older, it returns new
-   * allocated Strings, so this method violates the Set interface.
-   * It is kept this way for backwards compatibility, normally it should
-   * return {@code char[]} on {@code next()}</li>
-   * </ul>
+  /**
+   * Returns an {@link Iterator} for {@code char[]} instances in this set.
    */
   @Override @SuppressWarnings("unchecked")
   public Iterator<Object> iterator() {
     // use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
-    return map.matchVersion.onOrAfter(Version.LUCENE_31) ?
-      map.originalKeySet().iterator() : (Iterator) stringIterator();
+    return map.originalKeySet().iterator();
   }
   
   @Override

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java Tue Nov 30 11:22:39 2010
@@ -239,13 +239,6 @@ public abstract class SnowballProgram {
 	return true;
     }
 
-    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
-    @Deprecated
-    protected boolean eq_s(int s_size, String s)
-    {
-	return eq_s(s_size, (CharSequence)s);
-    }
-
     protected boolean eq_s_b(int s_size, CharSequence s)
     {
 	if (cursor - limit_backward < s_size) return false;
@@ -257,35 +250,15 @@ public abstract class SnowballProgram {
 	return true;
     }
 
-    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
-    @Deprecated
-    protected boolean eq_s_b(int s_size, String s)
-    {
-	return eq_s_b(s_size, (CharSequence)s);
-    }
-
     protected boolean eq_v(CharSequence s)
     {
 	return eq_s(s.length(), s);
     }
 
-    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
-    @Deprecated
-    protected boolean eq_v(StringBuilder s)
-    {
-	return eq_s(s.length(), (CharSequence)s);
-    }
-
     protected boolean eq_v_b(CharSequence s)
     {   return eq_s_b(s.length(), s);
     }
 
-    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
-    @Deprecated
-    protected boolean eq_v_b(StringBuilder s)
-    {   return eq_s_b(s.length(), (CharSequence)s);
-    }
-
     protected int find_among(Among v[], int v_size)
     {
 	int i = 0;
@@ -456,12 +429,6 @@ public abstract class SnowballProgram {
 	return adjustment;
     }
 
-    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
-    @Deprecated
-    protected int replace_s(int c_bra, int c_ket, String s) {
-	return replace_s(c_bra, c_ket, (CharSequence)s);
-    }
-
     protected void slice_check()
     {
 	if (bra < 0 ||
@@ -484,20 +451,6 @@ public abstract class SnowballProgram {
 	replace_s(bra, ket, s);
     }
  
-    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
-    @Deprecated
-    protected void slice_from(String s)
-    {
-	slice_from((CharSequence)s);
-    }
-
-    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
-    @Deprecated
-    protected void slice_from(StringBuilder s)
-    {
-	slice_from((CharSequence)s);
-    }
-
     protected void slice_del()
     {
 	slice_from((CharSequence)"");
@@ -510,20 +463,6 @@ public abstract class SnowballProgram {
 	if (c_bra <= ket) ket += adjustment;
     }
 
-    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
-    @Deprecated
-    protected void insert(int c_bra, int c_ket, String s)
-    {
-	insert(c_bra, c_ket, (CharSequence)s);
-    }
-
-    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
-    @Deprecated
-    protected void insert(int c_bra, int c_ket, StringBuilder s)
-    {
-	insert(c_bra, c_ket, (CharSequence)s);
-    }
-
     /* Copy the slice into the supplied StringBuffer */
     protected StringBuilder slice_to(StringBuilder s)
     {

Added: lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/cjk/stopwords.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/cjk/stopwords.txt?rev=1040463&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/cjk/stopwords.txt (added)
+++ lucene/dev/trunk/modules/analysis/common/src/resources/org/apache/lucene/analysis/cjk/stopwords.txt Tue Nov 30 11:22:39 2010
@@ -0,0 +1,35 @@
+a
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+s
+such
+t
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
+www

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java Tue Nov 30 11:22:39 2010
@@ -19,9 +19,10 @@ package org.apache.lucene.analysis.br;
 
 import java.io.IOException;
 import java.io.StringReader;
+import java.util.Collections;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
@@ -135,19 +136,10 @@ public class TestBrazilianStemmer extend
   }
  
   public void testStemExclusionTable() throws Exception {
-    BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
-    a.setStemExclusionTable(new String[] { "quintessência" });
+    BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT, Collections.emptySet(), asSet("quintessência"));
     checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
   }
   
-  public void testStemExclusionTableBWCompat() throws IOException {
-    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
-    set.add("Brasília");
-    BrazilianStemFilter filter = new BrazilianStemFilter(
-        new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set);
-    assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
-  }
-
   public void testWithKeywordAttribute() throws IOException {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("Brasília");
@@ -157,28 +149,6 @@ public class TestBrazilianStemmer extend
     assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
   }
 
-  public void testWithKeywordAttributeAndExclusionTable() throws IOException {
-    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
-    set.add("Brasília");
-    CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
-    set1.add("Brasilia");
-    BrazilianStemFilter filter = new BrazilianStemFilter(
-        new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
-            "Brasília Brasilia")), set), set1);
-    assertTokenStreamContents(filter, new String[] { "brasília", "brasilia" });
-  }
-  
-  /* 
-   * Test that changes to the exclusion table are applied immediately
-   * when using reusable token streams.
-   */
-  public void testExclusionTableReuse() throws Exception {
-    BrazilianAnalyzer a = new BrazilianAnalyzer(TEST_VERSION_CURRENT);
-    checkReuse(a, "quintessência", "quintessente");
-    a.setStemExclusionTable(new String[] { "quintessência" });
-    checkReuse(a, "quintessência", "quintessência");
-  }
-  
   private void check(final String input, final String expected) throws Exception {
     checkOneTerm(new BrazilianAnalyzer(TEST_VERSION_CURRENT), input, expected);
   }

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java Tue Nov 30 11:22:39 2010
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.commo
 
 import java.io.Reader;
 import java.io.StringReader;
+import java.util.Arrays;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -26,18 +27,20 @@ import org.apache.lucene.analysis.TokenF
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Tests CommonGrams(Query)Filter
  */
 public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
-  private static final String[] commonWords = { "s", "a", "b", "c", "d", "the",
-      "of" };
+  private static final CharArraySet commonWords = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(
+      "s", "a", "b", "c", "d", "the", "of"
+  ), false);
   
   public void testReset() throws Exception {
     final String input = "How the s a brown s cow d like A B thing?";
     WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
-    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+    CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     
     CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
     assertTrue(cgf.incrementToken());
@@ -58,7 +61,7 @@ public class CommonGramsFilterTest exten
   public void testQueryReset() throws Exception {
     final String input = "How the s a brown s cow d like A B thing?";
     WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
-    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+    CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
     
     CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
@@ -88,7 +91,7 @@ public class CommonGramsFilterTest exten
     Analyzer a = new Analyzer() {    
       @Override
       public TokenStream tokenStream(String field, Reader in) {
-        return new CommonGramsQueryFilter(new CommonGramsFilter(
+        return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
             new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords));
       } 
     };
@@ -157,7 +160,7 @@ public class CommonGramsFilterTest exten
     Analyzer a = new Analyzer() {    
       @Override
       public TokenStream tokenStream(String field, Reader in) {
-        return new CommonGramsFilter(
+        return new CommonGramsFilter(TEST_VERSION_CURRENT,
             new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords);
       } 
     };
@@ -245,8 +248,7 @@ public class CommonGramsFilterTest exten
   public void testCaseSensitive() throws Exception {
     final String input = "How The s a brown s cow d like A B thing?";
     WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
-    Set common = CommonGramsFilter.makeCommonSet(commonWords);
-    TokenFilter cgf = new CommonGramsFilter(wt, common, false);
+    TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
         "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow",
         "cow_d", "d", "d_like", "like", "A", "B", "thing?"});
@@ -258,7 +260,7 @@ public class CommonGramsFilterTest exten
   public void testLastWordisStopWord() throws Exception {
     final String input = "dog the";
     WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
-    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+    CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     TokenFilter nsf = new CommonGramsQueryFilter(cgf);
     assertTokenStreamContents(nsf, new String[] { "dog_the" });
   }
@@ -269,7 +271,7 @@ public class CommonGramsFilterTest exten
   public void testFirstWordisStopWord() throws Exception {
     final String input = "the dog";
     WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
-    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+    CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     TokenFilter nsf = new CommonGramsQueryFilter(cgf);
     assertTokenStreamContents(nsf, new String[] { "the_dog" });
   }
@@ -280,7 +282,7 @@ public class CommonGramsFilterTest exten
   public void testOneWordQueryStopWord() throws Exception {
     final String input = "the";
     WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
-    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+    CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     TokenFilter nsf = new CommonGramsQueryFilter(cgf);
     assertTokenStreamContents(nsf, new String[] { "the" });
   }
@@ -291,7 +293,7 @@ public class CommonGramsFilterTest exten
   public void testOneWordQuery() throws Exception {
     final String input = "monster";
     WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
-    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+    CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     TokenFilter nsf = new CommonGramsQueryFilter(cgf);
     assertTokenStreamContents(nsf, new String[] { "monster" });
   }
@@ -302,7 +304,7 @@ public class CommonGramsFilterTest exten
   public void TestFirstAndLastStopWord() throws Exception {
     final String input = "the of";
     WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
-    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
+    CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     TokenFilter nsf = new CommonGramsQueryFilter(cgf);
     assertTokenStreamContents(nsf, new String[] { "the_of" });
   }

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Tue Nov 30 11:22:39 2010
@@ -18,22 +18,16 @@ package org.apache.lucene.analysis.core;
  */
 
 import java.io.IOException;
-import java.io.StringReader;
 import java.io.Reader;
+import java.io.StringReader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.SimpleAnalyzer;
-import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.index.Payload;
 import org.apache.lucene.util.Version;
 
@@ -137,20 +131,6 @@ public class TestAnalyzers extends BaseT
   }
   
   /**
-   * @deprecated remove this when lucene 3.0 "broken unicode 4" support
-   * is no longer needed.
-   */
-  @Deprecated
-  private static class LowerCaseWhitespaceAnalyzerBWComp extends Analyzer {
-
-    @Override
-    public TokenStream tokenStream(String fieldName, Reader reader) {
-      return new LowerCaseFilter(new WhitespaceTokenizer(reader));
-    }
-    
-  }
-  
-  /**
    * Test that LowercaseFilter handles entire unicode range correctly
    */
   public void testLowerCaseFilter() throws IOException {
@@ -196,30 +176,6 @@ public class TestAnalyzers extends BaseT
     
   }
   
-  /**
-   * Test that LowercaseFilter only works on BMP for back compat,
-   * depending upon version
-   * @deprecated remove this test when lucene 3.0 "broken unicode 4" support
-   * is no longer needed.
-   */
-  @Deprecated
-  public void testLowerCaseFilterBWComp() throws IOException {
-    Analyzer a = new LowerCaseWhitespaceAnalyzerBWComp();
-    // BMP
-    assertAnalyzesTo(a, "AbaCaDabA", new String[] { "abacadaba" });
-    // supplementary, no-op
-    assertAnalyzesTo(a, "\ud801\udc16\ud801\udc16\ud801\udc16\ud801\udc16",
-        new String[] {"\ud801\udc16\ud801\udc16\ud801\udc16\ud801\udc16"});
-    assertAnalyzesTo(a, "AbaCa\ud801\udc16DabA",
-        new String[] { "abaca\ud801\udc16daba" });
-    // unpaired lead surrogate
-    assertAnalyzesTo(a, "AbaC\uD801AdaBa", 
-        new String [] { "abac\uD801adaba" });
-    // unpaired trail surrogate
-    assertAnalyzesTo(a, "AbaC\uDC16AdaBa", 
-        new String [] { "abac\uDC16adaba" });
-  }
-  
   public void testLowerCaseTokenizer() throws IOException {
     StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
     LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT,
@@ -228,6 +184,7 @@ public class TestAnalyzers extends BaseT
         "\ud801\udc44test" });
   }
 
+  /** @deprecated (3.1) */
   @Deprecated
   public void testLowerCaseTokenizerBWCompat() throws IOException {
     StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
@@ -235,7 +192,7 @@ public class TestAnalyzers extends BaseT
         reader);
     assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "test" });
   }
-  
+
   public void testWhitespaceTokenizer() throws IOException {
     StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
     WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
@@ -244,6 +201,7 @@ public class TestAnalyzers extends BaseT
         "\ud801\udc1ctest" });
   }
 
+  /** @deprecated (3.1) */
   @Deprecated
   public void testWhitespaceTokenizerBWCompat() throws IOException {
     StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -129,12 +129,13 @@ public class TestClassicAnalyzer extends
     // the following should be recognized as HOST:
     assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
 
-    // 2.3 should show the bug
-    a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
-    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });
-
-    // 2.4 should not show the bug
-    a2 = new ClassicAnalyzer(Version.LUCENE_24);
+    // 2.3 should show the bug. But, alas, it's obsolete, we don't support it.
+    // a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
+    // assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });
+
+    // 2.4 should not show the bug. But, alas, it's also obsolete,
+    // so we check latest released (Robert's gonna break this on 4.0 soon :) )
+    a2 = new ClassicAnalyzer(Version.LUCENE_31);
     assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
   }
 

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -62,17 +62,15 @@ public class TestStopAnalyzer extends Ba
     stopWordsSet.add("good");
     stopWordsSet.add("test");
     stopWordsSet.add("analyzer");
-    StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet);
+    StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet);
     StringReader reader = new StringReader("This is a good test of the english stop analyzer");
     TokenStream stream = newStop.tokenStream("test", reader);
     assertNotNull(stream);
     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
-    PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
     
     while (stream.incrementToken()) {
       String text = termAtt.toString();
       assertFalse(stopWordsSet.contains(text));
-      assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments.
     }
   }
 

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java Tue Nov 30 11:22:39 2010
@@ -16,22 +16,18 @@ package org.apache.lucene.analysis.core;
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Set;
+
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.English;
 import org.apache.lucene.util.Version;
 
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Set;
-import java.util.HashSet;
-
 
 public class TestStopFilter extends BaseTokenStreamTestCase {
   
@@ -39,7 +35,7 @@ public class TestStopFilter extends Base
 
   public void testExactCase() throws IOException {
     StringReader reader = new StringReader("Now is The Time");
-    Set<String> stopWords = new HashSet<String>(Arrays.asList("is", "the", "Time"));
+    Set<String> stopWords = asSet("is", "the", "Time");
     TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false);
     final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
     assertTrue(stream.incrementToken());
@@ -51,7 +47,7 @@ public class TestStopFilter extends Base
 
   public void testIgnoreCase() throws IOException {
     StringReader reader = new StringReader("Now is The Time");
-    Set<Object> stopWords = new HashSet<Object>(Arrays.asList( "is", "the", "Time" ));
+    Set<String> stopWords = asSet( "is", "the", "Time" );
     TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, true);
     final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
     assertTrue(stream.incrementToken());
@@ -89,7 +85,7 @@ public class TestStopFilter extends Base
     Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
     // with increments
     StringReader reader = new StringReader(sb.toString());
-    StopFilter stpf = new StopFilter(Version.LUCENE_24, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
+    StopFilter stpf = new StopFilter(Version.LUCENE_40, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
     doTestStopPositons(stpf,true);
     // without increments
     reader = new StringReader(sb.toString());

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -35,9 +35,8 @@ import org.apache.lucene.util.Version;
  *
  */
 public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
-  
   /**
-   * @deprecated Remove this test when support for 3.0 indexes is no longer needed.
+   * @deprecated (3.1) Remove this test when support for 3.0 indexes is no longer needed.
    */
   @Deprecated
   public void testStopWordLegacy() throws Exception {
@@ -51,7 +50,7 @@ public class TestCzechAnalyzer extends B
   }
   
   /**
-   * @deprecated Remove this test when support for 3.0 indexes is no longer needed.
+   * @deprecated (3.1) Remove this test when support for 3.0 indexes is no longer needed.
    */
   @Deprecated
   public void testReusableTokenStreamLegacy() throws Exception {
@@ -66,49 +65,6 @@ public class TestCzechAnalyzer extends B
     assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česk", "republik" });
   }
 
-  /**
-   * An input stream that always throws IOException for testing.
-   * @deprecated Remove this class when the loadStopWords method is removed.
-   */
-  @Deprecated
-  private class UnreliableInputStream extends InputStream {
-    @Override
-    public int read() throws IOException {
-      throw new IOException();
-    }
-  }
-  
-  /**
-   * The loadStopWords method does not throw IOException on error,
-   * instead previously it set the stoptable to null (versus empty)
-   * this would cause a NPE when it is time to create the StopFilter.
-   * @deprecated Remove this test when the loadStopWords method is removed.
-   */
-  @Deprecated
-  public void testInvalidStopWordFile() throws Exception {
-    CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_30);
-    cz.loadStopWords(new UnreliableInputStream(), "UTF-8");
-    assertAnalyzesTo(cz, "Pokud mluvime o volnem",
-        new String[] { "pokud", "mluvime", "o", "volnem" });
-  }
-  
-  /** 
-   * Test that changes to the stop table via loadStopWords are applied immediately
-   * when using reusable token streams.
-   * @deprecated Remove this test when the loadStopWords method is removed.
-   */
-  @Deprecated
-  public void testStopWordFileReuse() throws Exception {
-    CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_30);
-    assertAnalyzesToReuse(cz, "Česká Republika", 
-      new String[] { "česká", "republika" });
-    
-    InputStream stopwords = getClass().getResourceAsStream("customStopWordFile.txt");
-    cz.loadStopWords(stopwords, "UTF-8");
-    
-    assertAnalyzesToReuse(cz, "Česká Republika", new String[] { "česká" });
-  }
-  
   public void testWithStemExclusionSet() throws IOException{
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("hole");

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.de;
 
 import java.io.IOException;
 import java.io.StringReader;
+import java.util.Collections;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@@ -35,15 +36,6 @@ public class TestGermanAnalyzer extends 
     checkOneTermReuse(a, "Tischen", "tisch");
   }
   
-  public void testExclusionTableBWCompat() throws IOException {
-    GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, 
-        new StringReader("Fischen Trinken")));
-    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
-    set.add("fischen");
-    filter.setExclusionSet(set);
-    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
-  }
-
   public void testWithKeywordAttribute() throws IOException {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("fischen");
@@ -53,27 +45,8 @@ public class TestGermanAnalyzer extends 
     assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
   }
 
-  public void testWithKeywordAttributeAndExclusionTable() throws IOException {
-    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
-    set.add("fischen");
-    CharArraySet set1 = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
-    set1.add("trinken");
-    set1.add("fischen");
-    GermanStemFilter filter = new GermanStemFilter(
-        new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
-            "Fischen Trinken")), set));
-    filter.setExclusionSet(set1);
-    assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
-  }
-  
-  /* 
-   * Test that changes to the exclusion table are applied immediately
-   * when using reusable token streams.
-   */
-  public void testExclusionTableReuse() throws Exception {
-    GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT);
-    checkOneTermReuse(a, "tischen", "tisch");
-    a.setStemExclusionTable(new String[] { "tischen" });
+  public void testStemExclusionTable() throws Exception {
+    GermanAnalyzer a = new GermanAnalyzer(TEST_VERSION_CURRENT, Collections.emptySet(), asSet("tischen"));
     checkOneTermReuse(a, "tischen", "tischen");
   }
   

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java Tue Nov 30 11:22:39 2010
@@ -16,8 +16,8 @@ package org.apache.lucene.analysis.el;
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 
 /**
@@ -52,7 +52,7 @@ public class GreekAnalyzerTest extends B
 	 * Test the analysis of various greek strings.
 	 *
 	 * @throws Exception in case an error occurs
-	 * @deprecated Remove this test when support for 3.0 is no longer needed
+	 * @deprecated (3.1) Remove this test when support for 3.0 is no longer needed
 	 */
   @Deprecated
 	public void testAnalyzerBWCompat() throws Exception {
@@ -87,15 +87,4 @@ public class GreekAnalyzerTest extends B
     assertAnalyzesToReuse(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ  Άψογος, ο μεστός και οι άλλοι",
         new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
   }
-	
-	/**
-	 * Greek Analyzer didn't call standardFilter, so no normalization of acronyms.
-	 * check that this is preserved.
-	 * @deprecated remove this test in Lucene 4.0
-	 */
-	@Deprecated
-	public void testAcronymBWCompat() throws Exception {
-	  Analyzer a = new GreekAnalyzer(Version.LUCENE_30);
-	  assertAnalyzesTo(a, "Α.Π.Τ.", new String[] { "α.π.τ." });
 	}
-}

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -17,8 +17,8 @@ package org.apache.lucene.analysis.fa;
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 
 /**
  * Test the Persian Analyzer
@@ -215,9 +215,8 @@ public class TestPersianAnalyzer extends
    * Test that custom stopwords work, and are not case-sensitive.
    */
   public void testCustomStopwords() throws Exception {
-    PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, new String[] { "the", "and", "a" });
+    PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT, asSet("the", "and", "a"));
     assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
         "brown", "fox" });
   }
-
 }

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -116,7 +116,7 @@ public class TestFrenchAnalyzer extends 
 	}
 	
 	/**
-	 * @deprecated remove this test for Lucene 4.0
+	 * @deprecated (3.1) remove this test for Lucene 5.0
 	 */
 	@Deprecated
 	public void testAnalyzer30() throws Exception {
@@ -224,17 +224,6 @@ public class TestFrenchAnalyzer extends 
               "captif" });
 	}
 
-	/* 
-	 * Test that changes to the exclusion table are applied immediately
-	 * when using reusable token streams.
-	 */
-	public void testExclusionTableReuse() throws Exception {
-	  FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
-	  assertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
-	  fa.setStemExclusionTable(new String[] { "habitable" });
-	  assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
-	}
-	
   public void testExclusionTableViaCtor() throws Exception {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("habitable");
@@ -256,7 +245,7 @@ public class TestFrenchAnalyzer extends 
   /**
    * Prior to 3.1, this analyzer had no lowercase filter.
    * stopwords were case sensitive. Preserve this for back compat.
-   * @deprecated Remove this test in Lucene 4.0
+   * @deprecated (3.1) Remove this test in Lucene 5.0
    */
   @Deprecated
   public void testBuggyStopwordsCasing() throws IOException {

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java Tue Nov 30 11:22:39 2010
@@ -24,6 +24,7 @@ import java.util.Set;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.CharArraySet;
 
 /** Test {@link KeepWordFilter} */
 public class TestKeepWordFilter extends BaseTokenStreamTestCase {
@@ -38,12 +39,12 @@ public class TestKeepWordFilter extends 
     
     // Test Stopwords
     TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
-    stream = new KeepWordFilter(stream, words, true);
+    stream = new KeepWordFilter(stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
     assertTokenStreamContents(stream, new String[] { "aaa", "BBB" });
        
     // Now force case
     stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
-    stream = new KeepWordFilter(stream, words, false);
+    stream = new KeepWordFilter(stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
     assertTokenStreamContents(stream, new String[] { "aaa" });
   }
 }

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java Tue Nov 30 11:22:39 2010
@@ -23,12 +23,7 @@ import java.util.Collection;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.analysis.tokenattributes.*;
 
 /**
  * @version $Id:$
@@ -70,7 +65,7 @@ public class TestTrimFilter extends Base
   }
   
   /**
-   * @deprecated does not support custom attributes
+   * @deprecated (3.0) does not support custom attributes
    */
   @Deprecated
   private static class IterTokenStream extends TokenStream {

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java Tue Nov 30 11:22:39 2010
@@ -25,7 +25,6 @@ import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -68,18 +67,14 @@ public class TestWordDelimiterFilter ext
 
     // test that subwords and catenated subwords have
     // the correct offsets.
-    WordDelimiterFilter wdf = new WordDelimiterFilter(
-            new SingleTokenTokenStream(new Token("foo-bar", 5, 12)),
-    1,1,0,0,1,1,0);
+    WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("foo-bar", 5, 12)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
 
     assertTokenStreamContents(wdf, 
         new String[] { "foo", "bar", "foobar" },
         new int[] { 5, 9, 5 }, 
         new int[] { 8, 12, 12 });
 
-    wdf = new WordDelimiterFilter(
-            new SingleTokenTokenStream(new Token("foo-bar", 5, 6)),
-    1,1,0,0,1,1,0);
+    wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("foo-bar", 5, 6)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
     
     assertTokenStreamContents(wdf,
         new String[] { "foo", "bar", "foobar" },
@@ -90,10 +85,7 @@ public class TestWordDelimiterFilter ext
   @Test
   public void testOffsetChange() throws Exception
   {
-    WordDelimiterFilter wdf = new WordDelimiterFilter(
-      new SingleTokenTokenStream(new Token("übelkeit)", 7, 16)),
-      1,1,0,0,1,1,0
-    );
+    WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("übelkeit)", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
     
     assertTokenStreamContents(wdf,
         new String[] { "übelkeit" },
@@ -104,10 +96,7 @@ public class TestWordDelimiterFilter ext
   @Test
   public void testOffsetChange2() throws Exception
   {
-    WordDelimiterFilter wdf = new WordDelimiterFilter(
-      new SingleTokenTokenStream(new Token("(übelkeit", 7, 17)),
-      1,1,0,0,1,1,0
-    );
+    WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(übelkeit", 7, 17)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
     
     assertTokenStreamContents(wdf,
         new String[] { "übelkeit" },
@@ -118,10 +107,7 @@ public class TestWordDelimiterFilter ext
   @Test
   public void testOffsetChange3() throws Exception
   {
-    WordDelimiterFilter wdf = new WordDelimiterFilter(
-      new SingleTokenTokenStream(new Token("(übelkeit", 7, 16)),
-      1,1,0,0,1,1,0
-    );
+    WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(übelkeit", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
     
     assertTokenStreamContents(wdf,
         new String[] { "übelkeit" },
@@ -132,10 +118,7 @@ public class TestWordDelimiterFilter ext
   @Test
   public void testOffsetChange4() throws Exception
   {
-    WordDelimiterFilter wdf = new WordDelimiterFilter(
-      new SingleTokenTokenStream(new Token("(foo,bar)", 7, 16)),
-      1,1,0,0,1,1,0
-    );
+    WordDelimiterFilter wdf = new WordDelimiterFilter(new SingleTokenTokenStream(new Token("(foo,bar)", 7, 16)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 1, 1, 0, 1, 1, null);
     
     assertTokenStreamContents(wdf,
         new String[] { "foo", "bar", "foobar"},
@@ -145,7 +128,7 @@ public class TestWordDelimiterFilter ext
 
   public void doSplit(final String input, String... output) throws Exception {
     WordDelimiterFilter wdf = new WordDelimiterFilter(new KeywordTokenizer(
-        new StringReader(input)), 1, 1, 0, 0, 0);
+                new StringReader(input)), WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, 1, 1, 0, 0, 0, 1, 0, 1, 1, null);
     
     assertTokenStreamContents(wdf, output);
   }

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java Tue Nov 30 11:22:39 2010
@@ -114,7 +114,7 @@ public class TestDutchStemmer extends Ba
   }
   
   /**
-   * @deprecated remove this test in Lucene 4.0
+   * @deprecated (3.1) remove this test in Lucene 5.0
    */
   @Deprecated
   public void testOldBuggyStemmer() throws Exception {
@@ -139,19 +139,6 @@ public class TestDutchStemmer extends Ba
     checkOneTermReuse(a, "lichamelijkheden", "licham");
   }
   
-  /* 
-   * Test that changes to the exclusion table are applied immediately
-   * when using reusable token streams.
-   */
-  public void testExclusionTableReuse() throws Exception {
-    DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT);
-    checkOneTermReuse(a, "lichamelijk", "licham");
-    a.setStemExclusionTable(new String[] { "lichamelijk" });
-    checkOneTermReuse(a, "lichamelijk", "lichamelijk");
-
-    
-  }
-  
   public void testExclusionTableViaCtor() throws IOException {
     CharArraySet set = new CharArraySet(Version.LUCENE_30, 1, true);
     set.add("lichamelijk");
@@ -178,7 +165,7 @@ public class TestDutchStemmer extends Ba
   /**
    * Prior to 3.1, this analyzer had no lowercase filter.
    * stopwords were case sensitive. Preserve this for back compat.
-   * @deprecated Remove this test in Lucene 4.0
+   * @deprecated (3.1) Remove this test in Lucene 5.0
    */
   @Deprecated
   public void testBuggyStopwordsCasing() throws IOException {

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java Tue Nov 30 11:22:39 2010
@@ -26,9 +26,9 @@ import java.util.regex.Pattern;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharReader;
 import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.charfilter.MappingCharFilter;
 import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 
 public class TestPatternTokenizer extends BaseTokenStreamTestCase 
@@ -96,9 +96,7 @@ public class TestPatternTokenizer extend
   
   /** 
    * TODO: rewrite tests not to use string comparison.
-   * @deprecated only tests TermAttribute!
    */
-  @Deprecated
   private static String tsToString(TokenStream in) throws IOException {
     StringBuilder out = new StringBuilder();
     CharTermAttribute termAtt = in.addAttribute(CharTermAttribute.class);

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java Tue Nov 30 11:22:39 2010
@@ -22,6 +22,7 @@ import java.io.StringReader;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
 
 public class TestReverseStringFilter extends BaseTokenStreamTestCase {
   public void testFilter() throws Exception {
@@ -53,9 +54,11 @@ public class TestReverseStringFilter ext
   
   /**
    * Test the broken 3.0 behavior, for back compat
+   * @deprecated (3.1) Remove in Lucene 5.0
    */
+  @Deprecated
   public void testBackCompat() throws Exception {
-    assertEquals("\uDF05\uD866\uDF05\uD866", ReverseStringFilter.reverse("𩬅𩬅"));
+    assertEquals("\uDF05\uD866\uDF05\uD866", ReverseStringFilter.reverse(Version.LUCENE_30, "𩬅𩬅"));
   }
   
   public void testReverseSupplementary() throws Exception {

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Tue Nov 30 11:22:39 2010
@@ -18,12 +18,9 @@ package org.apache.lucene.analysis.ru;
  */
 
 import java.io.IOException;
-import java.io.InputStreamReader;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.util.Version;
 
@@ -31,65 +28,16 @@ import org.apache.lucene.util.Version;
  * Test case for RussianAnalyzer.
  */
 
-public class TestRussianAnalyzer extends BaseTokenStreamTestCase
-{
-    private InputStreamReader inWords;
+public class TestRussianAnalyzer extends BaseTokenStreamTestCase {
 
-    private InputStreamReader sampleUnicode;
-
-    /**
-     * @deprecated remove this test and its datafiles in Lucene 4.0
-     * the Snowball version has its own data tests.
-     */
-    @Deprecated
-    public void testUnicode30() throws IOException
-    {
-        RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_30);
-        inWords =
-            new InputStreamReader(
-                getClass().getResourceAsStream("testUTF8.txt"),
-                "UTF-8");
-
-        sampleUnicode =
-            new InputStreamReader(
-                getClass().getResourceAsStream("resUTF8.htm"),
-                "UTF-8");
-
-        TokenStream in = ra.tokenStream("all", inWords);
-
-        RussianLetterTokenizer sample =
-            new RussianLetterTokenizer(TEST_VERSION_CURRENT,
-                sampleUnicode);
-
-        CharTermAttribute text = in.getAttribute(CharTermAttribute.class);
-        CharTermAttribute sampleText = sample.getAttribute(CharTermAttribute.class);
-
-        for (;;)
-        {
-          if (in.incrementToken() == false)
-            break;
-
-            boolean nextSampleToken = sample.incrementToken();
-            assertEquals(
-                "Unicode",
-                text.toString(),
-                nextSampleToken == false
-                ? null
-                : sampleText.toString());
-        }
-
-        inWords.close();
-        sampleUnicode.close();
-    }
-    
-    /** Check that RussianAnalyzer doesnt discard any numbers */
+     /** Check that RussianAnalyzer doesnt discard any numbers */
     public void testDigitsInRussianCharset() throws IOException
     {
       RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT);
       assertAnalyzesTo(ra, "text 1000", new String[] { "text", "1000" });
     }
     
-    /** @deprecated remove this test in Lucene 4.0: stopwords changed */
+    /** @deprecated (3.1) remove this test in Lucene 5.0: stopwords changed */
     @Deprecated
     public void testReusableTokenStream30() throws Exception {
       Analyzer a = new RussianAnalyzer(Version.LUCENE_30);

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java?rev=1040463&r1=1040462&r2=1040463&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java Tue Nov 30 11:22:39 2010
@@ -25,7 +25,7 @@ import org.apache.lucene.util.Version;
 
 /**
  * Testcase for {@link RussianLetterTokenizer}
- * @deprecated Remove this test class in Lucene 4.0
+ * @deprecated (3.1) Remove this test class in Lucene 5.0
  */
 @Deprecated
 public class TestRussianLetterTokenizer extends BaseTokenStreamTestCase {