You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/07/14 18:32:18 UTC

svn commit: r793966 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/analysis/ src/java/org/apache/lucene/analysis/standard/ src/test/org/apache/lucene/analysis/

Author: mikemccand
Date: Tue Jul 14 16:32:18 2009
New Revision: 793966

URL: http://svn.apache.org/viewvc?rev=793966&view=rev
Log:
LUCENE-1678: if a subclass of a core Analyzer overrides tokenStream but not reusableTokenStream, then the core analyzer's reusableTokenStream will now fallback to tokenStream to fix the previous back-compat break

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/analysis/Analyzer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestAnalyzers.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=793966&r1=793965&r2=793966&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Jul 14 16:32:18 2009
@@ -135,6 +135,13 @@
     true in all Lucene releases before 2.3, but was broken in 2.3 and
     2.4, and is now fixed in 2.9.  (Mike McCandless)
 
+11. LUCENE-1678: The addition of Analyzer.reusableTokenStream
+    accidentally broke back compatibility of external analyzers that
+    subclassed core analyzers that implemented tokenStream but not
+    reusableTokenStream.  This is now fixed, such that if
+    reusableTokenStream is invoked on such a subclass, that method
+    will forcefully fallback to tokenStream.  (Mike McCandless)
+
 API Changes
 
 1. LUCENE-1419: Add expert API to set custom indexing chain. This API is 

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/Analyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/Analyzer.java?rev=793966&r1=793965&r2=793966&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/Analyzer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/Analyzer.java Tue Jul 14 16:32:18 2009
@@ -19,6 +19,7 @@
 
 import java.io.Reader;
 import java.io.IOException;
+import java.lang.reflect.Method;
 
 import org.apache.lucene.util.CloseableThreadLocal;
 import org.apache.lucene.store.AlreadyClosedException;
@@ -32,7 +33,8 @@
  */
 public abstract class Analyzer {
   /** Creates a TokenStream which tokenizes all the text in the provided
-   * Reader.  Must be able to handle null field name for backward compatibility.
+   * Reader.  Must be able to handle null field name for
+   * backward compatibility.
    */
   public abstract TokenStream tokenStream(String fieldName, Reader reader);
 
@@ -79,6 +81,29 @@
     }
   }
 
+  protected boolean overridesTokenStreamMethod;
+
+  /** @deprecated This is only present to preserve
+   *  back-compat of classes that subclass a core analyzer
+   *  and override tokenStream but not reusableTokenStream */
+  protected void setOverridesTokenStreamMethod(Class baseClass) {
+
+    final Class[] params = new Class[2];
+    params[0] = String.class;
+    params[1] = Reader.class;
+    
+    try {
+      Method m = this.getClass().getMethod("tokenStream", params);
+      if (m != null) {
+        overridesTokenStreamMethod = m.getDeclaringClass() != baseClass;
+      } else {
+        overridesTokenStreamMethod = false;
+      }
+    } catch (NoSuchMethodException nsme) {
+      overridesTokenStreamMethod = false;
+    }
+  }
+
 
   /**
    * Invoked before indexing a Fieldable instance if

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java?rev=793966&r1=793965&r2=793966&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java Tue Jul 14 16:32:18 2009
@@ -25,18 +25,27 @@
  * for data like zip codes, ids, and some product names.
  */
 public class KeywordAnalyzer extends Analyzer {
+  public KeywordAnalyzer() {
+    setOverridesTokenStreamMethod(KeywordAnalyzer.class);
+  }
   public TokenStream tokenStream(String fieldName,
                                  final Reader reader) {
     return new KeywordTokenizer(reader);
   }
   public TokenStream reusableTokenStream(String fieldName,
                                          final Reader reader) throws IOException {
+    if (overridesTokenStreamMethod) {
+      // LUCENE-1678: force fallback to tokenStream() if we
+      // have been subclassed and that subclass overrides
+      // tokenStream but not reusableTokenStream
+      return tokenStream(fieldName, reader);
+    }
     Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
     if (tokenizer == null) {
       tokenizer = new KeywordTokenizer(reader);
       setPreviousTokenStream(tokenizer);
     } else
-      	tokenizer.reset(reader);
+      tokenizer.reset(reader);
     return tokenizer;
   }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java?rev=793966&r1=793965&r2=793966&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java Tue Jul 14 16:32:18 2009
@@ -55,6 +55,7 @@
    */
   public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) {
     this.defaultAnalyzer = defaultAnalyzer;
+    setOverridesTokenStreamMethod(PerFieldAnalyzerWrapper.class);
   }
 
   /**
@@ -77,6 +78,12 @@
   }
   
   public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    if (overridesTokenStreamMethod) {
+      // LUCENE-1678: force fallback to tokenStream() if we
+      // have been subclassed and that subclass overrides
+      // tokenStream but not reusableTokenStream
+      return tokenStream(fieldName, reader);
+    }
     Analyzer analyzer = (Analyzer) analyzerMap.get(fieldName);
     if (analyzer == null)
       analyzer = defaultAnalyzer;

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java?rev=793966&r1=793965&r2=793966&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java Tue Jul 14 16:32:18 2009
@@ -44,7 +44,6 @@
  */
 public class StandardAnalyzer extends Analyzer {
   private Set stopSet;
-  private Version matchVersion;
 
   /**
    * Specifies whether deprecated acronyms should be replaced with HOST type.
@@ -262,7 +261,7 @@
   }
 
   private final void init(Version matchVersion) {
-    this.matchVersion = matchVersion;
+    setOverridesTokenStreamMethod(StandardAnalyzer.class);
     if (matchVersion.onOrAfter(Version.LUCENE_29)) {
       enableStopPositionIncrements = true;
     } else {
@@ -314,6 +313,12 @@
 
   /** @deprecated Use {@link #tokenStream} instead */
   public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    if (overridesTokenStreamMethod) {
+      // LUCENE-1678: force fallback to tokenStream() if we
+      // have been subclassed and that subclass overrides
+      // tokenStream but not reusableTokenStream
+      return tokenStream(fieldName, reader);
+    }
     SavedStreams streams = (SavedStreams) getPreviousTokenStream();
     if (streams == null) {
       streams = new SavedStreams();

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestAnalyzers.java?rev=793966&r1=793965&r2=793966&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestAnalyzers.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestAnalyzers.java Tue Jul 14 16:32:18 2009
@@ -19,8 +19,10 @@
 
 import java.io.IOException;
 import java.io.StringReader;
+import java.io.Reader;
 
 import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.index.Payload;
@@ -130,6 +132,21 @@
     x = StandardTokenizer.CJ;
     String[] y = StandardTokenizer.TOKEN_TYPES;
   }
+
+  private static class MyStandardAnalyzer extends StandardAnalyzer {
+    public TokenStream tokenStream(String field, Reader reader) {
+      return new WhitespaceAnalyzer().tokenStream(field, reader);
+    }
+  }
+
+  public void testSubclassOverridingOnlyTokenStream() throws Throwable {
+    Analyzer a = new MyStandardAnalyzer();
+    TokenStream ts = a.reusableTokenStream("field", new StringReader("the"));
+    // StandardAnalyzer will discard "the" (it's a
+    // stopword), by my subclass will not:
+    assertTrue(ts.incrementToken());
+    assertFalse(ts.incrementToken());
+  }
 }
 
 class PayloadSetter extends TokenFilter {