You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/30 21:11:15 UTC

svn commit: r1379071 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/analysis/ lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/ lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ lucene/analysis/common/src/java/org/apa...

Author: rmuir
Date: Thu Aug 30 19:11:14 2012
New Revision: 1379071

URL: http://svn.apache.org/viewvc?rev=1379071&view=rev
Log:
LUCENE-4343: clear up more Tokenizer.setReader/TokenStream.reset issues

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
    lucene/dev/branches/branch_4x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
    lucene/dev/branches/branch_4x/lucene/queryparser/   (props changed)
    lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java
    lucene/dev/branches/branch_4x/lucene/spatial/   (props changed)
    lucene/dev/branches/branch_4x/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/BoolField.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Aug 30 19:11:14 2012
@@ -77,6 +77,10 @@ API Changes
   fields in a stored document, has been replaced with the simpler
   StoredFieldVisitor API.  (Mike McCandless)
 
+* LUCENE-4343: Made Tokenizer.setReader final. This is a setter that should
+  not be overriden by subclasses: per-stream initialization should happen
+  in reset().  (Robert Muir)
+
 Bug Fixes
 
 * LUCENE-4297: BooleanScorer2 would multiply the coord() factor

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java Thu Aug 30 19:11:14 2012
@@ -308,10 +308,4 @@ public final class CJKTokenizer extends 
       preIsTokened = false;
       tokenType = WORD_TYPE;
     }
-    
-    @Override
-    public void setReader(Reader reader) throws IOException {
-      super.setReader(reader);
-      reset();
-    }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java Thu Aug 30 19:11:14 2012
@@ -166,10 +166,4 @@ public final class ChineseTokenizer exte
       super.reset();
       offset = bufferIndex = dataLen = 0;
     }
-    
-    @Override
-    public void setReader(Reader input) throws IOException {
-      super.setReader(input);
-      reset();
-    }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java Thu Aug 30 19:11:14 2012
@@ -94,8 +94,7 @@ public final class KeywordTokenizer exte
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
+  public void reset() throws IOException {
     this.done = false;
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java Thu Aug 30 19:11:14 2012
@@ -191,16 +191,16 @@ public final class PatternAnalyzer exten
   public TokenStreamComponents createComponents(String fieldName, Reader reader, String text) {
     // Ideally the Analyzer superclass should have a method with the same signature, 
     // with a default impl that simply delegates to the StringReader flavour. 
-    if (text == null) 
-      throw new IllegalArgumentException("text must not be null");
+    if (reader == null) 
+      reader = new FastStringReader(text);
     
     if (pattern == NON_WORD_PATTERN) { // fast path
-      return new TokenStreamComponents(new FastStringTokenizer(reader, text, true, toLowerCase, stopWords));
+      return new TokenStreamComponents(new FastStringTokenizer(reader, true, toLowerCase, stopWords));
     } else if (pattern == WHITESPACE_PATTERN) { // fast path
-      return new TokenStreamComponents(new FastStringTokenizer(reader, text, false, toLowerCase, stopWords));
+      return new TokenStreamComponents(new FastStringTokenizer(reader, false, toLowerCase, stopWords));
     }
 
-    Tokenizer tokenizer = new PatternTokenizer(reader, text, pattern, toLowerCase);
+    Tokenizer tokenizer = new PatternTokenizer(reader, pattern, toLowerCase);
     TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
     return new TokenStreamComponents(tokenizer, result);
   }
@@ -218,12 +218,7 @@ public final class PatternAnalyzer exten
    */
   @Override
   public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-    try {
-      String text = toString(reader);
-      return createComponents(fieldName, reader, text);
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
+    return createComponents(fieldName, reader, null);
   }
   
   /**
@@ -333,11 +328,10 @@ public final class PatternAnalyzer exten
     private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
     private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
     
-    public PatternTokenizer(Reader input, String str, Pattern pattern, boolean toLowerCase) {
+    public PatternTokenizer(Reader input, Pattern pattern, boolean toLowerCase) {
       super(input);
       this.pattern = pattern;
-      this.str = str;
-      this.matcher = pattern.matcher(str);
+      this.matcher = pattern.matcher("");
       this.toLowerCase = toLowerCase;
     }
 
@@ -376,15 +370,10 @@ public final class PatternAnalyzer exten
     }
 
     @Override
-    public void setReader(Reader input) throws IOException {
-      super.setReader(input);
-      this.str = PatternAnalyzer.toString(input);
-      this.matcher = pattern.matcher(this.str);
-    }
-
-    @Override
     public void reset() throws IOException {
       super.reset();
+      this.str = PatternAnalyzer.toString(input);
+      this.matcher = pattern.matcher(this.str);
       this.pos = 0;
     }
   }
@@ -408,9 +397,8 @@ public final class PatternAnalyzer exten
     private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
     private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
     
-    public FastStringTokenizer(Reader input, String str, boolean isLetter, boolean toLowerCase, CharArraySet stopWords) {
+    public FastStringTokenizer(Reader input, boolean isLetter, boolean toLowerCase, CharArraySet stopWords) {
       super(input);
-      this.str = str;
       this.isLetter = isLetter;
       this.toLowerCase = toLowerCase;
       this.stopWords = stopWords;
@@ -481,14 +469,9 @@ public final class PatternAnalyzer exten
     }
 
     @Override
-    public void setReader(Reader input) throws IOException {
-      super.setReader(input);
-      this.str = PatternAnalyzer.toString(input);
-    }
-
-    @Override
     public void reset() throws IOException {
       super.reset();
+      this.str = PatternAnalyzer.toString(input);
       this.pos = 0;
     }
   }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java Thu Aug 30 19:11:14 2012
@@ -78,9 +78,6 @@ public final class PatternTokenizer exte
     if (group >= 0 && group > matcher.groupCount()) {
       throw new IllegalArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups");
     }
-    fillBuffer(str, input);
-    matcher.reset(str);
-    index = 0;
   }
 
   @Override
@@ -136,8 +133,7 @@ public final class PatternTokenizer exte
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
+  public void reset() throws IOException {
     fillBuffer(str, input);
     matcher.reset(str);
     index = 0;

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Thu Aug 30 19:11:14 2012
@@ -175,8 +175,7 @@ public final class ClassicTokenizer exte
   }
 
   @Override
-  public void setReader(Reader reader) throws IOException {
-    super.setReader(reader);
-    scanner.yyreset(reader);
+  public void reset() throws IOException {
+    scanner.yyreset(input);
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Thu Aug 30 19:11:14 2012
@@ -212,8 +212,7 @@ public final class StandardTokenizer ext
   }
 
   @Override
-  public void setReader(Reader reader) throws IOException {
-    super.setReader(reader);
-    scanner.yyreset(reader);
+  public void reset() throws IOException {
+    scanner.yyreset(input);
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Thu Aug 30 19:11:14 2012
@@ -178,8 +178,7 @@ public final class UAX29URLEmailTokenize
   }
 
   @Override
-  public void setReader(Reader reader) throws IOException {
-    super.setReader(reader);
-    scanner.yyreset(reader);
+  public void reset() throws IOException {
+    scanner.yyreset(input);
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Thu Aug 30 19:11:14 2012
@@ -112,7 +112,8 @@ public abstract class CharTokenizer exte
     charUtils = CharacterUtils.getInstance(matchVersion);
   }
   
-  private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
+  // note: bufferIndex is -1 here to best-effort AIOOBE consumers that don't call reset()
+  private int offset = 0, bufferIndex = -1, dataLen = 0, finalOffset = 0;
   private static final int MAX_WORD_LEN = 255;
   private static final int IO_BUFFER_SIZE = 4096;
   
@@ -196,8 +197,7 @@ public abstract class CharTokenizer exte
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
+  public void reset() throws IOException {
     bufferIndex = 0;
     offset = 0;
     dataLen = 0;

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Thu Aug 30 19:11:14 2012
@@ -318,19 +318,13 @@ public final class WikipediaTokenizer ex
   */
   @Override
   public void reset() throws IOException {
-    super.reset();
+    scanner.yyreset(input);
     tokens = null;
     scanner.reset();
     first = true;
   }
 
   @Override
-  public void setReader(Reader reader) throws IOException {
-    super.setReader(reader);
-    scanner.yyreset(input);
-  }
-
-  @Override
   public void end() {
     // set final offset
     final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java Thu Aug 30 19:11:14 2012
@@ -39,6 +39,7 @@ public class CommonGramsFilterTest exten
     CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     
     CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
+    cgf.reset();
     assertTrue(cgf.incrementToken());
     assertEquals("How", term.toString());
     assertTrue(cgf.incrementToken());
@@ -61,6 +62,7 @@ public class CommonGramsFilterTest exten
     CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
     
     CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
+    nsf.reset();
     assertTrue(nsf.incrementToken());
     assertEquals("How_the", term.toString());
     assertTrue(nsf.incrementToken());

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Thu Aug 30 19:11:14 2012
@@ -235,6 +235,7 @@ public class TestCompoundWordTokenFilter
         CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
     
     CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
+    tf.reset();
     assertTrue(tf.incrementToken());
     assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
     assertTrue(tf.incrementToken());
@@ -256,6 +257,7 @@ public class TestCompoundWordTokenFilter
         CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
         CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
     MockRetainAttribute retAtt = stream.addAttribute(MockRetainAttribute.class);
+    stream.reset();
     while (stream.incrementToken()) {
       assertTrue("Custom attribute value was lost", retAtt.getRetain());
     }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Thu Aug 30 19:11:14 2012
@@ -81,6 +81,7 @@ public class TestAnalyzers extends BaseT
 
   void verifyPayload(TokenStream ts) throws IOException {
     PayloadAttribute payloadAtt = ts.getAttribute(PayloadAttribute.class);
+    ts.reset();
     for(byte b=1;;b++) {
       boolean hasNext = ts.incrementToken();
       if (!hasNext) break;

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java Thu Aug 30 19:11:14 2012
@@ -66,6 +66,7 @@ public class TestStopAnalyzer extends Ba
     assertNotNull(stream);
     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
     
+    stream.reset();
     while (stream.incrementToken()) {
       String text = termAtt.toString();
       assertFalse(stopWordsSet.contains(text));
@@ -83,6 +84,7 @@ public class TestStopAnalyzer extends Ba
     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
     PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
 
+    stream.reset();
     while (stream.incrementToken()) {
       String text = termAtt.toString();
       assertFalse(stopWordsSet.contains(text));

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java Thu Aug 30 19:11:14 2012
@@ -111,6 +111,7 @@ public class TestPatternTokenizer extend
     // assign bogus values
     in.clearAttributes();
     termAtt.setEmpty().append("bogusTerm");
+    in.reset();
     while (in.incrementToken()) {
       if (out.length() > 0)
         out.append(' ');

Modified: lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java Thu Aug 30 19:11:14 2012
@@ -45,7 +45,8 @@ public final class ICUTokenizer extends 
   /** true length of text in the buffer */
   private int length = 0; 
   /** length in buffer that can be evaluated safely, up to a safe end point */
-  private int usableLength = 0; 
+  // note: usableLength is -1 here to best-effort AIOOBE consumers that don't call reset()
+  private int usableLength = -1; 
   /** accumulated offset of previous buffers for this reader, for offsetAtt */
   private int offset = 0; 
 
@@ -101,12 +102,6 @@ public final class ICUTokenizer extends 
     breaker.setText(buffer, 0, 0);
     length = usableLength = offset = 0;
   }
-
-  @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
-    reset();
-  }
   
   @Override
   public void end() {

Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java Thu Aug 30 19:11:14 2012
@@ -245,14 +245,8 @@ public final class JapaneseTokenizer ext
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
-    buffer.reset(input);
-  }
-
-  @Override
   public void reset() throws IOException {
-    super.reset();
+    buffer.reset(input);
     resetState();
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Thu Aug 30 19:11:14 2012
@@ -112,17 +112,10 @@ public final class SentenceTokenizer ext
 
   @Override
   public void reset() throws IOException {
-    super.reset();
     tokenStart = tokenEnd = 0;
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
-    reset();
-  }
-
-  @Override
   public void end() {
     // set final offset
     final int finalOffset = correctOffset(tokenEnd);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Thu Aug 30 19:11:14 2012
@@ -80,8 +80,7 @@ public abstract class BaseUIMATokenizer 
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
+  public void reset() throws IOException {
     iterator = null;
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java Thu Aug 30 19:11:14 2012
@@ -170,12 +170,8 @@ public abstract class TokenStream extend
    * This method is called by a consumer before it begins consumption using
    * {@link #incrementToken()}.
    * <p/>
-   * Resets this stream to the beginning.  As all TokenStreams must be reusable,
-   * any implementations which have state that needs to be reset between usages
-   * of the TokenStream, must implement this method. Note that if your TokenStream
-   * caches tokens and feeds them back again after a reset, it is imperative
-   * that you clone the tokens when you store them away (on the first pass) as
-   * well as when you return them (on future passes after {@link #reset()}).
+   * Resets this stream to a clean state. Stateful implementations must implement
+   * this method so that they can be reused, just as if they had been created fresh.
    */
   public void reset() throws IOException {}
   

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java Thu Aug 30 19:11:14 2012
@@ -82,12 +82,18 @@ public abstract class Tokenizer extends 
     return (input instanceof CharFilter) ? ((CharFilter) input).correctOffset(currentOff) : currentOff;
   }
 
-  /** Expert: Reset the tokenizer to a new reader.  Typically, an
+  /** Expert: Set a new reader on the Tokenizer.  Typically, an
    *  analyzer (in its tokenStream method) will use
    *  this to re-use a previously created tokenizer. */
-  public void setReader(Reader input) throws IOException {
+  public final void setReader(Reader input) throws IOException {
     assert input != null: "input must not be null";
     this.input = input;
+    assert setReaderTestPoint();
+  }
+  
+  // only used by assert, for testing
+  boolean setReaderTestPoint() {
+    return true;
   }
 }
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Thu Aug 30 19:11:14 2012
@@ -1545,7 +1545,7 @@ public class TestIndexWriter extends Luc
     }
 
     @Override
-    public void setReader(Reader input) throws IOException {
+    public void reset() throws IOException {
        this.upto = 0;
        final StringBuilder b = new StringBuilder();
        final char[] buffer = new char[1024];

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java Thu Aug 30 19:11:14 2012
@@ -227,8 +227,7 @@ public class TestTermRangeQuery extends 
       }
 
       @Override
-      public final void setReader(Reader reader) throws IOException {
-        super.setReader(reader);
+      public void reset() throws IOException {;
         done = false;
       }
     }

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Thu Aug 30 19:11:14 2012
@@ -176,6 +176,8 @@ public abstract class AbstractTestCase e
 
     BytesRef bytesRef = termAttribute.getBytesRef();
 
+    tokenStream.reset();
+    
     while (tokenStream.incrementToken()) {
       termAttribute.fillBytesRef();
       bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
@@ -317,12 +319,6 @@ public abstract class AbstractTestCase e
     }
     
     @Override
-    public void setReader( Reader input ) throws IOException {
-      super.setReader( input );
-      reset();
-    }
-    
-    @Override
     public void reset() {
       startTerm = 0;
       nextStartOffset = 0;

Modified: lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java (original)
+++ lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java Thu Aug 30 19:11:14 2012
@@ -81,8 +81,7 @@ public class TestMultiPhraseQueryParsing
     }
 
     @Override
-    public void setReader(Reader reader) throws IOException {
-      super.setReader(reader);
+    public void reset() throws IOException {
       this.upto = 0;
       this.lastPos = 0;
     }

Modified: lucene/dev/branches/branch_4x/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java Thu Aug 30 19:11:14 2012
@@ -76,14 +76,4 @@ class PrefixCellsTokenizer extends Token
     termAtt.setLength(length);
     return length > 0; // should only happen at the end
   }
-
-  @Override
-  public final void end() {
-
-  }
-
-  @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
-  }
 }
\ No newline at end of file

Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java Thu Aug 30 19:11:14 2012
@@ -227,10 +227,10 @@ public class MockTokenizer extends Token
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
+  boolean setReaderTestPoint() {
     assert !enableChecks || streamState == State.CLOSE : "setReader() called in wrong state: " + streamState;
     streamState = State.SETREADER;
+    return true;
   }
 
   @Override

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java Thu Aug 30 19:11:14 2012
@@ -72,15 +72,11 @@ final class TrieTokenizer extends Tokeni
     this.type = type;
     this.precisionStep = precisionStep;
     this.ts = ts;
-
-    setReader(input);
   }
 
   @Override
-  public void setReader(Reader input) {
+  public void reset() {
    try {
-      super.setReader(input);
-      input = super.input;
       char[] buf = new char[32];
       int len = input.read(buf);
       this.startOfs = correctOffset(0);
@@ -113,6 +109,7 @@ final class TrieTokenizer extends Tokeni
     } catch (IOException e) {
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to create TrieIndexTokenizer", e);
     }
+    ts.reset();
   }
 
   @Override
@@ -120,12 +117,6 @@ final class TrieTokenizer extends Tokeni
     super.close();
     ts.close();
   }
-  
-  @Override
-  public void reset() throws IOException {
-    super.reset();
-    ts.reset();
-  }
 
   @Override
   public boolean incrementToken() {

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/BoolField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/BoolField.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/BoolField.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/BoolField.java Thu Aug 30 19:11:14 2012
@@ -71,9 +71,8 @@ public class BoolField extends Primitive
         boolean done = false;
 
         @Override
-        public void setReader(Reader input) throws IOException {
+        public void reset() throws IOException {
           done = false;
-          super.setReader(input);
         }
 
         @Override

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java?rev=1379071&r1=1379070&r2=1379071&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java Thu Aug 30 19:11:14 2012
@@ -81,13 +81,8 @@ public class PreAnalyzedField extends Fi
     return new SolrAnalyzer() {
       
       @Override
-      protected TokenStreamComponents createComponents(String fieldName,
-          Reader reader) {
-        try {
-          return new TokenStreamComponents(new PreAnalyzedTokenizer(reader, parser));
-        } catch (IOException e) {
-          return null;
-        }
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        return new TokenStreamComponents(new PreAnalyzedTokenizer(reader, parser));
       }
       
     };
@@ -169,6 +164,7 @@ public class PreAnalyzedField extends Fi
       return null;
     }
     PreAnalyzedTokenizer parse = new PreAnalyzedTokenizer(new StringReader(val), parser);
+    parse.reset(); // consume
     Field f = (Field)super.createField(field, val, boost);
     if (parse.getStringValue() != null) {
       f.setStringValue(parse.getStringValue());
@@ -195,11 +191,11 @@ public class PreAnalyzedField extends Fi
     private String stringValue = null;
     private byte[] binaryValue = null;
     private PreAnalyzedParser parser;
+    private Reader lastReader;
     
-    public PreAnalyzedTokenizer(Reader reader, PreAnalyzedParser parser) throws IOException {
+    public PreAnalyzedTokenizer(Reader reader, PreAnalyzedParser parser) {
       super(reader);
       this.parser = parser;
-      setReader(reader);
     }
     
     public boolean hasTokenStream() {
@@ -229,24 +225,30 @@ public class PreAnalyzedField extends Fi
       return true;
     }
   
-    public final void reset() {
+    @Override
+    public final void reset() throws IOException {
+      // NOTE: this acts like rewind if you call it again
+      if (input != lastReader) {
+        lastReader = input;
+        cachedStates.clear();
+        stringValue = null;
+        binaryValue = null;
+        ParseResult res = parser.parse(input, this);
+        if (res != null) {
+          stringValue = res.str;
+          binaryValue = res.bin;
+          if (res.states != null) {
+            cachedStates.addAll(res.states);
+          }
+        }
+      }
       it = cachedStates.iterator();
     }
 
     @Override
-    public void setReader(Reader input) throws IOException {
-      super.setReader(input);
-      cachedStates.clear();
-      stringValue = null;
-      binaryValue = null;
-      ParseResult res = parser.parse(input, this);
-      if (res != null) {
-        stringValue = res.str;
-        binaryValue = res.bin;
-        if (res.states != null) {
-          cachedStates.addAll(res.states);
-        }
-      }
+    public void close() throws IOException {
+      super.close();
+      lastReader = null; // just a ref, null for gc
     }
   }