You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/01/15 14:42:20 UTC

svn commit: r899627 - in /lucene/java/trunk: ./ contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/ contrib/analyzers/common/src/java/org/apache/lucene/analy...

Author: uschindler
Date: Fri Jan 15 13:42:18 2010
New Revision: 899627

URL: http://svn.apache.org/viewvc?rev=899627&view=rev
Log:
LUCENE-2211: Fix various missing clearAttributes() and improve BaseTokenStreamTestCase to check for this trap

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
    lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
    lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
    lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/java/trunk/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
    lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
    lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
    lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
    lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestPayloads.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestTermVectorsReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestTermdocPerf.java
    lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestTermRangeQuery.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Jan 15 13:42:18 2010
@@ -96,6 +96,11 @@
   implementation class when interface was loaded by a different
   class loader.  (Uwe Schindler, reported on java-user by Ahmed El-dawy)
   
+* LUCENE-2211: Fix missing clearAttributes() calls in contrib:
+  ShingleMatrix, PrefixAware, compounds, NGramTokenFilter,
+  EdgeNGramTokenFilter, Highlighter, and MemoryIndex.
+  (Uwe Schindler, Robert Muir)
+  
 New features
 
 * LUCENE-2128: Parallelized fetching document frequencies during weight
@@ -197,6 +202,10 @@
 
 * LUCENE-2170: Fix thread starvation problems.  (Uwe Schindler)
 
+* LUCENE-2211: Improves BaseTokenStreamTestCase to use a fake attribute
+  that checks if clearAttributes() was called correctly.
+  (Uwe Schindler, Robert Muir)
+  
 ======================= Release 3.0.0 2009-11-25 =======================
 
 Changes in backwards compatibility policy

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java Fri Jan 15 13:42:18 2010
@@ -188,6 +188,7 @@
   }
   
   private final void setToken(final Token token) throws IOException {
+    clearAttributes();
     termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
     flagsAtt.setFlags(token.getFlags());
     typeAtt.setType(token.type());

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java Fri Jan 15 13:42:18 2010
@@ -114,6 +114,7 @@
   
   private void setCurrentToken(Token token) {
     if (token == null) return;
+    clearAttributes();
     termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
     posIncrAtt.setPositionIncrement(token.getPositionIncrement());
     flagsAtt.setFlags(token.getFlags());

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java Fri Jan 15 13:42:18 2010
@@ -134,6 +134,7 @@
           // grab gramSize chars from front or back
           int start = side == Side.FRONT ? 0 : curTermLength - curGramSize;
           int end = start + curGramSize;
+          clearAttributes();
           offsetAtt.setOffset(start, end);
           termAtt.setTermBuffer(curTermBuffer, start, curGramSize);
           curGramSize++;

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java Fri Jan 15 13:42:18 2010
@@ -87,6 +87,7 @@
       }
       while (curGramSize <= maxGram) {
         while (curPos+curGramSize <= curTermLength) {     // while there is input
+          clearAttributes();
           termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize);
           offsetAtt.setOffset(curPos, curPos+curGramSize);
           curPos++;

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java Fri Jan 15 13:42:18 2010
@@ -377,6 +377,7 @@
     } while (token == request_next_token);
     if (token == null) return false;
 
+    clearAttributes();
     termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
     posIncrAtt.setPositionIncrement(token.getPositionIncrement());
     flagsAtt.setFlags(token.getFlags());

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java Fri Jan 15 13:42:18 2010
@@ -19,10 +19,7 @@
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 
 import java.io.IOException;
 import java.io.StringReader;
@@ -36,22 +33,10 @@
         new WhitespaceTokenizer(new StringReader("hello world")),
         new SingleTokenTokenStream(createToken("$", 0, 0)));
 
-    assertNext(ts, "^", 0, 0);
-    assertNext(ts, "hello", 0, 5);
-    assertNext(ts, "world", 6, 11);
-    assertNext(ts, "$", 11, 11);
-    assertFalse(ts.incrementToken());
-  }
-
-
-  private void assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException {
-    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
-    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
-
-    assertTrue(ts.incrementToken());
-    assertEquals(text, termAtt.term());
-    assertEquals(startOffset, offsetAtt.startOffset());
-    assertEquals(endOffset, offsetAtt.endOffset());
+    assertTokenStreamContents(ts,
+        new String[] { "^", "hello", "world", "$" },
+        new int[] { 0, 0, 6, 11 },
+        new int[] { 0, 5, 11, 11 });
   }
 
   private static Token createToken(String term, int start, int offset)

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java Fri Jan 15 13:42:18 2010
@@ -19,10 +19,7 @@
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 
 import java.io.IOException;
 import java.io.StringReader;
@@ -36,31 +33,20 @@
     ts = new PrefixAwareTokenFilter(
         new SingleTokenTokenStream(createToken("a", 0, 1)),
         new SingleTokenTokenStream(createToken("b", 0, 1)));
-    assertNext(ts, "a", 0, 1);
-    assertNext(ts, "b", 1, 2);
-    assertFalse(ts.incrementToken());
+    assertTokenStreamContents(ts, 
+        new String[] { "a", "b" },
+        new int[] { 0, 1 },
+        new int[] { 1, 2 });
 
     // prefix and suffix using 2x prefix
 
     ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)), new WhitespaceTokenizer(new StringReader("hello world")));
     ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));
 
-    assertNext(ts, "^", 0, 0);
-    assertNext(ts, "hello", 0, 5);
-    assertNext(ts, "world", 6, 11);
-    assertNext(ts, "$", 11, 11);
-    assertFalse(ts.incrementToken());
-  }
-
-
-  private void assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException {
-    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
-    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
-
-    assertTrue(ts.incrementToken());
-    assertEquals(text, termAtt.term());
-    assertEquals(startOffset, offsetAtt.startOffset());
-    assertEquals(endOffset, offsetAtt.endOffset());
+    assertTokenStreamContents(ts,
+        new String[] { "^", "hello", "world", "$" },
+        new int[] { 0, 0, 6, 11 },
+        new int[] { 0, 5, 11, 11 });
   }
 
   private static Token createToken(String term, int start, int offset)

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java Fri Jan 15 13:42:18 2010
@@ -85,22 +85,12 @@
 
     ts = new ShingleMatrixFilter(tls, 1, 2, new Character(' '), false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
 
-
-    assertNext(ts, "please", 0, 6);
-    assertNext(ts, "please divide", 0, 13);
-    assertNext(ts, "divide", 7, 13);
-    assertNext(ts, "divide this", 7, 18);
-    assertNext(ts, "this", 14, 18);
-    assertNext(ts, "this sentence", 14, 27);
-    assertNext(ts, "sentence", 19, 27);
-    assertNext(ts, "sentence into", 19, 32);
-    assertNext(ts, "into", 28, 32);
-    assertNext(ts, "into shingles", 28, 39);
-    assertNext(ts, "shingles", 33, 39);
-
-
-    assertFalse(ts.incrementToken());
-
+    assertTokenStreamContents(ts,
+      new String[] { "please", "please divide", "divide", "divide this",
+        "this", "this sentence", "sentence", "sentence into", "into",
+        "into shingles", "shingles" },
+      new int[] { 0, 0, 7, 7, 14, 14, 19, 19, 28, 28, 33 },
+      new int[] { 6, 13, 13, 18, 18, 27, 27, 32, 32, 39, 39 });
   }
 
   /**
@@ -546,6 +536,7 @@
         return false;
       }
       Token prototype = (Token) iterator.next();
+      clearAttributes();
       termAtt.setTermBuffer(prototype.termBuffer(), 0, prototype.termLength());
       posIncrAtt.setPositionIncrement(prototype.getPositionIncrement());
       flagsAtt.setFlags(prototype.getFlags());

Modified: lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/java/trunk/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Fri Jan 15 13:42:18 2010
@@ -205,7 +205,7 @@
     public boolean incrementToken() throws IOException {
       if( !getNextPartialSnippet() )
         return false;
-      
+      clearAttributes();
       termAtt.setTermBuffer(snippet, startTerm, lenTerm);
       offsetAtt.setOffset(correctOffset(startOffset), correctOffset(startOffset + lenTerm));
       return true;

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Fri Jan 15 13:42:18 2010
@@ -169,6 +169,7 @@
           return false;
         }
         Token token = tokens[currentToken++];
+        clearAttributes();
         termAtt.setTermBuffer(token.term());
         offsetAtt.setOffset(token.startOffset(), token.endOffset());
         return true;

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java Fri Jan 15 13:42:18 2010
@@ -102,6 +102,7 @@
   public boolean incrementToken() throws IOException {
     if (this.tokensAtCurrentPosition.hasNext()) {
       final Token next = this.tokensAtCurrentPosition.next();
+      clearAttributes();
       termAttribute.setTermBuffer(next.term());
       positionIncrementAttribute.setPositionIncrement(next
           .getPositionIncrement());

Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java Fri Jan 15 13:42:18 2010
@@ -311,6 +311,7 @@
       if (this.i >= this.tokens.length) {
         return false;
       }
+      clearAttributes();
       termAttribute.setTermBuffer(this.tokens[i].term(), 0, this.tokens[i]
           .term().length());
       offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i]
@@ -355,6 +356,7 @@
       if (this.i >= this.tokens.length) {
         return false;
       }
+      clearAttributes();
       termAttribute.setTermBuffer(this.tokens[i].term(), 0, this.tokens[i]
           .term().length());
       offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i]

Modified: lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Fri Jan 15 13:42:18 2010
@@ -1458,6 +1458,7 @@
       public boolean incrementToken() throws IOException {
         if(iter.hasNext()) {
           Token token =  iter.next();
+          clearAttributes();
           termAtt.setTermBuffer(token.term());
           posIncrAtt.setPositionIncrement(token.getPositionIncrement());
           offsetAtt.setOffset(token.startOffset(), token.endOffset());
@@ -1506,6 +1507,7 @@
       public boolean incrementToken() throws IOException {
         if(iter.hasNext()) {
           Token token = iter.next();
+          clearAttributes();
           termAtt.setTermBuffer(token.term());
           posIncrAtt.setPositionIncrement(token.getPositionIncrement());
           offsetAtt.setOffset(token.startOffset(), token.endOffset());
@@ -1845,6 +1847,7 @@
         return false;
       }
       //Token nextRealToken = new Token(, offsetAtt.startOffset(), offsetAtt.endOffset());
+      clearAttributes();
       termAtt.setTermBuffer(realTermAtt.term());
       offsetAtt.setOffset(realOffsetAtt.startOffset(), realOffsetAtt.endOffset());
       posIncrAtt.setPositionIncrement(realPosIncrAtt.getPositionIncrement());
@@ -1862,6 +1865,7 @@
       return true;
     } else {
       String tok = st.nextToken();
+      clearAttributes();
       termAtt.setTermBuffer(tok);
       offsetAtt.setOffset(currentRealToken.startOffset(), currentRealToken.endOffset());
       posIncrAtt.setPositionIncrement(0);

Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Fri Jan 15 13:42:18 2010
@@ -287,6 +287,7 @@
           throw new IllegalArgumentException("keyword must not be null");
         
         String term = obj.toString();
+        clearAttributes();
         termAtt.setTermBuffer(term);
         offsetAtt.setOffset(start, start+termAtt.termLength());
         start += term.length() + 1; // separate words by 1 (blank) character

Modified: lucene/java/trunk/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (original)
+++ lucene/java/trunk/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java Fri Jan 15 13:42:18 2010
@@ -73,6 +73,7 @@
     
     @Override
     public boolean incrementToken() throws IOException {
+      clearAttributes();
       if (inPhrase) {
         inPhrase = false;
         termAtt.setTermBuffer("phrase2");

Modified: lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (original)
+++ lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Fri Jan 15 13:42:18 2010
@@ -119,6 +119,7 @@
     public boolean incrementToken() throws IOException {
       if (inPhrase) {
         inPhrase = false;
+        clearAttributes();
         termAtt.setTermBuffer("phrase2");
         offsetAtt.setOffset(savedStart, savedEnd);
         return true;
@@ -1163,6 +1164,7 @@
     final TermAttribute term = addAttribute(TermAttribute.class);
     @Override
     public boolean incrementToken() {
+      clearAttributes();
       if (upto == 4) {
         return false;
       }

Modified: lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (original)
+++ lucene/java/trunk/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java Fri Jan 15 13:42:18 2010
@@ -114,6 +114,7 @@
     public boolean incrementToken() throws IOException {
       if (inPhrase) {
         inPhrase = false;
+        clearAttributes();
         termAtt.setTermBuffer("phrase2");
         offsetAtt.setOffset(savedStart, savedEnd);
         return true;

Modified: lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (original)
+++ lucene/java/trunk/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java Fri Jan 15 13:42:18 2010
@@ -131,6 +131,7 @@
     
     @Override
     public boolean incrementToken() {
+      clearAttributes();
       termAtt.setTermBuffer("accents");
       offsetAtt.setOffset(2, 7);
       typeAtt.setType("wrd");

Modified: lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java (original)
+++ lucene/java/trunk/contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java Fri Jan 15 13:42:18 2010
@@ -18,8 +18,6 @@
 
 package org.apache.lucene.wikipedia.analysis;
 
-import junit.framework.TestCase;
-
 import java.io.StringReader;
 import java.io.IOException;
 import java.util.HashMap;
@@ -46,6 +44,12 @@
     super(s);
   }
 
+  public void testSimple() throws Exception {
+    WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader("This is a [[Category:foo]]"));
+    assertTokenStreamContents(tf,
+        new String[] { "This", "is", "a", "foo" });
+  }
+  
   public void testHandwritten() throws Exception {
     //make sure all tokens are in only one type
     String test = "[[link]] This is a [[Category:foo]] Category  This is a linked [[:Category:bar none withstanding]] " +

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Fri Jan 15 13:42:18 2010
@@ -21,6 +21,8 @@
 import java.io.IOException;
  
 import org.apache.lucene.analysis.tokenattributes.*;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.LuceneTestCase;
 
 /** 
@@ -38,26 +40,67 @@
   
   // some helpers to test Analyzers and TokenStreams:
   
+  public static interface CheckClearAttributesAttribute extends Attribute {
+    boolean getAndResetClearCalled();
+  }
+
+  public static final class CheckClearAttributesAttributeImpl extends AttributeImpl implements CheckClearAttributesAttribute {
+    private boolean clearCalled = false;
+    
+    public boolean getAndResetClearCalled() {
+      try {
+        return clearCalled;
+      } finally {
+        clearCalled = false;
+      }
+    }
+
+    @Override
+    public void clear() {
+      clearCalled = true;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      return (
+        other instanceof CheckClearAttributesAttributeImpl &&
+        ((CheckClearAttributesAttributeImpl) other).clearCalled == this.clearCalled
+      );
+    }
+
+    @Override
+    public int hashCode() {
+      return 76137213 ^ Boolean.valueOf(clearCalled).hashCode();
+    }
+    
+    @Override
+    public void copyTo(AttributeImpl target) {
+      ((CheckClearAttributesAttributeImpl) target).clear();
+    }
+  }
+
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
     assertNotNull(output);
-    assertTrue("has TermAttribute", ts.hasAttribute(TermAttribute.class));
+    CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
+    
+    assertTrue("has no TermAttribute", ts.hasAttribute(TermAttribute.class));
     TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
     
     OffsetAttribute offsetAtt = null;
     if (startOffsets != null || endOffsets != null) {
-      assertTrue("has OffsetAttribute", ts.hasAttribute(OffsetAttribute.class));
+      assertTrue("has no OffsetAttribute", ts.hasAttribute(OffsetAttribute.class));
       offsetAtt = ts.getAttribute(OffsetAttribute.class);
     }
     
     TypeAttribute typeAtt = null;
     if (types != null) {
-      assertTrue("has TypeAttribute", ts.hasAttribute(TypeAttribute.class));
+      assertTrue("has no TypeAttribute", ts.hasAttribute(TypeAttribute.class));
       typeAtt = ts.getAttribute(TypeAttribute.class);
     }
     
     PositionIncrementAttribute posIncrAtt = null;
     if (posIncrements != null) {
-      assertTrue("has PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
+      assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
       posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
     }
     
@@ -70,7 +113,10 @@
       if (typeAtt != null) typeAtt.setType("bogusType");
       if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
       
-      assertTrue("token "+i+" exists", ts.incrementToken());
+      checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
+      assertTrue("token "+i+" does not exist", ts.incrementToken());
+      assertTrue("clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled());
+      
       assertEquals("term "+i, output[i], termAtt.term());
       if (startOffsets != null)
         assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset());

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java Fri Jan 15 13:42:18 2010
@@ -49,6 +49,7 @@
         if (index == tokens.length) {
           return false;
         } else {
+          clearAttributes();
           termAtt.setTermBuffer(tokens[index++]);
           offsetAtt.setOffset(0,0);
           return true;

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java Fri Jan 15 13:42:18 2010
@@ -79,29 +79,14 @@
     final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new WhitespaceTokenizer(new StringReader(buffer1.toString())));
     final TokenStream sink1 = source.newSinkTokenStream();
     final TokenStream sink2 = source.newSinkTokenStream(theFilter);
-    int i = 0;
-    TermAttribute termAtt = source.getAttribute(TermAttribute.class);
-    while (source.incrementToken()) {
-      assertEquals(tokens1[i], termAtt.term());
-      i++;
-    }
-    assertEquals(tokens1.length, i);
     
-    i = 0;
-    termAtt = sink1.getAttribute(TermAttribute.class);
-    while (sink1.incrementToken()) {
-      assertEquals(tokens1[i], termAtt.term());
-      i++;
-    }
-    assertEquals(tokens1.length, i);
+    source.addAttribute(CheckClearAttributesAttribute.class);
+    sink1.addAttribute(CheckClearAttributesAttribute.class);
+    sink2.addAttribute(CheckClearAttributesAttribute.class);
     
-    i = 0;
-    termAtt = sink2.getAttribute(TermAttribute.class);
-    while (sink2.incrementToken()) {
-      assertTrue(termAtt.term().equalsIgnoreCase("The"));
-      i++;
-    }
-    assertEquals("there should be two times 'the' in the stream", 2, i);
+    assertTokenStreamContents(source, tokens1);
+    assertTokenStreamContents(sink1, tokens1);
+    assertTokenStreamContents(sink2, new String[]{"The", "the"});
   }
 
   public void testMultipleSources() throws Exception {
@@ -109,50 +94,28 @@
     final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter);
     final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter);
     final TokenStream source1 = new CachingTokenFilter(tee1);
+    
+    tee1.addAttribute(CheckClearAttributesAttribute.class);
+    dogDetector.addAttribute(CheckClearAttributesAttribute.class);
+    theDetector.addAttribute(CheckClearAttributesAttribute.class);
 
     final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(new StringReader(buffer2.toString())));
     tee2.addSinkTokenStream(dogDetector);
     tee2.addSinkTokenStream(theDetector);
     final TokenStream source2 = tee2;
 
-    int i = 0;
-    TermAttribute termAtt = source1.getAttribute(TermAttribute.class);
-    while (source1.incrementToken()) {
-      assertEquals(tokens1[i], termAtt.term());
-      i++;
-    }
-    assertEquals(tokens1.length, i);
-    i = 0;
-    termAtt = source2.getAttribute(TermAttribute.class);
-    while (source2.incrementToken()) {
-      assertEquals(tokens2[i], termAtt.term());
-      i++;
-    }
-    assertEquals(tokens2.length, i);
-    i = 0;
-    termAtt = theDetector.getAttribute(TermAttribute.class);
-    while (theDetector.incrementToken()) {
-      assertTrue("'" + termAtt.term() + "' is not equal to 'The'", termAtt.term().equalsIgnoreCase("The"));
-      i++;
-    }
-    assertEquals("there must be 4 times 'The' in the stream", 4, i);
-    i = 0;
-    termAtt = dogDetector.getAttribute(TermAttribute.class);
-    while (dogDetector.incrementToken()) {
-      assertTrue("'" + termAtt.term() + "' is not equal to 'Dogs'", termAtt.term().equalsIgnoreCase("Dogs"));
-      i++;
-    }
-    assertEquals("there must be 2 times 'Dog' in the stream", 2, i);
+    assertTokenStreamContents(source1, tokens1);
+    assertTokenStreamContents(source2, tokens2);
+
+    assertTokenStreamContents(theDetector, new String[]{"The", "the", "The", "the"});
+    assertTokenStreamContents(dogDetector, new String[]{"Dogs", "Dogs"});
     
     source1.reset();
     TokenStream lowerCasing = new LowerCaseFilter(Version.LUCENE_CURRENT, source1);
-    i = 0;
-    termAtt = lowerCasing.getAttribute(TermAttribute.class);
-    while (lowerCasing.incrementToken()) {
-      assertEquals(tokens1[i].toLowerCase(), termAtt.term());
-      i++;
-    }
-    assertEquals(i, tokens1.length);
+    String[] lowerCaseTokens = new String[tokens1.length];
+    for (int i = 0; i < tokens1.length; i++)
+      lowerCaseTokens[i] = tokens1[i].toLowerCase();
+    assertTokenStreamContents(lowerCasing, lowerCaseTokens);
   }
 
   /**

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java Fri Jan 15 13:42:18 2010
@@ -221,6 +221,7 @@
         if (index == tokens.length) {
           return false;
         } else {
+          clearAttributes();
           termAtt.setTermBuffer(tokens[index++]);
           return true;
         }        

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestPayloads.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestPayloads.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestPayloads.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestPayloads.java Fri Jan 15 13:42:18 2010
@@ -537,6 +537,7 @@
         public boolean incrementToken() throws IOException {
             if (!first) return false;
             first = false;
+            clearAttributes();
             termAtt.setTermBuffer(term);
             payloadAtt.setPayload(new Payload(payload));
             return true;

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestTermVectorsReader.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestTermVectorsReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestTermVectorsReader.java Fri Jan 15 13:42:18 2010
@@ -138,6 +138,7 @@
         return false;
       else {
         final TestToken testToken = tokens[tokenUpto++];
+        clearAttributes();
         termAtt.setTermBuffer(testToken.text);
         offsetAtt.setOffset(testToken.startOffset, testToken.endOffset);
         if (tokenUpto > 1) {

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestTermdocPerf.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestTermdocPerf.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestTermdocPerf.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestTermdocPerf.java Fri Jan 15 13:42:18 2010
@@ -48,6 +48,7 @@
    public boolean incrementToken() throws IOException {
      num--;
      if (num >= 0) {
+       clearAttributes();
        termAtt.setTermBuffer(value);
        return true;
      }

Modified: lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java Fri Jan 15 13:42:18 2010
@@ -102,6 +102,7 @@
     public boolean incrementToken() throws IOException {
       if (inPhrase) {
         inPhrase = false;
+        clearAttributes();
         termAtt.setTermBuffer("phrase2");
         offsetAtt.setOffset(savedStart, savedEnd);
         return true;

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java Fri Jan 15 13:42:18 2010
@@ -77,6 +77,7 @@
           public boolean incrementToken() {
             if (i == TOKENS.length)
               return false;
+            clearAttributes();
             termAtt.setTermBuffer(TOKENS[i]);
             offsetAtt.setOffset(i,i);
             posIncrAtt.setPositionIncrement(INCREMENTS[i]);

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestTermRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestTermRangeQuery.java?rev=899627&r1=899626&r2=899627&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestTermRangeQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestTermRangeQuery.java Fri Jan 15 13:42:18 2010
@@ -273,6 +273,7 @@
         if (done)
           return false;
         else {
+          clearAttributes();
           done = true;
           if (count == 1) {
             termAtt.termBuffer()[0] = buffer[0];