You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/08/20 20:13:45 UTC

svn commit: r1515909 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/analysis/ lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/ lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ lucene/analysis/common/src/java/org/apa...

Author: mikemccand
Date: Tue Aug 20 18:13:44 2013
New Revision: 1515909

URL: http://svn.apache.org/r1515909
Log:
LUCENE-3849: end() now sets position increment, so any trailing holes are counted

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
    lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/branches/branch_4x/lucene/facet/   (props changed)
    lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
    lucene/dev/branches/branch_4x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
    lucene/dev/branches/branch_4x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_4x/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Tue Aug 20 18:13:44 2013
@@ -84,6 +84,11 @@ Bug Fixes
   the default one) have their own limits (David Smiley, Robert Muir,
   Mike McCandless)
 
+* LUCENE-3849: TokenStreams now set the position increment in end(),
+  so we can handle trailing holes.  If you have a custom TokenStream
+  implementing end() then be sure it calls super.end().  (Robert Muir,
+  Mike McCandless)
+
 API Changes
 
 * LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java Tue Aug 20 18:13:44 2013
@@ -290,7 +290,8 @@ public final class CJKTokenizer extends 
     }
     
     @Override
-    public final void end() {
+    public final void end() throws IOException {
+      super.end();
       // set final offset
       final int finalOffset = correctOffset(offset);
       this.offsetAtt.setOffset(finalOffset, finalOffset);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java Tue Aug 20 18:13:44 2013
@@ -150,7 +150,8 @@ public final class ChineseTokenizer exte
     }
     
     @Override
-    public final void end() {
+    public final void end() throws IOException {
+      super.end();
       // set final offset
       final int finalOffset = correctOffset(offset);
       this.offsetAtt.setOffset(finalOffset, finalOffset);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java Tue Aug 20 18:13:44 2013
@@ -79,7 +79,8 @@ public final class KeywordTokenizer exte
   }
   
   @Override
-  public final void end() {
+  public final void end() throws IOException {
+    super.end();
     // set final offset 
     offsetAtt.setOffset(finalOffset, finalOffset);
   }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java Tue Aug 20 18:13:44 2013
@@ -363,7 +363,8 @@ public final class PatternAnalyzer exten
     }
     
     @Override
-    public final void end() {
+    public final void end() throws IOException {
+      super.end();
       // set final offset
       final int finalOffset = correctOffset(str.length());
       this.offsetAtt.setOffset(finalOffset, finalOffset);
@@ -454,7 +455,8 @@ public final class PatternAnalyzer exten
     }
     
     @Override
-    public final void end() {
+    public final void end() throws IOException {
+      super.end();
       // set final offset
       final int finalOffset = str.length();
       this.offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43EdgeNGramTokenizer.java Tue Aug 20 18:13:44 2013
@@ -265,7 +265,8 @@ public final class Lucene43EdgeNGramToke
   }
   
   @Override
-  public void end() {
+  public void end() throws IOException {
+    super.end();
     // set final offset
     final int finalOffset = correctOffset(charsRead);
     this.offsetAtt.setOffset(finalOffset, finalOffset);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java Tue Aug 20 18:13:44 2013
@@ -140,7 +140,8 @@ public final class Lucene43NGramTokenize
   }
   
   @Override
-  public void end() {
+  public void end() throws IOException {
+    super.end();
     // set final offset
     final int finalOffset = correctOffset(charsRead);
     this.offsetAtt.setOffset(finalOffset, finalOffset);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java Tue Aug 20 18:13:44 2013
@@ -217,13 +217,15 @@ public class NGramTokenizer extends Toke
   }
 
   @Override
-  public final void end() {
+  public final void end() throws IOException {
+    super.end();
     assert bufferStart <= bufferEnd;
     int endOffset = offset;
     for (int i = bufferStart; i < bufferEnd; ++i) {
       endOffset += Character.charCount(buffer[i]);
     }
     endOffset = correctOffset(endOffset);
+    // set final offset
     offsetAtt.setOffset(endOffset, endOffset);
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java Tue Aug 20 18:13:44 2013
@@ -191,7 +191,8 @@ public class PathHierarchyTokenizer exte
   }
 
   @Override
-  public final void end() {
+  public final void end() throws IOException {
+    super.end();
     // set final offset
     int finalOffset = correctOffset(charsRead);
     offsetAtt.setOffset(finalOffset, finalOffset);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java Tue Aug 20 18:13:44 2013
@@ -176,7 +176,8 @@ public class ReversePathHierarchyTokeniz
   }
 
   @Override
-  public final void end() {
+  public final void end() throws IOException {
+    super.end();
     // set final offset
     offsetAtt.setOffset(finalOffset, finalOffset);
   }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java Tue Aug 20 18:13:44 2013
@@ -130,7 +130,8 @@ public final class PatternTokenizer exte
   }
 
   @Override
-  public void end() {
+  public void end() throws IOException {
+    super.end();
     final int ofs = correctOffset(str.length());
     offsetAtt.setOffset(ofs, ofs);
   }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Tue Aug 20 18:13:44 2013
@@ -75,6 +75,8 @@ public final class ClassicTokenizer exte
     "<CJ>",
     "<ACRONYM_DEP>"
   };
+  
+  private int skippedPositions;
 
   private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
 
@@ -129,7 +131,7 @@ public final class ClassicTokenizer exte
   @Override
   public final boolean incrementToken() throws IOException {
     clearAttributes();
-    int posIncr = 1;
+    skippedPositions = 0;
 
     while(true) {
       int tokenType = scanner.getNextToken();
@@ -139,7 +141,7 @@ public final class ClassicTokenizer exte
       }
 
       if (scanner.yylength() <= maxTokenLength) {
-        posIncrAtt.setPositionIncrement(posIncr);
+        posIncrAtt.setPositionIncrement(skippedPositions+1);
         scanner.getText(termAtt);
         final int start = scanner.yychar();
         offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
@@ -154,19 +156,23 @@ public final class ClassicTokenizer exte
       } else
         // When we skip a too-long term, we still increment the
         // position increment
-        posIncr++;
+        skippedPositions++;
     }
   }
   
   @Override
-  public final void end() {
+  public final void end() throws IOException {
+    super.end();
     // set final offset
     int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
     offsetAtt.setOffset(finalOffset, finalOffset);
+    // adjust any skipped tokens
+    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
   }
 
   @Override
   public void reset() throws IOException {
     scanner.yyreset(input);
+    skippedPositions = 0;
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Tue Aug 20 18:13:44 2013
@@ -102,6 +102,8 @@ public final class StandardTokenizer ext
     "<KATAKANA>",
     "<HANGUL>"
   };
+  
+  private int skippedPositions;
 
   private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
 
@@ -165,7 +167,7 @@ public final class StandardTokenizer ext
   @Override
   public final boolean incrementToken() throws IOException {
     clearAttributes();
-    int posIncr = 1;
+    skippedPositions = 0;
 
     while(true) {
       int tokenType = scanner.getNextToken();
@@ -175,7 +177,7 @@ public final class StandardTokenizer ext
       }
 
       if (scanner.yylength() <= maxTokenLength) {
-        posIncrAtt.setPositionIncrement(posIncr);
+        posIncrAtt.setPositionIncrement(skippedPositions+1);
         scanner.getText(termAtt);
         final int start = scanner.yychar();
         offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
@@ -192,19 +194,23 @@ public final class StandardTokenizer ext
       } else
         // When we skip a too-long term, we still increment the
         // position increment
-        posIncr++;
+        skippedPositions++;
     }
   }
   
   @Override
-  public final void end() {
+  public final void end() throws IOException {
+    super.end();
     // set final offset
     int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
     offsetAtt.setOffset(finalOffset, finalOffset);
+    // adjust any skipped tokens
+    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
   }
 
   @Override
   public void reset() throws IOException {
     scanner.yyreset(input);
+    skippedPositions = 0;
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Tue Aug 20 18:13:44 2013
@@ -83,6 +83,8 @@ public final class UAX29URLEmailTokenize
     "<URL>",
     "<EMAIL>",
   };
+  
+  private int skippedPositions;
 
   private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
 
@@ -139,7 +141,7 @@ public final class UAX29URLEmailTokenize
   @Override
   public final boolean incrementToken() throws IOException {
     clearAttributes();
-    int posIncr = 1;
+    skippedPositions = 0;
 
     while(true) {
       int tokenType = scanner.getNextToken();
@@ -149,7 +151,7 @@ public final class UAX29URLEmailTokenize
       }
 
       if (scanner.yylength() <= maxTokenLength) {
-        posIncrAtt.setPositionIncrement(posIncr);
+        posIncrAtt.setPositionIncrement(skippedPositions+1);
         scanner.getText(termAtt);
         final int start = scanner.yychar();
         offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
@@ -158,19 +160,23 @@ public final class UAX29URLEmailTokenize
       } else
         // When we skip a too-long term, we still increment the
         // position increment
-        posIncr++;
+        skippedPositions++;
     }
   }
   
   @Override
-  public final void end() {
+  public final void end() throws IOException {
+    super.end();
     // set final offset
     int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
     offsetAtt.setOffset(finalOffset, finalOffset);
+    // adjust any skipped tokens
+    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
   }
 
   @Override
   public void reset() throws IOException {
     scanner.yyreset(input);
+    skippedPositions = 0;
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Tue Aug 20 18:13:44 2013
@@ -174,7 +174,8 @@ public abstract class CharTokenizer exte
   }
   
   @Override
-  public final void end() {
+  public final void end() throws IOException {
+    super.end();
     // set final offset
     offsetAtt.setOffset(finalOffset, finalOffset);
   }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java Tue Aug 20 18:13:44 2013
@@ -45,6 +45,7 @@ public abstract class FilteringTokenFilt
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
   private boolean enablePositionIncrements; // no init needed, as ctor enforces setting value!
   private boolean first = true;
+  private int skippedPositions;
 
   /**
    * Create a new {@link FilteringTokenFilter}.
@@ -77,7 +78,7 @@ public abstract class FilteringTokenFilt
   @Override
   public final boolean incrementToken() throws IOException {
     if (enablePositionIncrements) {
-      int skippedPositions = 0;
+      skippedPositions = 0;
       while (input.incrementToken()) {
         if (accept()) {
           if (skippedPositions != 0) {
@@ -109,6 +110,7 @@ public abstract class FilteringTokenFilt
   public void reset() throws IOException {
     super.reset();
     first = true;
+    skippedPositions = 0;
   }
 
   /**
@@ -140,4 +142,12 @@ public abstract class FilteringTokenFilt
     checkPositionIncrement(version, enable);
     this.enablePositionIncrements = enable;
   }
+
+  @Override
+  public void end() throws IOException {
+    super.end();
+    if (enablePositionIncrements) {
+      posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
+    }
+  }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Tue Aug 20 18:13:44 2013
@@ -309,7 +309,8 @@ public final class WikipediaTokenizer ex
   }
 
   @Override
-  public void end() {
+  public void end() throws IOException {
+    super.end();
     // set final offset
     final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
     this.offsetAtt.setOffset(finalOffset, finalOffset);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java Tue Aug 20 18:13:44 2013
@@ -99,6 +99,22 @@ public class TestStopFilter extends Base
     StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
     doTestStopPositons(stpf01,true);
   }
+
+  // LUCENE-3849: make sure after .end() we see the "ending" posInc
+  public void testEndStopword() throws Exception {
+    CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
+    StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(new StringReader("test of"), MockTokenizer.WHITESPACE, false), stopSet);
+    assertTokenStreamContents(stpf, new String[] { "test" },
+                              new int[] {0},
+                              new int[] {4},
+                              null,
+                              new int[] {1},
+                              null,
+                              7,
+                              1,
+                              null,
+                              true);    
+  }
   
   private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException {
     log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));

Modified: lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java Tue Aug 20 18:13:44 2013
@@ -120,7 +120,8 @@ public final class ICUTokenizer extends 
   }
   
   @Override
-  public void end() {
+  public void end() throws IOException {
+    super.end();
     final int finalOffset = (length < 0) ? offset : offset + length;
     offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
   }  

Modified: lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java Tue Aug 20 18:13:44 2013
@@ -280,7 +280,8 @@ public final class JapaneseTokenizer ext
   }
 
   @Override
-  public void end() {
+  public void end() throws IOException {
+    super.end();
     // Set final offset
     int finalOffset = correctOffset(pos);
     offsetAtt.setOffset(finalOffset, finalOffset);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Tue Aug 20 18:13:44 2013
@@ -111,7 +111,8 @@ public final class SentenceTokenizer ext
   }
 
   @Override
-  public void end() {
+  public void end() throws IOException {
+    super.end();
     // set final offset
     final int finalOffset = correctOffset(tokenEnd);
     offsetAtt.setOffset(finalOffset, finalOffset);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Tue Aug 20 18:13:44 2013
@@ -91,9 +91,4 @@ public abstract class BaseUIMATokenizer 
   public void reset() throws IOException {
     iterator = null;
   }
-
-  @Override
-  public void end() throws IOException {
-    iterator = null;
-  }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java Tue Aug 20 18:13:44 2013
@@ -86,7 +86,7 @@ public final class UIMAAnnotationsTokeni
 
   @Override
   public void end() throws IOException {
-    offsetAttr.setOffset(finalOffset, finalOffset);
     super.end();
+    offsetAttr.setOffset(finalOffset, finalOffset);
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java Tue Aug 20 18:13:44 2013
@@ -107,8 +107,8 @@ public final class UIMATypeAwareAnnotati
 
   @Override
   public void end() throws IOException {
-    offsetAttr.setOffset(finalOffset, finalOffset);
     super.end();
+    offsetAttr.setOffset(finalOffset, finalOffset);
   }
 
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java Tue Aug 20 18:13:44 2013
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.io.Closeable;
 import java.lang.reflect.Modifier;
 
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriter;
@@ -159,11 +160,18 @@ public abstract class TokenStream extend
    * setting the final offset of a stream. The final offset of a stream might
    * differ from the offset of the last token eg in case one or more whitespaces
    * followed after the last token, but a WhitespaceTokenizer was used.
+   * <p>
+   * Additionally any skipped positions (such as those removed by a stopfilter)
+   * can be applied to the position increment, or any adjustment of other
+   * attributes where the end-of-stream value may be important.
    * 
    * @throws IOException If an I/O error occurs
    */
   public void end() throws IOException {
-    // do nothing by default
+    clearAttributes(); // LUCENE-3849: don't consume dirty atts
+    if (hasAttribute(PositionIncrementAttribute.class)) {
+      getAttribute(PositionIncrementAttribute.class).setPositionIncrement(0);
+    }
   }
 
   /**

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java Tue Aug 20 18:13:44 2013
@@ -175,7 +175,9 @@ final class DocInverterPerField extends 
           }
           // trigger streams to perform end-of-stream operations
           stream.end();
-
+          // TODO: maybe add some safety? then again, its already checked 
+          // when we come back around to the field...
+          fieldState.position += posIncrAttribute.getPositionIncrement();
           fieldState.offset += offsetAttribute.endOffset();
           success2 = true;
         } finally {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java Tue Aug 20 18:13:44 2013
@@ -31,16 +31,19 @@ import org.apache.lucene.analysis.Canned
  */
 public final class BinaryTokenStream extends TokenStream {
   private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
+  private final BytesRef bytes;
   private boolean available = true;
   
   public BinaryTokenStream(BytesRef bytes) {
-    bytesAtt.setBytesRef(bytes);
+    this.bytes = bytes;
   }
   
   @Override
   public boolean incrementToken() {
     if (available) {
+      clearAttributes();
       available = false;
+      bytesAtt.setBytesRef(bytes);
       return true;
     }
     return false;

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Tue Aug 20 18:13:44 2013
@@ -50,6 +50,7 @@ import org.apache.lucene.search.DocIdSet
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.AlreadyClosedException;
@@ -72,6 +73,9 @@ import org.apache.lucene.util.LuceneTest
 import org.apache.lucene.util.SetOnce;
 import org.apache.lucene.util.ThreadInterruptedException;
 import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.packed.PackedInts;
 import org.junit.Test;
 
@@ -1954,6 +1958,65 @@ public class TestIndexWriter extends Luc
     }
   }
   
+  // LUCENE-3849
+  public void testStopwordsPosIncHole() throws Exception {
+    Directory dir = newDirectory();
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader);
+        TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
+        return new TokenStreamComponents(tokenizer, stream);
+      }
+    };
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
+    Document doc = new Document();
+    doc.add(new TextField("body", "just a", Field.Store.NO));
+    doc.add(new TextField("body", "test of gaps", Field.Store.NO));
+    iw.addDocument(doc);
+    IndexReader ir = iw.getReader();
+    iw.close();
+    IndexSearcher is = newSearcher(ir);
+    PhraseQuery pq = new PhraseQuery();
+    pq.add(new Term("body", "just"), 0);
+    pq.add(new Term("body", "test"), 2);
+    // body:"just ? test"
+    assertEquals(1, is.search(pq, 5).totalHits);
+    ir.close();
+    dir.close();
+  }
+  
+  // LUCENE-3849
+  public void testStopwordsPosIncHole2() throws Exception {
+    // use two stopfilters for testing here
+    Directory dir = newDirectory();
+    final Automaton secondSet = BasicAutomata.makeString("foobar");
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader);
+        TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
+        stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet));
+        return new TokenStreamComponents(tokenizer, stream);
+      }
+    };
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
+    Document doc = new Document();
+    doc.add(new TextField("body", "just a foobar", Field.Store.NO));
+    doc.add(new TextField("body", "test of gaps", Field.Store.NO));
+    iw.addDocument(doc);
+    IndexReader ir = iw.getReader();
+    iw.close();
+    IndexSearcher is = newSearcher(ir);
+    PhraseQuery pq = new PhraseQuery();
+    pq.add(new Term("body", "just"), 0);
+    pq.add(new Term("body", "test"), 3);
+    // body:"just ? ? test"
+    assertEquals(1, is.search(pq, 5).totalHits);
+    ir.close();
+    dir.close();
+  }
+  
   // here we do better, there is no current segments file, so we don't delete anything.
   // however, if you actually go and make a commit, the next time you run indexwriter
   // this file will be gone.

Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java Tue Aug 20 18:13:44 2013
@@ -556,12 +556,16 @@ public class DirectoryTaxonomyWriter imp
     private CharTermAttribute termAtt;
     private PositionIncrementAttribute posIncrAtt;
     private boolean returned;
+    private int val;
+    private final String word;
+
     public SinglePositionTokenStream(String word) {
       termAtt = addAttribute(CharTermAttribute.class);
       posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-      termAtt.setEmpty().append(word);
+      this.word = word;
       returned = true;
     }
+
     /**
      * Set the value we want to keep, as the position increment.
      * Note that when TermPositions.nextPosition() is later used to
@@ -575,15 +579,21 @@ public class DirectoryTaxonomyWriter imp
      * This change is described in Lucene's JIRA: LUCENE-1542. 
      */
     public void set(int val) {
-      posIncrAtt.setPositionIncrement(val);
+      this.val = val;
       returned = false;
     }
+
     @Override
     public boolean incrementToken() throws IOException {
       if (returned) {
         return false;
       }
-      return returned = true;
+      clearAttributes();
+      posIncrAtt.setPositionIncrement(val);
+      termAtt.setEmpty();
+      termAtt.append(word);
+      returned = true;
+      return true;
     }
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Tue Aug 20 18:13:44 2013
@@ -264,7 +264,8 @@ public abstract class AbstractTestCase e
     }
     
     @Override
-    public final void end(){
+    public final void end() throws IOException {
+      super.end();
       offsetAtt.setOffset(getFinalOffset(),getFinalOffset());
     }
     

Modified: lucene/dev/branches/branch_4x/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java (original)
+++ lucene/dev/branches/branch_4x/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java Tue Aug 20 18:13:44 2013
@@ -116,7 +116,6 @@ public abstract class SorterTestBase ext
     
     public PositionsTokenStream() {
       term = addAttribute(CharTermAttribute.class);
-      term.append(DOC_POSITIONS_TERM);
       payload = addAttribute(PayloadAttribute.class);
       offset = addAttribute(OffsetAttribute.class);
     }
@@ -127,6 +126,8 @@ public abstract class SorterTestBase ext
         return false;
       }
       
+      clearAttributes();
+      term.append(DOC_POSITIONS_TERM);
       payload.setPayload(new BytesRef(Integer.toString(pos)));
       offset.setOffset(off, off);
       --pos;

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java Tue Aug 20 18:13:44 2013
@@ -50,7 +50,6 @@ public final class SuggestStopFilter ext
   private final CharArraySet stopWords;
 
   private State endState;
-  private boolean ended;
 
   /** Sole constructor. */
   public SuggestStopFilter(TokenStream input, CharArraySet stopWords) {
@@ -61,28 +60,24 @@ public final class SuggestStopFilter ext
   @Override
   public void reset() throws IOException {
     super.reset();
-    ended = false;
     endState = null;
   }
 
   @Override
   public void end() throws IOException {
-    if (!ended) {
+    if (endState == null) {
       super.end();
     } else {
       // NOTE: we already called .end() from our .next() when
       // the stream was complete, so we do not call
       // super.end() here
-
-      if (endState != null) {
-        restoreState(endState);
-      }
+      restoreState(endState);
     }
   }
 
   @Override
   public boolean incrementToken() throws IOException {
-    if (ended) {
+    if (endState != null) {
       return false;
     }
 
@@ -101,8 +96,9 @@ public final class SuggestStopFilter ext
           // It was a stopword; skip it
           skippedPositions += posInc;
         } else {
+          clearAttributes();
           input.end();
-          ended = true;
+          endState = captureState();
           int finalEndOffset = offsetAtt.endOffset();
           assert finalEndOffset >= endOffset;
           if (finalEndOffset > endOffset) {
@@ -112,7 +108,6 @@ public final class SuggestStopFilter ext
           } else {
             // No token separator after final token that
             // looked like a stop-word; don't filter it:
-            endState = captureState();
             restoreState(sav);
             posIncAtt.setPositionIncrement(skippedPositions + posIncAtt.getPositionIncrement());
             keywordAtt.setKeyword(true);

Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Tue Aug 20 18:13:44 2013
@@ -112,7 +112,7 @@ public abstract class BaseTokenStreamTes
   //   - offsets only move forwards (startOffset >=
   //     lastStartOffset)
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
-                                               int posLengths[], Integer finalOffset, boolean[] keywordAtts,
+                                               int posLengths[], Integer finalOffset, Integer finalPosInc, boolean[] keywordAtts,
                                                boolean offsetsAreCorrect) throws IOException {
     assertNotNull(output);
     CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
@@ -136,7 +136,7 @@ public abstract class BaseTokenStreamTes
     }
     
     PositionIncrementAttribute posIncrAtt = null;
-    if (posIncrements != null) {
+    if (posIncrements != null || finalPosInc != null) {
       assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
       posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
     }
@@ -255,19 +255,43 @@ public abstract class BaseTokenStreamTes
         assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1);
       }
     }
+
     if (ts.incrementToken()) {
       fail("TokenStream has more tokens than expected (expected count=" + output.length + "); extra token=" + termAtt.toString());
     }
+
+    // repeat our extra safety checks for end()
+    ts.clearAttributes();
+    if (termAtt != null) termAtt.setEmpty().append("bogusTerm");
+    if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
+    if (typeAtt != null) typeAtt.setType("bogusType");
+    if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
+    if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
+    
+    checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
+
     ts.end();
+    assertTrue("super.end()/clearAttributes() was not called correctly in end()", checkClearAtt.getAndResetClearCalled());
+    
     if (finalOffset != null) {
-      assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset());
+      assertEquals("finalOffset", finalOffset.intValue(), offsetAtt.endOffset());
     }
     if (offsetAtt != null) {
       assertTrue("finalOffset must be >= 0", offsetAtt.endOffset() >= 0);
     }
+    if (finalPosInc != null) {
+      assertEquals("finalPosInc", finalPosInc.intValue(), posIncrAtt.getPositionIncrement());
+    }
+
     ts.close();
   }
   
+  public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
+                                               int posLengths[], Integer finalOffset, boolean[] keywordAtts,
+                                               boolean offsetsAreCorrect) throws IOException {
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, null, offsetsAreCorrect);
+  }
+
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset, boolean offsetsAreCorrect) throws IOException {
     assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, offsetsAreCorrect);
   }

Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java Tue Aug 20 18:13:44 2013
@@ -58,7 +58,8 @@ public final class MockTokenFilter exten
 
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-  
+  private int skippedPositions;
+
   /**
    * Create a new MockTokenFilter.
    * 
@@ -76,7 +77,7 @@ public final class MockTokenFilter exten
     // initial token with posInc=0 ever
     
     // return the first non-stop word found
-    int skippedPositions = 0;
+    skippedPositions = 0;
     while (input.incrementToken()) {
       if (!filter.run(termAtt.buffer(), 0, termAtt.length())) {
         posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
@@ -87,4 +88,16 @@ public final class MockTokenFilter exten
     // reached EOS -- return false
     return false;
   }
+
+  @Override
+  public void end() throws IOException {
+    super.end();
+    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    skippedPositions = 0;
+  }
 }

Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java?rev=1515909&r1=1515908&r2=1515909&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java Tue Aug 20 18:13:44 2013
@@ -244,6 +244,7 @@ public class MockTokenizer extends Token
 
   @Override
   public void end() throws IOException {
+    super.end();
     int finalOffset = correctOffset(off);
     offsetAtt.setOffset(finalOffset, finalOffset);
     // some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.