You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/08/20 19:13:07 UTC
svn commit: r1515887 - in /lucene/dev/trunk/lucene: ./
analysis/common/src/java/org/apache/lucene/analysis/core/
analysis/common/src/java/org/apache/lucene/analysis/ngram/
analysis/common/src/java/org/apache/lucene/analysis/path/
analysis/common/src/ja...
Author: mikemccand
Date: Tue Aug 20 17:13:06 2013
New Revision: 1515887
URL: http://svn.apache.org/r1515887
Log:
LUCENE-3849: end() now sets position increment, so any trailing holes are counted
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
lucene/dev/trunk/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java
lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Aug 20 17:13:06 2013
@@ -126,6 +126,11 @@ Bug Fixes
the default one) have their own limits (David Smiley, Robert Muir,
Mike McCandless)
+* LUCENE-3849: TokenStreams now set the position increment in end(),
+ so we can handle trailing holes. If you have a custom TokenStream
+ implementing end() then be sure it calls super.end(). (Robert Muir,
+ Mike McCandless)
+
API Changes
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java Tue Aug 20 17:13:06 2013
@@ -80,7 +80,8 @@ public final class KeywordTokenizer exte
}
@Override
- public final void end() {
+ public final void end() throws IOException {
+ super.end();
// set final offset
offsetAtt.setOffset(finalOffset, finalOffset);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java Tue Aug 20 17:13:06 2013
@@ -217,13 +217,15 @@ public class NGramTokenizer extends Toke
}
@Override
- public final void end() {
+ public final void end() throws IOException {
+ super.end();
assert bufferStart <= bufferEnd;
int endOffset = offset;
for (int i = bufferStart; i < bufferEnd; ++i) {
endOffset += Character.charCount(buffer[i]);
}
endOffset = correctOffset(endOffset);
+ // set final offset
offsetAtt.setOffset(endOffset, endOffset);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java Tue Aug 20 17:13:06 2013
@@ -191,7 +191,8 @@ public class PathHierarchyTokenizer exte
}
@Override
- public final void end() {
+ public final void end() throws IOException {
+ super.end();
// set final offset
int finalOffset = correctOffset(charsRead);
offsetAtt.setOffset(finalOffset, finalOffset);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java Tue Aug 20 17:13:06 2013
@@ -176,7 +176,8 @@ public class ReversePathHierarchyTokeniz
}
@Override
- public final void end() {
+ public final void end() throws IOException {
+ super.end();
// set final offset
offsetAtt.setOffset(finalOffset, finalOffset);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java Tue Aug 20 17:13:06 2013
@@ -130,7 +130,8 @@ public final class PatternTokenizer exte
}
@Override
- public void end() {
+ public void end() throws IOException {
+ super.end();
final int ofs = correctOffset(str.length());
offsetAtt.setOffset(ofs, ofs);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Tue Aug 20 17:13:06 2013
@@ -76,6 +76,8 @@ public final class ClassicTokenizer exte
"<CJ>",
"<ACRONYM_DEP>"
};
+
+ private int skippedPositions;
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
@@ -130,7 +132,7 @@ public final class ClassicTokenizer exte
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
- int posIncr = 1;
+ skippedPositions = 0;
while(true) {
int tokenType = scanner.getNextToken();
@@ -140,7 +142,7 @@ public final class ClassicTokenizer exte
}
if (scanner.yylength() <= maxTokenLength) {
- posIncrAtt.setPositionIncrement(posIncr);
+ posIncrAtt.setPositionIncrement(skippedPositions+1);
scanner.getText(termAtt);
final int start = scanner.yychar();
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
@@ -155,19 +157,23 @@ public final class ClassicTokenizer exte
} else
// When we skip a too-long term, we still increment the
// position increment
- posIncr++;
+ skippedPositions++;
}
}
@Override
- public final void end() {
+ public final void end() throws IOException {
+ super.end();
// set final offset
int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
offsetAtt.setOffset(finalOffset, finalOffset);
+ // adjust any skipped tokens
+ posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
}
@Override
public void reset() throws IOException {
scanner.yyreset(input);
+ skippedPositions = 0;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Tue Aug 20 17:13:06 2013
@@ -90,6 +90,8 @@ public final class StandardTokenizer ext
"<KATAKANA>",
"<HANGUL>"
};
+
+ private int skippedPositions;
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
@@ -144,7 +146,7 @@ public final class StandardTokenizer ext
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
- int posIncr = 1;
+ skippedPositions = 0;
while(true) {
int tokenType = scanner.getNextToken();
@@ -154,7 +156,7 @@ public final class StandardTokenizer ext
}
if (scanner.yylength() <= maxTokenLength) {
- posIncrAtt.setPositionIncrement(posIncr);
+ posIncrAtt.setPositionIncrement(skippedPositions+1);
scanner.getText(termAtt);
final int start = scanner.yychar();
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
@@ -163,19 +165,23 @@ public final class StandardTokenizer ext
} else
// When we skip a too-long term, we still increment the
// position increment
- posIncr++;
+ skippedPositions++;
}
}
@Override
- public final void end() {
+ public final void end() throws IOException {
+ super.end();
// set final offset
int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
offsetAtt.setOffset(finalOffset, finalOffset);
+ // adjust any skipped tokens
+ posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
}
@Override
public void reset() throws IOException {
scanner.yyreset(input);
+ skippedPositions = 0;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Tue Aug 20 17:13:06 2013
@@ -76,6 +76,8 @@ public final class UAX29URLEmailTokenize
"<URL>",
"<EMAIL>",
};
+
+ private int skippedPositions;
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
@@ -123,7 +125,7 @@ public final class UAX29URLEmailTokenize
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
- int posIncr = 1;
+ skippedPositions = 0;
while(true) {
int tokenType = scanner.getNextToken();
@@ -133,7 +135,7 @@ public final class UAX29URLEmailTokenize
}
if (scanner.yylength() <= maxTokenLength) {
- posIncrAtt.setPositionIncrement(posIncr);
+ posIncrAtt.setPositionIncrement(skippedPositions+1);
scanner.getText(termAtt);
final int start = scanner.yychar();
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
@@ -142,19 +144,23 @@ public final class UAX29URLEmailTokenize
} else
// When we skip a too-long term, we still increment the
// position increment
- posIncr++;
+ skippedPositions++;
}
}
@Override
- public final void end() {
+ public final void end() throws IOException {
+ super.end();
// set final offset
int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
offsetAtt.setOffset(finalOffset, finalOffset);
+ // adjust any skipped tokens
+ posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
}
@Override
public void reset() throws IOException {
scanner.yyreset(input);
+ skippedPositions = 0;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Tue Aug 20 17:13:06 2013
@@ -142,7 +142,8 @@ public abstract class CharTokenizer exte
}
@Override
- public final void end() {
+ public final void end() throws IOException {
+ super.end();
// set final offset
offsetAtt.setOffset(finalOffset, finalOffset);
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java Tue Aug 20 17:13:06 2013
@@ -34,6 +34,7 @@ public abstract class FilteringTokenFilt
protected final Version version;
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private int skippedPositions;
/**
* Create a new {@link FilteringTokenFilter}.
@@ -50,7 +51,7 @@ public abstract class FilteringTokenFilt
@Override
public final boolean incrementToken() throws IOException {
- int skippedPositions = 0;
+ skippedPositions = 0;
while (input.incrementToken()) {
if (accept()) {
if (skippedPositions != 0) {
@@ -68,6 +69,12 @@ public abstract class FilteringTokenFilt
@Override
public void reset() throws IOException {
super.reset();
+ skippedPositions = 0;
}
+ @Override
+ public void end() throws IOException {
+ super.end();
+ posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
+ }
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Tue Aug 20 17:13:06 2013
@@ -309,7 +309,8 @@ public final class WikipediaTokenizer ex
}
@Override
- public void end() {
+ public void end() throws IOException {
+ super.end();
// set final offset
final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
this.offsetAtt.setOffset(finalOffset, finalOffset);
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java Tue Aug 20 17:13:06 2013
@@ -90,6 +90,22 @@ public class TestStopFilter extends Base
StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
doTestStopPositons(stpf01);
}
+
+ // LUCENE-3849: make sure after .end() we see the "ending" posInc
+ public void testEndStopword() throws Exception {
+ CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
+ StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(new StringReader("test of"), MockTokenizer.WHITESPACE, false), stopSet);
+ assertTokenStreamContents(stpf, new String[] { "test" },
+ new int[] {0},
+ new int[] {4},
+ null,
+ new int[] {1},
+ null,
+ 7,
+ 1,
+ null,
+ true);
+ }
private void doTestStopPositons(StopFilter stpf) throws IOException {
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
Modified: lucene/dev/trunk/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java Tue Aug 20 17:13:06 2013
@@ -120,7 +120,8 @@ public final class ICUTokenizer extends
}
@Override
- public void end() {
+ public void end() throws IOException {
+ super.end();
final int finalOffset = (length < 0) ? offset : offset + length;
offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
}
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java Tue Aug 20 17:13:06 2013
@@ -280,7 +280,8 @@ public final class JapaneseTokenizer ext
}
@Override
- public void end() {
+ public void end() throws IOException {
+ super.end();
// Set final offset
int finalOffset = correctOffset(pos);
offsetAtt.setOffset(finalOffset, finalOffset);
Modified: lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Tue Aug 20 17:13:06 2013
@@ -112,7 +112,8 @@ public final class SentenceTokenizer ext
}
@Override
- public void end() {
+ public void end() throws IOException {
+ super.end();
// set final offset
final int finalOffset = correctOffset(tokenEnd);
offsetAtt.setOffset(finalOffset, finalOffset);
Modified: lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Tue Aug 20 17:13:06 2013
@@ -91,9 +91,4 @@ public abstract class BaseUIMATokenizer
public void reset() throws IOException {
iterator = null;
}
-
- @Override
- public void end() throws IOException {
- iterator = null;
- }
}
Modified: lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java Tue Aug 20 17:13:06 2013
@@ -86,7 +86,7 @@ public final class UIMAAnnotationsTokeni
@Override
public void end() throws IOException {
- offsetAttr.setOffset(finalOffset, finalOffset);
super.end();
+ offsetAttr.setOffset(finalOffset, finalOffset);
}
}
Modified: lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java Tue Aug 20 17:13:06 2013
@@ -107,8 +107,8 @@ public final class UIMATypeAwareAnnotati
@Override
public void end() throws IOException {
- offsetAttr.setOffset(finalOffset, finalOffset);
super.end();
+ offsetAttr.setOffset(finalOffset, finalOffset);
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java Tue Aug 20 17:13:06 2013
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.Closeable;
import java.lang.reflect.Modifier;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
@@ -159,11 +160,18 @@ public abstract class TokenStream extend
* setting the final offset of a stream. The final offset of a stream might
* differ from the offset of the last token eg in case one or more whitespaces
* followed after the last token, but a WhitespaceTokenizer was used.
+ * <p>
+ * Additionally any skipped positions (such as those removed by a stopfilter)
+ * can be applied to the position increment, or any adjustment of other
+ * attributes where the end-of-stream value may be important.
*
* @throws IOException If an I/O error occurs
*/
public void end() throws IOException {
- // do nothing by default
+ clearAttributes(); // LUCENE-3849: don't consume dirty atts
+ if (hasAttribute(PositionIncrementAttribute.class)) {
+ getAttribute(PositionIncrementAttribute.class).setPositionIncrement(0);
+ }
}
/**
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java Tue Aug 20 17:13:06 2013
@@ -175,7 +175,9 @@ final class DocInverterPerField extends
}
// trigger streams to perform end-of-stream operations
stream.end();
-
+ // TODO: maybe add some safety? then again, its already checked
+ // when we come back around to the field...
+ fieldState.position += posIncrAttribute.getPositionIncrement();
fieldState.offset += offsetAttribute.endOffset();
success2 = true;
} finally {
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/BinaryTokenStream.java Tue Aug 20 17:13:06 2013
@@ -31,16 +31,19 @@ import org.apache.lucene.analysis.Canned
*/
public final class BinaryTokenStream extends TokenStream {
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
+ private final BytesRef bytes;
private boolean available = true;
public BinaryTokenStream(BytesRef bytes) {
- bytesAtt.setBytesRef(bytes);
+ this.bytes = bytes;
}
@Override
public boolean incrementToken() {
if (available) {
+ clearAttributes();
available = false;
+ bytesAtt.setBytesRef(bytes);
return true;
}
return false;
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Tue Aug 20 17:13:06 2013
@@ -50,6 +50,7 @@ import org.apache.lucene.search.DocIdSet
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
@@ -72,6 +73,9 @@ import org.apache.lucene.util.LuceneTest
import org.apache.lucene.util.SetOnce;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.packed.PackedInts;
import org.junit.Test;
@@ -1899,6 +1903,65 @@ public class TestIndexWriter extends Luc
}
}
+ // LUCENE-3849
+ public void testStopwordsPosIncHole() throws Exception {
+ Directory dir = newDirectory();
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader);
+ TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
+ return new TokenStreamComponents(tokenizer, stream);
+ }
+ };
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
+ Document doc = new Document();
+ doc.add(new TextField("body", "just a", Field.Store.NO));
+ doc.add(new TextField("body", "test of gaps", Field.Store.NO));
+ iw.addDocument(doc);
+ IndexReader ir = iw.getReader();
+ iw.close();
+ IndexSearcher is = newSearcher(ir);
+ PhraseQuery pq = new PhraseQuery();
+ pq.add(new Term("body", "just"), 0);
+ pq.add(new Term("body", "test"), 2);
+ // body:"just ? test"
+ assertEquals(1, is.search(pq, 5).totalHits);
+ ir.close();
+ dir.close();
+ }
+
+ // LUCENE-3849
+ public void testStopwordsPosIncHole2() throws Exception {
+ // use two stopfilters for testing here
+ Directory dir = newDirectory();
+ final Automaton secondSet = BasicAutomata.makeString("foobar");
+ Analyzer a = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader);
+ TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
+ stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet));
+ return new TokenStreamComponents(tokenizer, stream);
+ }
+ };
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
+ Document doc = new Document();
+ doc.add(new TextField("body", "just a foobar", Field.Store.NO));
+ doc.add(new TextField("body", "test of gaps", Field.Store.NO));
+ iw.addDocument(doc);
+ IndexReader ir = iw.getReader();
+ iw.close();
+ IndexSearcher is = newSearcher(ir);
+ PhraseQuery pq = new PhraseQuery();
+ pq.add(new Term("body", "just"), 0);
+ pq.add(new Term("body", "test"), 3);
+ // body:"just ? ? test"
+ assertEquals(1, is.search(pq, 5).totalHits);
+ ir.close();
+ dir.close();
+ }
+
// here we do better, there is no current segments file, so we don't delete anything.
// however, if you actually go and make a commit, the next time you run indexwriter
// this file will be gone.
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java Tue Aug 20 17:13:06 2013
@@ -555,12 +555,16 @@ public class DirectoryTaxonomyWriter imp
private CharTermAttribute termAtt;
private PositionIncrementAttribute posIncrAtt;
private boolean returned;
+ private int val;
+ private final String word;
+
public SinglePositionTokenStream(String word) {
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- termAtt.setEmpty().append(word);
+ this.word = word;
returned = true;
}
+
/**
* Set the value we want to keep, as the position increment.
* Note that when TermPositions.nextPosition() is later used to
@@ -574,15 +578,21 @@ public class DirectoryTaxonomyWriter imp
* This change is described in Lucene's JIRA: LUCENE-1542.
*/
public void set(int val) {
- posIncrAtt.setPositionIncrement(val);
+ this.val = val;
returned = false;
}
+
@Override
public boolean incrementToken() throws IOException {
if (returned) {
return false;
}
- return returned = true;
+ clearAttributes();
+ posIncrAtt.setPositionIncrement(val);
+ termAtt.setEmpty();
+ termAtt.append(word);
+ returned = true;
+ return true;
}
}
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Tue Aug 20 17:13:06 2013
@@ -264,7 +264,8 @@ public abstract class AbstractTestCase e
}
@Override
- public final void end(){
+ public final void end() throws IOException {
+ super.end();
offsetAtt.setOffset(getFinalOffset(),getFinalOffset());
}
Modified: lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java (original)
+++ lucene/dev/trunk/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java Tue Aug 20 17:13:06 2013
@@ -114,7 +114,6 @@ public abstract class SorterTestBase ext
public PositionsTokenStream() {
term = addAttribute(CharTermAttribute.class);
- term.append(DOC_POSITIONS_TERM);
payload = addAttribute(PayloadAttribute.class);
offset = addAttribute(OffsetAttribute.class);
}
@@ -125,6 +124,8 @@ public abstract class SorterTestBase ext
return false;
}
+ clearAttributes();
+ term.append(DOC_POSITIONS_TERM);
payload.setPayload(new BytesRef(Integer.toString(pos)));
offset.setOffset(off, off);
--pos;
Modified: lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java Tue Aug 20 17:13:06 2013
@@ -50,7 +50,6 @@ public final class SuggestStopFilter ext
private final CharArraySet stopWords;
private State endState;
- private boolean ended;
/** Sole constructor. */
public SuggestStopFilter(TokenStream input, CharArraySet stopWords) {
@@ -61,28 +60,24 @@ public final class SuggestStopFilter ext
@Override
public void reset() throws IOException {
super.reset();
- ended = false;
endState = null;
}
@Override
public void end() throws IOException {
- if (!ended) {
+ if (endState == null) {
super.end();
} else {
// NOTE: we already called .end() from our .next() when
// the stream was complete, so we do not call
// super.end() here
-
- if (endState != null) {
- restoreState(endState);
- }
+ restoreState(endState);
}
}
@Override
public boolean incrementToken() throws IOException {
- if (ended) {
+ if (endState != null) {
return false;
}
@@ -101,8 +96,9 @@ public final class SuggestStopFilter ext
// It was a stopword; skip it
skippedPositions += posInc;
} else {
+ clearAttributes();
input.end();
- ended = true;
+ endState = captureState();
int finalEndOffset = offsetAtt.endOffset();
assert finalEndOffset >= endOffset;
if (finalEndOffset > endOffset) {
@@ -112,7 +108,6 @@ public final class SuggestStopFilter ext
} else {
// No token separator after final token that
// looked like a stop-word; don't filter it:
- endState = captureState();
restoreState(sav);
posIncAtt.setPositionIncrement(skippedPositions + posIncAtt.getPositionIncrement());
keywordAtt.setKeyword(true);
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Tue Aug 20 17:13:06 2013
@@ -112,7 +112,7 @@ public abstract class BaseTokenStreamTes
// - offsets only move forwards (startOffset >=
// lastStartOffset)
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
- int posLengths[], Integer finalOffset, boolean[] keywordAtts,
+ int posLengths[], Integer finalOffset, Integer finalPosInc, boolean[] keywordAtts,
boolean offsetsAreCorrect) throws IOException {
assertNotNull(output);
CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
@@ -136,7 +136,7 @@ public abstract class BaseTokenStreamTes
}
PositionIncrementAttribute posIncrAtt = null;
- if (posIncrements != null) {
+ if (posIncrements != null || finalPosInc != null) {
assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
}
@@ -255,19 +255,43 @@ public abstract class BaseTokenStreamTes
assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1);
}
}
+
if (ts.incrementToken()) {
fail("TokenStream has more tokens than expected (expected count=" + output.length + "); extra token=" + termAtt.toString());
}
+
+ // repeat our extra safety checks for end()
+ ts.clearAttributes();
+ if (termAtt != null) termAtt.setEmpty().append("bogusTerm");
+ if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
+ if (typeAtt != null) typeAtt.setType("bogusType");
+ if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
+ if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
+
+ checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
+
ts.end();
+ assertTrue("super.end()/clearAttributes() was not called correctly in end()", checkClearAtt.getAndResetClearCalled());
+
if (finalOffset != null) {
- assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset());
+ assertEquals("finalOffset", finalOffset.intValue(), offsetAtt.endOffset());
}
if (offsetAtt != null) {
assertTrue("finalOffset must be >= 0", offsetAtt.endOffset() >= 0);
}
+ if (finalPosInc != null) {
+ assertEquals("finalPosInc", finalPosInc.intValue(), posIncrAtt.getPositionIncrement());
+ }
+
ts.close();
}
+ public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
+ int posLengths[], Integer finalOffset, boolean[] keywordAtts,
+ boolean offsetsAreCorrect) throws IOException {
+ assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, null, offsetsAreCorrect);
+ }
+
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset, boolean offsetsAreCorrect) throws IOException {
assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, offsetsAreCorrect);
}
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java Tue Aug 20 17:13:06 2013
@@ -58,7 +58,8 @@ public final class MockTokenFilter exten
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-
+ private int skippedPositions;
+
/**
* Create a new MockTokenFilter.
*
@@ -76,7 +77,7 @@ public final class MockTokenFilter exten
// initial token with posInc=0 ever
// return the first non-stop word found
- int skippedPositions = 0;
+ skippedPositions = 0;
while (input.incrementToken()) {
if (!filter.run(termAtt.buffer(), 0, termAtt.length())) {
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
@@ -87,4 +88,16 @@ public final class MockTokenFilter exten
// reached EOS -- return false
return false;
}
+
+ @Override
+ public void end() throws IOException {
+ super.end();
+ posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ skippedPositions = 0;
+ }
}
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java?rev=1515887&r1=1515886&r2=1515887&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java Tue Aug 20 17:13:06 2013
@@ -244,6 +244,7 @@ public class MockTokenizer extends Token
@Override
public void end() throws IOException {
+ super.end();
int finalOffset = correctOffset(off);
offsetAtt.setOffset(finalOffset, finalOffset);
// some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.