You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/30 19:50:19 UTC
svn commit: r1379036 - in /lucene/dev/trunk: lucene/
lucene/analysis/common/src/java/org/apache/lucene/analysis/core/
lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/
lucene/analysis/common/src/java/org/apache/lucene/analysis/standar...
Author: rmuir
Date: Thu Aug 30 17:50:18 2012
New Revision: 1379036
URL: http://svn.apache.org/viewvc?rev=1379036&view=rev
Log:
LUCENE-4343: clear up more Tokenizer.setReader/TokenStream.reset issues
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
lucene/dev/trunk/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java
lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Aug 30 17:50:18 2012
@@ -77,6 +77,10 @@ API Changes
fields in a stored document, has been replaced with the simpler
StoredFieldVisitor API. (Mike McCandless)
+* LUCENE-4343: Made Tokenizer.setReader final. This is a setter that should
+ not be overriden by subclasses: per-stream initialization should happen
+ in reset(). (Robert Muir)
+
Bug Fixes
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java Thu Aug 30 17:50:18 2012
@@ -94,8 +94,7 @@ public final class KeywordTokenizer exte
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
+ public void reset() throws IOException {
this.done = false;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java Thu Aug 30 17:50:18 2012
@@ -78,9 +78,6 @@ public final class PatternTokenizer exte
if (group >= 0 && group > matcher.groupCount()) {
throw new IllegalArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups");
}
- fillBuffer(str, input);
- matcher.reset(str);
- index = 0;
}
@Override
@@ -136,8 +133,7 @@ public final class PatternTokenizer exte
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
+ public void reset() throws IOException {
fillBuffer(str, input);
matcher.reset(str);
index = 0;
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Thu Aug 30 17:50:18 2012
@@ -175,8 +175,7 @@ public final class ClassicTokenizer exte
}
@Override
- public void setReader(Reader reader) throws IOException {
- super.setReader(reader);
- scanner.yyreset(reader);
+ public void reset() throws IOException {
+ scanner.yyreset(input);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Thu Aug 30 17:50:18 2012
@@ -183,8 +183,7 @@ public final class StandardTokenizer ext
}
@Override
- public void setReader(Reader reader) throws IOException {
- super.setReader(reader);
- scanner.yyreset(reader);
+ public void reset() throws IOException {
+ scanner.yyreset(input);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Thu Aug 30 17:50:18 2012
@@ -162,8 +162,7 @@ public final class UAX29URLEmailTokenize
}
@Override
- public void setReader(Reader reader) throws IOException {
- super.setReader(reader);
- scanner.yyreset(reader);
+ public void reset() throws IOException {
+ scanner.yyreset(input);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Thu Aug 30 17:50:18 2012
@@ -78,7 +78,8 @@ public abstract class CharTokenizer exte
charUtils = CharacterUtils.getInstance(matchVersion);
}
- private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
+ // note: bufferIndex is -1 here to best-effort AIOOBE consumers that don't call reset()
+ private int offset = 0, bufferIndex = -1, dataLen = 0, finalOffset = 0;
private static final int MAX_WORD_LEN = 255;
private static final int IO_BUFFER_SIZE = 4096;
@@ -162,8 +163,7 @@ public abstract class CharTokenizer exte
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
+ public void reset() throws IOException {
bufferIndex = 0;
offset = 0;
dataLen = 0;
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Thu Aug 30 17:50:18 2012
@@ -318,19 +318,13 @@ public final class WikipediaTokenizer ex
*/
@Override
public void reset() throws IOException {
- super.reset();
+ scanner.yyreset(input);
tokens = null;
scanner.reset();
first = true;
}
@Override
- public void setReader(Reader reader) throws IOException {
- super.setReader(reader);
- scanner.yyreset(input);
- }
-
- @Override
public void end() {
// set final offset
final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java Thu Aug 30 17:50:18 2012
@@ -39,6 +39,7 @@ public class CommonGramsFilterTest exten
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
+ cgf.reset();
assertTrue(cgf.incrementToken());
assertEquals("How", term.toString());
assertTrue(cgf.incrementToken());
@@ -61,6 +62,7 @@ public class CommonGramsFilterTest exten
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
+ nsf.reset();
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.toString());
assertTrue(nsf.incrementToken());
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Thu Aug 30 17:50:18 2012
@@ -235,6 +235,7 @@ public class TestCompoundWordTokenFilter
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
+ tf.reset();
assertTrue(tf.incrementToken());
assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
assertTrue(tf.incrementToken());
@@ -256,6 +257,7 @@ public class TestCompoundWordTokenFilter
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
MockRetainAttribute retAtt = stream.addAttribute(MockRetainAttribute.class);
+ stream.reset();
while (stream.incrementToken()) {
assertTrue("Custom attribute value was lost", retAtt.getRetain());
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Thu Aug 30 17:50:18 2012
@@ -80,6 +80,7 @@ public class TestAnalyzers extends BaseT
void verifyPayload(TokenStream ts) throws IOException {
PayloadAttribute payloadAtt = ts.getAttribute(PayloadAttribute.class);
+ ts.reset();
for(byte b=1;;b++) {
boolean hasNext = ts.incrementToken();
if (!hasNext) break;
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java Thu Aug 30 17:50:18 2012
@@ -66,6 +66,7 @@ public class TestStopAnalyzer extends Ba
assertNotNull(stream);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
+ stream.reset();
while (stream.incrementToken()) {
String text = termAtt.toString();
assertFalse(stopWordsSet.contains(text));
@@ -83,6 +84,7 @@ public class TestStopAnalyzer extends Ba
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
+ stream.reset();
while (stream.incrementToken()) {
String text = termAtt.toString();
assertFalse(stopWordsSet.contains(text));
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java Thu Aug 30 17:50:18 2012
@@ -111,6 +111,7 @@ public class TestPatternTokenizer extend
// assign bogus values
in.clearAttributes();
termAtt.setEmpty().append("bogusTerm");
+ in.reset();
while (in.incrementToken()) {
if (out.length() > 0)
out.append(' ');
Modified: lucene/dev/trunk/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java Thu Aug 30 17:50:18 2012
@@ -45,7 +45,8 @@ public final class ICUTokenizer extends
/** true length of text in the buffer */
private int length = 0;
/** length in buffer that can be evaluated safely, up to a safe end point */
- private int usableLength = 0;
+ // note: usableLength is -1 here to best-effort AIOOBE consumers that don't call reset()
+ private int usableLength = -1;
/** accumulated offset of previous buffers for this reader, for offsetAtt */
private int offset = 0;
@@ -101,12 +102,6 @@ public final class ICUTokenizer extends
breaker.setText(buffer, 0, 0);
length = usableLength = offset = 0;
}
-
- @Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
- reset();
- }
@Override
public void end() {
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java Thu Aug 30 17:50:18 2012
@@ -245,14 +245,8 @@ public final class JapaneseTokenizer ext
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
- buffer.reset(input);
- }
-
- @Override
public void reset() throws IOException {
- super.reset();
+ buffer.reset(input);
resetState();
}
Modified: lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Thu Aug 30 17:50:18 2012
@@ -112,17 +112,10 @@ public final class SentenceTokenizer ext
@Override
public void reset() throws IOException {
- super.reset();
tokenStart = tokenEnd = 0;
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
- reset();
- }
-
- @Override
public void end() {
// set final offset
final int finalOffset = correctOffset(tokenEnd);
Modified: lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Thu Aug 30 17:50:18 2012
@@ -80,8 +80,7 @@ public abstract class BaseUIMATokenizer
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
+ public void reset() throws IOException {
iterator = null;
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java Thu Aug 30 17:50:18 2012
@@ -170,12 +170,8 @@ public abstract class TokenStream extend
* This method is called by a consumer before it begins consumption using
* {@link #incrementToken()}.
* <p/>
- * Resets this stream to the beginning. As all TokenStreams must be reusable,
- * any implementations which have state that needs to be reset between usages
- * of the TokenStream, must implement this method. Note that if your TokenStream
- * caches tokens and feeds them back again after a reset, it is imperative
- * that you clone the tokens when you store them away (on the first pass) as
- * well as when you return them (on future passes after {@link #reset()}).
+ * Resets this stream to a clean state. Stateful implementations must implement
+ * this method so that they can be reused, just as if they had been created fresh.
*/
public void reset() throws IOException {}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java Thu Aug 30 17:50:18 2012
@@ -82,12 +82,18 @@ public abstract class Tokenizer extends
return (input instanceof CharFilter) ? ((CharFilter) input).correctOffset(currentOff) : currentOff;
}
- /** Expert: Reset the tokenizer to a new reader. Typically, an
+ /** Expert: Set a new reader on the Tokenizer. Typically, an
* analyzer (in its tokenStream method) will use
* this to re-use a previously created tokenizer. */
- public void setReader(Reader input) throws IOException {
+ public final void setReader(Reader input) throws IOException {
assert input != null: "input must not be null";
this.input = input;
+ assert setReaderTestPoint();
+ }
+
+ // only used by assert, for testing
+ boolean setReaderTestPoint() {
+ return true;
}
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Thu Aug 30 17:50:18 2012
@@ -1545,7 +1545,7 @@ public class TestIndexWriter extends Luc
}
@Override
- public void setReader(Reader input) throws IOException {
+ public void reset() throws IOException {
this.upto = 0;
final StringBuilder b = new StringBuilder();
final char[] buffer = new char[1024];
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java Thu Aug 30 17:50:18 2012
@@ -227,8 +227,7 @@ public class TestTermRangeQuery extends
}
@Override
- public final void setReader(Reader reader) throws IOException {
- super.setReader(reader);
+ public void reset() throws IOException {;
done = false;
}
}
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Thu Aug 30 17:50:18 2012
@@ -176,6 +176,8 @@ public abstract class AbstractTestCase e
BytesRef bytesRef = termAttribute.getBytesRef();
+ tokenStream.reset();
+
while (tokenStream.incrementToken()) {
termAttribute.fillBytesRef();
bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
@@ -317,12 +319,6 @@ public abstract class AbstractTestCase e
}
@Override
- public void setReader( Reader input ) throws IOException {
- super.setReader( input );
- reset();
- }
-
- @Override
public void reset() {
startTerm = 0;
nextStartOffset = 0;
Modified: lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java (original)
+++ lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java Thu Aug 30 17:50:18 2012
@@ -81,8 +81,7 @@ public class TestMultiPhraseQueryParsing
}
@Override
- public void setReader(Reader reader) throws IOException {
- super.setReader(reader);
+ public void reset() throws IOException {
this.upto = 0;
this.lastPos = 0;
}
Modified: lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java (original)
+++ lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java Thu Aug 30 17:50:18 2012
@@ -76,14 +76,4 @@ class PrefixCellsTokenizer extends Token
termAtt.setLength(length);
return length > 0; // should only happen at the end
}
-
- @Override
- public final void end() {
-
- }
-
- @Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
- }
}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java Thu Aug 30 17:50:18 2012
@@ -227,10 +227,10 @@ public class MockTokenizer extends Token
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
+ boolean setReaderTestPoint() {
assert !enableChecks || streamState == State.CLOSE : "setReader() called in wrong state: " + streamState;
streamState = State.SETREADER;
+ return true;
}
@Override
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java Thu Aug 30 17:50:18 2012
@@ -72,15 +72,11 @@ final class TrieTokenizer extends Tokeni
this.type = type;
this.precisionStep = precisionStep;
this.ts = ts;
-
- setReader(input);
}
@Override
- public void setReader(Reader input) {
+ public void reset() {
try {
- super.setReader(input);
- input = super.input;
char[] buf = new char[32];
int len = input.read(buf);
this.startOfs = correctOffset(0);
@@ -113,6 +109,7 @@ final class TrieTokenizer extends Tokeni
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to create TrieIndexTokenizer", e);
}
+ ts.reset();
}
@Override
@@ -120,12 +117,6 @@ final class TrieTokenizer extends Tokeni
super.close();
ts.close();
}
-
- @Override
- public void reset() throws IOException {
- super.reset();
- ts.reset();
- }
@Override
public boolean incrementToken() {
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java Thu Aug 30 17:50:18 2012
@@ -71,9 +71,8 @@ public class BoolField extends Primitive
boolean done = false;
@Override
- public void setReader(Reader input) throws IOException {
+ public void reset() throws IOException {
done = false;
- super.setReader(input);
}
@Override
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java?rev=1379036&r1=1379035&r2=1379036&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java Thu Aug 30 17:50:18 2012
@@ -81,13 +81,8 @@ public class PreAnalyzedField extends Fi
return new SolrAnalyzer() {
@Override
- protected TokenStreamComponents createComponents(String fieldName,
- Reader reader) {
- try {
- return new TokenStreamComponents(new PreAnalyzedTokenizer(reader, parser));
- } catch (IOException e) {
- return null;
- }
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new PreAnalyzedTokenizer(reader, parser));
}
};
@@ -169,6 +164,7 @@ public class PreAnalyzedField extends Fi
return null;
}
PreAnalyzedTokenizer parse = new PreAnalyzedTokenizer(new StringReader(val), parser);
+ parse.reset(); // consume
Field f = (Field)super.createField(field, val, boost);
if (parse.getStringValue() != null) {
f.setStringValue(parse.getStringValue());
@@ -195,11 +191,11 @@ public class PreAnalyzedField extends Fi
private String stringValue = null;
private byte[] binaryValue = null;
private PreAnalyzedParser parser;
+ private Reader lastReader;
- public PreAnalyzedTokenizer(Reader reader, PreAnalyzedParser parser) throws IOException {
+ public PreAnalyzedTokenizer(Reader reader, PreAnalyzedParser parser) {
super(reader);
this.parser = parser;
- setReader(reader);
}
public boolean hasTokenStream() {
@@ -229,24 +225,30 @@ public class PreAnalyzedField extends Fi
return true;
}
- public final void reset() {
+ @Override
+ public final void reset() throws IOException {
+ // NOTE: this acts like rewind if you call it again
+ if (input != lastReader) {
+ lastReader = input;
+ cachedStates.clear();
+ stringValue = null;
+ binaryValue = null;
+ ParseResult res = parser.parse(input, this);
+ if (res != null) {
+ stringValue = res.str;
+ binaryValue = res.bin;
+ if (res.states != null) {
+ cachedStates.addAll(res.states);
+ }
+ }
+ }
it = cachedStates.iterator();
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
- cachedStates.clear();
- stringValue = null;
- binaryValue = null;
- ParseResult res = parser.parse(input, this);
- if (res != null) {
- stringValue = res.str;
- binaryValue = res.bin;
- if (res.states != null) {
- cachedStates.addAll(res.states);
- }
- }
+ public void close() throws IOException {
+ super.close();
+ lastReader = null; // just a ref, null for gc
}
}