You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2013/09/22 15:57:45 UTC
svn commit: r1525362 [3/3] - in /lucene/dev/trunk: lucene/
lucene/analysis/common/src/java/org/apache/lucene/analysis/core/
lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/
lucene/analysis/common/src/java/org/apache/lucene/analysis/pat...
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java Sun Sep 22 13:57:43 2013
@@ -30,21 +30,28 @@ import java.io.IOException;
call {@link AttributeSource#clearAttributes()} before
setting attributes.
*/
-public abstract class Tokenizer extends TokenStream {
+public abstract class Tokenizer extends TokenStream {
/** The text source for this Tokenizer. */
- protected Reader input;
+ protected Reader input = ILLEGAL_STATE_READER;
+
+ /** Pending reader: not actually assigned to input until reset() */
+ private Reader inputPending = ILLEGAL_STATE_READER;
/** Construct a token stream processing the given input. */
protected Tokenizer(Reader input) {
- assert input != null: "input must not be null";
- this.input = input;
+ if (input == null) {
+ throw new NullPointerException("input must not be null");
+ }
+ this.inputPending = input;
}
/** Construct a token stream processing the given input using the given AttributeFactory. */
protected Tokenizer(AttributeFactory factory, Reader input) {
super(factory);
- assert input != null: "input must not be null";
- this.input = input;
+ if (input == null) {
+ throw new NullPointerException("input must not be null");
+ }
+ this.inputPending = input;
}
/**
@@ -56,12 +63,10 @@ public abstract class Tokenizer extends
*/
@Override
public void close() throws IOException {
- if (input != null) {
- input.close();
- // LUCENE-2387: don't hold onto Reader after close, so
- // GC can reclaim
- input = null;
- }
+ input.close();
+ // LUCENE-2387: don't hold onto Reader after close, so
+ // GC can reclaim
+ inputPending = input = ILLEGAL_STATE_READER;
}
/** Return the corrected offset. If {@link #input} is a {@link CharFilter} subclass
@@ -71,7 +76,6 @@ public abstract class Tokenizer extends
* @see CharFilter#correctOffset
*/
protected final int correctOffset(int currentOff) {
- assert input != null: "this tokenizer is closed";
return (input instanceof CharFilter) ? ((CharFilter) input).correctOffset(currentOff) : currentOff;
}
@@ -79,14 +83,36 @@ public abstract class Tokenizer extends
* analyzer (in its tokenStream method) will use
* this to re-use a previously created tokenizer. */
public final void setReader(Reader input) throws IOException {
- assert input != null: "input must not be null";
- this.input = input;
+ if (input == null) {
+ throw new NullPointerException("input must not be null");
+ }
+ this.input = ILLEGAL_STATE_READER;
+ this.inputPending = input;
assert setReaderTestPoint();
}
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ input = inputPending;
+ inputPending = ILLEGAL_STATE_READER;
+ }
+
// only used by assert, for testing
boolean setReaderTestPoint() {
return true;
}
+
+ private static final Reader ILLEGAL_STATE_READER = new Reader() {
+ @Override
+ public int read(char[] cbuf, int off, int len) {
+ throw new IllegalStateException("TokenStream contract violation: reset()/close() call missing, " +
+ "reset() called multiple times, or subclass does not call super.reset(). " +
+ "Please see Javadocs of TokenStream class for more information about the correct consuming workflow.");
+ }
+
+ @Override
+ public void close() {}
+ };
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java Sun Sep 22 13:57:43 2013
@@ -68,7 +68,8 @@ public class TestGraphTokenizers extends
}
@Override
- public void reset() {
+ public void reset() throws IOException {
+ super.reset();
tokens = null;
upto = 0;
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java Sun Sep 22 13:57:43 2013
@@ -36,9 +36,9 @@ public class TestMockAnalyzer extends Ba
Analyzer a = new MockAnalyzer(random());
assertAnalyzesTo(a, "A bc defg hiJklmn opqrstuv wxy z ",
new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
- assertAnalyzesToReuse(a, "aba cadaba shazam",
+ assertAnalyzesTo(a, "aba cadaba shazam",
new String[] { "aba", "cadaba", "shazam" });
- assertAnalyzesToReuse(a, "break on whitespace",
+ assertAnalyzesTo(a, "break on whitespace",
new String[] { "break", "on", "whitespace" });
}
@@ -47,9 +47,9 @@ public class TestMockAnalyzer extends Ba
Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
- assertAnalyzesToReuse(a, "aba4cadaba-Shazam",
+ assertAnalyzesTo(a, "aba4cadaba-Shazam",
new String[] { "aba", "cadaba", "shazam" });
- assertAnalyzesToReuse(a, "break+on/Letters",
+ assertAnalyzesTo(a, "break+on/Letters",
new String[] { "break", "on", "letters" });
}
@@ -58,9 +58,9 @@ public class TestMockAnalyzer extends Ba
Analyzer a = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
new String[] { "a-bc123 defg+hijklmn567opqrstuv78wxy_z " });
- assertAnalyzesToReuse(a, "aba4cadaba-Shazam",
+ assertAnalyzesTo(a, "aba4cadaba-Shazam",
new String[] { "aba4cadaba-Shazam" });
- assertAnalyzesToReuse(a, "break+on/Nothing",
+ assertAnalyzesTo(a, "break+on/Nothing",
new String[] { "break+on/Nothing" });
}
@@ -106,7 +106,7 @@ public class TestMockAnalyzer extends Ba
stream.end();
stream.close();
- assertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
+ assertAnalyzesTo(analyzer, testString, new String[] { "t" });
}
/** blast some random strings through the analyzer */
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Sun Sep 22 13:57:43 2013
@@ -1599,14 +1599,15 @@ public class TestIndexWriter extends Luc
@Override
public void reset() throws IOException {
- this.upto = 0;
- final StringBuilder b = new StringBuilder();
- final char[] buffer = new char[1024];
- int n;
- while ((n = input.read(buffer)) != -1) {
- b.append(buffer, 0, n);
- }
- this.tokens = b.toString().split(" ");
+ super.reset();
+ this.upto = 0;
+ final StringBuilder b = new StringBuilder();
+ final char[] buffer = new char[1024];
+ int n;
+ while ((n = input.read(buffer)) != -1) {
+ b.append(buffer, 0, n);
+ }
+ this.tokens = b.toString().split(" ");
}
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java Sun Sep 22 13:57:43 2013
@@ -227,7 +227,8 @@ public class TestTermRangeQuery extends
}
@Override
- public void reset() throws IOException {;
+ public void reset() throws IOException {
+ super.reset();
done = false;
}
}
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java Sun Sep 22 13:57:43 2013
@@ -49,7 +49,7 @@ public class OffsetLimitTokenFilterTest
assertTokenStreamContents(filter, new String[] {"short", "toolong",
"evenmuchlongertext"});
- checkOneTermReuse(new Analyzer() {
+ checkOneTerm(new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Sun Sep 22 13:57:43 2013
@@ -319,7 +319,8 @@ public abstract class AbstractTestCase e
}
@Override
- public void reset() {
+ public void reset() throws IOException {
+ super.reset();
startTerm = 0;
nextStartOffset = 0;
snippet = null;
Modified: lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java (original)
+++ lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java Sun Sep 22 13:57:43 2013
@@ -82,6 +82,7 @@ public class TestMultiPhraseQueryParsing
@Override
public void reset() throws IOException {
+ super.reset();
this.upto = 0;
this.lastPos = 0;
}
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Sun Sep 22 13:57:43 2013
@@ -341,14 +341,17 @@ public abstract class BaseTokenStreamTes
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
+ checkResetException(a, input);
assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[]) throws IOException {
+ checkResetException(a, input);
assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length());
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], boolean offsetsAreCorrect) throws IOException {
+ checkResetException(a, input);
assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), offsetsAreCorrect);
}
@@ -375,30 +378,28 @@ public abstract class BaseTokenStreamTes
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements, null);
}
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
- assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
- }
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException {
- assertAnalyzesToReuse(a, input, output, null, null, null, null);
- }
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, String[] types) throws IOException {
- assertAnalyzesToReuse(a, input, output, null, null, types, null);
- }
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException {
- assertAnalyzesToReuse(a, input, output, null, null, null, posIncrements);
- }
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException {
- assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, null);
- }
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
- assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, posIncrements);
+ static void checkResetException(Analyzer a, String input) throws IOException {
+ TokenStream ts = a.tokenStream("bogus", input);
+ try {
+ if (ts.incrementToken()) {
+ //System.out.println(ts.reflectAsString(false));
+ fail("didn't get expected exception when reset() not called");
+ }
+ } catch (IllegalStateException expected) {
+ // ok
+ } catch (AssertionError expected) {
+ // ok: MockTokenizer
+ assertTrue(expected.getMessage(), expected.getMessage() != null && expected.getMessage().contains("wrong state"));
+ } catch (Exception unexpected) {
+ fail("got wrong exception when reset() not called: " + unexpected);
+ } finally {
+ // consume correctly
+ ts.reset();
+ while (ts.incrementToken()) {}
+ ts.end();
+ ts.close();
+ }
}
// simple utility method for testing stemmers
@@ -407,10 +408,6 @@ public abstract class BaseTokenStreamTes
assertAnalyzesTo(a, input, new String[]{expected});
}
- public static void checkOneTermReuse(Analyzer a, final String input, final String expected) throws IOException {
- assertAnalyzesToReuse(a, input, new String[]{expected});
- }
-
/** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
checkRandomData(random, a, iterations, 20, false, true);
@@ -476,6 +473,7 @@ public abstract class BaseTokenStreamTes
}
public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple, boolean offsetsAreCorrect) throws IOException {
+ checkResetException(a, "best effort");
long seed = random.nextLong();
boolean useCharFilter = random.nextBoolean();
Directory dir = null;
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java Sun Sep 22 13:57:43 2013
@@ -41,7 +41,7 @@ public class VocabularyAssert {
while ((inputWord = vocReader.readLine()) != null) {
String expectedWord = outputReader.readLine();
Assert.assertNotNull(expectedWord);
- BaseTokenStreamTestCase.checkOneTermReuse(a, inputWord, expectedWord);
+ BaseTokenStreamTestCase.checkOneTerm(a, inputWord, expectedWord);
}
}
@@ -55,7 +55,7 @@ public class VocabularyAssert {
if (inputLine.startsWith("#") || inputLine.trim().length() == 0)
continue; /* comment */
String words[] = inputLine.split("\t");
- BaseTokenStreamTestCase.checkOneTermReuse(a, words[0], words[1]);
+ BaseTokenStreamTestCase.checkOneTerm(a, words[0], words[1]);
}
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java Sun Sep 22 13:57:43 2013
@@ -96,8 +96,9 @@ final class TrieTokenizer extends Tokeni
}
@Override
- public void reset() {
- try {
+ public void reset() throws IOException {
+ super.reset();
+ try {
int upto = 0;
char[] buf = termAtt.buffer();
while (true) {
@@ -167,6 +168,7 @@ final class TrieTokenizer extends Tokeni
@Override
public void end() throws IOException {
+ super.end();
if (hasValue) {
ts.end();
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java Sun Sep 22 13:57:43 2013
@@ -691,6 +691,11 @@ final class TokenOrderingFilter extends
return true;
}
}
+
+ @Override
+ public void reset() throws IOException {
+ // this looks wrong: but its correct.
+ }
}
// for TokenOrderingFilter, so it can easily sort by startOffset
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java Sun Sep 22 13:57:43 2013
@@ -74,6 +74,7 @@ public class BoolField extends Primitive
@Override
public void reset() throws IOException {
+ super.reset();
done = false;
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java Sun Sep 22 13:57:43 2013
@@ -252,9 +252,11 @@ public class PreAnalyzedField extends Fi
private byte[] binaryValue = null;
private PreAnalyzedParser parser;
private Reader lastReader;
+ private Reader input; // hides original input since we replay saved states (and dont reuse)
public PreAnalyzedTokenizer(Reader reader, PreAnalyzedParser parser) {
super(reader);
+ this.input = reader;
this.parser = parser;
}