You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/10/23 22:45:21 UTC
svn commit: r1401457 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/analysis/
lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/
lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/
lucene/analysis/common/src/...
Author: rmuir
Date: Tue Oct 23 20:45:20 2012
New Revision: 1401457
URL: http://svn.apache.org/viewvc?rev=1401457&view=rev
Log:
throw a best-effort NPE from the jflex-based tokenizers if you don't consume the TS correctly
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Tue Oct 23 20:45:20 2012
@@ -120,7 +120,7 @@ public final class ClassicTokenizer exte
}
private void init(Version matchVersion) {
- this.scanner = new ClassicTokenizerImpl(input);
+ this.scanner = new ClassicTokenizerImpl(null); // best effort NPE if you dont call reset
}
// this tokenizer generates three attributes:
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Tue Oct 23 20:45:20 2012
@@ -147,14 +147,15 @@ public final class StandardTokenizer ext
}
private final void init(Version matchVersion) {
+ // best effort NPE if you dont call reset
if (matchVersion.onOrAfter(Version.LUCENE_40)) {
- this.scanner = new StandardTokenizerImpl(input);
+ this.scanner = new StandardTokenizerImpl(null);
} else if (matchVersion.onOrAfter(Version.LUCENE_34)) {
- this.scanner = new StandardTokenizerImpl34(input);
+ this.scanner = new StandardTokenizerImpl34(null);
} else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
- this.scanner = new StandardTokenizerImpl31(input);
+ this.scanner = new StandardTokenizerImpl31(null);
} else {
- this.scanner = new ClassicTokenizerImpl(input);
+ this.scanner = new ClassicTokenizerImpl(null);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Tue Oct 23 20:45:20 2012
@@ -106,7 +106,7 @@ public final class UAX29URLEmailTokenize
*/
public UAX29URLEmailTokenizer(Version matchVersion, Reader input) {
super(input);
- this.scanner = getScannerFor(matchVersion, input);
+ this.scanner = getScannerFor(matchVersion);
}
/**
@@ -114,7 +114,7 @@ public final class UAX29URLEmailTokenize
*/
public UAX29URLEmailTokenizer(Version matchVersion, AttributeSource source, Reader input) {
super(source, input);
- this.scanner = getScannerFor(matchVersion, input);
+ this.scanner = getScannerFor(matchVersion);
}
/**
@@ -122,18 +122,19 @@ public final class UAX29URLEmailTokenize
*/
public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
super(factory, input);
- this.scanner = getScannerFor(matchVersion, input);
+ this.scanner = getScannerFor(matchVersion);
}
- private static StandardTokenizerInterface getScannerFor(Version matchVersion, Reader input) {
+ private static StandardTokenizerInterface getScannerFor(Version matchVersion) {
+ // best effort NPE if you dont call reset
if (matchVersion.onOrAfter(Version.LUCENE_40)) {
- return new UAX29URLEmailTokenizerImpl(input);
+ return new UAX29URLEmailTokenizerImpl(null);
} else if (matchVersion.onOrAfter(Version.LUCENE_36)) {
- return new UAX29URLEmailTokenizerImpl36(input);
+ return new UAX29URLEmailTokenizerImpl36(null);
} else if (matchVersion.onOrAfter(Version.LUCENE_34)) {
- return new UAX29URLEmailTokenizerImpl34(input);
+ return new UAX29URLEmailTokenizerImpl34(null);
} else {
- return new UAX29URLEmailTokenizerImpl31(input);
+ return new UAX29URLEmailTokenizerImpl31(null);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Tue Oct 23 20:45:20 2012
@@ -143,7 +143,7 @@ public final class WikipediaTokenizer ex
*/
public WikipediaTokenizer(Reader input, int tokenOutput, Set<String> untokenizedTypes) {
super(input);
- this.scanner = new WikipediaTokenizerImpl(input);
+ this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
init(tokenOutput, untokenizedTypes);
}
@@ -156,7 +156,7 @@ public final class WikipediaTokenizer ex
*/
public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, Set<String> untokenizedTypes) {
super(factory, input);
- this.scanner = new WikipediaTokenizerImpl(input);
+ this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
init(tokenOutput, untokenizedTypes);
}
@@ -169,7 +169,7 @@ public final class WikipediaTokenizer ex
*/
public WikipediaTokenizer(AttributeSource source, Reader input, int tokenOutput, Set<String> untokenizedTypes) {
super(source, input);
- this.scanner = new WikipediaTokenizerImpl(input);
+ this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
init(tokenOutput, untokenizedTypes);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java Tue Oct 23 20:45:20 2012
@@ -52,9 +52,12 @@ public class TestElision extends BaseTok
private List<String> filter(TokenFilter filter) throws IOException {
List<String> tas = new ArrayList<String>();
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
+ filter.reset();
while (filter.incrementToken()) {
tas.add(termAtt.toString());
}
+ filter.end();
+ filter.close();
return tas;
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java Tue Oct 23 20:45:20 2012
@@ -62,12 +62,16 @@ public class TestMorfologikAnalyzer exte
ts_1.reset();
ts_1.incrementToken();
assertEquals("first stream", "liÅcie", termAtt_1.toString());
+ ts_1.end();
+ ts_1.close();
TokenStream ts_2 = a.tokenStream("dummy", new StringReader("danych"));
CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
ts_2.reset();
ts_2.incrementToken();
assertEquals("second stream", "dany", termAtt_2.toString());
+ ts_2.end();
+ ts_2.close();
}
/** Test stemming of mixed-case tokens. */
@@ -110,6 +114,7 @@ public class TestMorfologikAnalyzer exte
public final void testPOSAttribute() throws IOException {
TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader("liÅcie"));
+ ts.reset();
assertPOSToken(ts, "liÅcie",
"subst:sg:acc:n2",
"subst:sg:nom:n2",
@@ -127,6 +132,8 @@ public class TestMorfologikAnalyzer exte
assertPOSToken(ts, "lista",
"subst:sg:dat:f",
"subst:sg:loc:f");
+ ts.end();
+ ts.close();
}
/** blast some random strings through the analyzer */