You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/10/23 22:45:21 UTC

svn commit: r1401457 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/analysis/ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/ lucene/analysis/common/src/...

Author: rmuir
Date: Tue Oct 23 20:45:20 2012
New Revision: 1401457

URL: http://svn.apache.org/viewvc?rev=1401457&view=rev
Log:
throw a best-effort NPE from the jflex-based tokenizers if you don't consume the TS correctly

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
    lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Tue Oct 23 20:45:20 2012
@@ -120,7 +120,7 @@ public final class ClassicTokenizer exte
   }
 
   private void init(Version matchVersion) {
-    this.scanner = new ClassicTokenizerImpl(input);
+    this.scanner = new ClassicTokenizerImpl(null); // best effort NPE if you dont call reset
   }
 
   // this tokenizer generates three attributes:

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Tue Oct 23 20:45:20 2012
@@ -147,14 +147,15 @@ public final class StandardTokenizer ext
   }
 
   private final void init(Version matchVersion) {
+    // best effort NPE if you dont call reset
     if (matchVersion.onOrAfter(Version.LUCENE_40)) {
-      this.scanner = new StandardTokenizerImpl(input);
+      this.scanner = new StandardTokenizerImpl(null);
     } else if (matchVersion.onOrAfter(Version.LUCENE_34)) {
-      this.scanner = new StandardTokenizerImpl34(input);
+      this.scanner = new StandardTokenizerImpl34(null);
     } else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
-      this.scanner = new StandardTokenizerImpl31(input);
+      this.scanner = new StandardTokenizerImpl31(null);
     } else {
-      this.scanner = new ClassicTokenizerImpl(input);
+      this.scanner = new ClassicTokenizerImpl(null);
     }
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Tue Oct 23 20:45:20 2012
@@ -106,7 +106,7 @@ public final class UAX29URLEmailTokenize
    */
   public UAX29URLEmailTokenizer(Version matchVersion, Reader input) {
     super(input);
-    this.scanner = getScannerFor(matchVersion, input);
+    this.scanner = getScannerFor(matchVersion);
   }
 
   /**
@@ -114,7 +114,7 @@ public final class UAX29URLEmailTokenize
    */
   public UAX29URLEmailTokenizer(Version matchVersion, AttributeSource source, Reader input) {
     super(source, input);
-    this.scanner = getScannerFor(matchVersion, input);
+    this.scanner = getScannerFor(matchVersion);
   }
 
   /**
@@ -122,18 +122,19 @@ public final class UAX29URLEmailTokenize
    */
   public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
     super(factory, input);
-    this.scanner = getScannerFor(matchVersion, input);
+    this.scanner = getScannerFor(matchVersion);
   }
 
-  private static StandardTokenizerInterface getScannerFor(Version matchVersion, Reader input) {
+  private static StandardTokenizerInterface getScannerFor(Version matchVersion) {
+    // best effort NPE if you dont call reset
     if (matchVersion.onOrAfter(Version.LUCENE_40)) {
-      return new UAX29URLEmailTokenizerImpl(input);
+      return new UAX29URLEmailTokenizerImpl(null);
     } else if (matchVersion.onOrAfter(Version.LUCENE_36)) {
-      return new UAX29URLEmailTokenizerImpl36(input);
+      return new UAX29URLEmailTokenizerImpl36(null);
     } else if (matchVersion.onOrAfter(Version.LUCENE_34)) {
-      return new UAX29URLEmailTokenizerImpl34(input);
+      return new UAX29URLEmailTokenizerImpl34(null);
     } else {
-      return new UAX29URLEmailTokenizerImpl31(input);
+      return new UAX29URLEmailTokenizerImpl31(null);
     }
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Tue Oct 23 20:45:20 2012
@@ -143,7 +143,7 @@ public final class WikipediaTokenizer ex
    */
   public WikipediaTokenizer(Reader input, int tokenOutput, Set<String> untokenizedTypes) {
     super(input);
-    this.scanner = new WikipediaTokenizerImpl(input);
+    this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
     init(tokenOutput, untokenizedTypes);
   }
 
@@ -156,7 +156,7 @@ public final class WikipediaTokenizer ex
    */
   public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, Set<String> untokenizedTypes) {
     super(factory, input);
-    this.scanner = new WikipediaTokenizerImpl(input);
+    this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
     init(tokenOutput, untokenizedTypes);
   }
 
@@ -169,7 +169,7 @@ public final class WikipediaTokenizer ex
    */
   public WikipediaTokenizer(AttributeSource source, Reader input, int tokenOutput, Set<String> untokenizedTypes) {
     super(source, input);
-    this.scanner = new WikipediaTokenizerImpl(input);
+    this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset
     init(tokenOutput, untokenizedTypes);
   }
   

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java Tue Oct 23 20:45:20 2012
@@ -52,9 +52,12 @@ public class TestElision extends BaseTok
   private List<String> filter(TokenFilter filter) throws IOException {
     List<String> tas = new ArrayList<String>();
     CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
+    filter.reset();
     while (filter.incrementToken()) {
       tas.add(termAtt.toString());
     }
+    filter.end();
+    filter.close();
     return tas;
   }
   

Modified: lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java?rev=1401457&r1=1401456&r2=1401457&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java Tue Oct 23 20:45:20 2012
@@ -62,12 +62,16 @@ public class TestMorfologikAnalyzer exte
     ts_1.reset();
     ts_1.incrementToken();
     assertEquals("first stream", "liście", termAtt_1.toString());
+    ts_1.end();
+    ts_1.close();
 
     TokenStream ts_2 = a.tokenStream("dummy", new StringReader("danych"));
     CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
     ts_2.reset();
     ts_2.incrementToken();
     assertEquals("second stream", "dany", termAtt_2.toString());
+    ts_2.end();
+    ts_2.close();
   }
 
   /** Test stemming of mixed-case tokens. */
@@ -110,6 +114,7 @@ public class TestMorfologikAnalyzer exte
   public final void testPOSAttribute() throws IOException {
     TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader("liście"));
 
+    ts.reset();
     assertPOSToken(ts, "liście",  
         "subst:sg:acc:n2",
         "subst:sg:nom:n2",
@@ -127,6 +132,8 @@ public class TestMorfologikAnalyzer exte
     assertPOSToken(ts, "lista", 
         "subst:sg:dat:f",
         "subst:sg:loc:f");
+    ts.end();
+    ts.close();
   }
 
   /** blast some random strings through the analyzer */