You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2013/09/22 15:57:45 UTC

svn commit: r1525362 [3/3] - in /lucene/dev/trunk: lucene/ lucene/analysis/common/src/java/org/apache/lucene/analysis/core/ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/ lucene/analysis/common/src/java/org/apache/lucene/analysis/pat...

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java Sun Sep 22 13:57:43 2013
@@ -30,21 +30,28 @@ import java.io.IOException;
   call {@link AttributeSource#clearAttributes()} before
   setting attributes.
  */
-public abstract class Tokenizer extends TokenStream {
+public abstract class Tokenizer extends TokenStream {  
   /** The text source for this Tokenizer. */
-  protected Reader input;
+  protected Reader input = ILLEGAL_STATE_READER;
+  
+  /** Pending reader: not actually assigned to input until reset() */
+  private Reader inputPending = ILLEGAL_STATE_READER;
 
   /** Construct a token stream processing the given input. */
   protected Tokenizer(Reader input) {
-    assert input != null: "input must not be null";
-    this.input = input;
+    if (input == null) {
+      throw new NullPointerException("input must not be null");
+    }
+    this.inputPending = input;
   }
   
   /** Construct a token stream processing the given input using the given AttributeFactory. */
   protected Tokenizer(AttributeFactory factory, Reader input) {
     super(factory);
-    assert input != null: "input must not be null";
-    this.input = input;
+    if (input == null) {
+      throw new NullPointerException("input must not be null");
+    }
+    this.inputPending = input;
   }
 
   /**
@@ -56,12 +63,10 @@ public abstract class Tokenizer extends 
    */
   @Override
   public void close() throws IOException {
-    if (input != null) {
-      input.close();
-      // LUCENE-2387: don't hold onto Reader after close, so
-      // GC can reclaim
-      input = null;
-    }
+    input.close();
+    // LUCENE-2387: don't hold onto Reader after close, so
+    // GC can reclaim
+    inputPending = input = ILLEGAL_STATE_READER;
   }
   
   /** Return the corrected offset. If {@link #input} is a {@link CharFilter} subclass
@@ -71,7 +76,6 @@ public abstract class Tokenizer extends 
    * @see CharFilter#correctOffset
    */
   protected final int correctOffset(int currentOff) {
-    assert input != null: "this tokenizer is closed";
     return (input instanceof CharFilter) ? ((CharFilter) input).correctOffset(currentOff) : currentOff;
   }
 
@@ -79,14 +83,36 @@ public abstract class Tokenizer extends 
    *  analyzer (in its tokenStream method) will use
    *  this to re-use a previously created tokenizer. */
   public final void setReader(Reader input) throws IOException {
-    assert input != null: "input must not be null";
-    this.input = input;
+    if (input == null) {
+      throw new NullPointerException("input must not be null");
+    }
+    this.input = ILLEGAL_STATE_READER;
+    this.inputPending = input;
     assert setReaderTestPoint();
   }
   
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    input = inputPending;
+    inputPending = ILLEGAL_STATE_READER;
+  }
+
   // only used by assert, for testing
   boolean setReaderTestPoint() {
     return true;
   }
+  
+  private static final Reader ILLEGAL_STATE_READER = new Reader() {
+    @Override
+    public int read(char[] cbuf, int off, int len) {
+      throw new IllegalStateException("TokenStream contract violation: reset()/close() call missing, " +
+          "reset() called multiple times, or subclass does not call super.reset(). " +
+          "Please see Javadocs of TokenStream class for more information about the correct consuming workflow.");
+    }
+
+    @Override
+    public void close() {} 
+  };
 }
 

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java Sun Sep 22 13:57:43 2013
@@ -68,7 +68,8 @@ public class TestGraphTokenizers extends
     }
 
     @Override
-    public void reset() {
+    public void reset() throws IOException {
+      super.reset();
       tokens = null;
       upto = 0;
     }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java Sun Sep 22 13:57:43 2013
@@ -36,9 +36,9 @@ public class TestMockAnalyzer extends Ba
     Analyzer a = new MockAnalyzer(random());
     assertAnalyzesTo(a, "A bc defg hiJklmn opqrstuv wxy z ",
         new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
-    assertAnalyzesToReuse(a, "aba cadaba shazam",
+    assertAnalyzesTo(a, "aba cadaba shazam",
         new String[] { "aba", "cadaba", "shazam" });
-    assertAnalyzesToReuse(a, "break on whitespace",
+    assertAnalyzesTo(a, "break on whitespace",
         new String[] { "break", "on", "whitespace" });
   }
   
@@ -47,9 +47,9 @@ public class TestMockAnalyzer extends Ba
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
     assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
         new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
-    assertAnalyzesToReuse(a, "aba4cadaba-Shazam",
+    assertAnalyzesTo(a, "aba4cadaba-Shazam",
         new String[] { "aba", "cadaba", "shazam" });
-    assertAnalyzesToReuse(a, "break+on/Letters",
+    assertAnalyzesTo(a, "break+on/Letters",
         new String[] { "break", "on", "letters" });
   }
   
@@ -58,9 +58,9 @@ public class TestMockAnalyzer extends Ba
     Analyzer a = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
     assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
         new String[] { "a-bc123 defg+hijklmn567opqrstuv78wxy_z " });
-    assertAnalyzesToReuse(a, "aba4cadaba-Shazam",
+    assertAnalyzesTo(a, "aba4cadaba-Shazam",
         new String[] { "aba4cadaba-Shazam" });
-    assertAnalyzesToReuse(a, "break+on/Nothing",
+    assertAnalyzesTo(a, "break+on/Nothing",
         new String[] { "break+on/Nothing" });
   }
   
@@ -106,7 +106,7 @@ public class TestMockAnalyzer extends Ba
     stream.end();
     stream.close();
     
-    assertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
+    assertAnalyzesTo(analyzer, testString, new String[] { "t" });
   }
 
   /** blast some random strings through the analyzer */

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Sun Sep 22 13:57:43 2013
@@ -1599,14 +1599,15 @@ public class TestIndexWriter extends Luc
 
     @Override
     public void reset() throws IOException {
-       this.upto = 0;
-       final StringBuilder b = new StringBuilder();
-       final char[] buffer = new char[1024];
-       int n;
-       while ((n = input.read(buffer)) != -1) {
-         b.append(buffer, 0, n);
-       }
-       this.tokens = b.toString().split(" ");
+      super.reset();
+      this.upto = 0;
+      final StringBuilder b = new StringBuilder();
+      final char[] buffer = new char[1024];
+      int n;
+      while ((n = input.read(buffer)) != -1) {
+        b.append(buffer, 0, n);
+      }
+      this.tokens = b.toString().split(" ");
     }
   }
 

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java Sun Sep 22 13:57:43 2013
@@ -227,7 +227,8 @@ public class TestTermRangeQuery extends 
       }
 
       @Override
-      public void reset() throws IOException {;
+      public void reset() throws IOException {
+        super.reset();
         done = false;
       }
     }

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java Sun Sep 22 13:57:43 2013
@@ -49,7 +49,7 @@ public class OffsetLimitTokenFilterTest 
     assertTokenStreamContents(filter, new String[] {"short", "toolong",
         "evenmuchlongertext"});
     
-    checkOneTermReuse(new Analyzer() {
+    checkOneTerm(new Analyzer() {
       
       @Override
       public TokenStreamComponents createComponents(String fieldName, Reader reader) {

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Sun Sep 22 13:57:43 2013
@@ -319,7 +319,8 @@ public abstract class AbstractTestCase e
     }
     
     @Override
-    public void reset() {
+    public void reset() throws IOException {
+      super.reset();
       startTerm = 0;
       nextStartOffset = 0;
       snippet = null;

Modified: lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java (original)
+++ lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java Sun Sep 22 13:57:43 2013
@@ -82,6 +82,7 @@ public class TestMultiPhraseQueryParsing
 
     @Override
     public void reset() throws IOException {
+      super.reset();
       this.upto = 0;
       this.lastPos = 0;
     }

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Sun Sep 22 13:57:43 2013
@@ -341,14 +341,17 @@ public abstract class BaseTokenStreamTes
   }
   
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
+    checkResetException(a, input);
     assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
   }
   
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[]) throws IOException {
+    checkResetException(a, input);
     assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length());
   }
 
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], boolean offsetsAreCorrect) throws IOException {
+    checkResetException(a, input);
     assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), offsetsAreCorrect);
   }
   
@@ -375,30 +378,28 @@ public abstract class BaseTokenStreamTes
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
     assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements, null);
   }
-  
 
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
-    assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
-  }
-  
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException {
-    assertAnalyzesToReuse(a, input, output, null, null, null, null);
-  }
-  
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, String[] types) throws IOException {
-    assertAnalyzesToReuse(a, input, output, null, null, types, null);
-  }
-  
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException {
-    assertAnalyzesToReuse(a, input, output, null, null, null, posIncrements);
-  }
-  
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException {
-    assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, null);
-  }
-  
-  public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
-    assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, posIncrements);
+  static void checkResetException(Analyzer a, String input) throws IOException {
+    TokenStream ts = a.tokenStream("bogus", input);
+    try {
+      if (ts.incrementToken()) {
+        //System.out.println(ts.reflectAsString(false));
+        fail("didn't get expected exception when reset() not called");
+      }
+    } catch (IllegalStateException expected) {
+      // ok
+    } catch (AssertionError expected) {
+      // ok: MockTokenizer
+      assertTrue(expected.getMessage(), expected.getMessage() != null && expected.getMessage().contains("wrong state"));
+    } catch (Exception unexpected) {
+      fail("got wrong exception when reset() not called: " + unexpected);
+    } finally {
+      // consume correctly
+      ts.reset();
+      while (ts.incrementToken()) {}
+      ts.end();
+      ts.close();
+    }
   }
 
   // simple utility method for testing stemmers
@@ -407,10 +408,6 @@ public abstract class BaseTokenStreamTes
     assertAnalyzesTo(a, input, new String[]{expected});
   }
   
-  public static void checkOneTermReuse(Analyzer a, final String input, final String expected) throws IOException {
-    assertAnalyzesToReuse(a, input, new String[]{expected});
-  }
-  
   /** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
   public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
     checkRandomData(random, a, iterations, 20, false, true);
@@ -476,6 +473,7 @@ public abstract class BaseTokenStreamTes
   }
 
   public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple, boolean offsetsAreCorrect) throws IOException {
+    checkResetException(a, "best effort");
     long seed = random.nextLong();
     boolean useCharFilter = random.nextBoolean();
     Directory dir = null;

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java Sun Sep 22 13:57:43 2013
@@ -41,7 +41,7 @@ public class VocabularyAssert {
     while ((inputWord = vocReader.readLine()) != null) {
       String expectedWord = outputReader.readLine();
       Assert.assertNotNull(expectedWord);
-      BaseTokenStreamTestCase.checkOneTermReuse(a, inputWord, expectedWord);
+      BaseTokenStreamTestCase.checkOneTerm(a, inputWord, expectedWord);
     }
   }
   
@@ -55,7 +55,7 @@ public class VocabularyAssert {
       if (inputLine.startsWith("#") || inputLine.trim().length() == 0)
         continue; /* comment */
       String words[] = inputLine.split("\t");
-      BaseTokenStreamTestCase.checkOneTermReuse(a, words[0], words[1]);
+      BaseTokenStreamTestCase.checkOneTerm(a, words[0], words[1]);
     }
   }
   

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java Sun Sep 22 13:57:43 2013
@@ -96,8 +96,9 @@ final class TrieTokenizer extends Tokeni
   }
 
   @Override
-  public void reset() {
-   try {
+  public void reset() throws IOException {
+    super.reset();
+    try {
       int upto = 0;
       char[] buf = termAtt.buffer();
       while (true) {
@@ -167,6 +168,7 @@ final class TrieTokenizer extends Tokeni
 
   @Override
   public void end() throws IOException {
+    super.end();
     if (hasValue) {
       ts.end();
     }

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java Sun Sep 22 13:57:43 2013
@@ -691,6 +691,11 @@ final class TokenOrderingFilter extends 
       return true;
     }
   }
+
+  @Override
+  public void reset() throws IOException {
+    // this looks wrong: but its correct.
+  }
 }
 
 // for TokenOrderingFilter, so it can easily sort by startOffset

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/BoolField.java Sun Sep 22 13:57:43 2013
@@ -74,6 +74,7 @@ public class BoolField extends Primitive
 
         @Override
         public void reset() throws IOException {
+          super.reset();
           done = false;
         }
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java?rev=1525362&r1=1525361&r2=1525362&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java Sun Sep 22 13:57:43 2013
@@ -252,9 +252,11 @@ public class PreAnalyzedField extends Fi
     private byte[] binaryValue = null;
     private PreAnalyzedParser parser;
     private Reader lastReader;
+    private Reader input; // hides original input since we replay saved states (and dont reuse)
     
     public PreAnalyzedTokenizer(Reader reader, PreAnalyzedParser parser) {
       super(reader);
+      this.input = reader;
       this.parser = parser;
     }