You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2013/01/04 00:35:21 UTC

svn commit: r1428671 - in /lucene/dev/trunk/lucene: ./ analysis/common/src/test/org/apache/lucene/analysis/core/ analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/ core/src/java/org/apache/lucene/index/ core/src/test/org/apache/lucene/d...

Author: uschindler
Date: Thu Jan  3 23:35:20 2013
New Revision: 1428671

URL: http://svn.apache.org/viewvc?rev=1428671&view=rev
Log:
LUCENE-4656: Fix regression in IndexWriter to work with empty TokenStreams that have no TermToBytesRefAttribute (commonly provided by CharTermAttribute), e.g., oal.analysis.miscellaneous.EmptyTokenStream. Remove EmptyTokenizer from test-framework.

Removed:
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/EmptyTokenizer.java
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/document/TestDocument.java
    lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java
    lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1428671&r1=1428670&r2=1428671&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Jan  3 23:35:20 2013
@@ -305,6 +305,11 @@ Bug Fixes
 
 * LUCENE-4461: If you added the same FacetRequest more than once, you would get
   inconsistent results. (Gilad Barkai via Shai Erera)
+
+* LUCENE-4656: Fix regression in IndexWriter to work with empty TokenStreams
+  that have no TermToBytesRefAttribute (commonly provided by CharTermAttribute),
+  e.g., oal.analysis.miscellaneous.EmptyTokenStream.
+  (Uwe Schindler, Adrien Grand, Robert Muir)
     
 Changes in Runtime Behavior
   

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java?rev=1428671&r1=1428670&r2=1428671&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java Thu Jan  3 23:35:20 2013
@@ -17,19 +17,16 @@ package org.apache.lucene.analysis.core;
  * limitations under the License.
  */
 
-import java.lang.reflect.Modifier;
 import java.io.Reader;
 import java.io.StringReader;
-import java.util.ArrayList;
+import java.lang.reflect.Modifier;
 import java.util.Collections;
 import java.util.IdentityHashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.CachingTokenFilter;
 import org.apache.lucene.analysis.CharFilter;
-import org.apache.lucene.analysis.EmptyTokenizer;
 import org.apache.lucene.analysis.MockCharFilter;
 import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
 import org.apache.lucene.analysis.MockGraphTokenFilter;
@@ -39,10 +36,9 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.MockVariableLengthPayloadFilter;
 import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ValidatingTokenFilter;
-import org.apache.lucene.analysis.core.TestRandomChains;
 import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
 import org.apache.lucene.analysis.sinks.TeeSinkTokenFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
@@ -72,7 +68,6 @@ public class TestAllAnalyzersHaveFactori
       MockRandomLookaheadTokenFilter.class,
       MockTokenFilter.class,
       MockVariableLengthPayloadFilter.class,
-      EmptyTokenizer.class,
       ValidatingTokenFilter.class
     );
   }

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1428671&r1=1428670&r2=1428671&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Thu Jan  3 23:35:20 2013
@@ -46,7 +46,6 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CachingTokenFilter;
 import org.apache.lucene.analysis.CharFilter;
-import org.apache.lucene.analysis.EmptyTokenizer;
 import org.apache.lucene.analysis.MockGraphTokenFilter;
 import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter;
 import org.apache.lucene.analysis.MockTokenFilter;
@@ -110,8 +109,6 @@ public class TestRandomChains extends Ba
     // TODO: can we promote some of these to be only
     // offsets offenders?
     Collections.<Class<?>>addAll(brokenComponents,
-      // TODO: fix basetokenstreamtestcase not to trip because this one has no CharTermAtt
-      EmptyTokenizer.class,
       // doesn't actual reset itself!
       CachingTokenFilter.class,
       // doesn't consume whole stream!

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java?rev=1428671&r1=1428670&r2=1428671&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java Thu Jan  3 23:35:20 2013
@@ -20,15 +20,55 @@ package org.apache.lucene.analysis.misce
 import java.io.IOException;
 
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
 
-public class TestEmptyTokenStream extends LuceneTestCase {
+public class TestEmptyTokenStream extends BaseTokenStreamTestCase {
 
-  public void test() throws IOException {
+  public void testConsume() throws IOException {
     TokenStream ts = new EmptyTokenStream();
+    ts.reset();
     assertFalse(ts.incrementToken());
+    ts.end();
+    ts.close();
+    // try again with reuse:
     ts.reset();
     assertFalse(ts.incrementToken());
+    ts.end();
+    ts.close();
+  }
+  
+  public void testConsume2() throws IOException {
+    BaseTokenStreamTestCase.assertTokenStreamContents(new EmptyTokenStream(), new String[0]);
+  }
+
+  public void testIndexWriter_LUCENE4656() throws IOException {
+    Directory directory = newDirectory();
+    IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(
+        TEST_VERSION_CURRENT, null));
+
+    TokenStream ts = new EmptyTokenStream();
+    assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class));
+
+    Document doc = new Document();
+    doc.add(new StringField("id", "0", Field.Store.YES));
+    doc.add(new TextField("description", ts));
+    
+    // this should not fail because we have no TermToBytesRefAttribute
+    writer.addDocument(doc);
+    
+    assertEquals(1, writer.numDocs());
+
+    writer.close();
+    directory.close();
   }
 
 }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1428671&r1=1428670&r2=1428671&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java Thu Jan  3 23:35:20 2013
@@ -106,75 +106,72 @@ final class DocInverterPerField extends 
           OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
           PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class);
 
-          consumer.start(field);
+          if (hasMoreTokens) {
+            consumer.start(field);
 
-          for (;;) {
-
-            // If we hit an exception in stream.next below
-            // (which is fairly common, eg if analyzer
-            // chokes on a given document), then it's
-            // non-aborting and (above) this one document
-            // will be marked as deleted, but still
-            // consume a docID
-
-            if (!hasMoreTokens) break;
-
-            final int posIncr = posIncrAttribute.getPositionIncrement();
-            if (posIncr < 0) {
-              throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ")");
-            }
-            if (fieldState.position == 0 && posIncr == 0) {
-              throw new IllegalArgumentException("first position increment must be > 0 (got 0)");
-            }
-            int position = fieldState.position + posIncr;
-            if (position > 0) {
-              // NOTE: confusing: this "mirrors" the
-              // position++ we do below
-              position--;
-            } else if (position < 0) {
-              throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
-            }
-            
-            // position is legal, we can safely place it in fieldState now.
-            // not sure if anything will use fieldState after non-aborting exc...
-            fieldState.position = position;
-
-            if (posIncr == 0)
-              fieldState.numOverlap++;
-            
-            if (checkOffsets) {
-              int startOffset = fieldState.offset + offsetAttribute.startOffset();
-              int endOffset = fieldState.offset + offsetAttribute.endOffset();
-              if (startOffset < 0 || endOffset < startOffset) {
-                throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
-                    + "startOffset=" + startOffset + ",endOffset=" + endOffset);
+            do {
+              // If we hit an exception in stream.next below
+              // (which is fairly common, eg if analyzer
+              // chokes on a given document), then it's
+              // non-aborting and (above) this one document
+              // will be marked as deleted, but still
+              // consume a docID
+
+              final int posIncr = posIncrAttribute.getPositionIncrement();
+              if (posIncr < 0) {
+                throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ")");
               }
-              if (startOffset < lastStartOffset) {
-                throw new IllegalArgumentException("offsets must not go backwards startOffset=" 
-                     + startOffset + " is < lastStartOffset=" + lastStartOffset);
+              if (fieldState.position == 0 && posIncr == 0) {
+                throw new IllegalArgumentException("first position increment must be > 0 (got 0)");
               }
-              lastStartOffset = startOffset;
-            }
-
-            boolean success = false;
-            try {
-              // If we hit an exception in here, we abort
-              // all buffered documents since the last
-              // flush, on the likelihood that the
-              // internal state of the consumer is now
-              // corrupt and should not be flushed to a
-              // new segment:
-              consumer.add();
-              success = true;
-            } finally {
-              if (!success) {
-                docState.docWriter.setAborting();
+              int position = fieldState.position + posIncr;
+              if (position > 0) {
+                // NOTE: confusing: this "mirrors" the
+                // position++ we do below
+                position--;
+              } else if (position < 0) {
+                throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
+              }
+              
+              // position is legal, we can safely place it in fieldState now.
+              // not sure if anything will use fieldState after non-aborting exc...
+              fieldState.position = position;
+
+              if (posIncr == 0)
+                fieldState.numOverlap++;
+              
+              if (checkOffsets) {
+                int startOffset = fieldState.offset + offsetAttribute.startOffset();
+                int endOffset = fieldState.offset + offsetAttribute.endOffset();
+                if (startOffset < 0 || endOffset < startOffset) {
+                  throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
+                      + "startOffset=" + startOffset + ",endOffset=" + endOffset);
+                }
+                if (startOffset < lastStartOffset) {
+                  throw new IllegalArgumentException("offsets must not go backwards startOffset=" 
+                       + startOffset + " is < lastStartOffset=" + lastStartOffset);
+                }
+                lastStartOffset = startOffset;
               }
-            }
-            fieldState.length++;
-            fieldState.position++;
 
-            hasMoreTokens = stream.incrementToken();
+              boolean success = false;
+              try {
+                // If we hit an exception in here, we abort
+                // all buffered documents since the last
+                // flush, on the likelihood that the
+                // internal state of the consumer is now
+                // corrupt and should not be flushed to a
+                // new segment:
+                consumer.add();
+                success = true;
+              } finally {
+                if (!success) {
+                  docState.docWriter.setAborting();
+                }
+              }
+              fieldState.length++;
+              fieldState.position++;
+            } while (stream.incrementToken());
           }
           // trigger streams to perform end-of-stream operations
           stream.end();

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/document/TestDocument.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/document/TestDocument.java?rev=1428671&r1=1428670&r2=1428671&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/document/TestDocument.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/document/TestDocument.java Thu Jan  3 23:35:20 2013
@@ -20,13 +20,9 @@ package org.apache.lucene.document;
 import java.io.StringReader;
 import java.util.List;
 
-import org.apache.lucene.analysis.EmptyTokenizer;
-import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.StorableField;
 import org.apache.lucene.index.StoredDocument;
@@ -318,7 +314,7 @@ public class TestDocument extends Lucene
   // LUCENE-3616
   public void testInvalidFields() {
     try {
-      new Field("foo", new EmptyTokenizer(new StringReader("")), StringField.TYPE_STORED);
+      new Field("foo", new MockTokenizer(new StringReader("")), StringField.TYPE_STORED);
       fail("did not hit expected exc");
     } catch (IllegalArgumentException iae) {
       // expected

Modified: lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java?rev=1428671&r1=1428670&r2=1428671&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java (original)
+++ lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java Thu Jan  3 23:35:20 2013
@@ -18,6 +18,7 @@ package org.apache.lucene.queryparser.cl
  */
 
 import java.io.Reader;
+import java.io.StringReader;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -33,6 +34,7 @@ import org.apache.lucene.search.IndexSea
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 
 /**
@@ -300,7 +302,7 @@ public class TestMultiFieldQueryParser e
   }
   
   /**
-   * Return empty tokens for field "f1".
+   * Return no tokens for field "f1".
    */
   private static class AnalyzerReturningNull extends Analyzer {
     MockAnalyzer stdAnalyzer = new MockAnalyzer(random());
@@ -310,13 +312,21 @@ public class TestMultiFieldQueryParser e
     }
 
     @Override
-    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+    protected Reader initReader(String fieldName, Reader reader) {
       if ("f1".equals(fieldName)) {
-        return new TokenStreamComponents(new EmptyTokenizer(reader));
+        // we don't use the reader, so close it:
+        IOUtils.closeWhileHandlingException(reader);
+        // return empty reader, so MockTokenizer returns no tokens:
+        return new StringReader("");
       } else {
-        return stdAnalyzer.createComponents(fieldName, reader);
+        return super.initReader(fieldName, reader);
       }
     }
+
+    @Override
+    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+      return stdAnalyzer.createComponents(fieldName, reader);
+    }
   }
 
 }

Modified: lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java?rev=1428671&r1=1428670&r2=1428671&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java (original)
+++ lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java Thu Jan  3 23:35:20 2013
@@ -18,6 +18,7 @@ package org.apache.lucene.queryparser.fl
  */
 
 import java.io.Reader;
+import java.io.StringReader;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -35,6 +36,7 @@ import org.apache.lucene.search.IndexSea
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 
 /**
@@ -339,9 +341,9 @@ public class TestMultiFieldQPHelper exte
   }
 
   /**
-   * Return empty tokens for field "f1".
+   * Return no tokens for field "f1".
    */
-  private static final class AnalyzerReturningNull extends Analyzer {
+  private static class AnalyzerReturningNull extends Analyzer {
     MockAnalyzer stdAnalyzer = new MockAnalyzer(random());
 
     public AnalyzerReturningNull() {
@@ -349,13 +351,21 @@ public class TestMultiFieldQPHelper exte
     }
 
     @Override
-    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+    protected Reader initReader(String fieldName, Reader reader) {
       if ("f1".equals(fieldName)) {
-        return new TokenStreamComponents(new EmptyTokenizer(reader));
+        // we don't use the reader, so close it:
+        IOUtils.closeWhileHandlingException(reader);
+        // return empty reader, so MockTokenizer returns no tokens:
+        return new StringReader("");
       } else {
-        return stdAnalyzer.createComponents(fieldName, reader);
+        return super.initReader(fieldName, reader);
       }
     }
+
+    @Override
+    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+      return stdAnalyzer.createComponents(fieldName, reader);
+    }
   }
 
 }

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1428671&r1=1428670&r2=1428671&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Thu Jan  3 23:35:20 2013
@@ -116,8 +116,11 @@ public abstract class BaseTokenStreamTes
     assertNotNull(output);
     CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
     
-    assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
-    CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
+    CharTermAttribute termAtt = null;
+    if (output.length > 0) {
+      assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
+      termAtt = ts.getAttribute(CharTermAttribute.class);
+    }
     
     OffsetAttribute offsetAtt = null;
     if (startOffsets != null || endOffsets != null || finalOffset != null) {
@@ -615,8 +618,7 @@ public abstract class BaseTokenStreamTes
     int remainder = random.nextInt(10);
     Reader reader = new StringReader(text);
     TokenStream ts = a.tokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
-    assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
-    CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
+    CharTermAttribute termAtt = ts.hasAttribute(CharTermAttribute.class) ? ts.getAttribute(CharTermAttribute.class) : null;
     OffsetAttribute offsetAtt = ts.hasAttribute(OffsetAttribute.class) ? ts.getAttribute(OffsetAttribute.class) : null;
     PositionIncrementAttribute posIncAtt = ts.hasAttribute(PositionIncrementAttribute.class) ? ts.getAttribute(PositionIncrementAttribute.class) : null;
     PositionLengthAttribute posLengthAtt = ts.hasAttribute(PositionLengthAttribute.class) ? ts.getAttribute(PositionLengthAttribute.class) : null;
@@ -631,6 +633,7 @@ public abstract class BaseTokenStreamTes
 
     // First pass: save away "correct" tokens
     while (ts.incrementToken()) {
+      assertNotNull("has no CharTermAttribute", termAtt);
       tokens.add(termAtt.toString());
       if (typeAtt != null) types.add(typeAtt.type());
       if (posIncAtt != null) positions.add(posIncAtt.getPositionIncrement());