You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/10/07 11:20:05 UTC
svn commit: r1629836 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/CHANGES.txt lucene/core/
lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
Author: rmuir
Date: Tue Oct 7 09:20:04 2014
New Revision: 1629836
URL: http://svn.apache.org/r1629836
Log:
LUCENE-5980: don't let document length overflow
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1629836&r1=1629835&r2=1629836&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Tue Oct 7 09:20:04 2014
@@ -154,6 +154,8 @@ Bug Fixes
not have the regular "spinlock" of DirectoryReader.open. It now implements
Closeable and you must close it to release the lock. (Mike McCandless, Robert Muir)
+* LUCENE-5980: Don't let document length overflow. (Robert Muir)
+
Documentation
* LUCENE-5392: Add/improve analysis package documentation to reflect
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java?rev=1629836&r1=1629835&r2=1629836&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java Tue Oct 7 09:20:04 2014
@@ -633,6 +633,10 @@ final class DefaultIndexingChain extends
invertState.lastStartOffset = startOffset;
}
+ invertState.length++;
+ if (invertState.length < 0) {
+ throw new IllegalArgumentException("too many tokens in field '" + field.name() + "'");
+ }
//System.out.println(" term=" + invertState.termAttribute);
// If we hit an exception in here, we abort
@@ -644,8 +648,6 @@ final class DefaultIndexingChain extends
aborting = true;
termsHashPerField.add();
aborting = false;
-
- invertState.length++;
}
// trigger streams to perform end-of-stream operations
Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java?rev=1629836&r1=1629835&r2=1629836&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Tue Oct 7 09:20:04 2014
@@ -36,6 +36,8 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -47,6 +49,7 @@ import org.apache.lucene.document.Sorted
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
@@ -2123,6 +2126,48 @@ public class TestIndexWriterExceptions e
dir.close();
}
+ // kind of slow, but omits positions, so just CPU
+ @Nightly
+ public void testTooManyTokens() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
+ Document doc = new Document();
+ FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+ ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+ doc.add(new Field("foo", new TokenStream() {
+ CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+ long num = 0;
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (num == Integer.MAX_VALUE + 1) {
+ return false;
+ }
+ clearAttributes();
+ if (num == 0) {
+ posIncAtt.setPositionIncrement(1);
+ } else {
+ posIncAtt.setPositionIncrement(0);
+ }
+ termAtt.append("a");
+ num++;
+ if (VERBOSE && num % 1000000 == 0) {
+ System.out.println("indexed: " + num);
+ }
+ return true;
+ }
+ }, ft));
+ try {
+ iw.addDocument(doc);
+ fail("didn't hit exception");
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("too many tokens"));
+ }
+ iw.close();
+ dir.close();
+ }
+
public void testExceptionDuringRollback() throws Exception {
// currently: fail in two different places
final String messageToFailOn = random().nextBoolean() ?