You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2010/06/24 13:57:20 UTC
svn commit: r957516 - /lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java

Author: mikemccand
Date: Thu Jun 24 11:57:20 2010
New Revision: 957516

URL: http://svn.apache.org/viewvc?rev=957516&view=rev
Log:
LUCENE-2142: if FieldsCache.getStrings/Index is mis-used (more than 1 term per doc), stop loading once number of terms > number of docs

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=957516&r1=957515&r2=957516&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java Thu Jun 24 11:57:20 2010
@@ -802,6 +802,14 @@ class FieldCacheImpl implements FieldCac
       int startTermsBPV;
       int startNumUniqueTerms;
 
+      int maxDoc = reader.maxDoc();
+      final int termCountHardLimit;
+      if (maxDoc == Integer.MAX_VALUE) {
+        termCountHardLimit = Integer.MAX_VALUE;
+      } else {
+        termCountHardLimit = maxDoc+1;
+      }
+
       if (terms != null) {
         // Try for coarse estimate for number of bits; this
         // should be an underestimate most of the time, which
@@ -813,11 +821,17 @@ class FieldCacheImpl implements FieldCac
           numUniqueTerms = -1;
         }
         if (numUniqueTerms != -1) {
+
+          if (numUniqueTerms > termCountHardLimit) {
+            // app is misusing the API (there is more than
+            // one term per doc); in this case we make best
+            // effort to load what we can (see LUCENE-2142)
+            numUniqueTerms = termCountHardLimit;
+          }
+
           startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4);
           startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
-          if (numUniqueTerms > Integer.MAX_VALUE-1) {
-            throw new IllegalStateException("this field has too many (" + numUniqueTerms + ") unique terms");
-          }
+
           startNumUniqueTerms = (int) numUniqueTerms;
         } else {
           startBytesBPV = 1;
@@ -847,6 +861,10 @@ class FieldCacheImpl implements FieldCac
           if (term == null) {
             break;
           }
+          if (termOrd >= termCountHardLimit) {
+            break;
+          }
+
           if (termOrd == termOrdToBytesOffset.size()) {
             // NOTE: this code only runs if the incoming
             // reader impl doesn't implement
@@ -925,6 +943,8 @@ class FieldCacheImpl implements FieldCac
 
       final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue();
 
+      final int termCountHardLimit = reader.maxDoc();
+
       // Holds the actual term data, expanded.
       final PagedBytes bytes = new PagedBytes(15);
 
@@ -941,6 +961,9 @@ class FieldCacheImpl implements FieldCac
           numUniqueTerms = -1;
         }
         if (numUniqueTerms != -1) {
+          if (numUniqueTerms > termCountHardLimit) {
+            numUniqueTerms = termCountHardLimit;
+          }
           startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
         } else {
           startBPV = 1;
@@ -955,10 +978,18 @@ class FieldCacheImpl implements FieldCac
       bytes.copyUsingLengthPrefix(new BytesRef());
 
       if (terms != null) {
+        int termCount = 0;
         final TermsEnum termsEnum = terms.iterator();
         final Bits delDocs = MultiFields.getDeletedDocs(reader);
         DocsEnum docs = null;
         while(true) {
+          if (termCount++ == termCountHardLimit) {
+            // app is misusing the API (there is more than
+            // one term per doc); in this case we make best
+            // effort to load what we can (see LUCENE-2142)
+            break;
+          }
+
           final BytesRef term = termsEnum.next();
           if (term == null) {
             break;