You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2010/04/01 12:18:43 UTC

svn commit: r929908 - in /lucene/java/branches/flex_1458/src/java/org/apache/lucene/index: CheckIndex.java codecs/standard/SimpleStandardTermsIndexReader.java codecs/standard/StandardTermsDictReader.java codecs/standard/StandardTermsDictWriter.java

Author: mikemccand
Date: Thu Apr  1 10:18:43 2010
New Revision: 929908

URL: http://svn.apache.org/viewvc?rev=929908&view=rev
Log:
LUCENE-2111: fix standard codec's terms dict to handle > 2B terms

Modified:
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java?rev=929908&r1=929907&r2=929908&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java Thu Apr  1 10:18:43 2010
@@ -593,31 +593,59 @@ public class CheckIndex {
       if (infoStream != null) {
         infoStream.print("    test: terms, freq, prox...");
       }
+
+      final Fields fields = reader.fields();
+      if (fields == null) {
+        msg("OK [no fields/terms]");
+        return status;
+      }
       
-      final FieldsEnum fields = reader.fields().iterator();
+      final FieldsEnum fieldsEnum = fields.iterator();
       while(true) {
-        final String field = fields.next();
+        final String field = fieldsEnum.next();
         if (field == null) {
           break;
         }
         
-        final TermsEnum terms = fields.terms();
+        final TermsEnum terms = fieldsEnum.terms();
 
         DocsEnum docs = null;
         DocsAndPositionsEnum postings = null;
 
+        boolean hasOrd = true;
+        final long termCountStart = status.termCount;
+
         while(true) {
 
           final BytesRef term = terms.next();
           if (term == null) {
             break;
           }
+
           final int docFreq = terms.docFreq();
           status.totFreq += docFreq;
 
           docs = terms.docs(delDocs, docs);
           postings = terms.docsAndPositions(delDocs, postings);
 
+          if (hasOrd) {
+            long ord = -1;
+            try {
+              ord = terms.ord();
+            } catch (UnsupportedOperationException uoe) {
+              hasOrd = false;
+            }
+
+            if (hasOrd) {
+              final long ordExpected = status.termCount - termCountStart;
+              if (ord != ordExpected) {
+                throw new RuntimeException("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected);
+              }
+            }
+          }
+
+          status.termCount++;
+
           final DocsEnum docs2;
           if (postings != null) {
             docs2 = postings;
@@ -625,8 +653,6 @@ public class CheckIndex {
             docs2 = docs;
           }
 
-          status.termCount++;
-
           int lastDoc = -1;
           while(true) {
             final int doc = docs2.nextDoc();

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java?rev=929908&r1=929907&r2=929908&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java Thu Apr  1 10:18:43 2010
@@ -60,7 +60,15 @@ import org.apache.lucene.index.IndexFile
 /** @lucene.experimental */
 public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
 
-  private int totalIndexInterval;
+  // NOTE: long is overkill here, since this number is 128
+  // by default and only indexDivisor * 128 if you change
+  // the indexDivisor at search time.  But, we use this in a
+  // number of places to multiply out the actual ord, and we
+  // will overflow int during those multiplies.  So to avoid
+  // having to upgrade each multiple to long in multiple
+  // places (error proned), we use long here:
+  private long totalIndexInterval;
+
   private int indexDivisor;
   final private int indexInterval;
 

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=929908&r1=929907&r2=929908&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java Thu Apr  1 10:18:43 2010
@@ -125,6 +125,7 @@ public class StandardTermsDictReader ext
       for(int i=0;i<numFields;i++) {
         final int field = in.readInt();
         final long numTerms = in.readLong();
+        assert numTerms >= 0;
         final long termsStartPointer = in.readLong();
         final StandardTermsIndexReader.FieldReader fieldIndexReader;
         final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java?rev=929908&r1=929907&r2=929908&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java Thu Apr  1 10:18:43 2010
@@ -129,7 +129,7 @@ public class StandardTermsDictWriter ext
     private final FieldInfo fieldInfo;
     private final StandardPostingsWriter postingsWriter;
     private final long termsStartPointer;
-    private int numTerms;
+    private long numTerms;
     private final StandardTermsIndexWriter.FieldWriter fieldIndexWriter;
 
     TermsWriter(StandardTermsIndexWriter.FieldWriter fieldIndexWriter, FieldInfo fieldInfo, StandardPostingsWriter postingsWriter) {