You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2010/04/01 12:18:43 UTC
svn commit: r929908 - in
/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index:
CheckIndex.java codecs/standard/SimpleStandardTermsIndexReader.java
codecs/standard/StandardTermsDictReader.java
codecs/standard/StandardTermsDictWriter.java
Author: mikemccand
Date: Thu Apr 1 10:18:43 2010
New Revision: 929908
URL: http://svn.apache.org/viewvc?rev=929908&view=rev
Log:
LUCENE-2111: fix standard codec's terms dict to handle > 2B terms
Modified:
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java?rev=929908&r1=929907&r2=929908&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java Thu Apr 1 10:18:43 2010
@@ -593,31 +593,59 @@ public class CheckIndex {
if (infoStream != null) {
infoStream.print(" test: terms, freq, prox...");
}
+
+ final Fields fields = reader.fields();
+ if (fields == null) {
+ msg("OK [no fields/terms]");
+ return status;
+ }
- final FieldsEnum fields = reader.fields().iterator();
+ final FieldsEnum fieldsEnum = fields.iterator();
while(true) {
- final String field = fields.next();
+ final String field = fieldsEnum.next();
if (field == null) {
break;
}
- final TermsEnum terms = fields.terms();
+ final TermsEnum terms = fieldsEnum.terms();
DocsEnum docs = null;
DocsAndPositionsEnum postings = null;
+ boolean hasOrd = true;
+ final long termCountStart = status.termCount;
+
while(true) {
final BytesRef term = terms.next();
if (term == null) {
break;
}
+
final int docFreq = terms.docFreq();
status.totFreq += docFreq;
docs = terms.docs(delDocs, docs);
postings = terms.docsAndPositions(delDocs, postings);
+ if (hasOrd) {
+ long ord = -1;
+ try {
+ ord = terms.ord();
+ } catch (UnsupportedOperationException uoe) {
+ hasOrd = false;
+ }
+
+ if (hasOrd) {
+ final long ordExpected = status.termCount - termCountStart;
+ if (ord != ordExpected) {
+ throw new RuntimeException("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected);
+ }
+ }
+ }
+
+ status.termCount++;
+
final DocsEnum docs2;
if (postings != null) {
docs2 = postings;
@@ -625,8 +653,6 @@ public class CheckIndex {
docs2 = docs;
}
- status.termCount++;
-
int lastDoc = -1;
while(true) {
final int doc = docs2.nextDoc();
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java?rev=929908&r1=929907&r2=929908&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java Thu Apr 1 10:18:43 2010
@@ -60,7 +60,15 @@ import org.apache.lucene.index.IndexFile
/** @lucene.experimental */
public class SimpleStandardTermsIndexReader extends StandardTermsIndexReader {
- private int totalIndexInterval;
+ // NOTE: long is overkill here, since this number is 128
+ // by default and only indexDivisor * 128 if you change
+ // the indexDivisor at search time. But, we use this in a
+ // number of places to multiply out the actual ord, and we
+ // will overflow int during those multiplies. So to avoid
+ // having to upgrade each multiple to long in multiple
+ // places (error proned), we use long here:
+ private long totalIndexInterval;
+
private int indexDivisor;
final private int indexInterval;
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=929908&r1=929907&r2=929908&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java Thu Apr 1 10:18:43 2010
@@ -125,6 +125,7 @@ public class StandardTermsDictReader ext
for(int i=0;i<numFields;i++) {
final int field = in.readInt();
final long numTerms = in.readLong();
+ assert numTerms >= 0;
final long termsStartPointer = in.readLong();
final StandardTermsIndexReader.FieldReader fieldIndexReader;
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java?rev=929908&r1=929907&r2=929908&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java Thu Apr 1 10:18:43 2010
@@ -129,7 +129,7 @@ public class StandardTermsDictWriter ext
private final FieldInfo fieldInfo;
private final StandardPostingsWriter postingsWriter;
private final long termsStartPointer;
- private int numTerms;
+ private long numTerms;
private final StandardTermsIndexWriter.FieldWriter fieldIndexWriter;
TermsWriter(StandardTermsIndexWriter.FieldWriter fieldIndexWriter, FieldInfo fieldInfo, StandardPostingsWriter postingsWriter) {