You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/10/18 14:27:31 UTC
svn commit: r1399615 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/codecs/
lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
lucene/core/
lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
Author: mikemccand
Date: Thu Oct 18 12:27:31 2012
New Revision: 1399615
URL: http://svn.apache.org/viewvc?rev=1399615&view=rev
Log:
reduce RAM cost per unique field while writing postings
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/codecs/ (props changed)
lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java?rev=1399615&r1=1399614&r2=1399615&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java Thu Oct 18 12:27:31 2012
@@ -69,7 +69,27 @@ public class BlockTermsWriter extends Fi
final FieldInfos fieldInfos;
FieldInfo currentField;
private final TermsIndexWriterBase termsIndexWriter;
- private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
+
+ private static class FieldMetaData {
+ public final FieldInfo fieldInfo;
+ public final long numTerms;
+ public final long termsStartPointer;
+ public final long sumTotalTermFreq;
+ public final long sumDocFreq;
+ public final int docCount;
+
+ public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+ assert numTerms > 0;
+ this.fieldInfo = fieldInfo;
+ this.termsStartPointer = termsStartPointer;
+ this.numTerms = numTerms;
+ this.sumTotalTermFreq = sumTotalTermFreq;
+ this.sumDocFreq = sumDocFreq;
+ this.docCount = docCount;
+ }
+ }
+
+ private final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
// private final String segment;
@@ -108,9 +128,7 @@ public class BlockTermsWriter extends Fi
assert currentField == null || currentField.name.compareTo(field.name) < 0;
currentField = field;
TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer());
- final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter);
- fields.add(terms);
- return terms;
+ return new TermsWriter(fieldIndexWriter, field, postingsWriter);
}
@Override
@@ -118,27 +136,18 @@ public class BlockTermsWriter extends Fi
try {
- int nonZeroCount = 0;
- for(TermsWriter field : fields) {
- if (field.numTerms > 0) {
- nonZeroCount++;
- }
- }
-
final long dirStart = out.getFilePointer();
- out.writeVInt(nonZeroCount);
- for(TermsWriter field : fields) {
- if (field.numTerms > 0) {
- out.writeVInt(field.fieldInfo.number);
- out.writeVLong(field.numTerms);
- out.writeVLong(field.termsStartPointer);
- if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- out.writeVLong(field.sumTotalTermFreq);
- }
- out.writeVLong(field.sumDocFreq);
- out.writeVInt(field.docCount);
+ out.writeVInt(fields.size());
+ for(FieldMetaData field : fields) {
+ out.writeVInt(field.fieldInfo.number);
+ out.writeVLong(field.numTerms);
+ out.writeVLong(field.termsStartPointer);
+ if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+ out.writeVLong(field.sumTotalTermFreq);
}
+ out.writeVLong(field.sumDocFreq);
+ out.writeVInt(field.docCount);
}
writeTrailer(dirStart);
} finally {
@@ -249,6 +258,14 @@ public class BlockTermsWriter extends Fi
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
fieldIndexWriter.finish(out.getFilePointer());
+ if (numTerms > 0) {
+ fields.add(new FieldMetaData(fieldInfo,
+ numTerms,
+ termsStartPointer,
+ sumTotalTermFreq,
+ sumDocFreq,
+ docCount));
+ }
}
private int sharedPrefix(BytesRef term1, BytesRef term2) {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java?rev=1399615&r1=1399614&r2=1399615&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java Thu Oct 18 12:27:31 2012
@@ -228,7 +228,30 @@ public class BlockTreeTermsWriter extend
final PostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
FieldInfo currentField;
- private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
+
+ private static class FieldMetaData {
+ public final FieldInfo fieldInfo;
+ public final BytesRef rootCode;
+ public final long numTerms;
+ public final long indexStartFP;
+ public final long sumTotalTermFreq;
+ public final long sumDocFreq;
+ public final int docCount;
+
+ public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+ assert numTerms > 0;
+ this.fieldInfo = fieldInfo;
+ assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms;
+ this.rootCode = rootCode;
+ this.indexStartFP = indexStartFP;
+ this.numTerms = numTerms;
+ this.sumTotalTermFreq = sumTotalTermFreq;
+ this.sumDocFreq = sumDocFreq;
+ this.docCount = docCount;
+ }
+ }
+
+ private final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
// private final String segment;
/** Create a new writer. The number of items (terms or
@@ -313,9 +336,7 @@ public class BlockTreeTermsWriter extend
//if (DEBUG) System.out.println("\nBTTW.addField seg=" + segment + " field=" + field.name);
assert currentField == null || currentField.name.compareTo(field.name) < 0;
currentField = field;
- final TermsWriter terms = new TermsWriter(field);
- fields.add(terms);
- return terms;
+ return new TermsWriter(field);
}
static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) {
@@ -1007,6 +1028,14 @@ public class BlockTreeTermsWriter extend
// System.out.println("SAVED to " + dotFileName);
// w.close();
// }
+
+ fields.add(new FieldMetaData(fieldInfo,
+ ((PendingBlock) pending.get(0)).index.getEmptyOutput(),
+ numTerms,
+ indexStartFP,
+ sumTotalTermFreq,
+ sumDocFreq,
+ docCount));
} else {
assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
assert sumDocFreq == 0;
@@ -1024,34 +1053,23 @@ public class BlockTreeTermsWriter extend
IOException ioe = null;
try {
- int nonZeroCount = 0;
- for(TermsWriter field : fields) {
- if (field.numTerms > 0) {
- nonZeroCount++;
- }
- }
-
final long dirStart = out.getFilePointer();
final long indexDirStart = indexOut.getFilePointer();
- out.writeVInt(nonZeroCount);
+ out.writeVInt(fields.size());
- for(TermsWriter field : fields) {
- if (field.numTerms > 0) {
- //System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms");
- out.writeVInt(field.fieldInfo.number);
- out.writeVLong(field.numTerms);
- final BytesRef rootCode = ((PendingBlock) field.pending.get(0)).index.getEmptyOutput();
- assert rootCode != null: "field=" + field.fieldInfo.name + " numTerms=" + field.numTerms;
- out.writeVInt(rootCode.length);
- out.writeBytes(rootCode.bytes, rootCode.offset, rootCode.length);
- if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- out.writeVLong(field.sumTotalTermFreq);
- }
- out.writeVLong(field.sumDocFreq);
- out.writeVInt(field.docCount);
- indexOut.writeVLong(field.indexStartFP);
- }
+ for(FieldMetaData field : fields) {
+ //System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms");
+ out.writeVInt(field.fieldInfo.number);
+ out.writeVLong(field.numTerms);
+ out.writeVInt(field.rootCode.length);
+ out.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length);
+ if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+ out.writeVLong(field.sumTotalTermFreq);
+ }
+ out.writeVLong(field.sumDocFreq);
+ out.writeVInt(field.docCount);
+ indexOut.writeVLong(field.indexStartFP);
}
writeTrailer(out, dirStart);
writeIndexTrailer(indexOut, indexDirStart);