You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/10/18 14:27:31 UTC

svn commit: r1399615 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java lucene/core/ lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java

Author: mikemccand
Date: Thu Oct 18 12:27:31 2012
New Revision: 1399615

URL: http://svn.apache.org/viewvc?rev=1399615&view=rev
Log:
reduce RAM cost per unique field while writing postings

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/   (props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java?rev=1399615&r1=1399614&r2=1399615&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java Thu Oct 18 12:27:31 2012
@@ -69,7 +69,27 @@ public class BlockTermsWriter extends Fi
   final FieldInfos fieldInfos;
   FieldInfo currentField;
   private final TermsIndexWriterBase termsIndexWriter;
-  private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
+
+  private static class FieldMetaData {
+    public final FieldInfo fieldInfo;
+    public final long numTerms;
+    public final long termsStartPointer;
+    public final long sumTotalTermFreq;
+    public final long sumDocFreq;
+    public final int docCount;
+
+    public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+      assert numTerms > 0;
+      this.fieldInfo = fieldInfo;
+      this.termsStartPointer = termsStartPointer;
+      this.numTerms = numTerms;
+      this.sumTotalTermFreq = sumTotalTermFreq;
+      this.sumDocFreq = sumDocFreq;
+      this.docCount = docCount;
+    }
+  }
+
+  private final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
 
   // private final String segment;
 
@@ -108,9 +128,7 @@ public class BlockTermsWriter extends Fi
     assert currentField == null || currentField.name.compareTo(field.name) < 0;
     currentField = field;
     TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer());
-    final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter);
-    fields.add(terms);
-    return terms;
+    return new TermsWriter(fieldIndexWriter, field, postingsWriter);
   }
 
   @Override
@@ -118,27 +136,18 @@ public class BlockTermsWriter extends Fi
 
     try {
       
-      int nonZeroCount = 0;
-      for(TermsWriter field : fields) {
-        if (field.numTerms > 0) {
-          nonZeroCount++;
-        }
-      }
-
       final long dirStart = out.getFilePointer();
 
-      out.writeVInt(nonZeroCount);
-      for(TermsWriter field : fields) {
-        if (field.numTerms > 0) {
-          out.writeVInt(field.fieldInfo.number);
-          out.writeVLong(field.numTerms);
-          out.writeVLong(field.termsStartPointer);
-          if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
-            out.writeVLong(field.sumTotalTermFreq);
-          }
-          out.writeVLong(field.sumDocFreq);
-          out.writeVInt(field.docCount);
+      out.writeVInt(fields.size());
+      for(FieldMetaData field : fields) {
+        out.writeVInt(field.fieldInfo.number);
+        out.writeVLong(field.numTerms);
+        out.writeVLong(field.termsStartPointer);
+        if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+          out.writeVLong(field.sumTotalTermFreq);
         }
+        out.writeVLong(field.sumDocFreq);
+        out.writeVInt(field.docCount);
       }
       writeTrailer(dirStart);
     } finally {
@@ -249,6 +258,14 @@ public class BlockTermsWriter extends Fi
       this.sumDocFreq = sumDocFreq;
       this.docCount = docCount;
       fieldIndexWriter.finish(out.getFilePointer());
+      if (numTerms > 0) {
+        fields.add(new FieldMetaData(fieldInfo,
+                                     numTerms,
+                                     termsStartPointer,
+                                     sumTotalTermFreq,
+                                     sumDocFreq,
+                                     docCount));
+      }
     }
 
     private int sharedPrefix(BytesRef term1, BytesRef term2) {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java?rev=1399615&r1=1399614&r2=1399615&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java Thu Oct 18 12:27:31 2012
@@ -228,7 +228,30 @@ public class BlockTreeTermsWriter extend
   final PostingsWriterBase postingsWriter;
   final FieldInfos fieldInfos;
   FieldInfo currentField;
-  private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
+
+  private static class FieldMetaData {
+    public final FieldInfo fieldInfo;
+    public final BytesRef rootCode;
+    public final long numTerms;
+    public final long indexStartFP;
+    public final long sumTotalTermFreq;
+    public final long sumDocFreq;
+    public final int docCount;
+
+    public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+      assert numTerms > 0;
+      this.fieldInfo = fieldInfo;
+      assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms;
+      this.rootCode = rootCode;
+      this.indexStartFP = indexStartFP;
+      this.numTerms = numTerms;
+      this.sumTotalTermFreq = sumTotalTermFreq;
+      this.sumDocFreq = sumDocFreq;
+      this.docCount = docCount;
+    }
+  }
+
+  private final List<FieldMetaData> fields = new ArrayList<FieldMetaData>();
   // private final String segment;
 
   /** Create a new writer.  The number of items (terms or
@@ -313,9 +336,7 @@ public class BlockTreeTermsWriter extend
     //if (DEBUG) System.out.println("\nBTTW.addField seg=" + segment + " field=" + field.name);
     assert currentField == null || currentField.name.compareTo(field.name) < 0;
     currentField = field;
-    final TermsWriter terms = new TermsWriter(field);
-    fields.add(terms);
-    return terms;
+    return new TermsWriter(field);
   }
 
   static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) {
@@ -1007,6 +1028,14 @@ public class BlockTreeTermsWriter extend
         //   System.out.println("SAVED to " + dotFileName);
         //   w.close();
         // }
+
+        fields.add(new FieldMetaData(fieldInfo,
+                                     ((PendingBlock) pending.get(0)).index.getEmptyOutput(),
+                                     numTerms,
+                                     indexStartFP,
+                                     sumTotalTermFreq,
+                                     sumDocFreq,
+                                     docCount));
       } else {
         assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
         assert sumDocFreq == 0;
@@ -1024,34 +1053,23 @@ public class BlockTreeTermsWriter extend
     IOException ioe = null;
     try {
       
-      int nonZeroCount = 0;
-      for(TermsWriter field : fields) {
-        if (field.numTerms > 0) {
-          nonZeroCount++;
-        }
-      }
-
       final long dirStart = out.getFilePointer();
       final long indexDirStart = indexOut.getFilePointer();
 
-      out.writeVInt(nonZeroCount);
+      out.writeVInt(fields.size());
       
-      for(TermsWriter field : fields) {
-        if (field.numTerms > 0) {
-          //System.out.println("  field " + field.fieldInfo.name + " " + field.numTerms + " terms");
-          out.writeVInt(field.fieldInfo.number);
-          out.writeVLong(field.numTerms);
-          final BytesRef rootCode = ((PendingBlock) field.pending.get(0)).index.getEmptyOutput();
-          assert rootCode != null: "field=" + field.fieldInfo.name + " numTerms=" + field.numTerms;
-          out.writeVInt(rootCode.length);
-          out.writeBytes(rootCode.bytes, rootCode.offset, rootCode.length);
-          if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
-            out.writeVLong(field.sumTotalTermFreq);
-          }
-          out.writeVLong(field.sumDocFreq);
-          out.writeVInt(field.docCount);
-          indexOut.writeVLong(field.indexStartFP);
-        }
+      for(FieldMetaData field : fields) {
+        //System.out.println("  field " + field.fieldInfo.name + " " + field.numTerms + " terms");
+        out.writeVInt(field.fieldInfo.number);
+        out.writeVLong(field.numTerms);
+        out.writeVInt(field.rootCode.length);
+        out.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length);
+        if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+          out.writeVLong(field.sumTotalTermFreq);
+        }
+        out.writeVLong(field.sumDocFreq);
+        out.writeVInt(field.docCount);
+        indexOut.writeVLong(field.indexStartFP);
       }
       writeTrailer(out, dirStart);
       writeIndexTrailer(indexOut, indexDirStart);