You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/07/26 04:21:35 UTC

svn commit: r1507179 - in /lucene/dev/branches/lucene5127/lucene: codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java

Author: rmuir
Date: Fri Jul 26 02:21:35 2013
New Revision: 1507179

URL: http://svn.apache.org/r1507179
Log:
LUCENE-5127: use less ram when writing the terms index

Modified:
    lucene/dev/branches/lucene5127/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java
    lucene/dev/branches/lucene5127/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java

Modified: lucene/dev/branches/lucene5127/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5127/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java?rev=1507179&r1=1507178&r2=1507179&view=diff
==============================================================================
--- lucene/dev/branches/lucene5127/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/lucene5127/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java Fri Jul 26 02:21:35 2013
@@ -24,8 +24,8 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
 import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts;
 
@@ -123,21 +123,19 @@ public class FixedGapTermsIndexWriter ex
     long packedOffsetsStart;
     private long numTerms;
 
-    // TODO: we could conceivably make a PackedInts wrapper
-    // that auto-grows... then we wouldn't force 6 bytes RAM
-    // per index term:
-    private short[] termLengths;
-    private int[] termsPointerDeltas;
-    private long lastTermsPointer;
+    // TODO: probably better not to recompress, but for now at least save RAM
+    private MonotonicAppendingLongBuffer termOffsets = new MonotonicAppendingLongBuffer();
+    private long currentOffset;
+    private MonotonicAppendingLongBuffer termAddresses = new MonotonicAppendingLongBuffer();
 
     private final BytesRef lastTerm = new BytesRef();
 
     SimpleFieldWriter(FieldInfo fieldInfo, long termsFilePointer) {
       this.fieldInfo = fieldInfo;
       indexStart = out.getFilePointer();
-      termsStart = lastTermsPointer = termsFilePointer;
-      termLengths = new short[0];
-      termsPointerDeltas = new int[0];
+      termsStart = termsFilePointer;
+      // we write terms+1 offsets, term n's length is n+1 - n
+      termOffsets.add(0L);
     }
 
     @Override
@@ -165,20 +163,13 @@ public class FixedGapTermsIndexWriter ex
       // against prior term
       out.writeBytes(text.bytes, text.offset, indexedTermLength);
 
-      if (termLengths.length == numIndexTerms) {
-        termLengths = ArrayUtil.grow(termLengths);
-      }
-      if (termsPointerDeltas.length == numIndexTerms) {
-        termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas);
-      }
-
       // save delta terms pointer
-      termsPointerDeltas[numIndexTerms] = (int) (termsFilePointer - lastTermsPointer);
-      lastTermsPointer = termsFilePointer;
+      termAddresses.add(termsFilePointer - termsStart);
 
       // save term length (in bytes)
       assert indexedTermLength <= Short.MAX_VALUE;
-      termLengths[numIndexTerms] = (short) indexedTermLength;
+      currentOffset += indexedTermLength;
+      termOffsets.add(currentOffset);
 
       lastTerm.copyBytes(text);
       numIndexTerms++;
@@ -190,13 +181,10 @@ public class FixedGapTermsIndexWriter ex
       // write primary terms dict offsets
       packedIndexStart = out.getFilePointer();
 
-      MonotonicBlockPackedWriter w = new MonotonicBlockPackedWriter(out, BLOCKSIZE);
-
       // relative to our indexStart
-      long upto = 0;
-      for(int i=0;i<numIndexTerms;i++) {
-        upto += termsPointerDeltas[i];
-        w.add(upto);
+      MonotonicBlockPackedWriter w = new MonotonicBlockPackedWriter(out, BLOCKSIZE);
+      for (MonotonicAppendingLongBuffer.Iterator iterator = termAddresses.iterator(); iterator.hasNext(); ) {
+        w.add(iterator.next());
       }
       w.finish();
 
@@ -204,18 +192,15 @@ public class FixedGapTermsIndexWriter ex
 
       // write offsets into the byte[] terms
       w = new MonotonicBlockPackedWriter(out, BLOCKSIZE);
-      upto = 0;
-      for(int i=0;i<numIndexTerms;i++) {
-        w.add(upto);
-        upto += termLengths[i];
+      for (MonotonicAppendingLongBuffer.Iterator iterator = termOffsets.iterator(); iterator.hasNext(); ) {
+        w.add(iterator.next());
       }
-      w.add(upto);
       w.finish();
 
       // our referrer holds onto us, while other fields are
       // being written, so don't tie up this RAM:
-      termLengths = null;
-      termsPointerDeltas = null;
+      termOffsets = null;
+      termAddresses = null;
     }
   }
 

Modified: lucene/dev/branches/lucene5127/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5127/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java?rev=1507179&r1=1507178&r2=1507179&view=diff
==============================================================================
--- lucene/dev/branches/lucene5127/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java (original)
+++ lucene/dev/branches/lucene5127/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java Fri Jul 26 02:21:35 2013
@@ -1200,6 +1200,7 @@ public abstract class BasePostingsFormat
     // Ghost busting terms dict impls will have
     // fields.size() == 0; all others must be == 1:
     assertTrue(fields.size() <= 1);
+    Terms terms = fields.terms("ghostField");
     if (terms != null) {
       TermsEnum termsEnum = terms.iterator(null);
       BytesRef term = termsEnum.next();