You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2012/10/08 11:25:50 UTC

svn commit: r1395491 - in /lucene/dev/branches/branch_4x/lucene: ./ codecs/src/java/org/apache/lucene/codecs/compressing/ codecs/src/test/org/apache/lucene/codecs/compressing/ core/src/java/org/apache/lucene/util/packed/ test-framework/src/java/org/apa...

Author: jpountz
Date: Mon Oct  8 09:25:49 2012
New Revision: 1395491

URL: http://svn.apache.org/viewvc?rev=1395491&view=rev
Log:
LUCENE-4226: Efficient stored fields compression (merged from r1394578).

Added:
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/
      - copied from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndex.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndex.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
      - copied, changed from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/Compressor.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/Compressor.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/LZ4.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/LZ4.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/Uncompressor.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/Uncompressor.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/package.html
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/package.html
    lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/
      - copied from r1394578, lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/
    lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
      - copied, changed from r1394578, lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestFastCompressionMode.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestFastCompressionMode.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestFastUncompressionMode.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestFastUncompressionMode.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestHighCompressionMode.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestHighCompressionMode.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/
      - copied from r1394578, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/package.html
      - copied unchanged from r1394578, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/package.html
Modified:
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1395491&r1=1395490&r2=1395491&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Mon Oct  8 09:25:49 2012
@@ -10,6 +10,10 @@ http://svn.apache.org/repos/asf/lucene/d
 
 New Features
 
+* LUCENE-4226: New experimental StoredFieldsFormat (in lucene/codecs) that
+  compresses chunks of documents together in order to improve the compression
+  ratio. (Adrien Grand)
+
 * LUCENE-4426: New ValueSource implementations (in lucene/queries) for
   DocValues fields. (Adrien Grand)
 

Copied: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java (from r1394578, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java?p2=lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java&p1=lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java&r1=1394578&r2=1395491&rev=1395491&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java Mon Oct  8 09:25:49 2012
@@ -26,16 +26,16 @@ import org.apache.lucene.codecs.CodecUti
 import org.apache.lucene.codecs.StoredFieldsReader;
 import org.apache.lucene.codecs.StoredFieldsWriter;
 import org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.ChunkIterator;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.index.StorableField;
-import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
@@ -204,7 +204,7 @@ final class CompressingStoredFieldsWrite
   }
 
   @Override
-  public void writeField(FieldInfo info, StorableField field)
+  public void writeField(FieldInfo info, IndexableField field)
       throws IOException {
     int bits = 0;
     final BytesRef bytes;
@@ -305,7 +305,7 @@ final class CompressingStoredFieldsWrite
       if (matchingFieldsReader == null) {
         // naive merge...
         for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) {
-          StoredDocument doc = reader.document(i);
+          Document doc = reader.document(i);
           addDocument(doc, mergeState.fieldInfos);
           ++docCount;
           mergeState.checkAbort.work(300);

Copied: lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java (from r1394578, lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java?p2=lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java&p1=lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java&r1=1394578&r2=1395491&rev=1395491&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java Mon Oct  8 09:25:49 2012
@@ -35,9 +35,8 @@ import org.apache.lucene.document.LongFi
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.StorableField;
-import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.search.NumericRangeQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@@ -113,7 +112,7 @@ public class TestCompressingStoredFields
     assertTrue(ir.numDocs() > 0);
     int numDocs = 0;
     for (int i = 0; i < ir.maxDoc(); ++i) {
-      final StoredDocument doc = ir.document(i);
+      final Document doc = ir.document(i);
       if (doc == null) {
         continue;
       }
@@ -177,8 +176,8 @@ public class TestCompressingStoredFields
     final int docID = random().nextInt(100);
     for (Field fld : fields) {
       String fldName = fld.name();
-      final StoredDocument sDoc = reader.document(docID, Collections.singleton(fldName));
-      final StorableField sField = sDoc.getField(fldName);
+      final Document sDoc = reader.document(docID, Collections.singleton(fldName));
+      final IndexableField sField = sDoc.getField(fldName);
       if (Field.class.equals(fld.getClass())) {
         assertEquals(fld.binaryValue(), sField.binaryValue());
         assertEquals(fld.stringValue(), sField.stringValue());

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java?rev=1395491&r1=1395490&r2=1395491&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java Mon Oct  8 09:25:49 2012
@@ -37,7 +37,7 @@ final class PackedReaderIterator extends
     this.format = format;
     bulkOperation = BulkOperation.of(format, bitsPerValue);
     iterations = bulkOperation.computeIterations(valueCount, mem);
-    assert iterations > 0;
+    assert valueCount == 0 || iterations > 0;
     nextBlocks = new long[iterations * bulkOperation.blockCount()];
     nextValues = new LongsRef(new long[iterations * bulkOperation.valueCount()], 0, 0);
     assert iterations * bulkOperation.valueCount() == nextValues.longs.length;

Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java?rev=1395491&r1=1395490&r2=1395491&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java Mon Oct  8 09:25:49 2012
@@ -32,6 +32,7 @@ import org.apache.lucene.codecs.Postings
 import org.apache.lucene.codecs.appending.AppendingCodec;
 import org.apache.lucene.codecs.asserting.AssertingCodec;
 import org.apache.lucene.codecs.lucene3x.PreFlexRWCodec;
+import org.apache.lucene.codecs.compressing.CompressingCodec;
 import org.apache.lucene.codecs.lucene40.Lucene40Codec;
 import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
 import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
@@ -164,6 +165,8 @@ final class TestRuleSetupAndRestoreClass
       codec = new AppendingCodec();
     } else if ("Asserting".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 7 && !shouldAvoidCodec("Asserting"))) {
       codec = new AssertingCodec();
+    } else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Compressing"))) {
+      codec = CompressingCodec.randomInstance(random);
     } else if (!"random".equals(TEST_CODEC)) {
       codec = Codec.forName(TEST_CODEC);
     } else if ("random".equals(TEST_POSTINGSFORMAT)) {

Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec?rev=1395491&r1=1395490&r2=1395491&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec Mon Oct  8 09:25:49 2012
@@ -15,3 +15,4 @@
 
 org.apache.lucene.codecs.lucene3x.PreFlexRWCodec
 org.apache.lucene.codecs.asserting.AssertingCodec
+org.apache.lucene.codecs.compressing.CompressingCodec