You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/16 17:21:01 UTC

svn commit: r1446911 - in /lucene/dev/branches/lucene4765/lucene: codecs/src/java/org/apache/lucene/codecs/diskdv/ core/src/test/org/apache/lucene/ test-framework/src/java/org/apache/lucene/codecs/cheapbastard/

Author: rmuir
Date: Sat Feb 16 16:21:00 2013
New Revision: 1446911

URL: http://svn.apache.org/r1446911
Log:
add all-on-disk codec

Modified:
    lucene/dev/branches/lucene4765/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
    lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
    lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java

Modified: lucene/dev/branches/lucene4765/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java?rev=1446911&r1=1446910&r2=1446911&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4765/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java Sat Feb 16 16:21:00 2013
@@ -304,6 +304,7 @@ class DiskDocValuesProducer extends DocV
         IndexInput data = this.data.clone();
         data.seek(entry.offset);
         ordIndexInstance = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, false);
+        ordIndexInstances.put(field.number, ordIndexInstance);
       }
       ordIndex = ordIndexInstance;
     }

Modified: lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java?rev=1446911&r1=1446910&r2=1446911&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java Sat Feb 16 16:21:00 2013
@@ -52,10 +52,11 @@ import static org.apache.lucene.index.So
  * to this class.
  */
 // nocommit: should only be Lucene40 and Lucene41
-@SuppressCodecs({ "Lucene40", "Lucene41", "SimpleText", "CheapBastard" })
+// nocommit: move to BaseDocValuesTestCase, but allow these to be assume()d (for 4.0 and 4.1)
+@SuppressCodecs({ "Lucene40", "Lucene41", "SimpleText" })
 public class TestDemoDocValue extends LuceneTestCase {
   
-  public void testOneValue() throws IOException {
+  public void testSortedSetOneValue() throws IOException {
     Directory directory = newDirectory();
     RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
     
@@ -80,7 +81,7 @@ public class TestDemoDocValue extends Lu
     directory.close();
   }
   
-  public void testTwoDocumentsMerged() throws IOException {
+  public void testSortedSetTwoDocumentsMerged() throws IOException {
     Directory directory = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random());
     IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -122,7 +123,7 @@ public class TestDemoDocValue extends Lu
     directory.close();
   }
   
-  public void testTwoValues() throws IOException {
+  public void testSortedSetTwoValues() throws IOException {
     Directory directory = newDirectory();
     RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
     
@@ -152,7 +153,7 @@ public class TestDemoDocValue extends Lu
     directory.close();
   }
   
-  public void testTwoValuesUnordered() throws IOException {
+  public void testSortedSetTwoValuesUnordered() throws IOException {
     Directory directory = newDirectory();
     RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
     
@@ -182,7 +183,7 @@ public class TestDemoDocValue extends Lu
     directory.close();
   }
   
-  public void testThreeValuesTwoDocs() throws IOException {
+  public void testSortedSetThreeValuesTwoDocs() throws IOException {
     Directory directory = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random());
     IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -231,7 +232,7 @@ public class TestDemoDocValue extends Lu
     directory.close();
   }
   
-  public void testTwoDocumentsLastMissing() throws IOException {
+  public void testSortedSetTwoDocumentsLastMissing() throws IOException {
     Directory directory = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random());
     IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -263,7 +264,7 @@ public class TestDemoDocValue extends Lu
     directory.close();
   }
   
-  public void testTwoDocumentsLastMissingMerge() throws IOException {
+  public void testSortedSetTwoDocumentsLastMissingMerge() throws IOException {
     Directory directory = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random());
     IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -297,7 +298,7 @@ public class TestDemoDocValue extends Lu
     directory.close();
   }
   
-  public void testTwoDocumentsFirstMissing() throws IOException {
+  public void testSortedSetTwoDocumentsFirstMissing() throws IOException {
     Directory directory = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random());
     IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -330,7 +331,7 @@ public class TestDemoDocValue extends Lu
     directory.close();
   }
   
-  public void testTwoDocumentsFirstMissingMerge() throws IOException {
+  public void testSortedSetTwoDocumentsFirstMissingMerge() throws IOException {
     Directory directory = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random());
     IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -364,7 +365,7 @@ public class TestDemoDocValue extends Lu
     directory.close();
   }
   
-  public void testMergeAwayAllValues() throws IOException {
+  public void testSortedSetMergeAwayAllValues() throws IOException {
     Directory directory = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random());
     IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

Modified: lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java?rev=1446911&r1=1446910&r2=1446911&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java Sat Feb 16 16:21:00 2013
@@ -41,6 +41,7 @@ import org.apache.lucene.util.packed.Mon
 class CheapBastardDocValuesProducer extends DocValuesProducer {
   private final Map<Integer,NumericEntry> numerics;
   private final Map<Integer,NumericEntry> ords;
+  private final Map<Integer,NumericEntry> ordIndexes;
   private final Map<Integer,BinaryEntry> binaries;
   private final IndexInput data;
   
@@ -55,6 +56,7 @@ class CheapBastardDocValuesProducer exte
                                 DiskDocValuesFormat.VERSION_START);
       numerics = new HashMap<Integer,NumericEntry>();
       ords = new HashMap<Integer,NumericEntry>();
+      ordIndexes = new HashMap<Integer,NumericEntry>();
       binaries = new HashMap<Integer,BinaryEntry>();
       readFields(in);
       success = true;
@@ -101,6 +103,36 @@ class CheapBastardDocValuesProducer exte
         }
         NumericEntry n = readNumericEntry(meta);
         ords.put(fieldNumber, n);
+      } else if (type == DiskDocValuesFormat.SORTED_SET) {
+        // sortedset = binary + numeric + ordIndex
+        if (meta.readVInt() != fieldNumber) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+        }
+        if (meta.readByte() != DiskDocValuesFormat.BINARY) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+        }
+        BinaryEntry b = readBinaryEntry(meta);
+        binaries.put(fieldNumber, b);
+        
+        if (meta.readVInt() != fieldNumber) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+        }
+        if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+        }
+        NumericEntry n1 = readNumericEntry(meta);
+        ords.put(fieldNumber, n1);
+        
+        if (meta.readVInt() != fieldNumber) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+        }
+        if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
+          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+        }
+        NumericEntry n2 = readNumericEntry(meta);
+        ordIndexes.put(fieldNumber, n2);
+      } else {
+        throw new CorruptIndexException("invalid type: " + type + ", resource=" + meta);
       }
       fieldNumber = meta.readVInt();
     }
@@ -135,15 +167,15 @@ class CheapBastardDocValuesProducer exte
     return getNumeric(field, entry);
   }
   
-  private NumericDocValues getNumeric(FieldInfo field, final NumericEntry entry) throws IOException {
+  private LongNumericDocValues getNumeric(FieldInfo field, final NumericEntry entry) throws IOException {
     final IndexInput data = this.data.clone();
     data.seek(entry.offset);
 
     final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
-    return new NumericDocValues() {
+    return new LongNumericDocValues() {
       @Override
-      public long get(int docID) {
-        return reader.get(docID);
+      public long get(long id) {
+        return reader.get(id);
       }
     };
   }
@@ -161,10 +193,10 @@ class CheapBastardDocValuesProducer exte
   private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
     final IndexInput data = this.data.clone();
 
-    return new BinaryDocValues() {
+    return new LongBinaryDocValues() {
       @Override
-      public void get(int docID, BytesRef result) {
-        long address = bytes.offset + docID * (long)bytes.maxLength;
+      public void get(long id, BytesRef result) {
+        long address = bytes.offset + id * bytes.maxLength;
         try {
           data.seek(address);
           // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource) 
@@ -186,11 +218,11 @@ class CheapBastardDocValuesProducer exte
     data.seek(bytes.addressesOffset);
 
     final MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, bytes.count, true);
-    return new BinaryDocValues() {
+    return new LongBinaryDocValues() {
       @Override
-      public void get(int docID, BytesRef result) {
-        long startAddress = bytes.offset + (docID == 0 ? 0 : + addresses.get(docID-1));
-        long endAddress = bytes.offset + addresses.get(docID);
+      public void get(long id, BytesRef result) {
+        long startAddress = bytes.offset + (id == 0 ? 0 : + addresses.get(id-1));
+        long endAddress = bytes.offset + addresses.get(id);
         int length = (int) (endAddress - startAddress);
         try {
           data.seek(startAddress);
@@ -234,7 +266,45 @@ class CheapBastardDocValuesProducer exte
 
   @Override
   public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
-    throw new UnsupportedOperationException(); // nocommit
+    final long valueCount = binaries.get(field.number).count;
+    final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
+    final LongNumericDocValues ordinals = getNumeric(field, ords.get(field.number));
+    NumericEntry entry = ordIndexes.get(field.number);
+    IndexInput data = this.data.clone();
+    data.seek(entry.offset);
+    final MonotonicBlockPackedReader ordIndex = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
+    
+    return new SortedSetDocValues() {
+      long offset;
+      long endOffset;
+      
+      @Override
+      public long nextOrd() {
+        if (offset == endOffset) {
+          return NO_MORE_ORDS;
+        } else {
+          long ord = ordinals.get(offset);
+          offset++;
+          return ord;
+        }
+      }
+
+      @Override
+      public void setDocument(int docID) {
+        offset = (docID == 0 ? 0 : ordIndex.get(docID-1));
+        endOffset = ordIndex.get(docID);
+      }
+
+      @Override
+      public void lookupOrd(long ord, BytesRef result) {
+        binary.get(ord, result);
+      }
+
+      @Override
+      public long getValueCount() {
+        return valueCount;
+      }
+    };
   }
 
   @Override
@@ -260,4 +330,23 @@ class CheapBastardDocValuesProducer exte
     int packedIntsVersion;
     int blockSize;
   }
+  
+  // internally we compose complex dv (sorted/sortedset) from other ones
+  static abstract class LongNumericDocValues extends NumericDocValues {
+    @Override
+    public final long get(int docID) {
+      return get((long) docID);
+    }
+    
+    abstract long get(long id);
+  }
+  
+  static abstract class LongBinaryDocValues extends BinaryDocValues {
+    @Override
+    public final void get(int docID, BytesRef result) {
+      get((long)docID, result);
+    }
+    
+    abstract void get(long id, BytesRef Result);
+  }
 }