You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/16 17:21:01 UTC
svn commit: r1446911 - in /lucene/dev/branches/lucene4765/lucene:
codecs/src/java/org/apache/lucene/codecs/diskdv/
core/src/test/org/apache/lucene/
test-framework/src/java/org/apache/lucene/codecs/cheapbastard/
Author: rmuir
Date: Sat Feb 16 16:21:00 2013
New Revision: 1446911
URL: http://svn.apache.org/r1446911
Log:
add all-on-disk codec
Modified:
lucene/dev/branches/lucene4765/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java
Modified: lucene/dev/branches/lucene4765/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java?rev=1446911&r1=1446910&r2=1446911&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4765/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java Sat Feb 16 16:21:00 2013
@@ -304,6 +304,7 @@ class DiskDocValuesProducer extends DocV
IndexInput data = this.data.clone();
data.seek(entry.offset);
ordIndexInstance = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, false);
+ ordIndexInstances.put(field.number, ordIndexInstance);
}
ordIndex = ordIndexInstance;
}
Modified: lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java?rev=1446911&r1=1446910&r2=1446911&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java (original)
+++ lucene/dev/branches/lucene4765/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java Sat Feb 16 16:21:00 2013
@@ -52,10 +52,11 @@ import static org.apache.lucene.index.So
* to this class.
*/
// nocommit: should only be Lucene40 and Lucene41
-@SuppressCodecs({ "Lucene40", "Lucene41", "SimpleText", "CheapBastard" })
+// nocommit: move to BaseDocValuesTestCase, but allow these to be assume()d (for 4.0 and 4.1)
+@SuppressCodecs({ "Lucene40", "Lucene41", "SimpleText" })
public class TestDemoDocValue extends LuceneTestCase {
- public void testOneValue() throws IOException {
+ public void testSortedSetOneValue() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
@@ -80,7 +81,7 @@ public class TestDemoDocValue extends Lu
directory.close();
}
- public void testTwoDocumentsMerged() throws IOException {
+ public void testSortedSetTwoDocumentsMerged() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -122,7 +123,7 @@ public class TestDemoDocValue extends Lu
directory.close();
}
- public void testTwoValues() throws IOException {
+ public void testSortedSetTwoValues() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
@@ -152,7 +153,7 @@ public class TestDemoDocValue extends Lu
directory.close();
}
- public void testTwoValuesUnordered() throws IOException {
+ public void testSortedSetTwoValuesUnordered() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
@@ -182,7 +183,7 @@ public class TestDemoDocValue extends Lu
directory.close();
}
- public void testThreeValuesTwoDocs() throws IOException {
+ public void testSortedSetThreeValuesTwoDocs() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -231,7 +232,7 @@ public class TestDemoDocValue extends Lu
directory.close();
}
- public void testTwoDocumentsLastMissing() throws IOException {
+ public void testSortedSetTwoDocumentsLastMissing() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -263,7 +264,7 @@ public class TestDemoDocValue extends Lu
directory.close();
}
- public void testTwoDocumentsLastMissingMerge() throws IOException {
+ public void testSortedSetTwoDocumentsLastMissingMerge() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -297,7 +298,7 @@ public class TestDemoDocValue extends Lu
directory.close();
}
- public void testTwoDocumentsFirstMissing() throws IOException {
+ public void testSortedSetTwoDocumentsFirstMissing() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -330,7 +331,7 @@ public class TestDemoDocValue extends Lu
directory.close();
}
- public void testTwoDocumentsFirstMissingMerge() throws IOException {
+ public void testSortedSetTwoDocumentsFirstMissingMerge() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
@@ -364,7 +365,7 @@ public class TestDemoDocValue extends Lu
directory.close();
}
- public void testMergeAwayAllValues() throws IOException {
+ public void testSortedSetMergeAwayAllValues() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
Modified: lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java?rev=1446911&r1=1446910&r2=1446911&view=diff
==============================================================================
--- lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4765/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java Sat Feb 16 16:21:00 2013
@@ -41,6 +41,7 @@ import org.apache.lucene.util.packed.Mon
class CheapBastardDocValuesProducer extends DocValuesProducer {
private final Map<Integer,NumericEntry> numerics;
private final Map<Integer,NumericEntry> ords;
+ private final Map<Integer,NumericEntry> ordIndexes;
private final Map<Integer,BinaryEntry> binaries;
private final IndexInput data;
@@ -55,6 +56,7 @@ class CheapBastardDocValuesProducer exte
DiskDocValuesFormat.VERSION_START);
numerics = new HashMap<Integer,NumericEntry>();
ords = new HashMap<Integer,NumericEntry>();
+ ordIndexes = new HashMap<Integer,NumericEntry>();
binaries = new HashMap<Integer,BinaryEntry>();
readFields(in);
success = true;
@@ -101,6 +103,36 @@ class CheapBastardDocValuesProducer exte
}
NumericEntry n = readNumericEntry(meta);
ords.put(fieldNumber, n);
+ } else if (type == DiskDocValuesFormat.SORTED_SET) {
+ // sortedset = binary + numeric + ordIndex
+ if (meta.readVInt() != fieldNumber) {
+ throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+ }
+ if (meta.readByte() != DiskDocValuesFormat.BINARY) {
+ throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+ }
+ BinaryEntry b = readBinaryEntry(meta);
+ binaries.put(fieldNumber, b);
+
+ if (meta.readVInt() != fieldNumber) {
+ throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+ }
+ if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
+ throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+ }
+ NumericEntry n1 = readNumericEntry(meta);
+ ords.put(fieldNumber, n1);
+
+ if (meta.readVInt() != fieldNumber) {
+ throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+ }
+ if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
+ throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt");
+ }
+ NumericEntry n2 = readNumericEntry(meta);
+ ordIndexes.put(fieldNumber, n2);
+ } else {
+ throw new CorruptIndexException("invalid type: " + type + ", resource=" + meta);
}
fieldNumber = meta.readVInt();
}
@@ -135,15 +167,15 @@ class CheapBastardDocValuesProducer exte
return getNumeric(field, entry);
}
- private NumericDocValues getNumeric(FieldInfo field, final NumericEntry entry) throws IOException {
+ private LongNumericDocValues getNumeric(FieldInfo field, final NumericEntry entry) throws IOException {
final IndexInput data = this.data.clone();
data.seek(entry.offset);
final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
- return new NumericDocValues() {
+ return new LongNumericDocValues() {
@Override
- public long get(int docID) {
- return reader.get(docID);
+ public long get(long id) {
+ return reader.get(id);
}
};
}
@@ -161,10 +193,10 @@ class CheapBastardDocValuesProducer exte
private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
final IndexInput data = this.data.clone();
- return new BinaryDocValues() {
+ return new LongBinaryDocValues() {
@Override
- public void get(int docID, BytesRef result) {
- long address = bytes.offset + docID * (long)bytes.maxLength;
+ public void get(long id, BytesRef result) {
+ long address = bytes.offset + id * bytes.maxLength;
try {
data.seek(address);
// NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource)
@@ -186,11 +218,11 @@ class CheapBastardDocValuesProducer exte
data.seek(bytes.addressesOffset);
final MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, bytes.count, true);
- return new BinaryDocValues() {
+ return new LongBinaryDocValues() {
@Override
- public void get(int docID, BytesRef result) {
- long startAddress = bytes.offset + (docID == 0 ? 0 : + addresses.get(docID-1));
- long endAddress = bytes.offset + addresses.get(docID);
+ public void get(long id, BytesRef result) {
+ long startAddress = bytes.offset + (id == 0 ? 0 : + addresses.get(id-1));
+ long endAddress = bytes.offset + addresses.get(id);
int length = (int) (endAddress - startAddress);
try {
data.seek(startAddress);
@@ -234,7 +266,45 @@ class CheapBastardDocValuesProducer exte
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
- throw new UnsupportedOperationException(); // nocommit
+ final long valueCount = binaries.get(field.number).count;
+ final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
+ final LongNumericDocValues ordinals = getNumeric(field, ords.get(field.number));
+ NumericEntry entry = ordIndexes.get(field.number);
+ IndexInput data = this.data.clone();
+ data.seek(entry.offset);
+ final MonotonicBlockPackedReader ordIndex = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
+
+ return new SortedSetDocValues() {
+ long offset;
+ long endOffset;
+
+ @Override
+ public long nextOrd() {
+ if (offset == endOffset) {
+ return NO_MORE_ORDS;
+ } else {
+ long ord = ordinals.get(offset);
+ offset++;
+ return ord;
+ }
+ }
+
+ @Override
+ public void setDocument(int docID) {
+ offset = (docID == 0 ? 0 : ordIndex.get(docID-1));
+ endOffset = ordIndex.get(docID);
+ }
+
+ @Override
+ public void lookupOrd(long ord, BytesRef result) {
+ binary.get(ord, result);
+ }
+
+ @Override
+ public long getValueCount() {
+ return valueCount;
+ }
+ };
}
@Override
@@ -260,4 +330,23 @@ class CheapBastardDocValuesProducer exte
int packedIntsVersion;
int blockSize;
}
+
+ // internally we compose complex dv (sorted/sortedset) from other ones
+ static abstract class LongNumericDocValues extends NumericDocValues {
+ @Override
+ public final long get(int docID) {
+ return get((long) docID);
+ }
+
+ abstract long get(long id);
+ }
+
+ static abstract class LongBinaryDocValues extends BinaryDocValues {
+ @Override
+ public final void get(int docID, BytesRef result) {
+ get((long)docID, result);
+ }
+
+ abstract void get(long id, BytesRef Result);
+ }
}