You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/10/07 11:05:11 UTC
svn commit: r1179970 [1/3] - in /lucene/dev/trunk/lucene: ./
src/java/org/apache/lucene/document/ src/java/org/apache/lucene/index/
src/java/org/apache/lucene/index/codecs/
src/java/org/apache/lucene/index/codecs/sep/
src/java/org/apache/lucene/index/v...
Author: simonw
Date: Fri Oct 7 09:05:10 2011
New Revision: 1179970
URL: http://svn.apache.org/viewvc?rev=1179970&view=rev
Log:
LUCENE-3433: Random access non RAM resident IndexDocValues (CSF)
Added:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/DirectSource.java (with props)
Removed:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/FloatsRef.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/LongsRef.java
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DocValuesReaderBase.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DocValuesWriterBase.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepDocValuesConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Bytes.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Floats.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/IndexDocValuesArray.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Ints.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/MultiIndexDocValues.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/PackedIntValues.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/SourceCache.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/ValueType.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Writer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/packed/PackedInts.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/packed/TestPackedInts.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Oct 7 09:05:10 2011
@@ -548,6 +548,10 @@ New features
* LUCENE-2309: Added IndexableField.tokenStream(Analyzer) which is now
responsible for creating the TokenStreams for Fields when they are to
be indexed. (Chris Male)
+
+* LUCENE-3433: Added random access for non RAM resident IndexDocValues. RAM
+ resident and disk resident IndexDocValues are now exposed via the Source
+ interface. ValuesEnum has been removed in favour of Source. (Simon Willnauer)
Optimizations
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java Fri Oct 7 09:05:10 2011
@@ -19,7 +19,6 @@ package org.apache.lucene.document;
import java.io.Reader;
import java.util.Comparator;
-import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.values.PerDocFieldValues;
import org.apache.lucene.index.values.ValueType;
@@ -317,21 +316,34 @@ public class IndexDocValuesField extends
final String value;
switch (type) {
case BYTES_FIXED_DEREF:
- case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
- case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
- value = "bytes:bytes.utf8ToString();";
+ case BYTES_FIXED_SORTED:
+ case BYTES_VAR_SORTED:
+ // don't use to unicode string this is not necessarily unicode here
+ value = "bytes: " + bytes.toString();
+ break;
+ case FIXED_INTS_16:
+ value = "int16: " + longValue;
+ break;
+ case FIXED_INTS_32:
+ value = "int32: " + longValue;
+ break;
+ case FIXED_INTS_64:
+ value = "int64: " + longValue;
+ break;
+ case FIXED_INTS_8:
+ value = "int8: " + longValue;
break;
case VAR_INTS:
- value = "int:" + longValue;
+ value = "vint: " + longValue;
break;
case FLOAT_32:
- value = "float32:" + doubleValue;
+ value = "float32: " + doubleValue;
break;
case FLOAT_64:
- value = "float64:" + doubleValue;
+ value = "float64: " + doubleValue;
break;
default:
throw new IllegalArgumentException("unknown type: " + type);
@@ -353,14 +365,18 @@ public class IndexDocValuesField extends
final IndexDocValuesField valField = new IndexDocValuesField(field.name(), field.fieldType(), field.stringValue());
switch (type) {
case BYTES_FIXED_DEREF:
- case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
- case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
+ case BYTES_FIXED_SORTED:
+ case BYTES_VAR_SORTED:
BytesRef ref = field.isBinary() ? field.binaryValue() : new BytesRef(field.stringValue());
valField.setBytes(ref, type);
break;
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
case VAR_INTS:
valField.setInt(Long.parseLong(field.stringValue()));
break;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java Fri Oct 7 09:05:10 2011
@@ -41,7 +41,7 @@ import java.util.Map;
import org.apache.lucene.index.codecs.BlockTreeTermsReader;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.values.IndexDocValues;
-import org.apache.lucene.index.values.ValuesEnum;
+import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -1070,27 +1070,28 @@ public class CheckIndex {
if (docValues == null) {
continue;
}
- final ValuesEnum values = docValues.getEnum();
- while (values.nextDoc() != ValuesEnum.NO_MORE_DOCS) {
+ final Source values = docValues.getDirectSource();
+ final int maxDoc = reader.maxDoc();
+ for (int i = 0; i < maxDoc; i++) {
switch (fieldInfo.docValues) {
- case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
+ case BYTES_VAR_SORTED:
+ case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
- case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
- values.bytes();
+ values.getBytes(i, new BytesRef());
break;
case FLOAT_32:
case FLOAT_64:
- values.getFloat();
+ values.getFloat(i);
break;
case VAR_INTS:
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
- values.getInt();
+ values.getInt(i);
break;
default:
throw new IllegalArgumentException("Field: " + fieldInfo.name
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java Fri Oct 7 09:05:10 2011
@@ -652,31 +652,30 @@ public final class FieldInfos implements
case BYTES_FIXED_DEREF:
b = 5;
break;
- case BYTES_FIXED_SORTED:
+ case BYTES_VAR_STRAIGHT:
b = 6;
break;
- case BYTES_VAR_STRAIGHT:
+ case BYTES_VAR_DEREF:
b = 7;
break;
- case BYTES_VAR_DEREF:
+ case FIXED_INTS_16:
b = 8;
break;
- case BYTES_VAR_SORTED:
+ case FIXED_INTS_32:
b = 9;
break;
- case FIXED_INTS_16:
+ case FIXED_INTS_64:
b = 10;
break;
- case FIXED_INTS_32:
+ case FIXED_INTS_8:
b = 11;
break;
- case FIXED_INTS_64:
+ case BYTES_FIXED_SORTED:
b = 12;
break;
- case FIXED_INTS_8:
+ case BYTES_VAR_SORTED:
b = 13;
break;
-
default:
throw new IllegalStateException("unhandled indexValues type " + fi.docValues);
}
@@ -754,29 +753,29 @@ public final class FieldInfos implements
docValuesType = ValueType.BYTES_FIXED_DEREF;
break;
case 6:
- docValuesType = ValueType.BYTES_FIXED_SORTED;
+ docValuesType = ValueType.BYTES_VAR_STRAIGHT;
break;
case 7:
- docValuesType = ValueType.BYTES_VAR_STRAIGHT;
+ docValuesType = ValueType.BYTES_VAR_DEREF;
break;
case 8:
- docValuesType = ValueType.BYTES_VAR_DEREF;
+ docValuesType = ValueType.FIXED_INTS_16;
break;
case 9:
- docValuesType = ValueType.BYTES_VAR_SORTED;
+ docValuesType = ValueType.FIXED_INTS_32;
break;
case 10:
- docValuesType = ValueType.FIXED_INTS_16;
+ docValuesType = ValueType.FIXED_INTS_64;
break;
case 11:
- docValuesType = ValueType.FIXED_INTS_32;
+ docValuesType = ValueType.FIXED_INTS_8;
break;
case 12:
- docValuesType = ValueType.FIXED_INTS_64;
+ docValuesType = ValueType.BYTES_FIXED_SORTED;
break;
case 13:
- docValuesType = ValueType.FIXED_INTS_8;
- break;
+ docValuesType = ValueType.BYTES_VAR_SORTED;
+ break;
default:
throw new IllegalStateException("unhandled indexValues type " + b);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DocValuesReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DocValuesReaderBase.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DocValuesReaderBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DocValuesReaderBase.java Fri Oct 7 09:05:10 2011
@@ -58,11 +58,11 @@ public abstract class DocValuesReaderBas
public Collection<String> fields() {
return docValues().keySet();
}
-
+
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
-
+
// Only opens files... doesn't actually load any values
protected TreeMap<String, IndexDocValues> load(FieldInfos fieldInfos,
String segment, int docCount, Directory dir, int codecId, IOContext context)
@@ -121,9 +121,9 @@ public abstract class DocValuesReaderBas
case VAR_INTS:
return Ints.getValues(dir, id, docCount, type, context);
case FLOAT_32:
- return Floats.getValues(dir, id, docCount, context);
+ return Floats.getValues(dir, id, docCount, context, type);
case FLOAT_64:
- return Floats.getValues(dir, id, docCount, context);
+ return Floats.getValues(dir, id, docCount, context, type);
case BYTES_FIXED_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
case BYTES_FIXED_DEREF:
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DocValuesWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DocValuesWriterBase.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DocValuesWriterBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DocValuesWriterBase.java Fri Oct 7 09:05:10 2011
@@ -54,7 +54,7 @@ public abstract class DocValuesWriterBas
@Override
public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
return Writer.create(field.getDocValues(),
- docValuesId(segmentName, codecId, field.number),
+ docValuesId(segmentName, codecId, field.number),
getDirectory(), getComparator(), bytesUsed, context);
}
@@ -62,6 +62,7 @@ public abstract class DocValuesWriterBas
return segmentsName + "_" + codecID + "-" + fieldId;
}
+
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepDocValuesConsumer.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepDocValuesConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepDocValuesConsumer.java Fri Oct 7 09:05:10 2011
@@ -56,9 +56,9 @@ public class SepDocValuesConsumer extend
switch (fieldInfo.getDocValues()) {
case BYTES_FIXED_DEREF:
case BYTES_VAR_DEREF:
- case BYTES_VAR_SORTED:
- case BYTES_FIXED_SORTED:
case BYTES_VAR_STRAIGHT:
+ case BYTES_FIXED_SORTED:
+ case BYTES_VAR_SORTED:
files.add(IndexFileNames.segmentFileName(filename, "",
Writer.INDEX_EXTENSION));
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
@@ -77,7 +77,6 @@ public class SepDocValuesConsumer extend
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
Writer.DATA_EXTENSION));
break;
-
default:
assert false;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Bytes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Bytes.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Bytes.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Bytes.java Fri Oct 7 09:05:10 2011
@@ -26,14 +26,12 @@ import java.util.concurrent.atomic.Atomi
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
import org.apache.lucene.index.values.IndexDocValues.Source;
-import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
@@ -50,8 +48,7 @@ import org.apache.lucene.util.packed.Pac
/**
* Provides concrete Writer/Reader implementations for <tt>byte[]</tt> value per
* document. There are 6 package-private default implementations of this, for
- * all combinations of {@link Mode#DEREF}/{@link Mode#STRAIGHT}/
- * {@link Mode#SORTED} x fixed-length/variable-length.
+ * all combinations of {@link Mode#DEREF}/{@link Mode#STRAIGHT} x fixed-length/variable-length.
*
* <p>
* NOTE: Currently the total amount of byte[] data stored (across a single
@@ -101,11 +98,12 @@ public final class Bytes {
* the segment name and a unique id per segment.
* @param mode
* the writers store mode
- * @param comp
- * a {@link BytesRef} comparator - only used with {@link Mode#SORTED}
* @param fixedSize
* <code>true</code> if all bytes subsequently passed to the
* {@link Writer} will have the same length
+ * @param sortComparator {@link BytesRef} comparator used by sorted variants.
+ * If <code>null</code> {@link BytesRef#getUTF8SortedAsUnicodeComparator()}
+ * is used instead
* @param bytesUsed
* an {@link AtomicLong} instance to track the used bytes within the
* {@link Writer}. A call to {@link Writer#finish(int)} will release
@@ -117,12 +115,12 @@ public final class Bytes {
* if the files for the writer can not be created.
*/
public static Writer getWriter(Directory dir, String id, Mode mode,
- Comparator<BytesRef> comp, boolean fixedSize, Counter bytesUsed, IOContext context)
+ boolean fixedSize, Comparator<BytesRef> sortComparator, Counter bytesUsed, IOContext context)
throws IOException {
// TODO -- i shouldn't have to specify fixed? can
// track itself & do the write thing at write time?
- if (comp == null) {
- comp = BytesRef.getUTF8SortedAsUnicodeComparator();
+ if (sortComparator == null) {
+ sortComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
}
if (fixedSize) {
@@ -131,7 +129,7 @@ public final class Bytes {
} else if (mode == Mode.DEREF) {
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) {
- return new FixedSortedBytesImpl.Writer(dir, id, comp, bytesUsed, context);
+ return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context);
}
} else {
if (mode == Mode.STRAIGHT) {
@@ -139,7 +137,7 @@ public final class Bytes {
} else if (mode == Mode.DEREF) {
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) {
- return new VarSortedBytesImpl.Writer(dir, id, comp, bytesUsed, context);
+ return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context);
}
}
@@ -163,30 +161,34 @@ public final class Bytes {
* otherwise <code>false</code>
* @param maxDoc
* the number of document values stored for the given ID
- * @param sortComparator byte comparator used by sorted variants
+ * @param sortComparator {@link BytesRef} comparator used by sorted variants.
+ * If <code>null</code> {@link BytesRef#getUTF8SortedAsUnicodeComparator()}
+ * is used instead
* @return an initialized {@link IndexDocValues} instance.
* @throws IOException
* if an {@link IOException} occurs
*/
public static IndexDocValues getValues(Directory dir, String id, Mode mode,
boolean fixedSize, int maxDoc, Comparator<BytesRef> sortComparator, IOContext context) throws IOException {
-
+ if (sortComparator == null) {
+ sortComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
+ }
// TODO -- I can peek @ header to determing fixed/mode?
if (fixedSize) {
if (mode == Mode.STRAIGHT) {
- return new FixedStraightBytesImpl.Reader(dir, id, maxDoc, context);
+ return new FixedStraightBytesImpl.FixedStraightReader(dir, id, maxDoc, context);
} else if (mode == Mode.DEREF) {
- return new FixedDerefBytesImpl.Reader(dir, id, maxDoc, context);
+ return new FixedDerefBytesImpl.FixedDerefReader(dir, id, maxDoc, context);
} else if (mode == Mode.SORTED) {
- return new FixedSortedBytesImpl.Reader(dir, id, maxDoc, context);
+ return new FixedSortedBytesImpl.Reader(dir, id, maxDoc, context, ValueType.BYTES_FIXED_SORTED, sortComparator);
}
} else {
if (mode == Mode.STRAIGHT) {
- return new VarStraightBytesImpl.Reader(dir, id, maxDoc, context);
+ return new VarStraightBytesImpl.VarStraightReader(dir, id, maxDoc, context);
} else if (mode == Mode.DEREF) {
- return new VarDerefBytesImpl.Reader(dir, id, maxDoc, context);
+ return new VarDerefBytesImpl.VarDerefReader(dir, id, maxDoc, context);
} else if (mode == Mode.SORTED) {
- return new VarSortedBytesImpl.Reader(dir, id, maxDoc, sortComparator, context);
+ return new VarSortedBytesImpl.Reader(dir, id, maxDoc,context, ValueType.BYTES_VAR_SORTED, sortComparator);
}
}
@@ -196,7 +198,6 @@ public final class Bytes {
// TODO open up this API?
static abstract class BytesSourceBase extends Source {
private final PagedBytes pagedBytes;
- private final ValueType type;
protected final IndexInput datIn;
protected final IndexInput idxIn;
protected final static int PAGED_BYTES_BITS = 15;
@@ -206,6 +207,7 @@ public final class Bytes {
protected BytesSourceBase(IndexInput datIn, IndexInput idxIn,
PagedBytes pagedBytes, long bytesToRead, ValueType type) throws IOException {
+ super(type);
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
this.datIn = datIn;
@@ -214,192 +216,15 @@ public final class Bytes {
this.pagedBytes.copy(datIn, bytesToRead);
data = pagedBytes.freeze(true);
this.idxIn = idxIn;
- this.type = type;
- }
-
- public void close() throws IOException {
- try {
- data.close(); // close data
- } finally {
- try {
- if (datIn != null) {
- datIn.close();
- }
- } finally {
- if (idxIn != null) {// if straight - no index needed
- idxIn.close();
- }
- }
- }
- }
-
- @Override
- public ValueType type() {
- return type;
- }
-
-
- @Override
- public int getValueCount() {
- throw new UnsupportedOperationException();
}
-
- /**
- * Returns one greater than the largest possible document number.
- */
- protected abstract int maxDoc();
-
- @Override
- public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
- return new SourceEnum(attrSource, type(), this, maxDoc()) {
- @Override
- public int advance(int target) throws IOException {
- if (target >= numDocs) {
- return pos = NO_MORE_DOCS;
- }
- while (source.getBytes(target, bytesRef).length == 0) {
- if (++target >= numDocs) {
- return pos = NO_MORE_DOCS;
- }
- }
- return pos = target;
- }
- };
- }
-
}
- static abstract class DerefBytesSourceBase extends BytesSourceBase {
- protected final PackedInts.Reader addresses;
- public DerefBytesSourceBase(IndexInput datIn, IndexInput idxIn, long bytesToRead, ValueType type) throws IOException {
- super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
- addresses = PackedInts.getReader(idxIn);
- }
-
- @Override
- public int getValueCount() {
- return addresses.size();
- }
-
- @Override
- protected int maxDoc() {
- return addresses.size();
- }
-
- }
-
- static abstract class BytesSortedSourceBase extends SortedSource {
- private final PagedBytes pagedBytes;
- private final Comparator<BytesRef> comp;
- protected final PackedInts.Reader docToOrdIndex;
- private final ValueType type;
-
- protected final IndexInput datIn;
- protected final IndexInput idxIn;
- protected final BytesRef defaultValue = new BytesRef();
- protected final static int PAGED_BYTES_BITS = 15;
- protected final PagedBytes.Reader data;
-
-
- protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
- Comparator<BytesRef> comp, long bytesToRead, ValueType type) throws IOException {
- this(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
- }
-
- protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
- Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead,ValueType type)
- throws IOException {
- assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
- + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
- this.datIn = datIn;
- this.pagedBytes = pagedBytes;
- this.pagedBytes.copy(datIn, bytesToRead);
- data = pagedBytes.freeze(true);
- this.idxIn = idxIn;
- this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
- : comp;
- docToOrdIndex = PackedInts.getReader(idxIn);
- this.type = type;
-
- }
-
- @Override
- public int ord(int docID) {
- return (int) docToOrdIndex.get(docID) -1;
- }
-
- @Override
- public BytesRef getByOrd(int ord, BytesRef bytesRef) {
- assert ord >= 0;
- return deref(ord, bytesRef);
- }
-
- protected void closeIndexInput() throws IOException {
- IOUtils.close(datIn, idxIn);
- }
-
- /**
- * Returns the largest doc id + 1 in this doc values source
- */
- public int maxDoc() {
- return docToOrdIndex.size();
- }
- /**
- * Copies the value for the given ord to the given {@link BytesRef} and
- * returns it.
- */
- protected abstract BytesRef deref(int ord, BytesRef bytesRef);
-
- protected int binarySearch(BytesRef b, BytesRef bytesRef, int low,
- int high) {
- int mid = 0;
- while (low <= high) {
- mid = (low + high) >>> 1;
- deref(mid, bytesRef);
- final int cmp = comp.compare(bytesRef, b);
- if (cmp < 0) {
- low = mid + 1;
- } else if (cmp > 0) {
- high = mid - 1;
- } else {
- return mid;
- }
- }
- assert comp.compare(bytesRef, b) != 0;
- return -(low + 1);
- }
-
- @Override
- public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
- return new SourceEnum(attrSource, type(), this, maxDoc()) {
-
- @Override
- public int advance(int target) throws IOException {
- if (target >= numDocs) {
- return pos = NO_MORE_DOCS;
- }
- while (source.getBytes(target, bytesRef).length == 0) {
- if (++target >= numDocs) {
- return pos = NO_MORE_DOCS;
- }
- }
- return pos = target;
- }
- };
- }
-
- @Override
- public ValueType type() {
- return type;
- }
- }
-
// TODO: open up this API?!
static abstract class BytesWriterBase extends Writer {
private final String id;
private IndexOutput idxOut;
private IndexOutput datOut;
- protected BytesRef bytesRef;
+ protected BytesRef bytesRef = new BytesRef();
private final Directory dir;
private final String codecName;
private final int version;
@@ -467,8 +292,8 @@ public final class Bytes {
public abstract void finish(int docCount) throws IOException;
@Override
- protected void mergeDoc(int docID) throws IOException {
- add(docID, bytesRef);
+ protected void mergeDoc(int docID, int sourceDoc) throws IOException {
+ add(docID, currentMergeSource.getBytes(sourceDoc, bytesRef));
}
@Override
@@ -480,11 +305,6 @@ public final class Bytes {
}
@Override
- protected void setNextEnum(ValuesEnum valuesEnum) {
- bytesRef = valuesEnum.bytes();
- }
-
- @Override
public void files(Collection<String> files) throws IOException {
assert datOut != null;
files.add(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION));
@@ -506,30 +326,34 @@ public final class Bytes {
protected final IndexInput datIn;
protected final int version;
protected final String id;
+ protected final ValueType type;
protected BytesReaderBase(Directory dir, String id, String codecName,
- int maxVersion, boolean doIndex, IOContext context) throws IOException {
- this.id = id;
- datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
- Writer.DATA_EXTENSION), context);
+ int maxVersion, boolean doIndex, IOContext context, ValueType type) throws IOException {
+ IndexInput dataIn = null;
+ IndexInput indexIn = null;
boolean success = false;
try {
- version = CodecUtil.checkHeader(datIn, codecName, maxVersion, maxVersion);
- if (doIndex) {
- idxIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
- Writer.INDEX_EXTENSION), context);
- final int version2 = CodecUtil.checkHeader(idxIn, codecName,
- maxVersion, maxVersion);
- assert version == version2;
- } else {
- idxIn = null;
- }
- success = true;
+ dataIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
+ Writer.DATA_EXTENSION), context);
+ version = CodecUtil.checkHeader(dataIn, codecName, maxVersion, maxVersion);
+ if (doIndex) {
+ indexIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
+ Writer.INDEX_EXTENSION), context);
+ final int version2 = CodecUtil.checkHeader(indexIn, codecName,
+ maxVersion, maxVersion);
+ assert version == version2;
+ }
+ success = true;
} finally {
if (!success) {
- closeInternal();
+ IOUtils.closeWhileHandlingException(dataIn, indexIn);
}
}
+ datIn = dataIn;
+ idxIn = indexIn;
+ this.type = type;
+ this.id = id;
}
/**
@@ -553,23 +377,20 @@ public final class Bytes {
try {
super.close();
} finally {
- closeInternal();
+ IOUtils.close(datIn, idxIn);
}
}
-
- private void closeInternal() throws IOException {
- try {
- datIn.close();
- } finally {
- if (idxIn != null) {
- idxIn.close();
- }
- }
+
+ @Override
+ public ValueType type() {
+ return type;
}
+
}
static abstract class DerefBytesWriterBase extends BytesWriterBase {
protected int size = -1;
+ protected int lastDocId = -1;
protected int[] docToEntry;
protected final BytesRefHash hash;
@@ -608,17 +429,33 @@ public final class Bytes {
return;
}
checkSize(bytes);
+ fillDefault(docID);
int ord = hash.add(bytes);
if (ord < 0) {
ord = (-ord) - 1;
}
+
+ docToEntry[docID] = ord;
+ lastDocId = docID;
+ }
+
+ protected void fillDefault(int docID) {
if (docID >= docToEntry.length) {
final int size = docToEntry.length;
docToEntry = ArrayUtil.grow(docToEntry, 1 + docID);
bytesUsed.addAndGet((docToEntry.length - size)
* RamUsageEstimator.NUM_BYTES_INT);
}
- docToEntry[docID] = 1 + ord;
+ assert size >= 0;
+ BytesRef ref = new BytesRef(size);
+ ref.length = size;
+ int ord = hash.add(ref);
+ if (ord < 0) {
+ ord = (-ord) - 1;
+ }
+ for (int i = lastDocId+1; i < docID; i++) {
+ docToEntry[i] = ord;
+ }
}
protected void checkSize(BytesRef bytes) {
@@ -713,77 +550,50 @@ public final class Bytes {
}
- abstract static class DerefBytesEnumBase extends ValuesEnum {
- private final PackedInts.ReaderIterator idx;
- private final int valueCount;
- private int pos = -1;
+ static abstract class BytesSortedSourceBase extends SortedSource {
+ private final PagedBytes pagedBytes;
+
+ protected final PackedInts.Reader docToOrdIndex;
protected final IndexInput datIn;
- protected final long fp;
- protected final int size;
+ protected final IndexInput idxIn;
+ protected final BytesRef defaultValue = new BytesRef();
+ protected final static int PAGED_BYTES_BITS = 15;
+ protected final PagedBytes.Reader data;
- protected DerefBytesEnumBase(AttributeSource source, IndexInput datIn,
- IndexInput idxIn, int size, ValueType enumType) throws IOException {
- super(source, enumType);
- this.datIn = datIn;
- this.size = size;
- idx = PackedInts.getReaderIterator(idxIn);
- fp = datIn.getFilePointer();
- if (size > 0) {
- bytesRef.grow(this.size);
- bytesRef.length = this.size;
- }
- bytesRef.offset = 0;
- valueCount = idx.size();
- }
-
- protected void copyFrom(ValuesEnum valuesEnum) {
- bytesRef = valuesEnum.bytesRef;
- if (bytesRef.bytes.length < size) {
- bytesRef.grow(size);
- }
- bytesRef.length = size;
- bytesRef.offset = 0;
+ protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
+ Comparator<BytesRef> comp, long bytesToRead, ValueType type) throws IOException {
+ this(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
}
+
+ protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
+ Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead,ValueType type)
+ throws IOException {
+ super(type, comp);
+ assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
+ + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
+ this.datIn = datIn;
+ this.pagedBytes = pagedBytes;
+ this.pagedBytes.copy(datIn, bytesToRead);
+ data = pagedBytes.freeze(true);
+ this.idxIn = idxIn;
+ docToOrdIndex = PackedInts.getReader(idxIn);
- @Override
- public int advance(int target) throws IOException {
- if (target < valueCount) {
- long address;
- while ((address = idx.advance(target)) == 0) {
- if (++target >= valueCount) {
- return pos = NO_MORE_DOCS;
- }
- }
- pos = idx.ord();
- fill(address, bytesRef);
- return pos;
- }
- return pos = NO_MORE_DOCS;
}
-
+
@Override
- public int nextDoc() throws IOException {
- if (pos >= valueCount) {
- return pos = NO_MORE_DOCS;
- }
- return advance(pos + 1);
+ public int ord(int docID) {
+ return (int) docToOrdIndex.get(docID);
}
- public void close() throws IOException {
- try {
- datIn.close();
- } finally {
- idx.close();
- }
+ protected void closeIndexInput() throws IOException {
+ IOUtils.close(datIn, idxIn);
}
-
- protected abstract void fill(long address, BytesRef ref) throws IOException;
-
- @Override
- public int docID() {
- return pos;
+
+ /**
+ * Returns the largest doc id + 1 in this doc values source
+ */
+ public int maxDoc() {
+ return docToOrdIndex.size();
}
-
}
-
-}
\ No newline at end of file
+}
Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/DirectSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/DirectSource.java?rev=1179970&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/DirectSource.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/DirectSource.java Fri Oct 7 09:05:10 2011
@@ -0,0 +1,137 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.values.IndexDocValues.Source;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Base class for disk resident source implementations
+ * @lucene.internal
+ */
+abstract class DirectSource extends Source {
+
+ protected final IndexInput data;
+ private final ToNumeric toNumeric;
+ protected final long baseOffset;
+
+ DirectSource(IndexInput input, ValueType type) {
+ super(type);
+ this.data = input;
+ baseOffset = input.getFilePointer();
+ switch (type) {
+ case FIXED_INTS_16:
+ toNumeric = new ShortToLong();
+ break;
+ case FLOAT_32:
+ case FIXED_INTS_32:
+ toNumeric = new IntToLong();
+ break;
+ case FIXED_INTS_8:
+ toNumeric = new ByteToLong();
+ break;
+ default:
+ toNumeric = new LongToLong();
+ }
+ }
+
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ try {
+ final int sizeToRead = position(docID);
+ ref.grow(sizeToRead);
+ data.readBytes(ref.bytes, 0, sizeToRead);
+ ref.length = sizeToRead;
+ ref.offset = 0;
+ return ref;
+ } catch (IOException ex) {
+ throw new IllegalStateException("failed to get value for docID: " + docID, ex);
+ }
+ }
+
+ @Override
+ public long getInt(int docID) {
+ try {
+ position(docID);
+ return toNumeric.toLong(data);
+ } catch (IOException ex) {
+ throw new IllegalStateException("failed to get value for docID: " + docID, ex);
+ }
+ }
+
+ @Override
+ public double getFloat(int docID) {
+ try {
+ position(docID);
+ return toNumeric.toDouble(data);
+ } catch (IOException ex) {
+ throw new IllegalStateException("failed to get value for docID: " + docID, ex);
+ }
+ }
+
+ protected abstract int position(int docID) throws IOException;
+
+ private abstract static class ToNumeric {
+ abstract long toLong(IndexInput input) throws IOException;
+
+ double toDouble(IndexInput input) throws IOException {
+ return toLong(input);
+ }
+ }
+
+ private static final class ByteToLong extends ToNumeric {
+ @Override
+ long toLong(IndexInput input) throws IOException {
+ return input.readByte();
+ }
+
+ }
+
+ private static final class ShortToLong extends ToNumeric {
+ @Override
+ long toLong(IndexInput input) throws IOException {
+ return input.readShort();
+ }
+ }
+
+ private static final class IntToLong extends ToNumeric {
+ @Override
+ long toLong(IndexInput input) throws IOException {
+ return input.readInt();
+ }
+
+ double toDouble(IndexInput input) throws IOException {
+ return Float.intBitsToFloat(input.readInt());
+ }
+ }
+
+ private static final class LongToLong extends ToNumeric {
+ @Override
+ long toLong(IndexInput input) throws IOException {
+ return input.readLong();
+ }
+
+ double toDouble(IndexInput input) throws IOException {
+ return Double.longBitsToDouble(input.readLong());
+ }
+ }
+
+}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java Fri Oct 7 09:05:10 2011
@@ -20,16 +20,17 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
-import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
-import org.apache.lucene.index.values.Bytes.DerefBytesEnumBase;
+import org.apache.lucene.index.values.Bytes.BytesSourceBase;
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
+import org.apache.lucene.index.values.DirectSource;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.PagedBytes;
+import org.apache.lucene.util.packed.PackedInts;
// Stores fixed-length byte[] by deref, ie when two docs
// have the same value, they store only 1 byte[]
@@ -66,63 +67,61 @@ class FixedDerefBytesImpl {
}
}
- public static class Reader extends BytesReaderBase {
+ public static class FixedDerefReader extends BytesReaderBase {
private final int size;
private final int numValuesStored;
- Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_START, true, context);
+ FixedDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_START, true, context, ValueType.BYTES_FIXED_DEREF);
size = datIn.readInt();
numValuesStored = idxIn.readInt();
}
@Override
public Source load() throws IOException {
- return new Source(cloneData(), cloneIndex(), size, numValuesStored);
+ return new FixedDerefSource(cloneData(), cloneIndex(), size, numValuesStored);
}
- private static final class Source extends DerefBytesSourceBase {
- private final int size;
-
- protected Source(IndexInput datIn, IndexInput idxIn, int size, long numValues) throws IOException {
- super(datIn, idxIn, size * numValues, ValueType.BYTES_FIXED_DEREF);
- this.size = size;
- }
-
- @Override
- public BytesRef getBytes(int docID, BytesRef bytesRef) {
- final int id = (int) addresses.get(docID);
- if (id == 0) {
- bytesRef.length = 0;
- return bytesRef;
- }
- return data.fillSlice(bytesRef, ((id - 1) * size), size);
- }
+ @Override
+ public Source getDirectSource()
+ throws IOException {
+ return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, type());
+ }
+ }
+
+ static final class FixedDerefSource extends BytesSourceBase {
+ private final int size;
+ private final PackedInts.Reader addresses;
+ protected FixedDerefSource(IndexInput datIn, IndexInput idxIn, int size, long numValues) throws IOException {
+ super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * numValues,
+ ValueType.BYTES_FIXED_DEREF);
+ this.size = size;
+ addresses = PackedInts.getReader(idxIn);
}
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
- return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
+ public BytesRef getBytes(int docID, BytesRef bytesRef) {
+ final int id = (int) addresses.get(docID);
+ return data.fillSlice(bytesRef, (id * size), size);
}
- final static class DerefBytesEnum extends DerefBytesEnumBase {
-
- public DerefBytesEnum(AttributeSource source, IndexInput datIn,
- IndexInput idxIn, int size) throws IOException {
- super(source, datIn, idxIn, size, ValueType.BYTES_FIXED_DEREF);
- }
+ }
+
+ final static class DirectFixedDerefSource extends DirectSource {
+ private final PackedInts.RandomAccessReaderIterator index;
+ private final int size;
- protected void fill(long address, BytesRef ref) throws IOException {
- datIn.seek(fp + ((address - 1) * size));
- datIn.readBytes(ref.bytes, 0, size);
- ref.length = size;
- ref.offset = 0;
- }
+ DirectFixedDerefSource(IndexInput data, IndexInput index, int size, ValueType type)
+ throws IOException {
+ super(data, type);
+ this.size = size;
+ this.index = PackedInts.getRandomAccessReaderIterator(index);
}
@Override
- public ValueType type() {
- return ValueType.BYTES_FIXED_DEREF;
+ protected int position(int docID) throws IOException {
+ data.seek(baseOffset + index.get(docID) * size);
+ return size;
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java Fri Oct 7 09:05:10 2011
@@ -23,14 +23,14 @@ import java.util.Comparator;
import org.apache.lucene.index.values.Bytes.BytesSortedSourceBase;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
-import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
+import org.apache.lucene.index.values.IndexDocValues.SortedSource;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.packed.PackedInts;
// Stores fixed-length byte[] by deref, ie when two docs
// have the same value, they store only 1 byte[]
@@ -44,7 +44,7 @@ class FixedSortedBytesImpl {
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
- static class Writer extends DerefBytesWriterBase {
+ static final class Writer extends DerefBytesWriterBase {
private final Comparator<BytesRef> comp;
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
@@ -57,9 +57,10 @@ class FixedSortedBytesImpl {
// some last docs that we didn't see
@Override
public void finishInternal(int docCount) throws IOException {
+ fillDefault(docCount);
final IndexOutput datOut = getOrCreateDataOut();
final int count = hash.size();
- final int[] address = new int[count+1]; // addr 0 is default values
+ final int[] address = new int[count]; // addr 0 is default values
datOut.writeInt(size);
if (size != -1) {
final int[] sortedEntries = hash.sort(comp);
@@ -70,7 +71,7 @@ class FixedSortedBytesImpl {
final BytesRef bytes = hash.get(e, bytesRef);
assert bytes.length == size;
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
- address[e + 1] = 1 + i;
+ address[e] = i;
}
}
final IndexOutput idxOut = getOrCreateIndexOut();
@@ -79,65 +80,101 @@ class FixedSortedBytesImpl {
}
}
- public static class Reader extends BytesReaderBase {
+ static final class Reader extends BytesReaderBase {
private final int size;
- private final int numValuesStored;
+ private final int valueCount;
+ private final Comparator<BytesRef> comparator;
- public Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_START, true, context);
+ public Reader(Directory dir, String id, int maxDoc, IOContext context,
+ ValueType type, Comparator<BytesRef> comparator) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_START, true, context, type);
size = datIn.readInt();
- numValuesStored = idxIn.readInt();
+ valueCount = idxIn.readInt();
+ this.comparator = comparator;
}
@Override
- public org.apache.lucene.index.values.IndexDocValues.Source load()
- throws IOException {
- return loadSorted(null);
+ public Source load() throws IOException {
+ return new FixedSortedSource(cloneData(), cloneIndex(), size,
+ valueCount, comparator);
}
@Override
- public SortedSource loadSorted(Comparator<BytesRef> comp)
- throws IOException {
- return new Source(cloneData(), cloneIndex(), size, numValuesStored, comp);
+ public Source getDirectSource() throws IOException {
+ return new DirectFixedSortedSource(cloneData(), cloneIndex(), size,
+ valueCount, comparator, type);
}
+ }
- private static class Source extends BytesSortedSourceBase {
- private final int valueCount;
- private final int size;
-
- public Source(IndexInput datIn, IndexInput idxIn, int size,
- int numValues, Comparator<BytesRef> comp) throws IOException {
- super(datIn, idxIn, comp, size * numValues, ValueType.BYTES_FIXED_SORTED);
- this.size = size;
- this.valueCount = numValues;
- closeIndexInput();
- }
+ static final class FixedSortedSource extends BytesSortedSourceBase {
+ private final int valueCount;
+ private final int size;
- @Override
- public int getByValue(BytesRef bytes, BytesRef tmpRef) {
- return binarySearch(bytes, tmpRef, 0, valueCount - 1);
- }
+ FixedSortedSource(IndexInput datIn, IndexInput idxIn, int size,
+ int numValues, Comparator<BytesRef> comp) throws IOException {
+ super(datIn, idxIn, comp, size * numValues, ValueType.BYTES_FIXED_SORTED);
+ this.size = size;
+ this.valueCount = numValues;
+ closeIndexInput();
+ }
- @Override
- public int getValueCount() {
- return valueCount;
- }
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ @Override
+ public BytesRef getByOrd(int ord, BytesRef bytesRef) {
+ return data.fillSlice(bytesRef, (ord * size), size);
+ }
+ }
- @Override
- protected BytesRef deref(int ord, BytesRef bytesRef) {
- return data.fillSlice(bytesRef, (ord * size), size);
+ static final class DirectFixedSortedSource extends SortedSource {
+ final PackedInts.RandomAccessReaderIterator docToOrdIndex;
+ private final IndexInput datIn;
+ private final long basePointer;
+ private final int size;
+ private final int valueCount;
+
+ DirectFixedSortedSource(IndexInput datIn, IndexInput idxIn, int size,
+ int valueCount, Comparator<BytesRef> comp, ValueType type)
+ throws IOException {
+ super(type, comp);
+ docToOrdIndex = PackedInts.getRandomAccessReaderIterator(idxIn);
+ basePointer = datIn.getFilePointer();
+ this.datIn = datIn;
+ this.size = size;
+ this.valueCount = valueCount;
+ }
+
+ @Override
+ public int ord(int docID) {
+ try {
+ return (int) docToOrdIndex.get(docID);
+ } catch (IOException e) {
+ throw new IllegalStateException("failed to get ord", e);
}
}
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
- // do unsorted
- return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
+ public BytesRef getByOrd(int ord, BytesRef bytesRef) {
+ try {
+ datIn.seek(basePointer + size * ord);
+ if (bytesRef.bytes.length < size) {
+ bytesRef.grow(size);
+ }
+ datIn.readBytes(bytesRef.bytes, 0, size);
+ bytesRef.length = size;
+ bytesRef.offset = 0;
+ return bytesRef;
+ } catch (IOException ex) {
+ throw new IllegalStateException("failed to getByOrd", ex);
+ }
}
@Override
- public ValueType type() {
- return ValueType.BYTES_FIXED_SORTED;
+ public int getValueCount() {
+ return valueCount;
}
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java Fri Oct 7 09:05:10 2011
@@ -24,11 +24,12 @@ import java.io.IOException;
import org.apache.lucene.index.values.Bytes.BytesSourceBase;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
+import org.apache.lucene.index.values.DirectSource;
+import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
import org.apache.lucene.util.BytesRef;
@@ -137,8 +138,8 @@ class FixedStraightBytesImpl {
datOut = getOrCreateDataOut();
boolean success = false;
try {
- if (state.liveDocs == null && state.reader instanceof Reader ) {
- Reader reader = (Reader) state.reader;
+ if (state.liveDocs == null && state.reader instanceof FixedStraightReader ) {
+ FixedStraightReader reader = (FixedStraightReader) state.reader;
final int maxDocs = reader.maxDoc;
if (maxDocs == 0) {
return;
@@ -175,8 +176,9 @@ class FixedStraightBytesImpl {
}
@Override
- protected void mergeDoc(int docID) throws IOException {
+ protected void mergeDoc(int docID, int sourceDoc) throws IOException {
assert lastDocID < docID;
+ currentMergeSource.getBytes(sourceDoc, bytesRef);
if (size == -1) {
size = bytesRef.length;
datOut.writeInt(size);
@@ -236,16 +238,16 @@ class FixedStraightBytesImpl {
}
- public static class Reader extends BytesReaderBase {
+ public static class FixedStraightReader extends BytesReaderBase {
protected final int size;
protected final int maxDoc;
- Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
- this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context);
+ FixedStraightReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
+ this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, ValueType.BYTES_FIXED_STRAIGHT);
}
- protected Reader(Directory dir, String id, String codec, int version, int maxDoc, IOContext context) throws IOException {
- super(dir, id, codec, version, false, context);
+ protected FixedStraightReader(Directory dir, String id, String codec, int version, int maxDoc, IOContext context, ValueType type) throws IOException {
+ super(dir, id, codec, version, false, context, type);
size = datIn.readInt();
this.maxDoc = maxDoc;
}
@@ -253,155 +255,83 @@ class FixedStraightBytesImpl {
@Override
public Source load() throws IOException {
return size == 1 ? new SingleByteSource(cloneData(), maxDoc) :
- new StraightBytesSource(cloneData(), size, maxDoc);
+ new FixedStraightSource(cloneData(), size, maxDoc, type);
}
@Override
public void close() throws IOException {
datIn.close();
}
-
- // specialized version for single bytes
- private static class SingleByteSource extends Source {
- private final int maxDoc;
- private final byte[] data;
-
- public SingleByteSource(IndexInput datIn, int maxDoc) throws IOException {
- this.maxDoc = maxDoc;
- try {
- data = new byte[maxDoc];
- datIn.readBytes(data, 0, data.length, false);
- } finally {
- IOUtils.close(datIn);
- }
-
- }
-
- @Override
- public BytesRef getBytes(int docID, BytesRef bytesRef) {
- bytesRef.length = 1;
- bytesRef.bytes = data;
- bytesRef.offset = docID;
- return bytesRef;
- }
-
- @Override
- public ValueType type() {
- return ValueType.BYTES_FIXED_STRAIGHT;
- }
-
- @Override
- public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
- return new SourceEnum(attrSource, type(), this, maxDoc) {
- @Override
- public int advance(int target) throws IOException {
- if (target >= numDocs) {
- return pos = NO_MORE_DOCS;
- }
- bytesRef.length = 1;
- bytesRef.bytes = data;
- bytesRef.offset = target;
- return pos = target;
- }
- };
- }
-
+
+ @Override
+ public Source getDirectSource() throws IOException {
+ return new DirectFixedStraightSource(cloneData(), size, type());
}
+ }
+
+ // specialized version for single bytes
+ private static final class SingleByteSource extends Source {
+ private final byte[] data;
- private final static class StraightBytesSource extends BytesSourceBase {
- private final int size;
- private final int maxDoc;
-
- public StraightBytesSource(IndexInput datIn, int size, int maxDoc)
- throws IOException {
- super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc, ValueType.BYTES_FIXED_STRAIGHT);
- this.size = size;
- this.maxDoc = maxDoc;
- }
-
- @Override
- public BytesRef getBytes(int docID, BytesRef bytesRef) {
- return data.fillSlice(bytesRef, docID * size, size);
- }
-
- @Override
- public int getValueCount() {
- return maxDoc;
- }
-
- @Override
- protected int maxDoc() {
- return maxDoc;
+ public SingleByteSource(IndexInput datIn, int maxDoc) throws IOException {
+ super(ValueType.BYTES_FIXED_STRAIGHT);
+ try {
+ data = new byte[maxDoc];
+ datIn.readBytes(data, 0, data.length, false);
+ } finally {
+ IOUtils.close(datIn);
}
}
-
+
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
- return new FixedStraightBytesEnum(source, cloneData(), size, maxDoc);
+ public boolean hasArray() {
+ return true;
}
-
+ @Override
+ public Object getArray() {
+ return data;
+ }
@Override
- public ValueType type() {
- return ValueType.BYTES_FIXED_STRAIGHT;
+ public BytesRef getBytes(int docID, BytesRef bytesRef) {
+ bytesRef.length = 1;
+ bytesRef.bytes = data;
+ bytesRef.offset = docID;
+ return bytesRef;
}
}
+
- static class FixedStraightBytesEnum extends ValuesEnum {
- private final IndexInput datIn;
+ private final static class FixedStraightSource extends BytesSourceBase {
private final int size;
- private final int maxDoc;
- private int pos = -1;
- private final long fp;
-
- public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn,
- int size, int maxDoc) throws IOException {
- super(source, ValueType.BYTES_FIXED_STRAIGHT);
- this.datIn = datIn;
- this.size = size;
- this.maxDoc = maxDoc;
- bytesRef.grow(size);
- bytesRef.length = size;
- bytesRef.offset = 0;
- fp = datIn.getFilePointer();
- }
-
- protected void copyFrom(ValuesEnum valuesEnum) {
- super.copyFrom(valuesEnum);
- if (bytesRef.bytes.length < size) {
- bytesRef.grow(size);
- }
- bytesRef.length = size;
- bytesRef.offset = 0;
- }
- public void close() throws IOException {
- datIn.close();
+ public FixedStraightSource(IndexInput datIn, int size, int maxDoc, ValueType type)
+ throws IOException {
+ super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc,
+ type);
+ this.size = size;
}
@Override
- public int advance(int target) throws IOException {
- if (target >= maxDoc || size == 0) {
- return pos = NO_MORE_DOCS;
- }
- if ((target - 1) != pos) // pos inc == 1
- datIn.seek(fp + target * size);
- datIn.readBytes(bytesRef.bytes, 0, size);
- return pos = target;
+ public BytesRef getBytes(int docID, BytesRef bytesRef) {
+ return data.fillSlice(bytesRef, docID * size, size);
}
+ }
+
+ public final static class DirectFixedStraightSource extends DirectSource {
+ private final int size;
- @Override
- public int docID() {
- return pos;
+ DirectFixedStraightSource(IndexInput input, int size, ValueType type) {
+ super(input, type);
+ this.size = size;
}
@Override
- public int nextDoc() throws IOException {
- if (pos >= maxDoc) {
- return pos = NO_MORE_DOCS;
- }
- return advance(pos + 1);
+ protected int position(int docID) throws IOException {
+ data.seek(baseOffset + size * docID);
+ return size;
}
+
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Floats.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Floats.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Floats.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/Floats.java Fri Oct 7 09:05:10 2011
@@ -22,9 +22,9 @@ import org.apache.lucene.index.values.In
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.IOUtils;
/**
* Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit
@@ -37,37 +37,47 @@ import org.apache.lucene.util.Counter;
*/
public class Floats {
- public static Writer getWriter(Directory dir, String id, int precisionBytes,
- Counter bytesUsed, IOContext context) throws IOException {
- if (precisionBytes != 4 && precisionBytes != 8) {
- throw new IllegalArgumentException("precisionBytes must be 4 or 8; got "
- + precisionBytes);
- }
- return new FloatsWriter(dir, id, bytesUsed, context, precisionBytes);
-
+ protected static final String CODEC_NAME = "Floats";
+ protected static final int VERSION_START = 0;
+ protected static final int VERSION_CURRENT = VERSION_START;
+
+ public static Writer getWriter(Directory dir, String id, Counter bytesUsed,
+ IOContext context, ValueType type) throws IOException {
+ return new FloatsWriter(dir, id, bytesUsed, context, type);
}
- public static IndexDocValues getValues(Directory dir, String id, int maxDoc, IOContext context)
+ public static IndexDocValues getValues(Directory dir, String id, int maxDoc, IOContext context, ValueType type)
throws IOException {
- return new FloatsReader(dir, id, maxDoc, context);
+ return new FloatsReader(dir, id, maxDoc, context, type);
+ }
+
+ private static int typeToSize(ValueType type) {
+ switch (type) {
+ case FLOAT_32:
+ return 4;
+ case FLOAT_64:
+ return 8;
+ default:
+ throw new IllegalStateException("illegal type " + type);
+ }
}
final static class FloatsWriter extends FixedStraightBytesImpl.Writer {
+
private final int size;
+ private final IndexDocValuesArray template;
public FloatsWriter(Directory dir, String id, Counter bytesUsed,
- IOContext context, int size) throws IOException {
- super(dir, id, bytesUsed, context);
+ IOContext context, ValueType type) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
+ size = typeToSize(type);
this.bytesRef = new BytesRef(size);
- this.size = size;
bytesRef.length = size;
+ template = IndexDocValuesArray.TEMPLATES.get(type);
+ assert template != null;
}
public void add(int docID, double v) throws IOException {
- if (size == 8) {
- bytesRef.copy(Double.doubleToRawLongBits(v));
- } else {
- bytesRef.copy(Float.floatToRawIntBits((float)v));
- }
+ template.toBytes(v, bytesRef);
add(docID, bytesRef);
}
@@ -76,19 +86,14 @@ public class Floats {
add(docID, docValues.getFloat());
}
}
-
- final static class FloatsReader extends FixedStraightBytesImpl.Reader {
+ final static class FloatsReader extends FixedStraightBytesImpl.FixedStraightReader {
final IndexDocValuesArray arrayTemplate;
- FloatsReader(Directory dir, String id, int maxDoc, IOContext context)
+ FloatsReader(Directory dir, String id, int maxDoc, IOContext context, ValueType type)
throws IOException {
- super(dir, id, maxDoc, context);
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, type);
+ arrayTemplate = IndexDocValuesArray.TEMPLATES.get(type);
assert size == 4 || size == 8;
- if (size == 4) {
- arrayTemplate = new IndexDocValuesArray.FloatValues();
- } else {
- arrayTemplate = new IndexDocValuesArray.DoubleValues();
- }
}
@Override
@@ -97,19 +102,10 @@ public class Floats {
try {
return arrayTemplate.newFromInput(indexInput, maxDoc);
} finally {
- indexInput.close();
+ IOUtils.close(indexInput);
}
}
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
- IndexInput indexInput = (IndexInput) datIn.clone();
- return arrayTemplate.getDirectEnum(source, indexInput, maxDoc);
- }
-
- @Override
- public ValueType type() {
- return arrayTemplate.type();
- }
}
}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java?rev=1179970&r1=1179969&r2=1179970&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java Fri Oct 7 09:05:10 2011
@@ -26,7 +26,6 @@ import org.apache.lucene.index.FieldsEnu
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
/**
@@ -34,8 +33,8 @@ import org.apache.lucene.util.BytesRef;
* value access based on the lucene internal document id. {@link IndexDocValues}
* exposes two distinct APIs:
* <ul>
- * <li>via {@link Source} an entirely RAM resident API for random access</li>
- * <li>via {@link ValuesEnum} a disk resident API for sequential access</li>
+ * <li>via {@link #getSource()} providing RAM resident random access</li>
+ * <li>via {@link #getDirectSource()} providing on disk random access</li>
* </ul> {@link IndexDocValues} are exposed via
* {@link IndexReader#perDocValues()} on a per-segment basis. For best
* performance {@link IndexDocValues} should be consumed per-segment just like
@@ -52,47 +51,18 @@ import org.apache.lucene.util.BytesRef;
* @lucene.experimental
*/
public abstract class IndexDocValues implements Closeable {
- /*
- * TODO: it might be useful to add another Random Access enum for some
- * implementations like packed ints and only return such a random access enum
- * if the impl supports random access. For super large segments it might be
- * useful or even required in certain environements to have disc based random
- * access
- */
- public static final IndexDocValues[] EMPTY_ARRAY = new IndexDocValues[0];
-
- private SourceCache cache = new SourceCache.DirectSourceCache();
- /**
- * Returns an iterator that steps through all documents values for this
- * {@link IndexDocValues} field instance. {@link ValuesEnum} will skip document
- * without a value if applicable.
- */
- public ValuesEnum getEnum() throws IOException {
- return getEnum(null);
- }
-
- /**
- * Returns an iterator that steps through all documents values for this
- * {@link IndexDocValues} field instance. {@link ValuesEnum} will skip document
- * without a value if applicable.
- * <p>
- * If an {@link AttributeSource} is supplied to this method the
- * {@link ValuesEnum} will use the given source to access implementation
- * related attributes.
- */
- public abstract ValuesEnum getEnum(AttributeSource attrSource)
- throws IOException;
+ public static final IndexDocValues[] EMPTY_ARRAY = new IndexDocValues[0];
+ private volatile SourceCache cache = new SourceCache.DirectSourceCache();
+ private final Object cacheLock = new Object();
+
/**
* Loads a new {@link Source} instance for this {@link IndexDocValues} field
* instance. Source instances returned from this method are not cached. It is
* the callers responsibility to maintain the instance and release its
* resources once the source is not needed anymore.
* <p>
- * This method will return null iff this {@link IndexDocValues} represent a
- * {@link SortedSource}.
- * <p>
* For managed {@link Source} instances see {@link #getSource()}.
*
* @see #getSource()
@@ -111,63 +81,18 @@ public abstract class IndexDocValues imp
* from the cache once this {@link IndexDocValues} instance is closed by the
* {@link IndexReader}, {@link Fields} or {@link FieldsEnum} the
* {@link IndexDocValues} was created from.
- * <p>
- * This method will return null iff this {@link IndexDocValues} represent a
- * {@link SortedSource}.
*/
public Source getSource() throws IOException {
return cache.load(this);
}
/**
- * Returns a {@link SortedSource} instance for this {@link IndexDocValues} field
- * instance like {@link #getSource()}.
- * <p>
- * This method will return null iff this {@link IndexDocValues} represent a
- * {@link Source} instead of a {@link SortedSource}.
+ * Returns a disk resident {@link Source} instance. Direct Sources are not
+ * cached in the {@link SourceCache} and should not be shared between threads.
*/
- public SortedSource getSortedSorted(Comparator<BytesRef> comparator)
- throws IOException {
- return cache.loadSorted(this, comparator);
- }
-
- /**
- * Returns a {@link SortedSource} instance using a default {@link BytesRef}
- * comparator for this {@link IndexDocValues} field instance like
- * {@link #getSource()}.
- * <p>
- * This method will return null iff this {@link IndexDocValues} represent a
- * {@link Source} instead of a {@link SortedSource}.
- */
- public SortedSource getSortedSorted() throws IOException {
- return getSortedSorted(null);
- }
+ public abstract Source getDirectSource() throws IOException;
/**
- * Loads and returns a {@link SortedSource} instance for this
- * {@link IndexDocValues} field instance like {@link #load()}.
- * <p>
- * This method will return null iff this {@link IndexDocValues} represent a
- * {@link Source} instead of a {@link SortedSource}.
- */
- public SortedSource loadSorted(Comparator<BytesRef> comparator)
- throws IOException {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Loads and returns a {@link SortedSource} instance using a default
- * {@link BytesRef} comparator for this {@link IndexDocValues} field instance
- * like {@link #load()}.
- * <p>
- * This method will return null iff this {@link IndexDocValues} represent a
- * {@link Source} instead of a {@link SortedSource}.
- */
- public SortedSource loadSorted() throws IOException {
- return loadSorted(null);
- }
-
- /**
* Returns the {@link ValueType} of this {@link IndexDocValues} instance
*/
public abstract ValueType type();
@@ -183,13 +108,10 @@ public abstract class IndexDocValues imp
/**
* Sets the {@link SourceCache} used by this {@link IndexDocValues} instance. This
- * method should be called before {@link #load()} or
- * {@link #loadSorted(Comparator)} is called. All {@link Source} or
- * {@link SortedSource} instances in the currently used cache will be closed
+ * method should be called before {@link #load()} is called. All {@link Source} instances in the currently used cache will be closed
* before the new cache is installed.
* <p>
- * Note: All instances previously obtained from {@link #load()} or
- * {@link #loadSorted(Comparator)} will be closed.
+ * Note: All instances previously obtained from {@link #load()} will be lost.
*
* @throws IllegalArgumentException
* if the given cache is <code>null</code>
@@ -198,9 +120,10 @@ public abstract class IndexDocValues imp
public void setCache(SourceCache cache) {
if (cache == null)
throw new IllegalArgumentException("cache must not be null");
- synchronized (this.cache) {
- this.cache.close(this);
+ synchronized (cacheLock) {
+ SourceCache toClose = this.cache;
this.cache = cache;
+ toClose.close(this);
}
}
@@ -208,12 +131,17 @@ public abstract class IndexDocValues imp
* Source of per document values like long, double or {@link BytesRef}
* depending on the {@link IndexDocValues} fields {@link ValueType}. Source
* implementations provide random access semantics similar to array lookups
- * and typically are entirely memory resident.
* <p>
- * {@link Source} defines 3 {@link ValueType} //TODO finish this
+ * @see IndexDocValues#getSource()
+ * @see IndexDocValues#getDirectSource()
*/
public static abstract class Source {
+
+ protected final ValueType type;
+ protected Source(ValueType type) {
+ this.type = type;
+ }
/**
* Returns a <tt>long</tt> for the given document id or throws an
* {@link UnsupportedOperationException} if this source doesn't support
@@ -242,6 +170,7 @@ public abstract class IndexDocValues imp
* Returns a {@link BytesRef} for the given document id or throws an
* {@link UnsupportedOperationException} if this source doesn't support
* <tt>byte[]</tt> values.
+ * @throws IOException
*
* @throws UnsupportedOperationException
* if this source doesn't support <tt>byte[]</tt> values.
@@ -251,35 +180,15 @@ public abstract class IndexDocValues imp
}
/**
- * Returns number of unique values. Some implementations may throw
- * UnsupportedOperationException.
- */
- public int getValueCount() {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Returns a {@link ValuesEnum} for this source.
- */
- public ValuesEnum getEnum() throws IOException {
- return getEnum(null);
- }
-
- /**
* Returns the {@link ValueType} of this source.
*
* @return the {@link ValueType} of this source.
*/
- public abstract ValueType type();
+ public ValueType type() {
+ return type;
+ }
/**
- * Returns a {@link ValuesEnum} for this source which uses the given
- * {@link AttributeSource}.
- */
- public abstract ValuesEnum getEnum(AttributeSource attrSource)
- throws IOException;
-
- /**
* Returns <code>true</code> iff this {@link Source} exposes an array via
* {@link #getArray()} otherwise <code>false</code>.
*
@@ -297,61 +206,29 @@ public abstract class IndexDocValues imp
public Object getArray() {
return null;
}
- }
-
- /**
- * {@link ValuesEnum} utility for {@link Source} implemenations.
- *
- */
- public abstract static class SourceEnum extends ValuesEnum {
- protected final Source source;
- protected final int numDocs;
- protected int pos = -1;
-
+
/**
- * Creates a new {@link SourceEnum}
- *
- * @param attrs
- * the {@link AttributeSource} for this enum
- * @param type
- * the enums {@link ValueType}
- * @param source
- * the source this enum operates on
- * @param numDocs
- * the number of documents within the source
+ * If this {@link Source} is sorted this method will return an instance of
+ * {@link SortedSource} otherwise <code>null</code>
*/
- protected SourceEnum(AttributeSource attrs, ValueType type, Source source,
- int numDocs) {
- super(attrs, type);
- this.source = source;
- this.numDocs = numDocs;
- }
-
- @Override
- public void close() throws IOException {
- }
-
- @Override
- public int docID() {
- return pos;
- }
-
- @Override
- public int nextDoc() throws IOException {
- if (pos == NO_MORE_DOCS)
- return NO_MORE_DOCS;
- return advance(pos + 1);
+ public SortedSource asSortedSource() {
+ return null;
}
}
-
+
/**
* A sorted variant of {@link Source} for <tt>byte[]</tt> values per document.
* <p>
- * Note: {@link ValuesEnum} obtained from a {@link SortedSource} will
- * enumerate values in document order and not in sorted order.
*/
public static abstract class SortedSource extends Source {
+ private final Comparator<BytesRef> comparator;
+
+ protected SortedSource(ValueType type, Comparator<BytesRef> comparator) {
+ super(type);
+ this.comparator = comparator;
+ }
+
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
final int ord = ord(docID);
@@ -364,8 +241,7 @@ public abstract class IndexDocValues imp
}
/**
- * Returns ord for specified docID. If this docID had not been added to the
- * Writer, the ord is 0. Ord is dense, ie, starts at 0, then increments by 1
+ * Returns ord for specified docID. Ord is dense, ie, starts at 0, then increments by 1
* for the next (as defined by {@link Comparator} value.
*/
public abstract int ord(int docID);
@@ -373,28 +249,13 @@ public abstract class IndexDocValues imp
/** Returns value for specified ord. */
public abstract BytesRef getByOrd(int ord, BytesRef bytesRef);
-
- /**
- * Finds the ordinal whose value is greater or equal to the given value.
- *
- * @return the given values ordinal if found or otherwise
- * <code>(-(ord)-1)</code>, defined as the ordinal of the first
- * element that is greater than the given value. This guarantees
- * that the return value will always be >= 0 if the given value
- * is found.
- *
- */
- public final int getByValue(BytesRef value) {
- return getByValue(value, new BytesRef());
- }
-
/**
* Performs a lookup by value.
*
* @param value
* the value to look up
- * @param tmpRef
- * a temporary {@link BytesRef} instance used to compare internal
+ * @param spare
+ * a spare {@link BytesRef} instance used to compare internal
* values to the given value. Must not be <code>null</code>
* @return the given values ordinal if found or otherwise
* <code>(-(ord)-1)</code>, defined as the ordinal of the first
@@ -402,6 +263,37 @@ public abstract class IndexDocValues imp
* that the return value will always be >= 0 if the given value
* is found.
*/
- public abstract int getByValue(BytesRef value, BytesRef tmpRef);
+ public int getByValue(BytesRef value, BytesRef spare) {
+ return binarySearch(value, spare, 0, getValueCount() - 1);
+ }
+
+ protected int binarySearch(BytesRef b, BytesRef bytesRef, int low,
+ int high) {
+ int mid = 0;
+ while (low <= high) {
+ mid = (low + high) >>> 1;
+ getByOrd(mid, bytesRef);
+ final int cmp = comparator.compare(bytesRef, b);
+ if (cmp < 0) {
+ low = mid + 1;
+ } else if (cmp > 0) {
+ high = mid - 1;
+ } else {
+ return mid;
+ }
+ }
+ assert comparator.compare(bytesRef, b) != 0;
+ return -(low + 1);
+ }
+
+ @Override
+ public SortedSource asSortedSource() {
+ return this;
+ }
+
+ /**
+ * Returns the number of unique values in this sorted source
+ */
+ public abstract int getValueCount();
}
}