You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/20 12:19:24 UTC
svn commit: r1448085 [2/3] - in /lucene/dev/branches/branch_4x: ./
dev-tools/ lucene/ lucene/codecs/
lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/
lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/ lucene/core/
lucene/core/src/java/o...
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Wed Feb 20 11:19:22 2013
@@ -44,6 +44,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CommandLineUtil;
import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.StringHelper;
/**
@@ -1291,7 +1292,8 @@ public class CheckIndex {
} else {
if (reader.getBinaryDocValues(fieldInfo.name) != null ||
reader.getNumericDocValues(fieldInfo.name) != null ||
- reader.getSortedDocValues(fieldInfo.name) != null) {
+ reader.getSortedDocValues(fieldInfo.name) != null ||
+ reader.getSortedSetDocValues(fieldInfo.name) != null) {
throw new RuntimeException("field: " + fieldInfo.name + " has docvalues but should omit them!");
}
}
@@ -1349,6 +1351,47 @@ public class CheckIndex {
}
}
+ private static void checkSortedSetDocValues(String fieldName, AtomicReader reader, SortedSetDocValues dv) {
+ final long maxOrd = dv.getValueCount()-1;
+ OpenBitSet seenOrds = new OpenBitSet(dv.getValueCount());
+ long maxOrd2 = -1;
+ for (int i = 0; i < reader.maxDoc(); i++) {
+ dv.setDocument(i);
+ long lastOrd = -1;
+ long ord;
+ while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ if (ord <= lastOrd) {
+ throw new RuntimeException("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i);
+ }
+ if (ord < 0 || ord > maxOrd) {
+ throw new RuntimeException("ord out of bounds: " + ord);
+ }
+ lastOrd = ord;
+ maxOrd2 = Math.max(maxOrd2, ord);
+ seenOrds.set(ord);
+ }
+ }
+ if (maxOrd != maxOrd2) {
+ throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
+ }
+ if (seenOrds.cardinality() != dv.getValueCount()) {
+ throw new RuntimeException("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.getValueCount() + " but only used: " + seenOrds.cardinality());
+ }
+
+ BytesRef lastValue = null;
+ BytesRef scratch = new BytesRef();
+ for (long i = 0; i <= maxOrd; i++) {
+ dv.lookupOrd(i, scratch);
+ assert scratch.isValid();
+ if (lastValue != null) {
+ if (scratch.compareTo(lastValue) <= 0) {
+ throw new RuntimeException("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + scratch);
+ }
+ }
+ lastValue = BytesRef.deepCopyOf(scratch);
+ }
+ }
+
private static void checkNumericDocValues(String fieldName, AtomicReader reader, NumericDocValues ndv) {
for (int i = 0; i < reader.maxDoc(); i++) {
ndv.get(i);
@@ -1359,12 +1402,35 @@ public class CheckIndex {
switch(fi.getDocValuesType()) {
case SORTED:
checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name));
+ if (reader.getBinaryDocValues(fi.name) != null ||
+ reader.getNumericDocValues(fi.name) != null ||
+ reader.getSortedSetDocValues(fi.name) != null) {
+ throw new RuntimeException(fi.name + " returns multiple docvalues types!");
+ }
+ break;
+ case SORTED_SET:
+ checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name));
+ if (reader.getBinaryDocValues(fi.name) != null ||
+ reader.getNumericDocValues(fi.name) != null ||
+ reader.getSortedDocValues(fi.name) != null) {
+ throw new RuntimeException(fi.name + " returns multiple docvalues types!");
+ }
break;
case BINARY:
checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name));
+ if (reader.getNumericDocValues(fi.name) != null ||
+ reader.getSortedDocValues(fi.name) != null ||
+ reader.getSortedSetDocValues(fi.name) != null) {
+ throw new RuntimeException(fi.name + " returns multiple docvalues types!");
+ }
break;
case NUMERIC:
checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name));
+ if (reader.getBinaryDocValues(fi.name) != null ||
+ reader.getSortedDocValues(fi.name) != null ||
+ reader.getSortedSetDocValues(fi.name) != null) {
+ throw new RuntimeException(fi.name + " returns multiple docvalues types!");
+ }
break;
default:
throw new AssertionError();
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java Wed Feb 20 11:19:22 2013
@@ -265,6 +265,10 @@ public class DocTermOrds {
/** Call this only once (if you subclass!) */
protected void uninvert(final AtomicReader reader, final BytesRef termPrefix) throws IOException {
+ final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
+ if (info != null && info.hasDocValues()) {
+ throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
+ }
//System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
final long startTime = System.currentTimeMillis();
prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);
@@ -596,93 +600,6 @@ public class DocTermOrds {
return pos;
}
- /** Iterates over the ords for a single document. */
- public class TermOrdsIterator {
- private int tnum;
- private int upto;
- private byte[] arr;
-
- TermOrdsIterator() {
- }
-
- /** Buffer must be at least 5 ints long. Returns number
- * of term ords placed into buffer; if this count is
- * less than buffer.length then that is the end. */
- public int read(int[] buffer) {
- int bufferUpto = 0;
- if (arr == null) {
- // code is inlined into upto
- //System.out.println("inlined");
- int code = upto;
- int delta = 0;
- for (;;) {
- delta = (delta << 7) | (code & 0x7f);
- if ((code & 0x80)==0) {
- if (delta==0) break;
- tnum += delta - TNUM_OFFSET;
- buffer[bufferUpto++] = ordBase+tnum;
- //System.out.println(" tnum=" + tnum);
- delta = 0;
- }
- code >>>= 8;
- }
- } else {
- // code is a pointer
- for(;;) {
- int delta = 0;
- for(;;) {
- byte b = arr[upto++];
- delta = (delta << 7) | (b & 0x7f);
- //System.out.println(" cycle: upto=" + upto + " delta=" + delta + " b=" + b);
- if ((b & 0x80) == 0) break;
- }
- //System.out.println(" delta=" + delta);
- if (delta == 0) break;
- tnum += delta - TNUM_OFFSET;
- //System.out.println(" tnum=" + tnum);
- buffer[bufferUpto++] = ordBase+tnum;
- if (bufferUpto == buffer.length) {
- break;
- }
- }
- }
-
- return bufferUpto;
- }
-
- /** Reset the iterator on a new document. */
- public TermOrdsIterator reset(int docID) {
- //System.out.println(" reset docID=" + docID);
- tnum = 0;
- final int code = index[docID];
- if ((code & 0xff)==1) {
- // a pointer
- upto = code>>>8;
- //System.out.println(" pointer! upto=" + upto);
- int whichArray = (docID >>> 16) & 0xff;
- arr = tnums[whichArray];
- } else {
- //System.out.println(" inline!");
- arr = null;
- upto = code;
- }
- return this;
- }
- }
-
- /** Returns an iterator to step through the term ords for
- * this document. It's also possible to subclass this
- * class and directly access members. */
- public TermOrdsIterator lookup(int doc, TermOrdsIterator reuse) {
- final TermOrdsIterator ret;
- if (reuse != null) {
- ret = reuse;
- } else {
- ret = new TermOrdsIterator();
- }
- return ret.reset(doc);
- }
-
/* Only used if original IndexReader doesn't implement
* ord; in this case we "wrap" our own terms index
* around it. */
@@ -847,4 +764,124 @@ public class DocTermOrds {
termsEnum.seekExact(ord);
return termsEnum.term();
}
+
+ /** Returns a SortedSetDocValues view of this instance */
+ public SortedSetDocValues iterator(TermsEnum termsEnum) throws IOException {
+ if (isEmpty()) {
+ return SortedSetDocValues.EMPTY;
+ } else {
+ return new Iterator(termsEnum);
+ }
+ }
+
+ private class Iterator extends SortedSetDocValues {
+ final TermsEnum te;
+ // currently we read 5 at a time (using the logic of the old iterator)
+ final int buffer[] = new int[5];
+ int bufferUpto;
+ int bufferLength;
+
+ private int tnum;
+ private int upto;
+ private byte[] arr;
+
+ Iterator(TermsEnum te) {
+ this.te = te;
+ }
+
+ @Override
+ public long nextOrd() {
+ while (bufferUpto == bufferLength) {
+ if (bufferLength < buffer.length) {
+ return NO_MORE_ORDS;
+ } else {
+ bufferLength = read(buffer);
+ bufferUpto = 0;
+ }
+ }
+ return buffer[bufferUpto++];
+ }
+
+ /** Buffer must be at least 5 ints long. Returns number
+ * of term ords placed into buffer; if this count is
+ * less than buffer.length then that is the end. */
+ int read(int[] buffer) {
+ int bufferUpto = 0;
+ if (arr == null) {
+ // code is inlined into upto
+ //System.out.println("inlined");
+ int code = upto;
+ int delta = 0;
+ for (;;) {
+ delta = (delta << 7) | (code & 0x7f);
+ if ((code & 0x80)==0) {
+ if (delta==0) break;
+ tnum += delta - TNUM_OFFSET;
+ buffer[bufferUpto++] = ordBase+tnum;
+ //System.out.println(" tnum=" + tnum);
+ delta = 0;
+ }
+ code >>>= 8;
+ }
+ } else {
+ // code is a pointer
+ for(;;) {
+ int delta = 0;
+ for(;;) {
+ byte b = arr[upto++];
+ delta = (delta << 7) | (b & 0x7f);
+ //System.out.println(" cycle: upto=" + upto + " delta=" + delta + " b=" + b);
+ if ((b & 0x80) == 0) break;
+ }
+ //System.out.println(" delta=" + delta);
+ if (delta == 0) break;
+ tnum += delta - TNUM_OFFSET;
+ //System.out.println(" tnum=" + tnum);
+ buffer[bufferUpto++] = ordBase+tnum;
+ if (bufferUpto == buffer.length) {
+ break;
+ }
+ }
+ }
+
+ return bufferUpto;
+ }
+
+ @Override
+ public void setDocument(int docID) {
+ tnum = 0;
+ final int code = index[docID];
+ if ((code & 0xff)==1) {
+ // a pointer
+ upto = code>>>8;
+ //System.out.println(" pointer! upto=" + upto);
+ int whichArray = (docID >>> 16) & 0xff;
+ arr = tnums[whichArray];
+ } else {
+ //System.out.println(" inline!");
+ arr = null;
+ upto = code;
+ }
+ bufferUpto = 0;
+ bufferLength = read(buffer);
+ }
+
+ @Override
+ public void lookupOrd(long ord, BytesRef result) {
+ BytesRef ref = null;
+ try {
+ ref = DocTermOrds.this.lookupTerm(te, (int) ord);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ result.bytes = ref.bytes;
+ result.offset = ref.offset;
+ result.length = ref.length;
+ }
+
+ @Override
+ public long getValueCount() {
+ return numTerms();
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/DocValuesProcessor.java Wed Feb 20 11:19:22 2013
@@ -57,6 +57,8 @@ final class DocValuesProcessor extends S
addBinaryField(fieldInfo, docID, field.binaryValue());
} else if (dvType == DocValuesType.SORTED) {
addSortedField(fieldInfo, docID, field.binaryValue());
+ } else if (dvType == DocValuesType.SORTED_SET) {
+ addSortedSetField(fieldInfo, docID, field.binaryValue());
} else if (dvType == DocValuesType.NUMERIC) {
if (!(field.numericValue() instanceof Long)) {
throw new IllegalArgumentException("illegal type " + field.numericValue().getClass() + ": DocValues types must be Long");
@@ -122,6 +124,20 @@ final class DocValuesProcessor extends S
}
sortedWriter.addValue(docID, value);
}
+
+ void addSortedSetField(FieldInfo fieldInfo, int docID, BytesRef value) {
+ DocValuesWriter writer = writers.get(fieldInfo.name);
+ SortedSetDocValuesWriter sortedSetWriter;
+ if (writer == null) {
+ sortedSetWriter = new SortedSetDocValuesWriter(fieldInfo, bytesUsed);
+ writers.put(fieldInfo.name, sortedSetWriter);
+ } else if (!(writer instanceof SortedSetDocValuesWriter)) {
+ throw new IllegalArgumentException("Incompatible DocValues type: field \"" + fieldInfo.name + "\" changed from " + getTypeDesc(writer) + " to sorted");
+ } else {
+ sortedSetWriter = (SortedSetDocValuesWriter) writer;
+ }
+ sortedSetWriter.addValue(docID, value);
+ }
void addNumericField(FieldInfo fieldInfo, int docID, long value) {
DocValuesWriter writer = writers.get(fieldInfo.name);
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java Wed Feb 20 11:19:22 2013
@@ -101,7 +101,14 @@ public final class FieldInfo {
* byte[]. The stored byte[] is presorted and allows access via document id,
* ordinal and by-value.
*/
- SORTED
+ SORTED,
+ /**
+ * A pre-sorted Set<byte[]>. Fields with this type only store distinct byte values
+ * and store additional offset pointers per document to dereference the shared
+ * byte[]s. The stored byte[] is presorted and allows access via document id,
+ * ordinal and by-value.
+ */
+ SORTED_SET
};
/**
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Wed Feb 20 11:19:22 2013
@@ -429,6 +429,12 @@ public class FilterAtomicReader extends
}
@Override
+ public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
+ ensureOpen();
+ return in.getSortedSetDocValues(field);
+ }
+
+ @Override
public NumericDocValues getNormValues(String field) throws IOException {
ensureOpen();
return in.getNormValues(field);
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java Wed Feb 20 11:19:22 2013
@@ -24,6 +24,7 @@ import org.apache.lucene.index.MultiTerm
import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.packed.AppendingLongBuffer;
+import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
/**
* A wrapper for CompositeIndexReader providing access to DocValues.
@@ -214,61 +215,151 @@ public class MultiDocValues {
if (!anyReal) {
return null;
} else {
- OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), values);
+ TermsEnum enums[] = new TermsEnum[values.length];
+ for (int i = 0; i < values.length; i++) {
+ enums[i] = new SortedDocValuesTermsEnum(values[i]);
+ }
+ OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
return new MultiSortedDocValues(values, starts, mapping);
}
}
+ /** Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things).
+ * <p>
+ * This is an extremely slow way to access sorted values. Instead, access them per-segment
+ * with {@link AtomicReader#getSortedSetDocValues(String)}
+ * </p>
+ */
+ public static SortedSetDocValues getSortedSetValues(final IndexReader r, final String field) throws IOException {
+ final List<AtomicReaderContext> leaves = r.leaves();
+ final int size = leaves.size();
+
+ if (size == 0) {
+ return null;
+ } else if (size == 1) {
+ return leaves.get(0).reader().getSortedSetDocValues(field);
+ }
+
+ boolean anyReal = false;
+ final SortedSetDocValues[] values = new SortedSetDocValues[size];
+ final int[] starts = new int[size+1];
+ for (int i = 0; i < size; i++) {
+ AtomicReaderContext context = leaves.get(i);
+ SortedSetDocValues v = context.reader().getSortedSetDocValues(field);
+ if (v == null) {
+ v = SortedSetDocValues.EMPTY;
+ } else {
+ anyReal = true;
+ }
+ values[i] = v;
+ starts[i] = context.docBase;
+ }
+ starts[size] = r.maxDoc();
+
+ if (!anyReal) {
+ return null;
+ } else {
+ TermsEnum enums[] = new TermsEnum[values.length];
+ for (int i = 0; i < values.length; i++) {
+ enums[i] = new SortedSetDocValuesTermsEnum(values[i]);
+ }
+ OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
+ return new MultiSortedSetDocValues(values, starts, mapping);
+ }
+ }
+
/** maps per-segment ordinals to/from global ordinal space */
- // TODO: use more efficient packed ints structures (these are all positive values!)
- static class OrdinalMap {
+ // TODO: use more efficient packed ints structures?
+ // TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums)
+ public static class OrdinalMap {
// cache key of whoever asked for this aweful thing
final Object owner;
// globalOrd -> (globalOrd - segmentOrd)
- final AppendingLongBuffer globalOrdDeltas;
+ final MonotonicAppendingLongBuffer globalOrdDeltas;
// globalOrd -> sub index
final AppendingLongBuffer subIndexes;
// segmentOrd -> (globalOrd - segmentOrd)
- final AppendingLongBuffer ordDeltas[];
+ final MonotonicAppendingLongBuffer ordDeltas[];
- OrdinalMap(Object owner, SortedDocValues subs[]) throws IOException {
+ /**
+ * Creates an ordinal map that allows mapping ords to/from a merged
+ * space from <code>subs</code>.
+ * @param owner a cache key
+ * @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
+ * not be dense (e.g. can be FilteredTermsEnums}.
+ * @throws IOException if an I/O error occurred.
+ */
+ public OrdinalMap(Object owner, TermsEnum subs[]) throws IOException {
// create the ordinal mappings by pulling a termsenum over each sub's
// unique terms, and walking a multitermsenum over those
this.owner = owner;
- globalOrdDeltas = new AppendingLongBuffer();
+ globalOrdDeltas = new MonotonicAppendingLongBuffer();
subIndexes = new AppendingLongBuffer();
- ordDeltas = new AppendingLongBuffer[subs.length];
+ ordDeltas = new MonotonicAppendingLongBuffer[subs.length];
for (int i = 0; i < ordDeltas.length; i++) {
- ordDeltas[i] = new AppendingLongBuffer();
+ ordDeltas[i] = new MonotonicAppendingLongBuffer();
}
- int segmentOrds[] = new int[subs.length];
+ long segmentOrds[] = new long[subs.length];
ReaderSlice slices[] = new ReaderSlice[subs.length];
TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
for (int i = 0; i < slices.length; i++) {
slices[i] = new ReaderSlice(0, 0, i);
- indexes[i] = new TermsEnumIndex(new SortedDocValuesTermsEnum(subs[i]), i);
+ indexes[i] = new TermsEnumIndex(subs[i], i);
}
MultiTermsEnum mte = new MultiTermsEnum(slices);
mte.reset(indexes);
- int globalOrd = 0;
+ long globalOrd = 0;
while (mte.next() != null) {
TermsEnumWithSlice matches[] = mte.getMatchArray();
for (int i = 0; i < mte.getMatchCount(); i++) {
int subIndex = matches[i].index;
- int delta = globalOrd - segmentOrds[subIndex];
- assert delta >= 0;
+ long segmentOrd = matches[i].terms.ord();
+ long delta = globalOrd - segmentOrd;
// for each unique term, just mark the first subindex/delta where it occurs
if (i == 0) {
subIndexes.add(subIndex);
globalOrdDeltas.add(delta);
}
// for each per-segment ord, map it back to the global term.
- ordDeltas[subIndex].add(delta);
- segmentOrds[subIndex]++;
+ while (segmentOrds[subIndex] <= segmentOrd) {
+ ordDeltas[subIndex].add(delta);
+ segmentOrds[subIndex]++;
+ }
}
globalOrd++;
}
}
+
+ /**
+ * Given a segment number and segment ordinal, returns
+ * the corresponding global ordinal.
+ */
+ public long getGlobalOrd(int subIndex, long segmentOrd) {
+ return segmentOrd + ordDeltas[subIndex].get(segmentOrd);
+ }
+
+ /**
+ * Given a segment number and global ordinal, returns
+ * the corresponding segment ordinal.
+ */
+ public long getSegmentOrd(int subIndex, long globalOrd) {
+ return globalOrd - globalOrdDeltas.get(globalOrd);
+ }
+
+ /**
+ * Given a global ordinal, returns the index of the first
+ * sub that contains this term.
+ */
+ public int getSegmentNumber(long globalOrd) {
+ return (int) subIndexes.get(globalOrd);
+ }
+
+ /**
+ * Returns the total number of unique terms in global ord space.
+ */
+ public long getValueCount() {
+ return globalOrdDeltas.size();
+ }
}
/** implements SortedDocValues over n subs, using an OrdinalMap */
@@ -289,19 +380,63 @@ public class MultiDocValues {
public int getOrd(int docID) {
int subIndex = ReaderUtil.subIndex(docID, docStarts);
int segmentOrd = values[subIndex].getOrd(docID - docStarts[subIndex]);
- return (int) (segmentOrd + mapping.ordDeltas[subIndex].get(segmentOrd));
+ return (int) mapping.getGlobalOrd(subIndex, segmentOrd);
}
@Override
public void lookupOrd(int ord, BytesRef result) {
- int subIndex = (int) mapping.subIndexes.get(ord);
- int segmentOrd = (int) (ord - mapping.globalOrdDeltas.get(ord));
+ int subIndex = mapping.getSegmentNumber(ord);
+ int segmentOrd = (int) mapping.getSegmentOrd(subIndex, ord);
values[subIndex].lookupOrd(segmentOrd, result);
}
@Override
public int getValueCount() {
- return mapping.globalOrdDeltas.size();
+ return (int) mapping.getValueCount();
+ }
+ }
+
+ /** implements MultiSortedDocValues over n subs, using an OrdinalMap */
+ static class MultiSortedSetDocValues extends SortedSetDocValues {
+ final int docStarts[];
+ final SortedSetDocValues values[];
+ final OrdinalMap mapping;
+ int currentSubIndex;
+
+ MultiSortedSetDocValues(SortedSetDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
+ assert values.length == mapping.ordDeltas.length;
+ assert docStarts.length == values.length + 1;
+ this.values = values;
+ this.docStarts = docStarts;
+ this.mapping = mapping;
+ }
+
+ @Override
+ public long nextOrd() {
+ long segmentOrd = values[currentSubIndex].nextOrd();
+ if (segmentOrd == NO_MORE_ORDS) {
+ return segmentOrd;
+ } else {
+ return mapping.getGlobalOrd(currentSubIndex, segmentOrd);
+ }
+ }
+
+ @Override
+ public void setDocument(int docID) {
+ currentSubIndex = ReaderUtil.subIndex(docID, docStarts);
+ values[currentSubIndex].setDocument(docID - docStarts[currentSubIndex]);
+ }
+
+ @Override
+ public void lookupOrd(long ord, BytesRef result) {
+ int subIndex = mapping.getSegmentNumber(ord);
+ long segmentOrd = mapping.getSegmentOrd(subIndex, ord);
+ values[subIndex].lookupOrd(segmentOrd, result);
+ }
+
+ @Override
+ public long getValueCount() {
+ return mapping.getValueCount();
}
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java Wed Feb 20 11:19:22 2013
@@ -497,7 +497,7 @@ public final class MultiTermsEnum extend
final static class TermsEnumWithSlice {
private final ReaderSlice subSlice;
- private TermsEnum terms;
+ TermsEnum terms;
public BytesRef current;
final int index;
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java Wed Feb 20 11:19:22 2013
@@ -50,7 +50,7 @@ class NumericDocValuesWriter extends Doc
}
// Fill in any holes:
- for (int i = pending.size(); i < docID; ++i) {
+ for (int i = (int)pending.size(); i < docID; ++i) {
pending.add(MISSING);
}
@@ -90,7 +90,7 @@ class NumericDocValuesWriter extends Doc
// iterates over the values we have in ram
private class NumericIterator implements Iterator<Number> {
final AppendingLongBuffer.Iterator iter = pending.iterator();
- final int size = pending.size();
+ final int size = (int)pending.size();
final int maxDoc;
int upto;
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java Wed Feb 20 11:19:22 2013
@@ -285,6 +285,13 @@ public final class ParallelAtomicReader
}
@Override
+ public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
+ ensureOpen();
+ AtomicReader reader = fieldToReader.get(field);
+ return reader == null ? null : reader.getSortedSetDocValues(field);
+ }
+
+ @Override
public NumericDocValues getNormValues(String field) throws IOException {
ensureOpen();
AtomicReader reader = fieldToReader.get(field);
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java Wed Feb 20 11:19:22 2013
@@ -253,6 +253,34 @@ final class SegmentCoreReaders {
return dvs;
}
+
+ SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
+ FieldInfo fi = fieldInfos.fieldInfo(field);
+ if (fi == null) {
+ // Field does not exist
+ return null;
+ }
+ if (fi.getDocValuesType() == null) {
+ // Field was not indexed with doc values
+ return null;
+ }
+ if (fi.getDocValuesType() != DocValuesType.SORTED_SET) {
+ // DocValues were not sorted
+ return null;
+ }
+
+ assert dvProducer != null;
+
+ Map<String,Object> dvFields = docValuesLocal.get();
+
+ SortedSetDocValues dvs = (SortedSetDocValues) dvFields.get(field);
+ if (dvs == null) {
+ dvs = dvProducer.getSortedSet(fi);
+ dvFields.put(field, dvs);
+ }
+
+ return dvs;
+ }
NumericDocValues getNormValues(String field) throws IOException {
FieldInfo fi = fieldInfos.fieldInfo(field);
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java Wed Feb 20 11:19:22 2013
@@ -197,6 +197,16 @@ final class SegmentMerger {
toMerge.add(values);
}
consumer.mergeSortedField(field, mergeState, toMerge);
+ } else if (type == DocValuesType.SORTED_SET) {
+ List<SortedSetDocValues> toMerge = new ArrayList<SortedSetDocValues>();
+ for (AtomicReader reader : mergeState.readers) {
+ SortedSetDocValues values = reader.getSortedSetDocValues(field.name);
+ if (values == null) {
+ values = SortedSetDocValues.EMPTY;
+ }
+ toMerge.add(values);
+ }
+ consumer.mergeSortedSetField(field, mergeState, toMerge);
} else {
throw new AssertionError("type=" + type);
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java Wed Feb 20 11:19:22 2013
@@ -248,6 +248,12 @@ public final class SegmentReader extends
}
@Override
+ public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
+ ensureOpen();
+ return core.getSortedSetDocValues(field);
+ }
+
+ @Override
public NumericDocValues getNormValues(String field) throws IOException {
ensureOpen();
return core.getNormValues(field);
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java Wed Feb 20 11:19:22 2013
@@ -24,7 +24,9 @@ import java.util.Map;
import org.apache.lucene.util.Bits;
import org.apache.lucene.index.DirectoryReader; // javadoc
+import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
+import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiReader; // javadoc
@@ -113,8 +115,10 @@ public final class SlowCompositeReaderWr
return dv;
}
}
- // cached multi dv
- assert map != null;
+ // cached ordinal map
+ if (getFieldInfos().fieldInfo(field).getDocValuesType() != DocValuesType.SORTED) {
+ return null;
+ }
int size = in.leaves().size();
final SortedDocValues[] values = new SortedDocValues[size];
final int[] starts = new int[size+1];
@@ -131,6 +135,45 @@ public final class SlowCompositeReaderWr
return new MultiSortedDocValues(values, starts, map);
}
+ @Override
+ public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
+ ensureOpen();
+ OrdinalMap map = null;
+ synchronized (cachedOrdMaps) {
+ map = cachedOrdMaps.get(field);
+ if (map == null) {
+ // uncached, or not a multi dv
+ SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
+ if (dv instanceof MultiSortedSetDocValues) {
+ map = ((MultiSortedSetDocValues)dv).mapping;
+ if (map.owner == getCoreCacheKey()) {
+ cachedOrdMaps.put(field, map);
+ }
+ }
+ return dv;
+ }
+ }
+ // cached ordinal map
+ if (getFieldInfos().fieldInfo(field).getDocValuesType() != DocValuesType.SORTED_SET) {
+ return null;
+ }
+ assert map != null;
+ int size = in.leaves().size();
+ final SortedSetDocValues[] values = new SortedSetDocValues[size];
+ final int[] starts = new int[size+1];
+ for (int i = 0; i < size; i++) {
+ AtomicReaderContext context = in.leaves().get(i);
+ SortedSetDocValues v = context.reader().getSortedSetDocValues(field);
+ if (v == null) {
+ v = SortedSetDocValues.EMPTY;
+ }
+ values[i] = v;
+ starts[i] = context.docBase;
+ }
+ starts[size] = maxDoc();
+ return new MultiSortedSetDocValues(values, starts, map);
+ }
+
// TODO: this could really be a weak map somewhere else on the coreCacheKey,
// but do we really need to optimize slow-wrapper any more?
private final Map<String,OrdinalMap> cachedOrdMaps = new HashMap<String,OrdinalMap>();
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCache.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCache.java Wed Feb 20 11:19:22 2013
@@ -29,6 +29,7 @@ import org.apache.lucene.index.AtomicRea
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
@@ -670,7 +671,7 @@ public interface FieldCache {
* @return a {@link DocTermOrds} instance
* @throws IOException If any error occurs.
*/
- public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException;
+ public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException;
/**
* EXPERT: A unique Identifier/Description for each item in the FieldCache.
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Wed Feb 20 11:19:22 2013
@@ -33,7 +33,9 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReader;
+import org.apache.lucene.index.SingletonSortedSetDocValues;
import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.ArrayUtil;
@@ -1363,8 +1365,30 @@ class FieldCacheImpl implements FieldCac
}
}
- public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException {
- return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false);
+ // TODO: this if DocTermsIndex was already created, we
+ // should share it...
+ public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException {
+ SortedSetDocValues dv = reader.getSortedSetDocValues(field);
+ if (dv != null) {
+ return dv;
+ }
+
+ SortedDocValues sdv = reader.getSortedDocValues(field);
+ if (sdv != null) {
+ return new SingletonSortedSetDocValues(sdv);
+ }
+
+ final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
+ if (info == null) {
+ return SortedSetDocValues.EMPTY;
+ } else if (info.hasDocValues()) {
+ throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
+ } else if (!info.isIndexed()) {
+ return SortedSetDocValues.EMPTY;
+ }
+
+ DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false);
+ return dto.iterator(dto.getOrdTermsEnum(reader));
}
static final class DocTermOrdsCache extends Cache {
@@ -1375,7 +1399,6 @@ class FieldCacheImpl implements FieldCac
@Override
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */)
throws IOException {
- // No DocValues impl yet (DocValues are single valued...):
return new DocTermOrds(reader, key.field);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java Wed Feb 20 11:19:22 2013
@@ -19,72 +19,33 @@ package org.apache.lucene.util.packed;
import java.util.Arrays;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.RamUsageEstimator;
-
/**
* Utility class to buffer a list of signed longs in memory. This class only
- * supports appending.
+ * supports appending and is optimized for the case where values are close to
+ * each other.
* @lucene.internal
*/
-public class AppendingLongBuffer {
-
- private static final int BLOCK_BITS = 10;
- private static final int MAX_PENDING_COUNT = 1 << BLOCK_BITS;
- private static final int BLOCK_MASK = MAX_PENDING_COUNT - 1;
-
- private long[] minValues;
- private PackedInts.Reader[] values;
- private long valuesBytes;
- private int valuesOff;
- private long[] pending;
- private int pendingOff;
+public final class AppendingLongBuffer extends AbstractAppendingLongBuffer {
/** Sole constructor. */
public AppendingLongBuffer() {
- minValues = new long[16];
- values = new PackedInts.Reader[16];
- pending = new long[MAX_PENDING_COUNT];
- valuesOff = 0;
- pendingOff = 0;
+ super(16);
}
- /** Append a value to this buffer. */
- public void add(long l) {
- if (pendingOff == MAX_PENDING_COUNT) {
- packPendingValues();
- }
- pending[pendingOff++] = l;
- }
-
- /** Get a value from this buffer.
- * <p>
- * <b>NOTE</b>: This class is not really designed for random access!
- * You will likely get better performance by using packed ints in another way! */
- public long get(int index) {
- assert index < size(); // TODO: do a better check, and throw IndexOutOfBoundsException?
- // This class is currently only used by the indexer.
- int block = index >> BLOCK_BITS;
- int element = index & BLOCK_MASK;
+ @Override
+ long get(int block, int element) {
if (block == valuesOff) {
return pending[element];
- } else if (values[block] == null) {
+ } else if (deltas[block] == null) {
return minValues[block];
} else {
- return minValues[block] + values[block].get(element);
+ return minValues[block] + deltas[block].get(element);
}
}
- private void packPendingValues() {
+ void packPendingValues() {
assert pendingOff == MAX_PENDING_COUNT;
- // check size
- if (values.length == valuesOff) {
- final int newLength = ArrayUtil.oversize(valuesOff + 1, 8);
- minValues = Arrays.copyOf(minValues, newLength);
- values = Arrays.copyOf(values, newLength);
- }
-
// compute max delta
long minValue = pending[0];
long maxValue = pending[0];
@@ -105,18 +66,8 @@ public class AppendingLongBuffer {
for (int i = 0; i < pendingOff; ) {
i += mutable.set(i, pending, i, pendingOff - i);
}
- values[valuesOff] = mutable;
- valuesBytes += mutable.ramBytesUsed();
+ deltas[valuesOff] = mutable;
}
- ++valuesOff;
-
- // reset pending buffer
- pendingOff = 0;
- }
-
- /** Get the number of values that have been added to the buffer. */
- public int size() {
- return valuesOff * MAX_PENDING_COUNT + pendingOff;
}
/** Return an iterator over the values of this buffer. */
@@ -125,29 +76,20 @@ public class AppendingLongBuffer {
}
/** A long iterator. */
- public class Iterator {
-
- long[] currentValues;
- int vOff, pOff;
+ public final class Iterator extends AbstractAppendingLongBuffer.Iterator {
private Iterator() {
- vOff = pOff = 0;
- if (valuesOff == 0) {
- currentValues = pending;
- } else {
- currentValues = new long[MAX_PENDING_COUNT];
- fillValues();
- }
+ super();
}
- private void fillValues() {
+ void fillValues() {
if (vOff == valuesOff) {
currentValues = pending;
- } else if (values[vOff] == null) {
+ } else if (deltas[vOff] == null) {
Arrays.fill(currentValues, minValues[vOff]);
} else {
for (int k = 0; k < MAX_PENDING_COUNT; ) {
- k += values[vOff].get(k, currentValues, k, MAX_PENDING_COUNT - k);
+ k += deltas[vOff].get(k, currentValues, k, MAX_PENDING_COUNT - k);
}
for (int k = 0; k < MAX_PENDING_COUNT; ++k) {
currentValues[k] += minValues[vOff];
@@ -155,42 +97,6 @@ public class AppendingLongBuffer {
}
}
- /** Whether or not there are remaining values. */
- public boolean hasNext() {
- return vOff < valuesOff || (vOff == valuesOff && pOff < pendingOff);
- }
-
- /** Return the next long in the buffer. */
- public long next() {
- assert hasNext();
- long result = currentValues[pOff++];
- if (pOff == MAX_PENDING_COUNT) {
- vOff += 1;
- pOff = 0;
- if (vOff <= valuesOff) {
- fillValues();
- }
- }
- return result;
- }
-
- }
-
- /**
- * Return the number of bytes used by this instance.
- */
- public long ramBytesUsed() {
- // TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
- long bytesUsed = RamUsageEstimator.alignObjectSize(
- RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
- + 3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // the 3 arrays
- + 2 * RamUsageEstimator.NUM_BYTES_INT) // the 2 offsets
- + RamUsageEstimator.NUM_BYTES_LONG // valuesBytes
- + RamUsageEstimator.sizeOf(pending)
- + RamUsageEstimator.sizeOf(minValues)
- + RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * values.length); // values
-
- return bytesUsed + valuesBytes;
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java Wed Feb 20 11:19:22 2013
@@ -29,7 +29,6 @@ import org.apache.lucene.codecs.Postings
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
-import org.apache.lucene.index.DocTermOrds.TermOrdsIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -63,25 +62,26 @@ public class TestDocTermOrds extends Luc
final IndexReader r = w.getReader();
w.close();
- final DocTermOrds dto = new DocTermOrds(SlowCompositeReaderWrapper.wrap(r), "field");
-
- TermOrdsIterator iter = dto.lookup(0, null);
- final int[] buffer = new int[5];
- assertEquals(3, iter.read(buffer));
- assertEquals(0, buffer[0]);
- assertEquals(1, buffer[1]);
- assertEquals(2, buffer[2]);
-
- iter = dto.lookup(1, iter);
- assertEquals(3, iter.read(buffer));
- assertEquals(3, buffer[0]);
- assertEquals(4, buffer[1]);
- assertEquals(5, buffer[2]);
-
- iter = dto.lookup(2, iter);
- assertEquals(2, iter.read(buffer));
- assertEquals(0, buffer[0]);
- assertEquals(5, buffer[1]);
+ final AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
+ final DocTermOrds dto = new DocTermOrds(ar, "field");
+ SortedSetDocValues iter = dto.iterator(ar.terms("field").iterator(null));
+
+ iter.setDocument(0);
+ assertEquals(0, iter.nextOrd());
+ assertEquals(1, iter.nextOrd());
+ assertEquals(2, iter.nextOrd());
+ assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
+
+ iter.setDocument(1);
+ assertEquals(3, iter.nextOrd());
+ assertEquals(4, iter.nextOrd());
+ assertEquals(5, iter.nextOrd());
+ assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
+
+ iter.setDocument(2);
+ assertEquals(0, iter.nextOrd());
+ assertEquals(5, iter.nextOrd());
+ assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
r.close();
dir.close();
@@ -352,31 +352,24 @@ public class TestDocTermOrds extends Luc
}
}
- TermOrdsIterator iter = null;
- final int[] buffer = new int[5];
+ SortedSetDocValues iter = dto.iterator(te);
for(int docID=0;docID<r.maxDoc();docID++) {
if (VERBOSE) {
System.out.println("TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")");
}
- iter = dto.lookup(docID, iter);
+ iter.setDocument(docID);
final int[] answers = idToOrds[docIDToID.get(docID)];
int upto = 0;
- while(true) {
- final int chunk = iter.read(buffer);
- for(int idx=0;idx<chunk;idx++) {
- te.seekExact((long) buffer[idx]);
- final BytesRef expected = termsArray[answers[upto++]];
- if (VERBOSE) {
- System.out.println(" exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
- }
- assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + buffer[idx], expected, te.term());
- }
-
- if (chunk < buffer.length) {
- assertEquals(answers.length, upto);
- break;
+ long ord;
+ while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ te.seekExact(ord);
+ final BytesRef expected = termsArray[answers[upto++]];
+ if (VERBOSE) {
+ System.out.println(" exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
}
+ assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term());
}
+ assertEquals(answers.length, upto);
}
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java Wed Feb 20 11:19:22 2013
@@ -28,6 +28,7 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
@@ -374,6 +375,30 @@ public class TestDocValuesIndexing exten
iwriter.close();
directory.close();
}
+
+ public void testTooLargeTermSortedSetBytes() throws IOException {
+ assumeTrue("codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Analyzer analyzer = new MockAnalyzer(random());
+
+ Directory directory = newDirectory();
+ // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwc.setMergePolicy(newLogMergePolicy());
+ IndexWriter iwriter = new IndexWriter(directory, iwc);
+ Document doc = new Document();
+ byte bytes[] = new byte[100000];
+ BytesRef b = new BytesRef(bytes);
+ random().nextBytes(bytes);
+ doc.add(new SortedSetDocValuesField("dv", b));
+ try {
+ iwriter.addDocument(doc);
+ fail("did not get expected exception");
+ } catch (IllegalArgumentException expected) {
+ // expected
+ }
+ iwriter.close();
+ directory.close();
+ }
// Two documents across segments
public void testMixedTypesDifferentSegments() throws Exception {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java Wed Feb 20 11:19:22 2013
@@ -31,6 +31,7 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
@@ -103,6 +104,10 @@ public class TestDuelingCodecs extends L
rightReader = maybeWrapReader(rightWriter.getReader());
rightWriter.close();
+ // check that our readers are valid
+ _TestUtil.checkReader(leftReader);
+ _TestUtil.checkReader(rightReader);
+
info = "left: " + leftCodec.toString() + " / right: " + rightCodec.toString();
}
@@ -136,7 +141,14 @@ public class TestDuelingCodecs extends L
// TODO: we should add other fields that use things like docs&freqs but omit positions,
// because linefiledocs doesn't cover all the possibilities.
for (int i = 0; i < numdocs; i++) {
- writer.addDocument(lineFileDocs.nextDoc());
+ Document document = lineFileDocs.nextDoc();
+ // grab the title and add some SortedSet instances for fun
+ String title = document.get("titleTokenized");
+ String split[] = title.split("\\s+");
+ for (String trash : split) {
+ document.add(new SortedSetDocValuesField("sortedset", new BytesRef(trash)));
+ }
+ writer.addDocument(document);
}
lineFileDocs.close();
@@ -688,6 +700,36 @@ public class TestDuelingCodecs extends L
assertNull(info, rightValues);
}
}
+
+ {
+ SortedSetDocValues leftValues = MultiDocValues.getSortedSetValues(leftReader, field);
+ SortedSetDocValues rightValues = MultiDocValues.getSortedSetValues(rightReader, field);
+ if (leftValues != null && rightValues != null) {
+ // numOrds
+ assertEquals(info, leftValues.getValueCount(), rightValues.getValueCount());
+ // ords
+ BytesRef scratchLeft = new BytesRef();
+ BytesRef scratchRight = new BytesRef();
+ for (int i = 0; i < leftValues.getValueCount(); i++) {
+ leftValues.lookupOrd(i, scratchLeft);
+ rightValues.lookupOrd(i, scratchRight);
+ assertEquals(info, scratchLeft, scratchRight);
+ }
+ // ord lists
+ for(int docID=0;docID<leftReader.maxDoc();docID++) {
+ leftValues.setDocument(docID);
+ rightValues.setDocument(docID);
+ long ord;
+ while ((ord = leftValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ assertEquals(info, ord, rightValues.nextOrd());
+ }
+ assertEquals(info, SortedSetDocValues.NO_MORE_ORDS, rightValues.nextOrd());
+ }
+ } else {
+ assertNull(info, leftValues);
+ assertNull(info, rightValues);
+ }
+ }
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Wed Feb 20 11:19:22 2013
@@ -40,6 +40,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
@@ -1025,6 +1026,10 @@ public class TestIndexWriter extends Luc
doc.add(new NumericDocValuesField("numericdv", 500));
doc.add(new SortedDocValuesField("sorteddv", new BytesRef("500")));
}
+ if (defaultCodecSupportsSortedSet()) {
+ doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("one")));
+ doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("two")));
+ }
w.addDocument(doc);
doc = new Document();
doc.add(newStringField(random, "id", "501", Field.Store.NO));
@@ -1034,6 +1039,10 @@ public class TestIndexWriter extends Luc
doc.add(new NumericDocValuesField("numericdv", 501));
doc.add(new SortedDocValuesField("sorteddv", new BytesRef("501")));
}
+ if (defaultCodecSupportsSortedSet()) {
+ doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("two")));
+ doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("three")));
+ }
w.addDocument(doc);
w.deleteDocuments(new Term("id", "500"));
w.close();
@@ -1061,6 +1070,7 @@ public class TestIndexWriter extends Luc
Field binaryDVField = null;
Field numericDVField = null;
Field sortedDVField = null;
+ Field sortedSetDVField = new SortedSetDocValuesField("sortedsetdv", new BytesRef());
doc.add(idField);
doc.add(newField(random, "field", "some text contents", storedTextType));
if (defaultCodecSupportsDocValues()) {
@@ -1071,6 +1081,9 @@ public class TestIndexWriter extends Luc
doc.add(numericDVField);
doc.add(sortedDVField);
}
+ if (defaultCodecSupportsSortedSet()) {
+ doc.add(sortedSetDVField);
+ }
for(int i=0;i<100;i++) {
idField.setStringValue(Integer.toString(i));
if (defaultCodecSupportsDocValues()) {
@@ -1078,6 +1091,7 @@ public class TestIndexWriter extends Luc
numericDVField.setLongValue(i);
sortedDVField.setBytesValue(new BytesRef(idField.stringValue()));
}
+ sortedSetDVField.setBytesValue(new BytesRef(idField.stringValue()));
int action = random.nextInt(100);
if (action == 17) {
w.addIndexes(adder);
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Wed Feb 20 11:19:22 2013
@@ -33,6 +33,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@@ -145,6 +146,10 @@ public class TestIndexWriterExceptions e
doc.add(new BinaryDocValuesField("binarydv", new BytesRef("hello")));
doc.add(new SortedDocValuesField("sorteddv", new BytesRef("world")));
}
+ if (defaultCodecSupportsSortedSet()) {
+ doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("hellllo")));
+ doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("again")));
+ }
doc.add(newField(r, "content7", "aaa bbb ccc ddd", DocCopyIterator.custom4));
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java Wed Feb 20 11:19:22 2013
@@ -17,11 +17,14 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import java.util.ArrayList;
+
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
@@ -191,4 +194,133 @@ public class TestMultiDocValues extends
ir2.close();
dir.close();
}
+
+ public void testSortedSet() throws Exception {
+ assumeTrue("codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory dir = newDirectory();
+
+ IndexWriterConfig iwc = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, null);
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+ int numDocs = atLeast(500);
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ int numValues = random().nextInt(5);
+ for (int j = 0; j < numValues; j++) {
+ doc.add(new SortedSetDocValuesField("bytes", new BytesRef(_TestUtil.randomUnicodeString(random()))));
+ }
+ iw.addDocument(doc);
+ if (random().nextInt(17) == 0) {
+ iw.commit();
+ }
+ }
+ DirectoryReader ir = iw.getReader();
+ iw.forceMerge(1);
+ DirectoryReader ir2 = iw.getReader();
+ AtomicReader merged = getOnlySegmentReader(ir2);
+ iw.close();
+
+ SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes");
+ SortedSetDocValues single = merged.getSortedSetDocValues("bytes");
+ if (multi == null) {
+ assertNull(single);
+ } else {
+ assertEquals(single.getValueCount(), multi.getValueCount());
+ BytesRef actual = new BytesRef();
+ BytesRef expected = new BytesRef();
+ // check values
+ for (long i = 0; i < single.getValueCount(); i++) {
+ single.lookupOrd(i, expected);
+ multi.lookupOrd(i, actual);
+ assertEquals(expected, actual);
+ }
+ // check ord list
+ for (int i = 0; i < numDocs; i++) {
+ single.setDocument(i);
+ ArrayList<Long> expectedList = new ArrayList<Long>();
+ long ord;
+ while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ expectedList.add(ord);
+ }
+
+ multi.setDocument(i);
+ int upto = 0;
+ while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ assertEquals(expectedList.get(upto).longValue(), ord);
+ upto++;
+ }
+ assertEquals(expectedList.size(), upto);
+ }
+ }
+
+ ir.close();
+ ir2.close();
+ dir.close();
+ }
+
+ // tries to make more dups than testSortedSet
+ public void testSortedSetWithDups() throws Exception {
+ assumeTrue("codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+ Directory dir = newDirectory();
+
+ IndexWriterConfig iwc = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, null);
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+ int numDocs = atLeast(500);
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ int numValues = random().nextInt(5);
+ for (int j = 0; j < numValues; j++) {
+ doc.add(new SortedSetDocValuesField("bytes", new BytesRef(_TestUtil.randomSimpleString(random(), 2))));
+ }
+ iw.addDocument(doc);
+ if (random().nextInt(17) == 0) {
+ iw.commit();
+ }
+ }
+ DirectoryReader ir = iw.getReader();
+ iw.forceMerge(1);
+ DirectoryReader ir2 = iw.getReader();
+ AtomicReader merged = getOnlySegmentReader(ir2);
+ iw.close();
+
+ SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes");
+ SortedSetDocValues single = merged.getSortedSetDocValues("bytes");
+ if (multi == null) {
+ assertNull(single);
+ } else {
+ assertEquals(single.getValueCount(), multi.getValueCount());
+ BytesRef actual = new BytesRef();
+ BytesRef expected = new BytesRef();
+ // check values
+ for (long i = 0; i < single.getValueCount(); i++) {
+ single.lookupOrd(i, expected);
+ multi.lookupOrd(i, actual);
+ assertEquals(expected, actual);
+ }
+ // check ord list
+ for (int i = 0; i < numDocs; i++) {
+ single.setDocument(i);
+ ArrayList<Long> expectedList = new ArrayList<Long>();
+ long ord;
+ while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ expectedList.add(ord);
+ }
+
+ multi.setDocument(i);
+ int upto = 0;
+ while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ assertEquals(expectedList.get(upto).longValue(), ord);
+ upto++;
+ }
+ assertEquals(expectedList.size(), upto);
+ }
+ }
+
+ ir.close();
+ ir2.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java Wed Feb 20 11:19:22 2013
@@ -29,12 +29,14 @@ import java.util.concurrent.atomic.Atomi
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.*;
@@ -263,45 +265,33 @@ public class TestFieldCache extends Luce
terms = cache.getTerms(reader, "bogusfield");
// getDocTermOrds
- DocTermOrds termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
- TermsEnum termsEnum = termOrds.getOrdTermsEnum(reader);
- assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
- DocTermOrds.TermOrdsIterator reuse = null;
+ SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
+ int numEntries = cache.getCacheEntries().length;
+ // ask for it again, and check that we didnt create any additional entries:
+ termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
+ assertEquals(numEntries, cache.getCacheEntries().length);
+
for (int i = 0; i < NUM_DOCS; i++) {
- reuse = termOrds.lookup(i, reuse);
- final int[] buffer = new int[5];
+ termOrds.setDocument(i);
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
List<BytesRef> values = new ArrayList<BytesRef>(new LinkedHashSet<BytesRef>(Arrays.asList(multiValued[i])));
- for (;;) {
- int chunk = reuse.read(buffer);
- if (chunk == 0) {
- for (int ord = 0; ord < values.size(); ord++) {
- BytesRef term = values.get(ord);
- assertNull(String.format(Locale.ROOT, "Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
- }
- break;
- }
-
- for(int idx=0; idx < chunk; idx++) {
- int key = buffer[idx];
- termsEnum.seekExact((long) key);
- String actual = termsEnum.term().utf8ToString();
- String expected = values.get(idx).utf8ToString();
- if (!expected.equals(actual)) {
- reuse = termOrds.lookup(i, reuse);
- reuse.read(buffer);
- }
- assertTrue(String.format(Locale.ROOT, "Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
- }
-
- if (chunk <= buffer.length) {
+ for (BytesRef v : values) {
+ if (v == null) {
+ // why does this test use null values... instead of an empty list: confusing
break;
}
+ long ord = termOrds.nextOrd();
+ assert ord != SortedSetDocValues.NO_MORE_ORDS;
+ BytesRef scratch = new BytesRef();
+ termOrds.lookupOrd(ord, scratch);
+ assertEquals(v, scratch);
}
+ assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
}
// test bad field
termOrds = cache.getDocTermOrds(reader, "bogusfield");
+ assertTrue(termOrds.getValueCount() == 0);
FieldCache.DEFAULT.purge(reader);
}
@@ -445,11 +435,16 @@ public class TestFieldCache extends Luce
public void testDocValuesIntegration() throws Exception {
assumeTrue("3.x does not support docvalues", defaultCodecSupportsDocValues());
Directory dir = newDirectory();
- RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
doc.add(new NumericDocValuesField("numeric", 42));
+ if (defaultCodecSupportsSortedSet()) {
+ doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
+ doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
+ }
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
@@ -472,15 +467,30 @@ public class TestFieldCache extends Luce
fail();
} catch (IllegalStateException expected) {}
+ try {
+ FieldCache.DEFAULT.getDocTermOrds(ar, "binary");
+ fail();
+ } catch (IllegalStateException expected) {}
+
+ try {
+ new DocTermOrds(ar, "binary");
+ fail();
+ } catch (IllegalStateException expected) {}
+
Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary");
assertTrue(bits instanceof Bits.MatchAllBits);
- // Sorted type: can be retrieved via getTerms() or getTermsIndex()
+ // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
try {
FieldCache.DEFAULT.getInts(ar, "sorted", false);
fail();
} catch (IllegalStateException expected) {}
+ try {
+ new DocTermOrds(ar, "sorted");
+ fail();
+ } catch (IllegalStateException expected) {}
+
binary = FieldCache.DEFAULT.getTerms(ar, "sorted");
binary.get(0, scratch);
assertEquals("sorted value", scratch.utf8ToString());
@@ -491,6 +501,12 @@ public class TestFieldCache extends Luce
sorted.get(0, scratch);
assertEquals("sorted value", scratch.utf8ToString());
+ SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted");
+ sortedSet.setDocument(0);
+ assertEquals(0, sortedSet.nextOrd());
+ assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+ assertEquals(1, sortedSet.getValueCount());
+
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted");
assertTrue(bits instanceof Bits.MatchAllBits);
@@ -508,9 +524,52 @@ public class TestFieldCache extends Luce
fail();
} catch (IllegalStateException expected) {}
+ try {
+ FieldCache.DEFAULT.getDocTermOrds(ar, "numeric");
+ fail();
+ } catch (IllegalStateException expected) {}
+
+ try {
+ new DocTermOrds(ar, "numeric");
+ fail();
+ } catch (IllegalStateException expected) {}
+
bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric");
assertTrue(bits instanceof Bits.MatchAllBits);
+ // SortedSet type: can be retrieved via getDocTermOrds()
+ if (defaultCodecSupportsSortedSet()) {
+ try {
+ FieldCache.DEFAULT.getInts(ar, "sortedset", false);
+ fail();
+ } catch (IllegalStateException expected) {}
+
+ try {
+ FieldCache.DEFAULT.getTerms(ar, "sortedset");
+ fail();
+ } catch (IllegalStateException expected) {}
+
+ try {
+ FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
+ fail();
+ } catch (IllegalStateException expected) {}
+
+ try {
+ new DocTermOrds(ar, "sortedset");
+ fail();
+ } catch (IllegalStateException expected) {}
+
+ sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset");
+ sortedSet.setDocument(0);
+ assertEquals(0, sortedSet.nextOrd());
+ assertEquals(1, sortedSet.nextOrd());
+ assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+ assertEquals(2, sortedSet.getValueCount());
+
+ bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset");
+ assertTrue(bits instanceof Bits.MatchAllBits);
+ }
+
ir.close();
dir.close();
}
@@ -557,6 +616,10 @@ public class TestFieldCache extends Luce
sorted.get(0, scratch);
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
+ SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued");
+ sortedSet.setDocument(0);
+ assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+
Bits bits = cache.getDocsWithField(ar, "bogusbits");
assertFalse(bits.get(0));
@@ -578,6 +641,7 @@ public class TestFieldCache extends Luce
doc.add(new StoredField("bogusdoubles", "bogus"));
doc.add(new StoredField("bogusterms", "bogus"));
doc.add(new StoredField("bogustermsindex", "bogus"));
+ doc.add(new StoredField("bogusmultivalued", "bogus"));
doc.add(new StoredField("bogusbits", "bogus"));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
@@ -617,6 +681,10 @@ public class TestFieldCache extends Luce
sorted.get(0, scratch);
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
+ SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued");
+ sortedSet.setDocument(0);
+ assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
+
Bits bits = cache.getDocsWithField(ar, "bogusbits");
assertFalse(bits.get(0));
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java Wed Feb 20 11:19:22 2013
@@ -805,42 +805,55 @@ public class TestPackedInts extends Luce
}
public void testAppendingLongBuffer() {
- final long[] arr = new long[RandomInts.randomIntBetween(random(), 1, 2000000)];
- for (int bpv : new int[] {0, 1, 63, 64, RandomInts.randomIntBetween(random(), 2, 61)}) {
- if (bpv == 0) {
- Arrays.fill(arr, random().nextLong());
- } else if (bpv == 64) {
+ final long[] arr = new long[RandomInts.randomIntBetween(random(), 1, 1000000)];
+ for (int bpv : new int[] {0, 1, 63, 64, RandomInts.randomIntBetween(random(), 2, 62)}) {
+ for (boolean monotonic : new boolean[] {true, false}) {
+ AbstractAppendingLongBuffer buf;
+ final int inc;
+ if (monotonic) {
+ buf = new MonotonicAppendingLongBuffer();
+ inc = _TestUtil.nextInt(random(), -1000, 1000);
+ } else {
+ buf = new AppendingLongBuffer();
+ inc = 0;
+ }
+ if (bpv == 0) {
+ arr[0] = random().nextLong();
+ for (int i = 1; i < arr.length; ++i) {
+ arr[i] = arr[i-1] + inc;
+ }
+ } else if (bpv == 64) {
+ for (int i = 0; i < arr.length; ++i) {
+ arr[i] = random().nextLong();
+ }
+ } else {
+ final long minValue = _TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE - PackedInts.maxValue(bpv));
+ for (int i = 0; i < arr.length; ++i) {
+ arr[i] = minValue + inc * i + random().nextLong() & PackedInts.maxValue(bpv); // _TestUtil.nextLong is too slow
+ }
+ }
for (int i = 0; i < arr.length; ++i) {
- arr[i] = random().nextLong();
+ buf.add(arr[i]);
}
- } else {
- final long minValue = _TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE - PackedInts.maxValue(bpv));
+ assertEquals(arr.length, buf.size());
+ final AbstractAppendingLongBuffer.Iterator it = buf.iterator();
for (int i = 0; i < arr.length; ++i) {
- arr[i] = minValue + random().nextLong() & PackedInts.maxValue(bpv); // _TestUtil.nextLong is too slow
+ if (random().nextBoolean()) {
+ assertTrue(it.hasNext());
+ }
+ assertEquals(arr[i], it.next());
}
- }
- AppendingLongBuffer buf = new AppendingLongBuffer();
- for (int i = 0; i < arr.length; ++i) {
- buf.add(arr[i]);
- }
- assertEquals(arr.length, buf.size());
- final AppendingLongBuffer.Iterator it = buf.iterator();
- for (int i = 0; i < arr.length; ++i) {
- if (random().nextBoolean()) {
- assertTrue(it.hasNext());
+ assertFalse(it.hasNext());
+
+ for (int i = 0; i < arr.length; ++i) {
+ assertEquals(arr[i], buf.get(i));
}
- assertEquals(arr[i], it.next());
- }
- assertFalse(it.hasNext());
-
- for (int i = 0; i < arr.length; ++i) {
- assertEquals(arr[i], buf.get(i));
+
+ final long expectedBytesUsed = RamUsageEstimator.sizeOf(buf);
+ final long computedBytesUsed = buf.ramBytesUsed();
+ assertEquals("got " + computedBytesUsed + ", expected: " + expectedBytesUsed,
+ expectedBytesUsed, computedBytesUsed);
}
-
- final long expectedBytesUsed = RamUsageEstimator.sizeOf(buf);
- final long computedBytesUsed = buf.ramBytesUsed();
- assertEquals("got " + computedBytesUsed + ", expected: " + expectedBytesUsed,
- expectedBytesUsed, computedBytesUsed);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java Wed Feb 20 11:19:22 2013
@@ -98,6 +98,11 @@ public class Facet42DocValuesConsumer ex
}
@Override
+ public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
+ throw new UnsupportedOperationException("FacetsDocValues can only handle binary fields");
+ }
+
+ @Override
public void close() throws IOException {
boolean success = false;
try {
Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java?rev=1448085&r1=1448084&r2=1448085&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java Wed Feb 20 11:19:22 2013
@@ -29,6 +29,7 @@ import org.apache.lucene.index.IndexFile
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
@@ -75,6 +76,11 @@ class Facet42DocValuesProducer extends D
}
@Override
+ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
+ throw new UnsupportedOperationException("FacetsDocValues only implements binary");
+ }
+
+ @Override
public void close() throws IOException {
}
}