You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/10/03 07:34:17 UTC
[1/2] lucene-solr:master: LUCENE-7457: Make Lucene54DocValuesFormat's
sparse case actually implement an iterator.
Repository: lucene-solr
Updated Branches:
refs/heads/master cc4c78022 -> 2f88bc80c
LUCENE-7457: Make Lucene54DocValuesFormat's sparse case actually implement an iterator.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2f88bc80
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2f88bc80
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2f88bc80
Branch: refs/heads/master
Commit: 2f88bc80c2c1afed975199adb3f340fcec8179aa
Parents: d0ff2d2
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Oct 3 09:20:29 2016 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Oct 3 09:33:59 2016 +0200
----------------------------------------------------------------------
.../lucene54/Lucene54DocValuesProducer.java | 262 +++++++++++--------
.../lucene54/TestLucene54DocValuesFormat.java | 55 ++--
2 files changed, 195 insertions(+), 122 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2f88bc80/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java
index 62e631c..9356aed 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java
@@ -443,8 +443,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
Bits docsWithField;
if (entry.format == SPARSE_COMPRESSED) {
- // TODO: make a real iterator in this case!
- docsWithField = getSparseLiveBits(entry);
+ return getSparseNumericDocValues(entry);
} else {
if (entry.missingOffset == ALL_MISSING) {
return DocValues.emptyNumeric();
@@ -566,8 +565,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
};
}
case SPARSE_COMPRESSED:
- final SparseBits docsWithField = getSparseLiveBits(entry);
- final LongValues values = getNumeric(entry.nonMissingValues);
+ final SparseNumericDocValues values = getSparseNumericDocValues(entry);
final long missingValue;
switch (entry.numberType) {
case ORDINAL:
@@ -579,141 +577,125 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
default:
throw new AssertionError();
}
- return new SparseLongValues(docsWithField, values, missingValue);
+ return new SparseNumericDocValuesRandomAccessWrapper(values, missingValue);
default:
throw new AssertionError();
}
}
- static final class SparseBits implements Bits {
+ static final class SparseNumericDocValues extends NumericDocValues {
- final long maxDoc, docIDsLength, firstDocId;
- final LongValues docIds;
+ final int docIDsLength;
+ final LongValues docIds, values;
- long index; // index of docId in docIds
- long docId; // doc ID at index
- long nextDocId; // doc ID at (index+1)
+ int index, doc;
- SparseBits(long maxDoc, long docIDsLength, LongValues docIDs) {
- if (docIDsLength > 0 && maxDoc <= docIDs.get(docIDsLength - 1)) {
- throw new IllegalArgumentException("maxDoc must be > the last element of docIDs");
- }
- this.maxDoc = maxDoc;
+ SparseNumericDocValues(int docIDsLength, LongValues docIDs, LongValues values) {
this.docIDsLength = docIDsLength;
this.docIds = docIDs;
- this.firstDocId = docIDsLength == 0 ? maxDoc : docIDs.get(0);
+ this.values = values;
reset();
}
- private void reset() {
+ void reset() {
index = -1;
- this.docId = -1;
- this.nextDocId = firstDocId;
+ doc = -1;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
}
- /** Gallop forward and stop as soon as an index is found that is greater than
- * the given docId. {@code index} will store an index that stores a value
- * that is <= {@code docId} while the return value will give an index
- * that stores a value that is > {@code docId}. These indices can then be
- * used to binary search. */
- private long gallop(long docId) {
- index++;
- this.docId = nextDocId;
- long hiIndex = index + 1;
-
- while (true) {
+ @Override
+ public int nextDoc() throws IOException {
+ if (index >= docIDsLength - 1) {
+ index = docIDsLength;
+ return doc = NO_MORE_DOCS;
+ }
+ return doc = (int) docIds.get(++index);
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ long loIndex = index;
+ long step = 1;
+ long hiIndex;
+ int hiDoc;
+
+ // gallop forward by exponentially growing the interval
+ // in order to find an interval so that the target doc
+ // is in ]lo, hi]. Compared to a regular binary search,
+ // this optimizes the case that the caller performs many
+ // advance calls by small deltas
+ do {
+ hiIndex = index + step;
if (hiIndex >= docIDsLength) {
hiIndex = docIDsLength;
- nextDocId = maxDoc;
+ hiDoc = NO_MORE_DOCS;
break;
}
-
- final long hiDocId = docIds.get(hiIndex);
- if (hiDocId > docId) {
- nextDocId = hiDocId;
+ hiDoc = (int) docIds.get(hiIndex);
+ if (hiDoc >= target) {
break;
}
-
- final long delta = hiIndex - index;
- index = hiIndex;
- this.docId = hiDocId;
- hiIndex += delta << 1; // double the step each time
- }
- return hiIndex;
- }
-
- private void binarySearch(long hiIndex, long docId) {
- while (index + 1 < hiIndex) {
- final long midIndex = (index + hiIndex) >>> 1;
- final long midDocId = docIds.get(midIndex);
- if (midDocId > docId) {
+ step <<= 1;
+ } while (true);
+
+ // now binary search
+ while (loIndex + 1 < hiIndex) {
+ final long midIndex = (loIndex + 1 + hiIndex) >>> 1;
+ final int midDoc = (int) docIds.get(midIndex);
+ if (midDoc >= target) {
hiIndex = midIndex;
- nextDocId = midDocId;
+ hiDoc = midDoc;
} else {
- index = midIndex;
- this.docId = midDocId;
+ loIndex = midIndex;
}
}
- }
- private boolean checkInvariants(long nextIndex, long docId) {
- assert this.docId <= docId;
- assert this.nextDocId > docId;
- assert (index == -1 && this.docId == -1) || this.docId == docIds.get(index);
- assert (nextIndex == docIDsLength && nextDocId == maxDoc) || nextDocId == docIds.get(nextIndex);
- return true;
- }
-
- private void exponentialSearch(long docId) {
- // seek forward by doubling the interval on each iteration
- final long hiIndex = gallop(docId);
- assert checkInvariants(hiIndex, docId);
-
- // now perform the actual binary search
- binarySearch(hiIndex, docId);
- }
-
- boolean get(final long docId) {
- if (docId < this.docId) {
- // reading doc IDs backward, go back to the start
- reset();
- }
-
- if (docId >= nextDocId) {
- exponentialSearch(docId);
- }
-
- assert checkInvariants(index + 1, docId);
- return docId == this.docId;
+ index = (int) hiIndex;
+ return doc = hiDoc;
}
@Override
- public boolean get(int index) {
- return get((long) index);
+ public long longValue() {
+ assert index >= 0;
+ assert index < docIDsLength;
+ return values.get(index);
}
@Override
- public int length() {
- return Math.toIntExact(maxDoc);
+ public long cost() {
+ return docIDsLength;
}
}
- static class SparseLongValues extends LongValues {
+ static class SparseNumericDocValuesRandomAccessWrapper extends LongValues {
- final SparseBits docsWithField;
- final LongValues values;
+ final SparseNumericDocValues values;
final long missingValue;
- SparseLongValues(SparseBits docsWithField, LongValues values, long missingValue) {
- this.docsWithField = docsWithField;
+ SparseNumericDocValuesRandomAccessWrapper(SparseNumericDocValues values, long missingValue) {
this.values = values;
this.missingValue = missingValue;
}
@Override
- public long get(long docId) {
- if (docsWithField.get(docId)) {
- return values.get(docsWithField.index);
+ public long get(long longIndex) {
+ final int index = Math.toIntExact(longIndex);
+ int doc = values.docID();
+ if (doc >= index) {
+ values.reset();
+ }
+ assert values.docID() < index;
+ try {
+ doc = values.advance(index);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ if (doc == index) {
+ return values.longValue();
} else {
return missingValue;
}
@@ -837,6 +819,47 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
final LegacyBinaryDocValues binary = getLegacyBinary(field);
NumericEntry entry = ords.get(field.name);
final LongValues ordinals = getNumeric(entry);
+ if (entry.format == SPARSE_COMPRESSED) {
+ final SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) ordinals).values;
+ return new SortedDocValues() {
+
+ @Override
+ public int ordValue() {
+ return (int) sparseValues.longValue();
+ }
+
+ @Override
+ public BytesRef lookupOrd(int ord) {
+ return binary.get(ord);
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ @Override
+ public int docID() {
+ return sparseValues.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return sparseValues.nextDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return sparseValues.advance(target);
+ }
+
+ @Override
+ public long cost() {
+ return sparseValues.cost();
+ }
+
+ };
+ }
return new SortedDocValues() {
private int docID = -1;
private int ord;
@@ -927,12 +950,43 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
if (ss.format == SORTED_SINGLE_VALUED) {
NumericEntry numericEntry = numerics.get(field.name);
final LongValues values = getNumeric(numericEntry);
- final Bits docsWithField;
if (numericEntry.format == SPARSE_COMPRESSED) {
- docsWithField = ((SparseLongValues) values).docsWithField;
- } else {
- docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
+ SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) values).values;
+ return new SortedNumericDocValues() {
+
+ @Override
+ public long nextValue() throws IOException {
+ return sparseValues.longValue();
+ }
+
+ @Override
+ public int docValueCount() {
+ return 1;
+ }
+
+ @Override
+ public int docID() {
+ return sparseValues.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return sparseValues.nextDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return sparseValues.advance(target);
+ }
+
+ @Override
+ public long cost() {
+ return sparseValues.cost();
+ }
+
+ };
}
+ final Bits docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
return new SortedNumericDocValues() {
int docID = -1;
@@ -949,7 +1003,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
docID = NO_MORE_DOCS;
break;
}
-
+
if (docsWithField.get(docID)) {
// TODO: use .nextSetBit here, at least!!
break;
@@ -1192,7 +1246,8 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
private SortedSetDocValues getSortedSetTable(FieldInfo field, SortedSetEntry ss) throws IOException {
final long valueCount = binaries.get(field.name).count;
final LongBinaryDocValues binary = (LongBinaryDocValues) getLegacyBinary(field);
- final LongValues ordinals = getNumeric(ords.get(field.name));
+ final NumericEntry ordinalsEntry = ords.get(field.name);
+ final LongValues ordinals = getNumeric(ordinalsEntry);
final long[] table = ss.table;
final int[] offsets = ss.tableOffsets;
@@ -1273,10 +1328,11 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
}
}
- private SparseBits getSparseLiveBits(NumericEntry entry) throws IOException {
+ private SparseNumericDocValues getSparseNumericDocValues(NumericEntry entry) throws IOException {
final RandomAccessInput docIdsData = this.data.randomAccessSlice(entry.missingOffset, entry.offset - entry.missingOffset);
final LongValues docIDs = DirectMonotonicReader.getInstance(entry.monotonicMeta, docIdsData);
- return new SparseBits(maxDoc, entry.numDocsWithValue, docIDs);
+ final LongValues values = getNumeric(entry.nonMissingValues); // cannot be sparse
+ return new SparseNumericDocValues(Math.toIntExact(entry.numDocsWithValue), docIDs, values);
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2f88bc80/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java
index f798148..dd7cdcc 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java
@@ -31,8 +31,8 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec;
-import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseBits;
-import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseLongValues;
+import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseNumericDocValues;
+import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseNumericDocValuesRandomAccessWrapper;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -61,6 +61,7 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMFile;
@@ -427,13 +428,13 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT
}
}
- public void testSparseLongValues() {
+ public void testSparseLongValues() throws IOException {
final int iters = atLeast(5);
for (int iter = 0; iter < iters; ++iter) {
final int numDocs = TestUtil.nextInt(random(), 0, 100);
- final long[] docIds = new long[numDocs];
+ final int[] docIds = new int[numDocs];
final long[] values = new long[numDocs];
- final long maxDoc;
+ final int maxDoc;
if (numDocs == 0) {
maxDoc = 1 + random().nextInt(10);
} else {
@@ -459,35 +460,51 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT
return values[Math.toIntExact(index)];
}
};
- final SparseBits liveBits = new SparseBits(maxDoc, numDocs, docIdsValues);
- // random-access
- for (int i = 0; i < 2000; ++i) {
- final long docId = TestUtil.nextLong(random(), 0, maxDoc - 1);
- final boolean exists = liveBits.get(Math.toIntExact(docId));
- assertEquals(Arrays.binarySearch(docIds, docId) >= 0, exists);
- }
+ final SparseNumericDocValues sparseValues = new SparseNumericDocValues(numDocs, docIdsValues, valuesValues);
+
// sequential access
- for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) {
- final boolean exists = liveBits.get(Math.toIntExact(docId));
- assertEquals(Arrays.binarySearch(docIds, docId) >= 0, exists);
+ assertEquals(-1, sparseValues.docID());
+ for (int i = 0; i < docIds.length; ++i) {
+ assertEquals(docIds[i], sparseValues.nextDoc());
+ }
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.nextDoc());
+
+ // advance
+ for (int i = 0; i < 2000; ++i) {
+ final int target = TestUtil.nextInt(random(), 0, (int) maxDoc);
+ int index = Arrays.binarySearch(docIds, target);
+ if (index < 0) {
+ index = -1 - index;
+ }
+ sparseValues.reset();
+ if (index > 0) {
+ assertEquals(docIds[index - 1], sparseValues.advance(Math.toIntExact(docIds[index - 1])));
+ }
+ if (index == docIds.length) {
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.advance(target));
+ } else {
+ assertEquals(docIds[index], sparseValues.advance(target));
+ }
}
- final SparseLongValues sparseValues = new SparseLongValues(liveBits, valuesValues, missingValue);
+ final SparseNumericDocValuesRandomAccessWrapper raWrapper = new SparseNumericDocValuesRandomAccessWrapper(sparseValues, missingValue);
+
// random-access
for (int i = 0; i < 2000; ++i) {
- final long docId = TestUtil.nextLong(random(), 0, maxDoc - 1);
+ final int docId = TestUtil.nextInt(random(), 0, maxDoc - 1);
final int idx = Arrays.binarySearch(docIds, docId);
- final long value = sparseValues.get(docId);
+ final long value = raWrapper.get(docId);
if (idx >= 0) {
assertEquals(values[idx], value);
} else {
assertEquals(missingValue, value);
}
}
+
// sequential access
for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) {
final int idx = Arrays.binarySearch(docIds, docId);
- final long value = sparseValues.get(docId);
+ final long value = raWrapper.get(docId);
if (idx >= 0) {
assertEquals(values[idx], value);
} else {
[2/2] lucene-solr:master: LUCENE-7459: LegacyNumericDocValuesWrapper
should check the value before the bits for docs that have a value.
Posted by jp...@apache.org.
LUCENE-7459: LegacyNumericDocValuesWrapper should check the value before the bits for docs that have a value.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d0ff2d27
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d0ff2d27
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d0ff2d27
Branch: refs/heads/master
Commit: d0ff2d2735170b226e1fead100b9a4c0c0dcb50a
Parents: cc4c780
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Oct 3 09:14:06 2016 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Oct 3 09:33:59 2016 +0200
----------------------------------------------------------------------
.../org/apache/lucene/index/LegacyNumericDocValuesWrapper.java | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d0ff2d27/lucene/core/src/java/org/apache/lucene/index/LegacyNumericDocValuesWrapper.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/LegacyNumericDocValuesWrapper.java b/lucene/core/src/java/org/apache/lucene/index/LegacyNumericDocValuesWrapper.java
index d9c997c..64108e1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LegacyNumericDocValuesWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LegacyNumericDocValuesWrapper.java
@@ -30,6 +30,7 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues {
private final LegacyNumericDocValues values;
private final int maxDoc;
private int docID = -1;
+ private long value;
public LegacyNumericDocValuesWrapper(Bits docsWithField, LegacyNumericDocValues values) {
this.docsWithField = docsWithField;
@@ -51,7 +52,8 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues {
public int nextDoc() {
docID++;
while (docID < maxDoc) {
- if (docsWithField.get(docID)) {
+ value = values.get(docID);
+ if (value != 0 || docsWithField.get(docID)) {
return docID;
}
docID++;
@@ -82,7 +84,7 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues {
@Override
public long longValue() {
- return values.get(docID);
+ return value;
}
@Override