You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/12/10 20:23:38 UTC
[lucene] branch main updated: Some minor code cleanup in IndexSortSortedNumericDocValuesRangeQuery (#12003)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 8671e29929c Some minor code cleanup in IndexSortSortedNumericDocValuesRangeQuery (#12003)
8671e29929c is described below
commit 8671e29929ccd8c610a874b071a3ec373892ad25
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Sat Dec 10 12:23:31 2022 -0800
Some minor code cleanup in IndexSortSortedNumericDocValuesRangeQuery (#12003)
* Leverage DISI static factory methods more over custom DISI impl where possible.
* Assert points field is a single-dim.
* Bound cost estimate by the cost of the doc values field (for sparse fields).
---
lucene/CHANGES.txt | 2 +
.../IndexSortSortedNumericDocValuesRangeQuery.java | 229 ++++++++++++---------
2 files changed, 130 insertions(+), 101 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 86e7df77a1b..a3f09567f04 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -157,6 +157,8 @@ Improvements
* GITHUB#11860: Improve storage efficiency of connections in the HNSW graph that Lucene uses for
vector search. (Ben Trent)
+* GITHUB#12003: Minor cleanup/improvements to IndexSortSortedNumericDocValuesRangeQuery. (Greg Miller)
+
Bug Fixes
---------------------
* GITHUB#11726: Indexing term vectors on large documents could fail due to
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/IndexSortSortedNumericDocValuesRangeQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/IndexSortSortedNumericDocValuesRangeQuery.java
index 8a3f964d496..e27f64c507f 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/IndexSortSortedNumericDocValuesRangeQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/IndexSortSortedNumericDocValuesRangeQuery.java
@@ -176,8 +176,9 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
final Weight weight = this;
- DocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
- if (disi != null) {
+ IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context);
+ if (itAndCount != null) {
+ DocIdSetIterator disi = itAndCount.it;
return new ScorerSupplier() {
@Override
public Scorer get(long leadCost) throws IOException {
@@ -212,9 +213,9 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
@Override
public int count(LeafReaderContext context) throws IOException {
if (context.reader().hasDeletions() == false) {
- BoundedDocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
- if (disi != null && disi.delegate == null) {
- return disi.lastDoc - disi.firstDoc;
+ IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context);
+ if (itAndCount != null && itAndCount.count != -1) {
+ return itAndCount.count;
}
}
return fallbackWeight.count(context);
@@ -406,119 +407,123 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
private boolean matchNone(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint)
throws IOException {
+ assert points.getNumDimensions() == 1;
final ByteArrayComparator comparator =
ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
- for (int dim = 0; dim < points.getNumDimensions(); dim++) {
- int offset = dim * points.getBytesPerDimension();
- if (comparator.compare(points.getMinPackedValue(), offset, queryUpperPoint, offset) > 0
- || comparator.compare(points.getMaxPackedValue(), offset, queryLowerPoint, offset) < 0) {
- return true;
- }
- }
- return false;
+ return comparator.compare(points.getMinPackedValue(), 0, queryUpperPoint, 0) > 0
+ || comparator.compare(points.getMaxPackedValue(), 0, queryLowerPoint, 0) < 0;
}
private boolean matchAll(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint)
throws IOException {
+ assert points.getNumDimensions() == 1;
final ByteArrayComparator comparator =
ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
- for (int dim = 0; dim < points.getNumDimensions(); dim++) {
- int offset = dim * points.getBytesPerDimension();
- if (comparator.compare(points.getMinPackedValue(), offset, queryLowerPoint, offset) >= 0
- && comparator.compare(points.getMaxPackedValue(), offset, queryUpperPoint, offset) <= 0) {
- return true;
- }
- }
- return false;
+ return comparator.compare(points.getMinPackedValue(), 0, queryLowerPoint, 0) >= 0
+ && comparator.compare(points.getMaxPackedValue(), 0, queryUpperPoint, 0) <= 0;
}
- private BoundedDocIdSetIterator getDocIdSetIteratorOrNullFromBkd(
+ private IteratorAndCount getDocIdSetIteratorOrNullFromBkd(
LeafReaderContext context, DocIdSetIterator delegate) throws IOException {
Sort indexSort = context.reader().getMetaData().getSort();
- if (indexSort != null
- && indexSort.getSort().length > 0
- && indexSort.getSort()[0].getField().equals(field)) {
- final boolean reverse = indexSort.getSort()[0].getReverse();
- PointValues points = context.reader().getPointValues(field);
- if (points == null) {
- return null;
- }
+ if (indexSort == null
+ || indexSort.getSort().length == 0
+ || indexSort.getSort()[0].getField().equals(field) == false) {
+ return null;
+ }
- if (points.getNumDimensions() != 1) {
- return null;
- }
+ final boolean reverse = indexSort.getSort()[0].getReverse();
- if (points.getBytesPerDimension() != Long.BYTES
- && points.getBytesPerDimension() != Integer.BYTES) {
- return null;
- }
+ PointValues points = context.reader().getPointValues(field);
+ if (points == null) {
+ return null;
+ }
- if (points.size() != points.getDocCount()) {
- return null;
- }
+ if (points.getNumDimensions() != 1) {
+ return null;
+ }
- byte[] queryLowerPoint;
- byte[] queryUpperPoint;
- if (points.getBytesPerDimension() == Integer.BYTES) {
- queryLowerPoint = IntPoint.pack((int) lowerValue).bytes;
- queryUpperPoint = IntPoint.pack((int) upperValue).bytes;
+ if (points.getBytesPerDimension() != Long.BYTES
+ && points.getBytesPerDimension() != Integer.BYTES) {
+ return null;
+ }
+
+ if (points.size() != points.getDocCount()) {
+ return null;
+ }
+
+ assert lowerValue <= upperValue;
+ byte[] queryLowerPoint;
+ byte[] queryUpperPoint;
+ if (points.getBytesPerDimension() == Integer.BYTES) {
+ queryLowerPoint = IntPoint.pack((int) lowerValue).bytes;
+ queryUpperPoint = IntPoint.pack((int) upperValue).bytes;
+ } else {
+ queryLowerPoint = LongPoint.pack(lowerValue).bytes;
+ queryUpperPoint = LongPoint.pack(upperValue).bytes;
+ }
+ if (matchNone(points, queryLowerPoint, queryUpperPoint)) {
+ return IteratorAndCount.empty();
+ }
+ if (matchAll(points, queryLowerPoint, queryUpperPoint)) {
+ int maxDoc = context.reader().maxDoc();
+ if (points.getDocCount() == maxDoc) {
+ return IteratorAndCount.all(maxDoc);
} else {
- queryLowerPoint = LongPoint.pack(lowerValue).bytes;
- queryUpperPoint = LongPoint.pack(upperValue).bytes;
+ return IteratorAndCount.sparseRange(0, maxDoc, delegate);
}
- if (lowerValue > upperValue || matchNone(points, queryLowerPoint, queryUpperPoint)) {
- return new BoundedDocIdSetIterator(0, 0, null);
- }
- int minDocId, maxDocId;
- if (matchAll(points, queryLowerPoint, queryUpperPoint)) {
- minDocId = 0;
- maxDocId = context.reader().maxDoc();
- } else {
- final ByteArrayComparator comparator =
- ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
-
- if (reverse) {
- minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, true) + 1;
- } else {
- minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, false);
- if (minDocId == -1) {
- // No matches
- return new BoundedDocIdSetIterator(0, 0, null);
- }
- }
+ }
- if (reverse) {
- maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, true) + 1;
- if (maxDocId == 0) {
- // No matches
- return new BoundedDocIdSetIterator(0, 0, null);
- }
- } else {
- maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, false);
- if (maxDocId == -1) {
- maxDocId = context.reader().maxDoc();
- }
- }
+ int minDocId, maxDocId;
+ final ByteArrayComparator comparator =
+ ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
+
+ if (reverse) {
+ minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, true) + 1;
+ } else {
+ minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, false);
+ if (minDocId == -1) {
+ // No matches
+ return IteratorAndCount.empty();
}
+ }
- if ((points.getDocCount() == context.reader().maxDoc())) {
- return new BoundedDocIdSetIterator(minDocId, maxDocId, null);
- } else {
- return new BoundedDocIdSetIterator(minDocId, maxDocId, delegate);
+ if (reverse) {
+ maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, true) + 1;
+ if (maxDocId == 0) {
+ // No matches
+ return IteratorAndCount.empty();
+ }
+ } else {
+ maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, false);
+ if (maxDocId == -1) {
+ maxDocId = context.reader().maxDoc();
}
}
- return null;
+
+ if (minDocId == maxDocId) {
+ return IteratorAndCount.empty();
+ }
+
+ if ((points.getDocCount() == context.reader().maxDoc())) {
+ return IteratorAndCount.denseRange(minDocId, maxDocId);
+ } else {
+ return IteratorAndCount.sparseRange(minDocId, maxDocId, delegate);
+ }
}
- private BoundedDocIdSetIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
- throws IOException {
+ private IteratorAndCount getDocIdSetIteratorOrNull(LeafReaderContext context) throws IOException {
+ if (lowerValue > upperValue) {
+ return IteratorAndCount.empty();
+ }
+
SortedNumericDocValues sortedNumericValues =
DocValues.getSortedNumeric(context.reader(), field);
NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);
if (numericValues != null) {
- BoundedDocIdSetIterator iterator = getDocIdSetIteratorOrNullFromBkd(context, numericValues);
- if (iterator != null) {
- return iterator;
+ IteratorAndCount itAndCount = getDocIdSetIteratorOrNullFromBkd(context, numericValues);
+ if (itAndCount != null) {
+ return itAndCount;
}
Sort indexSort = context.reader().getMetaData().getSort();
if (indexSort != null
@@ -548,7 +553,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
* {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with
* no value.
*/
- private BoundedDocIdSetIterator getDocIdSetIterator(
+ private IteratorAndCount getDocIdSetIterator(
SortField sortField,
SortField.Type sortFieldType,
LeafReaderContext context,
@@ -592,19 +597,22 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
}
int lastDocIdExclusive = high + 1;
+
+ if (firstDocIdInclusive == lastDocIdExclusive) {
+ return IteratorAndCount.empty();
+ }
+
Object missingValue = sortField.getMissingValue();
- BoundedDocIdSetIterator disi;
LeafReader reader = context.reader();
PointValues pointValues = reader.getPointValues(field);
final long missingLongValue = missingValue == null ? 0L : (long) missingValue;
// all documents have docValues or missing value falls outside the range
if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc())
|| (missingLongValue < lowerValue || missingLongValue > upperValue)) {
- disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, null);
+ return IteratorAndCount.denseRange(firstDocIdInclusive, lastDocIdExclusive);
} else {
- disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, delegate);
+ return IteratorAndCount.sparseRange(firstDocIdInclusive, lastDocIdExclusive, delegate);
}
- return disi;
}
/** Compares the given document's value with a stored reference value. */
@@ -643,6 +651,29 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
}
}
+ /**
+ * Provides a {@code DocIdSetIterator} along with an accurate count of documents provided by the
+ * iterator (or {@code -1} if an accurate count is unknown).
+ */
+ private record IteratorAndCount(DocIdSetIterator it, int count) {
+
+ static IteratorAndCount empty() {
+ return new IteratorAndCount(DocIdSetIterator.empty(), 0);
+ }
+
+ static IteratorAndCount all(int maxDoc) {
+ return new IteratorAndCount(DocIdSetIterator.all(maxDoc), maxDoc);
+ }
+
+ static IteratorAndCount denseRange(int minDoc, int maxDoc) {
+ return new IteratorAndCount(DocIdSetIterator.range(minDoc, maxDoc), maxDoc - minDoc);
+ }
+
+ static IteratorAndCount sparseRange(int minDoc, int maxDoc, DocIdSetIterator delegate) {
+ return new IteratorAndCount(new BoundedDocIdSetIterator(minDoc, maxDoc, delegate), -1);
+ }
+ }
+
/**
* A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in the range
* [firstDocInclusive, lastDoc).
@@ -655,6 +686,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
private int docID = -1;
BoundedDocIdSetIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) {
+ assert delegate != null;
this.firstDoc = firstDoc;
this.lastDoc = lastDoc;
this.delegate = delegate;
@@ -676,12 +708,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
target = firstDoc;
}
- int result;
- if (delegate != null) {
- result = delegate.advance(target);
- } else {
- result = target;
- }
+ int result = delegate.advance(target);
if (result < lastDoc) {
docID = result;
} else {
@@ -692,7 +719,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
@Override
public long cost() {
- return lastDoc - firstDoc;
+ return Math.min(delegate.cost(), lastDoc - firstDoc);
}
}
}