You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/12/10 20:23:38 UTC

[lucene] branch main updated: Some minor code cleanup in IndexSortSortedNumericDocValuesRangeQuery (#12003)

This is an automated email from the ASF dual-hosted git repository.

gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 8671e29929c Some minor code cleanup in IndexSortSortedNumericDocValuesRangeQuery (#12003)
8671e29929c is described below

commit 8671e29929ccd8c610a874b071a3ec373892ad25
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Sat Dec 10 12:23:31 2022 -0800

    Some minor code cleanup in IndexSortSortedNumericDocValuesRangeQuery (#12003)
    
    * Leverage DISI static factory methods more over custom DISI impl where possible.
    * Assert points field is a single-dim.
    * Bound cost estimate by the cost of the doc values field (for sparse fields).
---
 lucene/CHANGES.txt                                 |   2 +
 .../IndexSortSortedNumericDocValuesRangeQuery.java | 229 ++++++++++++---------
 2 files changed, 130 insertions(+), 101 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 86e7df77a1b..a3f09567f04 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -157,6 +157,8 @@ Improvements
 * GITHUB#11860: Improve storage efficiency of connections in the HNSW graph that Lucene uses for
   vector search. (Ben Trent)
 
+* GITHUB#12003: Minor cleanup/improvements to IndexSortSortedNumericDocValuesRangeQuery. (Greg Miller)
+
 Bug Fixes
 ---------------------
 * GITHUB#11726: Indexing term vectors on large documents could fail due to
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/IndexSortSortedNumericDocValuesRangeQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/IndexSortSortedNumericDocValuesRangeQuery.java
index 8a3f964d496..e27f64c507f 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/IndexSortSortedNumericDocValuesRangeQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/IndexSortSortedNumericDocValuesRangeQuery.java
@@ -176,8 +176,9 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
       @Override
       public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
         final Weight weight = this;
-        DocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
-        if (disi != null) {
+        IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context);
+        if (itAndCount != null) {
+          DocIdSetIterator disi = itAndCount.it;
           return new ScorerSupplier() {
             @Override
             public Scorer get(long leadCost) throws IOException {
@@ -212,9 +213,9 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
       @Override
       public int count(LeafReaderContext context) throws IOException {
         if (context.reader().hasDeletions() == false) {
-          BoundedDocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
-          if (disi != null && disi.delegate == null) {
-            return disi.lastDoc - disi.firstDoc;
+          IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context);
+          if (itAndCount != null && itAndCount.count != -1) {
+            return itAndCount.count;
           }
         }
         return fallbackWeight.count(context);
@@ -406,119 +407,123 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
 
   private boolean matchNone(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint)
       throws IOException {
+    assert points.getNumDimensions() == 1;
     final ByteArrayComparator comparator =
         ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
-    for (int dim = 0; dim < points.getNumDimensions(); dim++) {
-      int offset = dim * points.getBytesPerDimension();
-      if (comparator.compare(points.getMinPackedValue(), offset, queryUpperPoint, offset) > 0
-          || comparator.compare(points.getMaxPackedValue(), offset, queryLowerPoint, offset) < 0) {
-        return true;
-      }
-    }
-    return false;
+    return comparator.compare(points.getMinPackedValue(), 0, queryUpperPoint, 0) > 0
+        || comparator.compare(points.getMaxPackedValue(), 0, queryLowerPoint, 0) < 0;
   }
 
   private boolean matchAll(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint)
       throws IOException {
+    assert points.getNumDimensions() == 1;
     final ByteArrayComparator comparator =
         ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
-    for (int dim = 0; dim < points.getNumDimensions(); dim++) {
-      int offset = dim * points.getBytesPerDimension();
-      if (comparator.compare(points.getMinPackedValue(), offset, queryLowerPoint, offset) >= 0
-          && comparator.compare(points.getMaxPackedValue(), offset, queryUpperPoint, offset) <= 0) {
-        return true;
-      }
-    }
-    return false;
+    return comparator.compare(points.getMinPackedValue(), 0, queryLowerPoint, 0) >= 0
+        && comparator.compare(points.getMaxPackedValue(), 0, queryUpperPoint, 0) <= 0;
   }
 
-  private BoundedDocIdSetIterator getDocIdSetIteratorOrNullFromBkd(
+  private IteratorAndCount getDocIdSetIteratorOrNullFromBkd(
       LeafReaderContext context, DocIdSetIterator delegate) throws IOException {
     Sort indexSort = context.reader().getMetaData().getSort();
-    if (indexSort != null
-        && indexSort.getSort().length > 0
-        && indexSort.getSort()[0].getField().equals(field)) {
-      final boolean reverse = indexSort.getSort()[0].getReverse();
-      PointValues points = context.reader().getPointValues(field);
-      if (points == null) {
-        return null;
-      }
+    if (indexSort == null
+        || indexSort.getSort().length == 0
+        || indexSort.getSort()[0].getField().equals(field) == false) {
+      return null;
+    }
 
-      if (points.getNumDimensions() != 1) {
-        return null;
-      }
+    final boolean reverse = indexSort.getSort()[0].getReverse();
 
-      if (points.getBytesPerDimension() != Long.BYTES
-          && points.getBytesPerDimension() != Integer.BYTES) {
-        return null;
-      }
+    PointValues points = context.reader().getPointValues(field);
+    if (points == null) {
+      return null;
+    }
 
-      if (points.size() != points.getDocCount()) {
-        return null;
-      }
+    if (points.getNumDimensions() != 1) {
+      return null;
+    }
 
-      byte[] queryLowerPoint;
-      byte[] queryUpperPoint;
-      if (points.getBytesPerDimension() == Integer.BYTES) {
-        queryLowerPoint = IntPoint.pack((int) lowerValue).bytes;
-        queryUpperPoint = IntPoint.pack((int) upperValue).bytes;
+    if (points.getBytesPerDimension() != Long.BYTES
+        && points.getBytesPerDimension() != Integer.BYTES) {
+      return null;
+    }
+
+    if (points.size() != points.getDocCount()) {
+      return null;
+    }
+
+    assert lowerValue <= upperValue;
+    byte[] queryLowerPoint;
+    byte[] queryUpperPoint;
+    if (points.getBytesPerDimension() == Integer.BYTES) {
+      queryLowerPoint = IntPoint.pack((int) lowerValue).bytes;
+      queryUpperPoint = IntPoint.pack((int) upperValue).bytes;
+    } else {
+      queryLowerPoint = LongPoint.pack(lowerValue).bytes;
+      queryUpperPoint = LongPoint.pack(upperValue).bytes;
+    }
+    if (matchNone(points, queryLowerPoint, queryUpperPoint)) {
+      return IteratorAndCount.empty();
+    }
+    if (matchAll(points, queryLowerPoint, queryUpperPoint)) {
+      int maxDoc = context.reader().maxDoc();
+      if (points.getDocCount() == maxDoc) {
+        return IteratorAndCount.all(maxDoc);
       } else {
-        queryLowerPoint = LongPoint.pack(lowerValue).bytes;
-        queryUpperPoint = LongPoint.pack(upperValue).bytes;
+        return IteratorAndCount.sparseRange(0, maxDoc, delegate);
       }
-      if (lowerValue > upperValue || matchNone(points, queryLowerPoint, queryUpperPoint)) {
-        return new BoundedDocIdSetIterator(0, 0, null);
-      }
-      int minDocId, maxDocId;
-      if (matchAll(points, queryLowerPoint, queryUpperPoint)) {
-        minDocId = 0;
-        maxDocId = context.reader().maxDoc();
-      } else {
-        final ByteArrayComparator comparator =
-            ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
-
-        if (reverse) {
-          minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, true) + 1;
-        } else {
-          minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, false);
-          if (minDocId == -1) {
-            // No matches
-            return new BoundedDocIdSetIterator(0, 0, null);
-          }
-        }
+    }
 
-        if (reverse) {
-          maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, true) + 1;
-          if (maxDocId == 0) {
-            // No matches
-            return new BoundedDocIdSetIterator(0, 0, null);
-          }
-        } else {
-          maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, false);
-          if (maxDocId == -1) {
-            maxDocId = context.reader().maxDoc();
-          }
-        }
+    int minDocId, maxDocId;
+    final ByteArrayComparator comparator =
+        ArrayUtil.getUnsignedComparator(points.getBytesPerDimension());
+
+    if (reverse) {
+      minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, true) + 1;
+    } else {
+      minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, false);
+      if (minDocId == -1) {
+        // No matches
+        return IteratorAndCount.empty();
       }
+    }
 
-      if ((points.getDocCount() == context.reader().maxDoc())) {
-        return new BoundedDocIdSetIterator(minDocId, maxDocId, null);
-      } else {
-        return new BoundedDocIdSetIterator(minDocId, maxDocId, delegate);
+    if (reverse) {
+      maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, true) + 1;
+      if (maxDocId == 0) {
+        // No matches
+        return IteratorAndCount.empty();
+      }
+    } else {
+      maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, false);
+      if (maxDocId == -1) {
+        maxDocId = context.reader().maxDoc();
       }
     }
-    return null;
+
+    if (minDocId == maxDocId) {
+      return IteratorAndCount.empty();
+    }
+
+    if ((points.getDocCount() == context.reader().maxDoc())) {
+      return IteratorAndCount.denseRange(minDocId, maxDocId);
+    } else {
+      return IteratorAndCount.sparseRange(minDocId, maxDocId, delegate);
+    }
   }
 
-  private BoundedDocIdSetIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
-      throws IOException {
+  private IteratorAndCount getDocIdSetIteratorOrNull(LeafReaderContext context) throws IOException {
+    if (lowerValue > upperValue) {
+      return IteratorAndCount.empty();
+    }
+
     SortedNumericDocValues sortedNumericValues =
         DocValues.getSortedNumeric(context.reader(), field);
     NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);
     if (numericValues != null) {
-      BoundedDocIdSetIterator iterator = getDocIdSetIteratorOrNullFromBkd(context, numericValues);
-      if (iterator != null) {
-        return iterator;
+      IteratorAndCount itAndCount = getDocIdSetIteratorOrNullFromBkd(context, numericValues);
+      if (itAndCount != null) {
+        return itAndCount;
       }
       Sort indexSort = context.reader().getMetaData().getSort();
       if (indexSort != null
@@ -548,7 +553,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
    * {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with
    * no value.
    */
-  private BoundedDocIdSetIterator getDocIdSetIterator(
+  private IteratorAndCount getDocIdSetIterator(
       SortField sortField,
       SortField.Type sortFieldType,
       LeafReaderContext context,
@@ -592,19 +597,22 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
     }
 
     int lastDocIdExclusive = high + 1;
+
+    if (firstDocIdInclusive == lastDocIdExclusive) {
+      return IteratorAndCount.empty();
+    }
+
     Object missingValue = sortField.getMissingValue();
-    BoundedDocIdSetIterator disi;
     LeafReader reader = context.reader();
     PointValues pointValues = reader.getPointValues(field);
     final long missingLongValue = missingValue == null ? 0L : (long) missingValue;
     // all documents have docValues or missing value falls outside the range
     if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc())
         || (missingLongValue < lowerValue || missingLongValue > upperValue)) {
-      disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, null);
+      return IteratorAndCount.denseRange(firstDocIdInclusive, lastDocIdExclusive);
     } else {
-      disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, delegate);
+      return IteratorAndCount.sparseRange(firstDocIdInclusive, lastDocIdExclusive, delegate);
     }
-    return disi;
   }
 
   /** Compares the given document's value with a stored reference value. */
@@ -643,6 +651,29 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
     }
   }
 
+  /**
+   * Provides a {@code DocIdSetIterator} along with an accurate count of documents provided by the
+   * iterator (or {@code -1} if an accurate count is unknown).
+   */
+  private record IteratorAndCount(DocIdSetIterator it, int count) {
+
+    static IteratorAndCount empty() {
+      return new IteratorAndCount(DocIdSetIterator.empty(), 0);
+    }
+
+    static IteratorAndCount all(int maxDoc) {
+      return new IteratorAndCount(DocIdSetIterator.all(maxDoc), maxDoc);
+    }
+
+    static IteratorAndCount denseRange(int minDoc, int maxDoc) {
+      return new IteratorAndCount(DocIdSetIterator.range(minDoc, maxDoc), maxDoc - minDoc);
+    }
+
+    static IteratorAndCount sparseRange(int minDoc, int maxDoc, DocIdSetIterator delegate) {
+      return new IteratorAndCount(new BoundedDocIdSetIterator(minDoc, maxDoc, delegate), -1);
+    }
+  }
+
   /**
    * A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in the range
    * [firstDocInclusive, lastDoc).
@@ -655,6 +686,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
     private int docID = -1;
 
     BoundedDocIdSetIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) {
+      assert delegate != null;
       this.firstDoc = firstDoc;
       this.lastDoc = lastDoc;
       this.delegate = delegate;
@@ -676,12 +708,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
         target = firstDoc;
       }
 
-      int result;
-      if (delegate != null) {
-        result = delegate.advance(target);
-      } else {
-        result = target;
-      }
+      int result = delegate.advance(target);
       if (result < lastDoc) {
         docID = result;
       } else {
@@ -692,7 +719,7 @@ public class IndexSortSortedNumericDocValuesRangeQuery extends Query {
 
     @Override
     public long cost() {
-      return lastDoc - firstDoc;
+      return Math.min(delegate.cost(), lastDoc - firstDoc);
     }
   }
 }