You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2021/11/30 22:35:14 UTC

[lucene] branch branch_9_0 updated: LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match (#495)

This is an automated email from the ASF dual-hosted git repository.

gsmiller pushed a commit to branch branch_9_0
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9_0 by this push:
     new 2bcc8ff  LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match (#495)
2bcc8ff is described below

commit 2bcc8ff106820fb79e7e874780640deffbe3642e
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Tue Nov 30 14:35:09 2021 -0800

    LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match (#495)
---
 lucene/CHANGES.txt                                 |   2 +
 .../lucene/sandbox/search/MultiRangeQuery.java     | 103 +++++++--------------
 .../sandbox/search/TestMultiRangeQueries.java      |  82 +++++++++++++---
 3 files changed, 107 insertions(+), 80 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 617c1a1..a599283 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -395,6 +395,8 @@ Changes in Backwards Compatibility Policy
   semantic changes like analysis or certain encoding on top of the file format are only supported on
   a best effort basis. (Simon Willnauer)
 
+* LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match. (Greg Miller)
+
 Build
 ---------------------
 
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java
index ebc6b25..f0583bd 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java
@@ -149,6 +149,8 @@ public abstract class MultiRangeQuery extends Query {
   public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
       throws IOException {
 
+    final ArrayUtil.ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
+
     // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
     // This is an inverted structure and should be used in the first pass:
 
@@ -172,31 +174,21 @@ public abstract class MultiRangeQuery extends Query {
           @Override
           public void visit(int docID, byte[] packedValue) {
             // If a single OR clause has the value in range, the entire query accepts the value
+            continueRange:
             for (RangeClause rangeClause : rangeClauses) {
               for (int dim = 0; dim < numDims; dim++) {
                 int offset = dim * bytesPerDim;
-                if ((Arrays.compareUnsigned(
-                            packedValue,
-                            offset,
-                            offset + bytesPerDim,
-                            rangeClause.lowerValue,
-                            offset,
-                            offset + bytesPerDim)
-                        >= 0)
-                    && (Arrays.compareUnsigned(
-                            packedValue,
-                            offset,
-                            offset + bytesPerDim,
-                            rangeClause.upperValue,
-                            offset,
-                            offset + bytesPerDim)
-                        <= 0)) {
-                  // Doc is in-bounds. Add and short circuit
-                  adder.add(docID);
-                  return;
+                if (comparator.compare(packedValue, offset, rangeClause.lowerValue, offset) < 0) {
+                  // Doc value is too low in this dim:
+                  continue continueRange;
+                }
+                if (comparator.compare(packedValue, offset, rangeClause.upperValue, offset) > 0) {
+                  // Doc value is too high in this dim:
+                  continue continueRange;
                 }
-                // Iterate till we have any OR clauses remaining
               }
+              // Doc matched on all dimensions:
+              adder.add(docID);
             }
           }
 
@@ -211,47 +203,35 @@ public abstract class MultiRangeQuery extends Query {
              * as inside and atleast one range sees the point as CROSSES, return CROSSES 3) If none
              * of the above, return OUTSIDE
              */
+            continueRange:
             for (RangeClause rangeClause : rangeClauses) {
+              boolean rangeCrosses = false;
+
               for (int dim = 0; dim < numDims; dim++) {
                 int offset = dim * bytesPerDim;
 
-                if ((Arrays.compareUnsigned(
-                            minPackedValue,
-                            offset,
-                            offset + bytesPerDim,
-                            rangeClause.lowerValue,
-                            offset,
-                            offset + bytesPerDim)
-                        >= 0)
-                    && (Arrays.compareUnsigned(
-                            maxPackedValue,
-                            offset,
-                            offset + bytesPerDim,
-                            rangeClause.upperValue,
-                            offset,
-                            offset + bytesPerDim)
-                        <= 0)) {
-                  return PointValues.Relation.CELL_INSIDE_QUERY;
+                if (comparator.compare(minPackedValue, offset, rangeClause.upperValue, offset) > 0
+                    || comparator.compare(maxPackedValue, offset, rangeClause.lowerValue, offset)
+                        < 0) {
+                  continue continueRange;
                 }
 
-                crosses |=
-                    Arrays.compareUnsigned(
-                                minPackedValue,
-                                offset,
-                                offset + bytesPerDim,
-                                rangeClause.lowerValue,
-                                offset,
-                                offset + bytesPerDim)
-                            < 0
-                        || Arrays.compareUnsigned(
-                                maxPackedValue,
-                                offset,
-                                offset + bytesPerDim,
-                                rangeClause.upperValue,
-                                offset,
-                                offset + bytesPerDim)
+                rangeCrosses |=
+                    comparator.compare(minPackedValue, offset, rangeClause.lowerValue, offset) < 0
+                        || comparator.compare(
+                                maxPackedValue, offset, rangeClause.upperValue, offset)
                             > 0;
               }
+
+              if (rangeCrosses == false) {
+                // At this point we know that the cell is fully inside the range clause, so we
+                // return early:
+                return PointValues.Relation.CELL_INSIDE_QUERY;
+              } else {
+                // This range clause crosses the cell, but we'll keep checking more ranges to see if
+                // one fully contains the cell:
+                crosses = true;
+              }
             }
 
             if (crosses) {
@@ -300,21 +280,8 @@ public abstract class MultiRangeQuery extends Query {
           for (RangeClause rangeClause : rangeClauses) {
             for (int i = 0; i < numDims; ++i) {
               int offset = i * bytesPerDim;
-              if (Arrays.compareUnsigned(
-                          rangeClause.lowerValue,
-                          offset,
-                          offset + bytesPerDim,
-                          fieldPackedLower,
-                          offset,
-                          offset + bytesPerDim)
-                      > 0
-                  || Arrays.compareUnsigned(
-                          rangeClause.upperValue,
-                          offset,
-                          offset + bytesPerDim,
-                          fieldPackedUpper,
-                          offset,
-                          offset + bytesPerDim)
+              if (comparator.compare(rangeClause.lowerValue, offset, fieldPackedLower, offset) > 0
+                  || comparator.compare(rangeClause.upperValue, offset, fieldPackedUpper, offset)
                       < 0) {
                 allDocsMatch = false;
                 break;
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestMultiRangeQueries.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestMultiRangeQueries.java
index e7498ff..420847e 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestMultiRangeQueries.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestMultiRangeQueries.java
@@ -17,6 +17,7 @@
 
 package org.apache.lucene.sandbox.search;
 
+import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
 import java.io.IOException;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoublePoint;
@@ -29,14 +30,71 @@ import org.apache.lucene.sandbox.document.DoublePointMultiRangeBuilder;
 import org.apache.lucene.sandbox.document.FloatPointMultiRangeBuilder;
 import org.apache.lucene.sandbox.document.IntPointMultiRangeBuilder;
 import org.apache.lucene.sandbox.document.LongPointMultiRangeBuilder;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 
 public class TestMultiRangeQueries extends LuceneTestCase {
 
+  public void testDuelWithStandardDisjunction() throws IOException {
+    int iterations = LuceneTestCase.TEST_NIGHTLY ? atLeast(100) : 10;
+    for (int iter = 0; iter < iterations; iter++) {
+      Directory dir = newDirectory();
+      RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+      int dims = RandomNumbers.randomIntBetween(random(), 1, 8);
+
+      long[] scratch = new long[dims];
+      for (int i = 0; i < 100; i++) {
+        int numPoints = RandomNumbers.randomIntBetween(random(), 1, 10);
+        Document doc = new Document();
+        for (int j = 0; j < numPoints; j++) {
+          for (int v = 0; v < dims; v++) {
+            scratch[v] = RandomNumbers.randomLongBetween(random(), 0, 100);
+          }
+          doc.add(new LongPoint("point", scratch));
+        }
+        w.addDocument(doc);
+      }
+
+      IndexReader reader = w.getReader();
+      IndexSearcher searcher = newSearcher(reader);
+
+      int numRanges = RandomNumbers.randomIntBetween(random(), 1, 20);
+      LongPointMultiRangeBuilder builder1 = new LongPointMultiRangeBuilder("point", dims);
+      BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
+      for (int i = 0; i < numRanges; i++) {
+        long[] lower = new long[dims];
+        long[] upper = new long[dims];
+        for (int j = 0; j < dims; j++) {
+          lower[j] = RandomNumbers.randomLongBetween(random(), -100, 200);
+          upper[j] = lower[j] + RandomNumbers.randomLongBetween(random(), 0, 100);
+        }
+        builder1.add(lower, upper);
+        builder2.add(LongPoint.newRangeQuery("point", lower, upper), BooleanClause.Occur.SHOULD);
+      }
+
+      Query query1 = builder1.build();
+      Query query2 = builder2.build();
+      TopDocs result1 = searcher.search(query1, 10, Sort.INDEXORDER);
+      TopDocs result2 = searcher.search(query2, 10, Sort.INDEXORDER);
+      assertEquals(result2.totalHits, result1.totalHits);
+      assertEquals(result2.scoreDocs.length, result1.scoreDocs.length);
+      for (int i = 0; i < result2.scoreDocs.length; i++) {
+        assertEquals(result2.scoreDocs[i].doc, result1.scoreDocs[i].doc);
+      }
+
+      IOUtils.close(reader, w, dir);
+    }
+  }
+
   public void testDoubleRandomMultiRangeQuery() throws IOException {
     final int numDims = TestUtil.nextInt(random(), 1, 3);
     final int numVals = TestUtil.nextInt(random(), 3, 8);
@@ -123,9 +181,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
 
     assertEquals(searcher.count(query), 2);
 
-    // None match
-    double[] nonMatchingFirstRangeLower = {1.3, 3.5, 2.7};
-    double[] nonMatchingFirstRangeUpper = {5.2, 8.3, 7.8};
+    // None match (one dimension is in range but the rest aren't)
+    double[] nonMatchingFirstRangeLower = {111.3, 3.5, 2.7};
+    double[] nonMatchingFirstRangeUpper = {117.3, 8.3, 7.8};
 
     double[] nonMatchingSecondRangeLower = {11246.3, 19388.7, 21248.4};
     double[] nonMatchingSecondRangeUpper = {13242.9, 20214.2, 23236.5};
@@ -244,9 +302,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
 
     assertEquals(searcher.count(query), 2);
 
-    // None match
-    long[] nonMatchingFirstRangeLower = {1, 3, 2};
-    long[] nonMatchingFirstRangeUpper = {5, 8, 7};
+    // None match (one dimension is in range but the rest aren't)
+    long[] nonMatchingFirstRangeLower = {111, 3, 2};
+    long[] nonMatchingFirstRangeUpper = {117, 8, 7};
 
     long[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
     long[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};
@@ -365,9 +423,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
 
     assertEquals(searcher.count(query), 2);
 
-    // None Match
-    float[] nonMatchingFirstRangeLower = {1.4f, 3.3f, 2.7f};
-    float[] nonMatchingFirstRangeUpper = {5.4f, 8.2f, 7.3f};
+    // None match (one dimension is in range but the rest aren't)
+    float[] nonMatchingFirstRangeLower = {111.3f, 3.3f, 2.7f};
+    float[] nonMatchingFirstRangeUpper = {117.3f, 8.2f, 7.3f};
 
     float[] nonMatchingSecondRangeLower = {11246.2f, 19388.6f, 21248.3f};
     float[] nonMatchingSecondRangeUpper = {13242.4f, 20214.7f, 23236.3f};
@@ -486,9 +544,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
 
     assertEquals(searcher.count(query), 2);
 
-    // None match
-    int[] nonMatchingFirstRangeLower = {1, 3, 2};
-    int[] nonMatchingFirstRangeUpper = {5, 8, 7};
+    // None match (one dimension is in range but the rest aren't)
+    int[] nonMatchingFirstRangeLower = {111, 3, 2};
+    int[] nonMatchingFirstRangeUpper = {117, 8, 7};
 
     int[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
     int[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};