You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2021/11/30 22:35:14 UTC
[lucene] branch branch_9_0 updated: LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match (#495)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch branch_9_0
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9_0 by this push:
new 2bcc8ff LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match (#495)
2bcc8ff is described below
commit 2bcc8ff106820fb79e7e874780640deffbe3642e
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Tue Nov 30 14:35:09 2021 -0800
LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match (#495)
---
lucene/CHANGES.txt | 2 +
.../lucene/sandbox/search/MultiRangeQuery.java | 103 +++++++--------------
.../sandbox/search/TestMultiRangeQueries.java | 82 +++++++++++++---
3 files changed, 107 insertions(+), 80 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 617c1a1..a599283 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -395,6 +395,8 @@ Changes in Backwards Compatibility Policy
semantic changes like analysis or certain encoding on top of the file format are only supported on
a best effort basis. (Simon Willnauer)
+* LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match. (Greg Miller)
+
Build
---------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java
index ebc6b25..f0583bd 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiRangeQuery.java
@@ -149,6 +149,8 @@ public abstract class MultiRangeQuery extends Query {
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
+ final ArrayUtil.ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
+
// We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
// This is an inverted structure and should be used in the first pass:
@@ -172,31 +174,21 @@ public abstract class MultiRangeQuery extends Query {
@Override
public void visit(int docID, byte[] packedValue) {
// If a single OR clause has the value in range, the entire query accepts the value
+ continueRange:
for (RangeClause rangeClause : rangeClauses) {
for (int dim = 0; dim < numDims; dim++) {
int offset = dim * bytesPerDim;
- if ((Arrays.compareUnsigned(
- packedValue,
- offset,
- offset + bytesPerDim,
- rangeClause.lowerValue,
- offset,
- offset + bytesPerDim)
- >= 0)
- && (Arrays.compareUnsigned(
- packedValue,
- offset,
- offset + bytesPerDim,
- rangeClause.upperValue,
- offset,
- offset + bytesPerDim)
- <= 0)) {
- // Doc is in-bounds. Add and short circuit
- adder.add(docID);
- return;
+ if (comparator.compare(packedValue, offset, rangeClause.lowerValue, offset) < 0) {
+ // Doc value is too low in this dim:
+ continue continueRange;
+ }
+ if (comparator.compare(packedValue, offset, rangeClause.upperValue, offset) > 0) {
+ // Doc value is too high in this dim:
+ continue continueRange;
}
- // Iterate till we have any OR clauses remaining
}
+ // Doc matched on all dimensions:
+ adder.add(docID);
}
}
@@ -211,47 +203,35 @@ public abstract class MultiRangeQuery extends Query {
* as inside and atleast one range sees the point as CROSSES, return CROSSES 3) If none
* of the above, return OUTSIDE
*/
+ continueRange:
for (RangeClause rangeClause : rangeClauses) {
+ boolean rangeCrosses = false;
+
for (int dim = 0; dim < numDims; dim++) {
int offset = dim * bytesPerDim;
- if ((Arrays.compareUnsigned(
- minPackedValue,
- offset,
- offset + bytesPerDim,
- rangeClause.lowerValue,
- offset,
- offset + bytesPerDim)
- >= 0)
- && (Arrays.compareUnsigned(
- maxPackedValue,
- offset,
- offset + bytesPerDim,
- rangeClause.upperValue,
- offset,
- offset + bytesPerDim)
- <= 0)) {
- return PointValues.Relation.CELL_INSIDE_QUERY;
+ if (comparator.compare(minPackedValue, offset, rangeClause.upperValue, offset) > 0
+ || comparator.compare(maxPackedValue, offset, rangeClause.lowerValue, offset)
+ < 0) {
+ continue continueRange;
}
- crosses |=
- Arrays.compareUnsigned(
- minPackedValue,
- offset,
- offset + bytesPerDim,
- rangeClause.lowerValue,
- offset,
- offset + bytesPerDim)
- < 0
- || Arrays.compareUnsigned(
- maxPackedValue,
- offset,
- offset + bytesPerDim,
- rangeClause.upperValue,
- offset,
- offset + bytesPerDim)
+ rangeCrosses |=
+ comparator.compare(minPackedValue, offset, rangeClause.lowerValue, offset) < 0
+ || comparator.compare(
+ maxPackedValue, offset, rangeClause.upperValue, offset)
> 0;
}
+
+ if (rangeCrosses == false) {
+ // At this point we know that the cell is fully inside the range clause, so we
+ // return early:
+ return PointValues.Relation.CELL_INSIDE_QUERY;
+ } else {
+ // This range clause crosses the cell, but we'll keep checking more ranges to see if
+ // one fully contains the cell:
+ crosses = true;
+ }
}
if (crosses) {
@@ -300,21 +280,8 @@ public abstract class MultiRangeQuery extends Query {
for (RangeClause rangeClause : rangeClauses) {
for (int i = 0; i < numDims; ++i) {
int offset = i * bytesPerDim;
- if (Arrays.compareUnsigned(
- rangeClause.lowerValue,
- offset,
- offset + bytesPerDim,
- fieldPackedLower,
- offset,
- offset + bytesPerDim)
- > 0
- || Arrays.compareUnsigned(
- rangeClause.upperValue,
- offset,
- offset + bytesPerDim,
- fieldPackedUpper,
- offset,
- offset + bytesPerDim)
+ if (comparator.compare(rangeClause.lowerValue, offset, fieldPackedLower, offset) > 0
+ || comparator.compare(rangeClause.upperValue, offset, fieldPackedUpper, offset)
< 0) {
allDocsMatch = false;
break;
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestMultiRangeQueries.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestMultiRangeQueries.java
index e7498ff..420847e 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestMultiRangeQueries.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestMultiRangeQueries.java
@@ -17,6 +17,7 @@
package org.apache.lucene.sandbox.search;
+import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
@@ -29,14 +30,71 @@ import org.apache.lucene.sandbox.document.DoublePointMultiRangeBuilder;
import org.apache.lucene.sandbox.document.FloatPointMultiRangeBuilder;
import org.apache.lucene.sandbox.document.IntPointMultiRangeBuilder;
import org.apache.lucene.sandbox.document.LongPointMultiRangeBuilder;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestMultiRangeQueries extends LuceneTestCase {
+ public void testDuelWithStandardDisjunction() throws IOException {
+ int iterations = LuceneTestCase.TEST_NIGHTLY ? atLeast(100) : 10;
+ for (int iter = 0; iter < iterations; iter++) {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+ int dims = RandomNumbers.randomIntBetween(random(), 1, 8);
+
+ long[] scratch = new long[dims];
+ for (int i = 0; i < 100; i++) {
+ int numPoints = RandomNumbers.randomIntBetween(random(), 1, 10);
+ Document doc = new Document();
+ for (int j = 0; j < numPoints; j++) {
+ for (int v = 0; v < dims; v++) {
+ scratch[v] = RandomNumbers.randomLongBetween(random(), 0, 100);
+ }
+ doc.add(new LongPoint("point", scratch));
+ }
+ w.addDocument(doc);
+ }
+
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+
+ int numRanges = RandomNumbers.randomIntBetween(random(), 1, 20);
+ LongPointMultiRangeBuilder builder1 = new LongPointMultiRangeBuilder("point", dims);
+ BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
+ for (int i = 0; i < numRanges; i++) {
+ long[] lower = new long[dims];
+ long[] upper = new long[dims];
+ for (int j = 0; j < dims; j++) {
+ lower[j] = RandomNumbers.randomLongBetween(random(), -100, 200);
+ upper[j] = lower[j] + RandomNumbers.randomLongBetween(random(), 0, 100);
+ }
+ builder1.add(lower, upper);
+ builder2.add(LongPoint.newRangeQuery("point", lower, upper), BooleanClause.Occur.SHOULD);
+ }
+
+ Query query1 = builder1.build();
+ Query query2 = builder2.build();
+ TopDocs result1 = searcher.search(query1, 10, Sort.INDEXORDER);
+ TopDocs result2 = searcher.search(query2, 10, Sort.INDEXORDER);
+ assertEquals(result2.totalHits, result1.totalHits);
+ assertEquals(result2.scoreDocs.length, result1.scoreDocs.length);
+ for (int i = 0; i < result2.scoreDocs.length; i++) {
+ assertEquals(result2.scoreDocs[i].doc, result1.scoreDocs[i].doc);
+ }
+
+ IOUtils.close(reader, w, dir);
+ }
+ }
+
public void testDoubleRandomMultiRangeQuery() throws IOException {
final int numDims = TestUtil.nextInt(random(), 1, 3);
final int numVals = TestUtil.nextInt(random(), 3, 8);
@@ -123,9 +181,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
assertEquals(searcher.count(query), 2);
- // None match
- double[] nonMatchingFirstRangeLower = {1.3, 3.5, 2.7};
- double[] nonMatchingFirstRangeUpper = {5.2, 8.3, 7.8};
+ // None match (one dimension is in range but the rest aren't)
+ double[] nonMatchingFirstRangeLower = {111.3, 3.5, 2.7};
+ double[] nonMatchingFirstRangeUpper = {117.3, 8.3, 7.8};
double[] nonMatchingSecondRangeLower = {11246.3, 19388.7, 21248.4};
double[] nonMatchingSecondRangeUpper = {13242.9, 20214.2, 23236.5};
@@ -244,9 +302,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
assertEquals(searcher.count(query), 2);
- // None match
- long[] nonMatchingFirstRangeLower = {1, 3, 2};
- long[] nonMatchingFirstRangeUpper = {5, 8, 7};
+ // None match (one dimension is in range but the rest aren't)
+ long[] nonMatchingFirstRangeLower = {111, 3, 2};
+ long[] nonMatchingFirstRangeUpper = {117, 8, 7};
long[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
long[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};
@@ -365,9 +423,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
assertEquals(searcher.count(query), 2);
- // None Match
- float[] nonMatchingFirstRangeLower = {1.4f, 3.3f, 2.7f};
- float[] nonMatchingFirstRangeUpper = {5.4f, 8.2f, 7.3f};
+ // None match (one dimension is in range but the rest aren't)
+ float[] nonMatchingFirstRangeLower = {111.3f, 3.3f, 2.7f};
+ float[] nonMatchingFirstRangeUpper = {117.3f, 8.2f, 7.3f};
float[] nonMatchingSecondRangeLower = {11246.2f, 19388.6f, 21248.3f};
float[] nonMatchingSecondRangeUpper = {13242.4f, 20214.7f, 23236.3f};
@@ -486,9 +544,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
assertEquals(searcher.count(query), 2);
- // None match
- int[] nonMatchingFirstRangeLower = {1, 3, 2};
- int[] nonMatchingFirstRangeUpper = {5, 8, 7};
+ // None match (one dimension is in range but the rest aren't)
+ int[] nonMatchingFirstRangeLower = {111, 3, 2};
+ int[] nonMatchingFirstRangeUpper = {117, 8, 7};
int[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
int[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};