You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2020/09/18 05:54:56 UTC
[lucene-solr] branch branch_8x updated: LUCENE-9523: Speed up query
shapes for geometries that generate multiple points (#1866)
This is an automated email from the ASF dual-hosted git repository.
ivera pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 47215d4 LUCENE-9523: Speed up query shapes for geometries that generate multiple points (#1866)
47215d4 is described below
commit 47215d4a858c5959ea5c745249057db356f2cf16
Author: Ignacio Vera <iv...@apache.org>
AuthorDate: Fri Sep 18 07:50:58 2020 +0200
LUCENE-9523: Speed up query shapes for geometries that generate multiple points (#1866)
In query shapes over shape fields, skip points while traversing the BKD tree when the relationship with the document is already known
---
lucene/CHANGES.txt | 3 +
.../org/apache/lucene/document/ShapeQuery.java | 95 +++++++++++++++++-----
2 files changed, 78 insertions(+), 20 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 674626f..6bd927b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -63,6 +63,9 @@ Improvements
heap consumption is taken into account when IndexWriter stalls or should flush
DWPTs. (Simon Willnauer)
+* LUCENE-9523: In query shapes over shape fields, skip points while traversing the
+ BKD tree when the relationship with the document is already known. (Ignacio Vera)
+
Optimizations
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/document/ShapeQuery.java b/lucene/core/src/java/org/apache/lucene/document/ShapeQuery.java
index a6fa340..e8ea282 100644
--- a/lucene/core/src/java/org/apache/lucene/document/ShapeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/document/ShapeQuery.java
@@ -264,10 +264,20 @@ abstract class ShapeQuery extends Query {
final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
}
- final DocIdSetBuilder docIdSetBuilder = new DocIdSetBuilder(reader.maxDoc(), values, query.getField());
- values.intersect(getSparseVisitor(query, docIdSetBuilder));
- final DocIdSetIterator iterator = docIdSetBuilder.build().iterator();
- return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
+ if (values.getDocCount() < (values.size() >>> 2)) {
+ // we use a dense structure so we can skip already visited documents
+ final FixedBitSet result = new FixedBitSet(reader.maxDoc());
+ final long[] cost = new long[]{0};
+ values.intersect(getIntersectsDenseVisitor(query, result, cost));
+ assert cost[0] > 0 || result.cardinality() == 0;
+ final DocIdSetIterator iterator = cost[0] == 0 ? DocIdSetIterator.empty() : new BitSetIterator(result, cost[0]);
+ return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
+ } else {
+ final DocIdSetBuilder docIdSetBuilder = new DocIdSetBuilder(reader.maxDoc(), values, query.getField());
+ values.intersect(getSparseVisitor(query, docIdSetBuilder));
+ final DocIdSetIterator iterator = docIdSetBuilder.build().iterator();
+ return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
+ }
}
/** Scorer used for WITHIN and DISJOINT **/
@@ -292,8 +302,8 @@ abstract class ShapeQuery extends Query {
// process still reads the leaf nodes.
values.intersect(getShallowInverseDenseVisitor(query, result));
}
- assert cost[0] > 0;
- final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
+ assert cost[0] > 0 || result.cardinality() == 0;
+ final DocIdSetIterator iterator = cost[0] == 0 ? DocIdSetIterator.empty() : new BitSetIterator(result, cost[0]);
return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
}
@@ -304,7 +314,8 @@ abstract class ShapeQuery extends Query {
final FixedBitSet excluded = new FixedBitSet(reader.maxDoc());
values.intersect(getContainsDenseVisitor(query, result, excluded, cost));
result.andNot(excluded);
- final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
+ assert cost[0] > 0 || result.cardinality() == 0;
+ final DocIdSetIterator iterator = cost[0] == 0 ? DocIdSetIterator.empty() : new BitSetIterator(result, cost[0]);
return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
}
@@ -339,7 +350,8 @@ abstract class ShapeQuery extends Query {
};
}
- /** create a visitor that adds documents that match the query using a sparse bitset. (Used by INTERSECT) */
+ /** create a visitor that adds documents that match the query using a sparse bitset. (Used by INTERSECT
+ * when the number of docs <= 4 * number of points ) */
private static IntersectVisitor getSparseVisitor(final ShapeQuery query, final DocIdSetBuilder result) {
return new IntersectVisitor() {
final ShapeField.DecodedTriangle scratchTriangle = new ShapeField.DecodedTriangle();
@@ -379,6 +391,43 @@ abstract class ShapeQuery extends Query {
};
}
+ /** Scorer used for INTERSECTS when the number of points > 4 * number of docs **/
+ private static IntersectVisitor getIntersectsDenseVisitor(final ShapeQuery query, final FixedBitSet result, final long[] cost) {
+ return new IntersectVisitor() {
+ final ShapeField.DecodedTriangle scratchTriangle = new ShapeField.DecodedTriangle();
+
+ @Override
+ public void visit(int docID) {
+ result.set(docID);
+ cost[0]++;
+ }
+
+ @Override
+ public void visit(int docID, byte[] t) {
+ if (result.get(docID) == false) {
+ if (query.queryMatches(t, scratchTriangle, query.getQueryRelation())) {
+ visit(docID);
+ }
+ }
+ }
+
+ @Override
+ public void visit(DocIdSetIterator iterator, byte[] t) throws IOException {
+ if (query.queryMatches(t, scratchTriangle, query.getQueryRelation())) {
+ int docID;
+ while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ visit(docID);
+ }
+ }
+ }
+
+ @Override
+ public Relation compare(byte[] minTriangle, byte[] maxTriangle) {
+ return query.relateRangeToQuery(minTriangle, maxTriangle, query.getQueryRelation());
+ }
+ };
+ }
+
/** create a visitor that adds documents that match the query using a dense bitset; used with WITHIN & DISJOINT */
private static IntersectVisitor getDenseVisitor(final ShapeQuery query, final FixedBitSet result, final FixedBitSet excluded, final long[] cost) {
return new IntersectVisitor() {
@@ -392,10 +441,12 @@ abstract class ShapeQuery extends Query {
@Override
public void visit(int docID, byte[] t) {
- if (query.queryMatches(t, scratchTriangle, query.getQueryRelation())) {
- visit(docID);
- } else {
- excluded.set(docID);
+ if (excluded.get(docID) == false) {
+ if (query.queryMatches(t, scratchTriangle, query.getQueryRelation())) {
+ visit(docID);
+ } else {
+ excluded.set(docID);
+ }
}
}
@@ -431,12 +482,14 @@ abstract class ShapeQuery extends Query {
@Override
public void visit(int docID, byte[] t) {
- Component2D.WithinRelation within = query.queryWithin(t, scratchTriangle);
- if (within == Component2D.WithinRelation.CANDIDATE) {
- cost[0]++;
- result.set(docID);
- } else if (within == Component2D.WithinRelation.NOTWITHIN) {
- excluded.set(docID);
+ if (excluded.get(docID) == false) {
+ Component2D.WithinRelation within = query.queryWithin(t, scratchTriangle);
+ if (within == Component2D.WithinRelation.CANDIDATE) {
+ cost[0]++;
+ result.set(docID);
+ } else if (within == Component2D.WithinRelation.NOTWITHIN) {
+ excluded.set(docID);
+ }
}
}
@@ -474,8 +527,10 @@ abstract class ShapeQuery extends Query {
@Override
public void visit(int docID, byte[] packedTriangle) {
- if (query.queryMatches(packedTriangle, scratchTriangle, query.getQueryRelation()) == false) {
- visit(docID);
+ if (result.get(docID)) {
+ if (query.queryMatches(packedTriangle, scratchTriangle, query.getQueryRelation()) == false) {
+ visit(docID);
+ }
}
}