You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2020/09/18 05:54:56 UTC

[lucene-solr] branch branch_8x updated: LUCENE-9523: Speed up query shapes for geometries that generate multiple points (#1866)

This is an automated email from the ASF dual-hosted git repository.

ivera pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 47215d4  LUCENE-9523: Speed up query shapes for geometries that generate multiple points (#1866)
47215d4 is described below

commit 47215d4a858c5959ea5c745249057db356f2cf16
Author: Ignacio Vera <iv...@apache.org>
AuthorDate: Fri Sep 18 07:50:58 2020 +0200

    LUCENE-9523: Speed up query shapes for geometries that generate multiple points (#1866)
    
    In query shapes over shape fields, skip points while traversing the BKD tree when the relationship with the document is already known
---
 lucene/CHANGES.txt                                 |  3 +
 .../org/apache/lucene/document/ShapeQuery.java     | 95 +++++++++++++++++-----
 2 files changed, 78 insertions(+), 20 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 674626f..6bd927b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -63,6 +63,9 @@ Improvements
   heap consumption is taken into account when IndexWriter stalls or should flush
   DWPTs. (Simon Willnauer)
 
+* LUCENE-9523: In query shapes over shape fields, skip points while traversing the
+  BKD tree when the relationship with the document is already known. (Ignacio Vera)
+
 Optimizations
 ---------------------
 
diff --git a/lucene/core/src/java/org/apache/lucene/document/ShapeQuery.java b/lucene/core/src/java/org/apache/lucene/document/ShapeQuery.java
index a6fa340..e8ea282 100644
--- a/lucene/core/src/java/org/apache/lucene/document/ShapeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/document/ShapeQuery.java
@@ -264,10 +264,20 @@ abstract class ShapeQuery extends Query {
         final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
         return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
       }
-      final DocIdSetBuilder docIdSetBuilder = new DocIdSetBuilder(reader.maxDoc(), values, query.getField());
-      values.intersect(getSparseVisitor(query, docIdSetBuilder));
-      final DocIdSetIterator iterator = docIdSetBuilder.build().iterator();
-      return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
+      if (values.getDocCount() < (values.size() >>> 2)) {
+        // we use a dense structure so we can skip already visited documents
+        final FixedBitSet result = new FixedBitSet(reader.maxDoc());
+        final long[] cost = new long[]{0};
+        values.intersect(getIntersectsDenseVisitor(query, result, cost));
+        assert cost[0] > 0 || result.cardinality() == 0;
+        final DocIdSetIterator iterator = cost[0] == 0 ? DocIdSetIterator.empty() : new BitSetIterator(result, cost[0]);
+        return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
+      } else {
+        final DocIdSetBuilder docIdSetBuilder = new DocIdSetBuilder(reader.maxDoc(), values, query.getField());
+        values.intersect(getSparseVisitor(query, docIdSetBuilder));
+        final DocIdSetIterator iterator = docIdSetBuilder.build().iterator();
+        return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
+      }
     }
 
     /** Scorer used for WITHIN and DISJOINT **/
@@ -292,8 +302,8 @@ abstract class ShapeQuery extends Query {
         // process still reads the leaf nodes.
         values.intersect(getShallowInverseDenseVisitor(query, result));
       }
-      assert cost[0] > 0;
-      final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
+      assert cost[0] > 0 || result.cardinality() == 0;
+      final DocIdSetIterator iterator = cost[0] == 0 ? DocIdSetIterator.empty() : new BitSetIterator(result, cost[0]);
       return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
     }
 
@@ -304,7 +314,8 @@ abstract class ShapeQuery extends Query {
       final FixedBitSet excluded = new FixedBitSet(reader.maxDoc());
       values.intersect(getContainsDenseVisitor(query, result, excluded, cost));
       result.andNot(excluded);
-      final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
+      assert cost[0] > 0 || result.cardinality() == 0;
+      final DocIdSetIterator iterator = cost[0] == 0 ? DocIdSetIterator.empty() : new BitSetIterator(result, cost[0]);
       return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
     }
 
@@ -339,7 +350,8 @@ abstract class ShapeQuery extends Query {
     };
   }
 
-  /** create a visitor that adds documents that match the query using a sparse bitset. (Used by INTERSECT) */
+  /** create a visitor that adds documents that match the query using a sparse bitset. (Used by INTERSECT
+   * when the number of docs <= 4 * number of points ) */
   private static IntersectVisitor getSparseVisitor(final ShapeQuery query, final DocIdSetBuilder result) {
     return new IntersectVisitor() {
       final ShapeField.DecodedTriangle scratchTriangle = new ShapeField.DecodedTriangle();
@@ -379,6 +391,43 @@ abstract class ShapeQuery extends Query {
     };
   }
 
+  /** Scorer used for INTERSECTS when the number of points > 4 * number of docs **/
+  private static IntersectVisitor getIntersectsDenseVisitor(final ShapeQuery query, final FixedBitSet result, final long[] cost) {
+    return new IntersectVisitor() {
+      final ShapeField.DecodedTriangle scratchTriangle = new ShapeField.DecodedTriangle();
+
+      @Override
+      public void visit(int docID) {
+        result.set(docID);
+        cost[0]++;
+      }
+
+      @Override
+      public void visit(int docID, byte[] t) {
+        if (result.get(docID) == false) {
+          if (query.queryMatches(t, scratchTriangle, query.getQueryRelation())) {
+            visit(docID);
+          }
+        }
+      }
+
+      @Override
+      public void visit(DocIdSetIterator iterator, byte[] t) throws IOException {
+        if (query.queryMatches(t, scratchTriangle, query.getQueryRelation())) {
+          int docID;
+          while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+              visit(docID);
+          }
+        }
+      }
+
+      @Override
+      public Relation compare(byte[] minTriangle, byte[] maxTriangle) {
+        return query.relateRangeToQuery(minTriangle, maxTriangle, query.getQueryRelation());
+      }
+    };
+  }
+
   /** create a visitor that adds documents that match the query using a dense bitset; used with WITHIN & DISJOINT */
   private static IntersectVisitor getDenseVisitor(final ShapeQuery query, final FixedBitSet result, final FixedBitSet excluded, final long[] cost) {
     return new IntersectVisitor() {
@@ -392,10 +441,12 @@ abstract class ShapeQuery extends Query {
 
       @Override
       public void visit(int docID, byte[] t) {
-        if (query.queryMatches(t, scratchTriangle, query.getQueryRelation())) {
-          visit(docID);
-        } else {
-          excluded.set(docID);
+        if (excluded.get(docID) == false) {
+          if (query.queryMatches(t, scratchTriangle, query.getQueryRelation())) {
+            visit(docID);
+          } else {
+            excluded.set(docID);
+          }
         }
       }
 
@@ -431,12 +482,14 @@ abstract class ShapeQuery extends Query {
 
       @Override
       public void visit(int docID, byte[] t) {
-        Component2D.WithinRelation within = query.queryWithin(t, scratchTriangle);
-        if (within == Component2D.WithinRelation.CANDIDATE) {
-          cost[0]++;
-          result.set(docID);
-        } else if (within == Component2D.WithinRelation.NOTWITHIN) {
-          excluded.set(docID);
+        if (excluded.get(docID) == false) {
+          Component2D.WithinRelation within = query.queryWithin(t, scratchTriangle);
+          if (within == Component2D.WithinRelation.CANDIDATE) {
+            cost[0]++;
+            result.set(docID);
+          } else if (within == Component2D.WithinRelation.NOTWITHIN) {
+            excluded.set(docID);
+          }
         }
       }
 
@@ -474,8 +527,10 @@ abstract class ShapeQuery extends Query {
 
       @Override
       public void visit(int docID, byte[] packedTriangle) {
-        if (query.queryMatches(packedTriangle, scratchTriangle, query.getQueryRelation()) == false) {
-          visit(docID);
+        if (result.get(docID)) {
+          if (query.queryMatches(packedTriangle, scratchTriangle, query.getQueryRelation()) == false) {
+            visit(docID);
+          }
         }
       }