You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/03/07 10:08:04 UTC

[2/2] lucene-solr git commit: LUCENE-7066: Optimize PointRangeQuery for the case that all documents have a value and all points from the segment match.

LUCENE-7066: Optimize PointRangeQuery for the case that all documents have a value and all points from the segment match.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8c363479
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8c363479
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8c363479

Branch: refs/heads/branch_6_0
Commit: 8c36347928efd20977acc120690ab5f8d5ccd4b7
Parents: 1f34d7a
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Mar 7 09:55:39 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Mar 7 09:57:57 2016 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 +
 .../apache/lucene/search/PointRangeQuery.java   | 145 +++++++++++--------
 .../apache/lucene/search/TestPointQueries.java  |  43 +++++-
 3 files changed, 130 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8c363479/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7ba6fcf..d575b63 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -132,6 +132,9 @@ Optimizations
 * LUCENE-7050: TermsQuery is now cached more aggressively by the default
   query caching policy. (Adrien Grand)
 
+* LUCENE-7066: PointRangeQuery got optimized for the case that all documents
+  have a value and all points from the segment match. (Adrien Grand)
+
 Changes in Runtime Behavior
 
 * LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8c363479/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
index 85c486e..777c133 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
@@ -125,6 +125,68 @@ public abstract class PointRangeQuery extends Query {
 
     return new ConstantScoreWeight(this) {
 
+      private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values,
+          byte[] packedLower, byte[] packedUpper) throws IOException {
+        DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+
+        values.intersect(field,
+            new IntersectVisitor() {
+
+              @Override
+              public void grow(int count) {
+                result.grow(count);
+              }
+
+              @Override
+              public void visit(int docID) {
+                result.add(docID);
+              }
+
+              @Override
+              public void visit(int docID, byte[] packedValue) {
+                for(int dim=0;dim<numDims;dim++) {
+                  int offset = dim*bytesPerDim;
+                  if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLower, offset) < 0) {
+                    // Doc's value is too low, in this dimension
+                    return;
+                  }
+                  if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpper, offset) > 0) {
+                    // Doc's value is too high, in this dimension
+                    return;
+                  }
+                }
+
+                // Doc is in-bounds
+                result.add(docID);
+              }
+
+              @Override
+              public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+
+                boolean crosses = false;
+
+                for(int dim=0;dim<numDims;dim++) {
+                  int offset = dim*bytesPerDim;
+
+                  if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpper, offset) > 0 ||
+                      StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) {
+                    return Relation.CELL_OUTSIDE_QUERY;
+                  }
+
+                  crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 ||
+                    StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0;
+                }
+
+                if (crosses) {
+                  return Relation.CELL_CROSSES_QUERY;
+                } else {
+                  return Relation.CELL_INSIDE_QUERY;
+                }
+              }
+            });
+        return result.build();
+      }
+
       @Override
       public Scorer scorer(LeafReaderContext context) throws IOException {
         LeafReader reader = context.reader();
@@ -155,67 +217,32 @@ public abstract class PointRangeQuery extends Query {
           System.arraycopy(upperPoint[dim], 0, packedUpper, dim*bytesPerDim, bytesPerDim);
         }
 
-        // Now packedLowerIncl and packedUpperIncl are inclusive, and non-empty space:
+        boolean allDocsMatch;
+        if (values.getDocCount(field) == reader.maxDoc()) {
+          final byte[] fieldPackedLower = values.getMinPackedValue(field);
+          final byte[] fieldPackedUpper = values.getMaxPackedValue(field);
+          allDocsMatch = true;
+          for (int i = 0; i < numDims; ++i) {
+            int offset = i * bytesPerDim;
+            if (StringHelper.compare(bytesPerDim, packedLower, offset, fieldPackedLower, offset) > 0
+                || StringHelper.compare(bytesPerDim, packedUpper, offset, fieldPackedUpper, offset) < 0) {
+              allDocsMatch = false;
+              break;
+            }
+          }
+        } else {
+          allDocsMatch = false;
+        }
 
-        DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+        DocIdSetIterator iterator;
+        if (allDocsMatch) {
+          // all docs have a value and all points are within bounds, so everything matches
+          iterator = DocIdSetIterator.all(reader.maxDoc());
+        } else {
+          iterator = buildMatchingDocIdSet(reader, values, packedLower, packedUpper).iterator();
+        }
 
-        values.intersect(field,
-                         new IntersectVisitor() {
-
-                           @Override
-                           public void grow(int count) {
-                             result.grow(count);
-                           }
-
-                           @Override
-                           public void visit(int docID) {
-                             result.add(docID);
-                           }
-
-                           @Override
-                           public void visit(int docID, byte[] packedValue) {
-                             for(int dim=0;dim<numDims;dim++) {
-                               int offset = dim*bytesPerDim;
-                               if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLower, offset) < 0) {
-                                 // Doc's value is too low, in this dimension
-                                 return;
-                               }
-                               if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpper, offset) > 0) {
-                                 // Doc's value is too high, in this dimension
-                                 return;
-                               }
-                             }
-
-                             // Doc is in-bounds
-                             result.add(docID);
-                           }
-
-                           @Override
-                           public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
-
-                             boolean crosses = false;
-
-                             for(int dim=0;dim<numDims;dim++) {
-                               int offset = dim*bytesPerDim;
-
-                               if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpper, offset) > 0 ||
-                                   StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) {
-                                 return Relation.CELL_OUTSIDE_QUERY;
-                               }
-
-                               crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 ||
-                                 StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0;
-                             }
-
-                             if (crosses) {
-                               return Relation.CELL_CROSSES_QUERY;
-                             } else {
-                               return Relation.CELL_INSIDE_QUERY;
-                             }
-                           }
-                         });
-
-        return new ConstantScoreScorer(this, score(), result.build().iterator());
+        return new ConstantScoreScorer(this, score(), iterator);
       }
     };
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8c363479/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
index cd0d719..5a3483b 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
@@ -53,13 +53,11 @@ import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiDocValues;
-import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@@ -1847,4 +1845,45 @@ public class TestPointQueries extends LuceneTestCase {
     // binary
     assertEquals("bytes:{[12] [2a]}", BinaryPoint.newSetQuery("bytes", new byte[] {42}, new byte[] {18}).toString());
   }
+
+  public void testRangeOptimizesIfAllPointsMatch() throws IOException {
+    final int numDims = TestUtil.nextInt(random(), 1, 3);
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    int[] value = new int[numDims];
+    for (int i = 0; i < numDims; ++i) {
+      value[i] = TestUtil.nextInt(random(), 1, 10);
+    }
+    doc.add(new IntPoint("point", value));
+    w.addDocument(doc);
+    IndexReader reader = w.getReader();
+    IndexSearcher searcher = new IndexSearcher(reader);
+    searcher.setQueryCache(null);
+    int[] lowerBound = new int[numDims];
+    int[] upperBound = new int[numDims];
+    for (int i = 0; i < numDims; ++i) {
+      lowerBound[i] = value[i] - random().nextInt(1);
+      upperBound[i] = value[i] + random().nextInt(1);
+    }
+    Query query = IntPoint.newRangeQuery("point", lowerBound, upperBound);
+    Weight weight = searcher.createNormalizedWeight(query, false);
+    Scorer scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
+    assertEquals(DocIdSetIterator.all(1).getClass(), scorer.iterator().getClass());
+
+    // When not all documents in the query have a value, the optimization is not applicable
+    reader.close();
+    w.addDocument(new Document());
+    w.forceMerge(1);
+    reader = w.getReader();
+    searcher = new IndexSearcher(reader);
+    searcher.setQueryCache(null);
+    weight = searcher.createNormalizedWeight(query, false);
+    scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
+    assertFalse(DocIdSetIterator.all(1).getClass().equals(scorer.iterator().getClass()));
+
+    reader.close();
+    w.close();
+    dir.close();
+  }
 }