You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/03/07 10:08:04 UTC
[2/2] lucene-solr git commit: LUCENE-7066: Optimize PointRangeQuery
for the case that all documents have a value and all points from the segment
match.
LUCENE-7066: Optimize PointRangeQuery for the case that all documents have a value and all points from the segment match.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8c363479
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8c363479
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8c363479
Branch: refs/heads/branch_6_0
Commit: 8c36347928efd20977acc120690ab5f8d5ccd4b7
Parents: 1f34d7a
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Mar 7 09:55:39 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Mar 7 09:57:57 2016 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../apache/lucene/search/PointRangeQuery.java | 145 +++++++++++--------
.../apache/lucene/search/TestPointQueries.java | 43 +++++-
3 files changed, 130 insertions(+), 61 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8c363479/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7ba6fcf..d575b63 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -132,6 +132,9 @@ Optimizations
* LUCENE-7050: TermsQuery is now cached more aggressively by the default
query caching policy. (Adrien Grand)
+* LUCENE-7066: PointRangeQuery got optimized for the case that all documents
+ have a value and all points from the segment match. (Adrien Grand)
+
Changes in Runtime Behavior
* LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8c363479/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
index 85c486e..777c133 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
@@ -125,6 +125,68 @@ public abstract class PointRangeQuery extends Query {
return new ConstantScoreWeight(this) {
+ private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values,
+ byte[] packedLower, byte[] packedUpper) throws IOException {
+ DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+
+ values.intersect(field,
+ new IntersectVisitor() {
+
+ @Override
+ public void grow(int count) {
+ result.grow(count);
+ }
+
+ @Override
+ public void visit(int docID) {
+ result.add(docID);
+ }
+
+ @Override
+ public void visit(int docID, byte[] packedValue) {
+ for(int dim=0;dim<numDims;dim++) {
+ int offset = dim*bytesPerDim;
+ if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLower, offset) < 0) {
+ // Doc's value is too low, in this dimension
+ return;
+ }
+ if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpper, offset) > 0) {
+ // Doc's value is too high, in this dimension
+ return;
+ }
+ }
+
+ // Doc is in-bounds
+ result.add(docID);
+ }
+
+ @Override
+ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+
+ boolean crosses = false;
+
+ for(int dim=0;dim<numDims;dim++) {
+ int offset = dim*bytesPerDim;
+
+ if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpper, offset) > 0 ||
+ StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) {
+ return Relation.CELL_OUTSIDE_QUERY;
+ }
+
+ crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 ||
+ StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0;
+ }
+
+ if (crosses) {
+ return Relation.CELL_CROSSES_QUERY;
+ } else {
+ return Relation.CELL_INSIDE_QUERY;
+ }
+ }
+ });
+ return result.build();
+ }
+
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
@@ -155,67 +217,32 @@ public abstract class PointRangeQuery extends Query {
System.arraycopy(upperPoint[dim], 0, packedUpper, dim*bytesPerDim, bytesPerDim);
}
- // Now packedLowerIncl and packedUpperIncl are inclusive, and non-empty space:
+ boolean allDocsMatch;
+ if (values.getDocCount(field) == reader.maxDoc()) {
+ final byte[] fieldPackedLower = values.getMinPackedValue(field);
+ final byte[] fieldPackedUpper = values.getMaxPackedValue(field);
+ allDocsMatch = true;
+ for (int i = 0; i < numDims; ++i) {
+ int offset = i * bytesPerDim;
+ if (StringHelper.compare(bytesPerDim, packedLower, offset, fieldPackedLower, offset) > 0
+ || StringHelper.compare(bytesPerDim, packedUpper, offset, fieldPackedUpper, offset) < 0) {
+ allDocsMatch = false;
+ break;
+ }
+ }
+ } else {
+ allDocsMatch = false;
+ }
- DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+ DocIdSetIterator iterator;
+ if (allDocsMatch) {
+ // all docs have a value and all points are within bounds, so everything matches
+ iterator = DocIdSetIterator.all(reader.maxDoc());
+ } else {
+ iterator = buildMatchingDocIdSet(reader, values, packedLower, packedUpper).iterator();
+ }
- values.intersect(field,
- new IntersectVisitor() {
-
- @Override
- public void grow(int count) {
- result.grow(count);
- }
-
- @Override
- public void visit(int docID) {
- result.add(docID);
- }
-
- @Override
- public void visit(int docID, byte[] packedValue) {
- for(int dim=0;dim<numDims;dim++) {
- int offset = dim*bytesPerDim;
- if (StringHelper.compare(bytesPerDim, packedValue, offset, packedLower, offset) < 0) {
- // Doc's value is too low, in this dimension
- return;
- }
- if (StringHelper.compare(bytesPerDim, packedValue, offset, packedUpper, offset) > 0) {
- // Doc's value is too high, in this dimension
- return;
- }
- }
-
- // Doc is in-bounds
- result.add(docID);
- }
-
- @Override
- public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
-
- boolean crosses = false;
-
- for(int dim=0;dim<numDims;dim++) {
- int offset = dim*bytesPerDim;
-
- if (StringHelper.compare(bytesPerDim, minPackedValue, offset, packedUpper, offset) > 0 ||
- StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedLower, offset) < 0) {
- return Relation.CELL_OUTSIDE_QUERY;
- }
-
- crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, packedLower, offset) < 0 ||
- StringHelper.compare(bytesPerDim, maxPackedValue, offset, packedUpper, offset) > 0;
- }
-
- if (crosses) {
- return Relation.CELL_CROSSES_QUERY;
- } else {
- return Relation.CELL_INSIDE_QUERY;
- }
- }
- });
-
- return new ConstantScoreScorer(this, score(), result.build().iterator());
+ return new ConstantScoreScorer(this, score(), iterator);
}
};
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8c363479/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
index cd0d719..5a3483b 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
@@ -53,13 +53,11 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
-import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -1847,4 +1845,45 @@ public class TestPointQueries extends LuceneTestCase {
// binary
assertEquals("bytes:{[12] [2a]}", BinaryPoint.newSetQuery("bytes", new byte[] {42}, new byte[] {18}).toString());
}
+
+ public void testRangeOptimizesIfAllPointsMatch() throws IOException {
+ final int numDims = TestUtil.nextInt(random(), 1, 3);
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ int[] value = new int[numDims];
+ for (int i = 0; i < numDims; ++i) {
+ value[i] = TestUtil.nextInt(random(), 1, 10);
+ }
+ doc.add(new IntPoint("point", value));
+ w.addDocument(doc);
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = new IndexSearcher(reader);
+ searcher.setQueryCache(null);
+ int[] lowerBound = new int[numDims];
+ int[] upperBound = new int[numDims];
+ for (int i = 0; i < numDims; ++i) {
+ lowerBound[i] = value[i] - random().nextInt(1);
+ upperBound[i] = value[i] + random().nextInt(1);
+ }
+ Query query = IntPoint.newRangeQuery("point", lowerBound, upperBound);
+ Weight weight = searcher.createNormalizedWeight(query, false);
+ Scorer scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
+ assertEquals(DocIdSetIterator.all(1).getClass(), scorer.iterator().getClass());
+
+ // When not all documents in the query have a value, the optimization is not applicable
+ reader.close();
+ w.addDocument(new Document());
+ w.forceMerge(1);
+ reader = w.getReader();
+ searcher = new IndexSearcher(reader);
+ searcher.setQueryCache(null);
+ weight = searcher.createNormalizedWeight(query, false);
+ scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
+ assertFalse(DocIdSetIterator.all(1).getClass().equals(scorer.iterator().getClass()));
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
}