You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/02/25 19:41:55 UTC
[02/18] lucene-solr git commit: factor out some private helper classes
factor out some private helper classes
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/96ed42bb
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/96ed42bb
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/96ed42bb
Branch: refs/heads/master
Commit: 96ed42bb135d22494bfe76ec5b85ca580ee8fb84
Parents: 013abea
Author: Mike McCandless <mi...@apache.org>
Authored: Tue Feb 23 16:48:35 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Feb 23 16:48:35 2016 -0500
----------------------------------------------------------------------
.../apache/lucene/search/PointInSetQuery.java | 304 ++++++++++---------
1 file changed, 167 insertions(+), 137 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/96ed42bb/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
index 4403e45..bbc9a54 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
@@ -64,6 +64,7 @@ public class PointInSetQuery extends Query {
BytesRefBuilder previous = null;
BytesRef current;
while ((current = packedPoints.next()) != null) {
+ // nocommit make sure a test tests this:
if (current.length != numDims * bytesPerDim) {
throw new IllegalArgumentException("packed point length should be " + (numDims * bytesPerDim) + " but got " + current.length + "; field=\"" + field + "\", numDims=" + numDims + " bytesPerDim=" + bytesPerDim);
}
@@ -145,148 +146,21 @@ public class PointInSetQuery extends Query {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
int[] hitCount = new int[1];
- final TermIterator iterator = sortedPackedPoints.iterator();
- byte[] pointBytes = new byte[bytesPerDim * numDims];
if (numDims == 1) {
- final BytesRef scratch = new BytesRef();
- scratch.length = bytesPerDim;
-
- // Optimize this common case, effectively doing a merge sort of the indexed values vs the queried set:
- values.intersect(field,
- new IntersectVisitor() {
-
- private BytesRef nextQueryPoint = iterator.next();
-
- @Override
- public void grow(int count) {
- result.grow(count);
- }
-
- @Override
- public void visit(int docID) {
- hitCount[0]++;
- result.add(docID);
- }
-
- @Override
- public void visit(int docID, byte[] packedValue) {
- scratch.bytes = packedValue;
- while (nextQueryPoint != null) {
- int cmp = nextQueryPoint.compareTo(scratch);
- if (cmp == 0) {
- // Query point equals index point, so collect and return
- hitCount[0]++;
- result.add(docID);
- break;
- } else if (cmp < 0) {
- // Query point is before index point, so we move to next query point
- nextQueryPoint = iterator.next();
- } else {
- // Query point is after index point, so we don't collect and we return:
- break;
- }
- }
- }
-
- @Override
- public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
-
- while (nextQueryPoint != null) {
- scratch.bytes = minPackedValue;
- int cmpMin = nextQueryPoint.compareTo(scratch);
- if (cmpMin < 0) {
- // query point is before the start of this cell
- nextQueryPoint = iterator.next();
- continue;
- }
- scratch.bytes = maxPackedValue;
- int cmpMax = nextQueryPoint.compareTo(scratch);
- if (cmpMax > 0) {
- // query point is after the end of this cell
- return Relation.CELL_OUTSIDE_QUERY;
- }
-
- if (cmpMin == 0 && cmpMax == 0) {
- // NOTE: we only hit this if we are on a cell whose min and max values are exactly equal to our point,
- // which can easily happen if many (> 1024) docs share this one value
- return Relation.CELL_INSIDE_QUERY;
- } else {
- return Relation.CELL_CROSSES_QUERY;
- }
- }
-
- // We exhausted all points in the query:
- return Relation.CELL_OUTSIDE_QUERY;
- }
- });
+ // We optimize this common case, effectively doing a merge sort of the indexed values vs the queried set:
+ values.intersect(field, new MergePointVisitor(sortedPackedPoints.iterator(), hitCount, result));
+
} else {
+ // NOTE: this is naive implementation, where for each point we re-walk the KD tree to intersect. We could instead do a similar
+ // optimization as the 1D case, but I think it'd mean building a query-time KD tree so we could efficiently intersect against the
+ // index, which is probably tricky!
+ SinglePointVisitor visitor = new SinglePointVisitor(hitCount, result);
+ TermIterator iterator = sortedPackedPoints.iterator();
for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
- // nocommit make sure a test tests this:
- assert point.length == pointBytes.length;
- System.arraycopy(point.bytes, point.offset, pointBytes, 0, pointBytes.length);
-
- final BytesRef finalPoint = point;
-
- values.intersect(field,
- // nocommit don't make new instance of this for each point?
- new IntersectVisitor() {
-
- @Override
- public void grow(int count) {
- result.grow(count);
- }
-
- @Override
- public void visit(int docID) {
- hitCount[0]++;
- result.add(docID);
- }
-
- @Override
- public void visit(int docID, byte[] packedValue) {
- assert packedValue.length == finalPoint.length;
- if (Arrays.equals(packedValue, pointBytes)) {
- // The point for this doc matches the point we are querying on
- hitCount[0]++;
- result.add(docID);
- }
- }
-
- @Override
- public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
-
- boolean crosses = false;
-
- for(int dim=0;dim<numDims;dim++) {
- int offset = dim*bytesPerDim;
-
- int cmpMin = StringHelper.compare(bytesPerDim, minPackedValue, offset, pointBytes, offset);
- if (cmpMin > 0) {
- return Relation.CELL_OUTSIDE_QUERY;
- }
-
- int cmpMax = StringHelper.compare(bytesPerDim, maxPackedValue, offset, pointBytes, offset);
- if (cmpMax < 0) {
- return Relation.CELL_OUTSIDE_QUERY;
- }
-
- if (cmpMin != 0 || cmpMax != 0) {
- crosses = true;
- }
- }
-
- if (crosses) {
- return Relation.CELL_CROSSES_QUERY;
- } else {
- // nocommit make sure tests hit this case:
- // NOTE: we only hit this if we are on a cell whose min and max values are exactly equal to our point,
- // which can easily happen if many docs share this one value
- return Relation.CELL_INSIDE_QUERY;
- }
- }
- });
+ visitor.setPoint(point);
+ values.intersect(field, visitor);
}
}
@@ -296,6 +170,162 @@ public class PointInSetQuery extends Query {
};
}
+ /** Essentially does a merge sort, only collecting hits when the indexed point and query point are the same. This is an optimization,
+ * used in the 1D case. */
+ private class MergePointVisitor implements IntersectVisitor {
+
+ private final DocIdSetBuilder result;
+ private final int[] hitCount;
+ private final TermIterator iterator;
+ private BytesRef nextQueryPoint;
+ private final BytesRef scratch = new BytesRef();
+
+ public MergePointVisitor(TermIterator iterator, int[] hitCount, DocIdSetBuilder result) throws IOException {
+ this.hitCount = hitCount;
+ this.result = result;
+ this.iterator = iterator;
+ nextQueryPoint = iterator.next();
+ scratch.length = bytesPerDim;
+ }
+
+ @Override
+ public void grow(int count) {
+ result.grow(count);
+ }
+
+ @Override
+ public void visit(int docID) {
+ hitCount[0]++;
+ result.add(docID);
+ }
+
+ @Override
+ public void visit(int docID, byte[] packedValue) {
+ scratch.bytes = packedValue;
+ while (nextQueryPoint != null) {
+ int cmp = nextQueryPoint.compareTo(scratch);
+ if (cmp == 0) {
+ // Query point equals index point, so collect and return
+ hitCount[0]++;
+ result.add(docID);
+ break;
+ } else if (cmp < 0) {
+ // Query point is before index point, so we move to next query point
+ nextQueryPoint = iterator.next();
+ } else {
+ // Query point is after index point, so we don't collect and we return:
+ break;
+ }
+ }
+ }
+
+ @Override
+ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+
+ while (nextQueryPoint != null) {
+ scratch.bytes = minPackedValue;
+ int cmpMin = nextQueryPoint.compareTo(scratch);
+ if (cmpMin < 0) {
+ // query point is before the start of this cell
+ nextQueryPoint = iterator.next();
+ continue;
+ }
+ scratch.bytes = maxPackedValue;
+ int cmpMax = nextQueryPoint.compareTo(scratch);
+ if (cmpMax > 0) {
+ // query point is after the end of this cell
+ return Relation.CELL_OUTSIDE_QUERY;
+ }
+
+ if (cmpMin == 0 && cmpMax == 0) {
+ // NOTE: we only hit this if we are on a cell whose min and max values are exactly equal to our point,
+ // which can easily happen if many (> 1024) docs share this one value
+ return Relation.CELL_INSIDE_QUERY;
+ } else {
+ return Relation.CELL_CROSSES_QUERY;
+ }
+ }
+
+ // We exhausted all points in the query:
+ return Relation.CELL_OUTSIDE_QUERY;
+ }
+ }
+
+ /** IntersectVisitor that queries against a highly degenerate shape: a single point. This is used in the > 1D case. */
+ private class SinglePointVisitor implements IntersectVisitor {
+
+ private final DocIdSetBuilder result;
+ private final int[] hitCount;
+ public BytesRef point;
+ private final byte[] pointBytes;
+
+ public SinglePointVisitor(int[] hitCount, DocIdSetBuilder result) {
+ this.hitCount = hitCount;
+ this.result = result;
+ this.pointBytes = new byte[bytesPerDim * numDims];
+ }
+
+ public void setPoint(BytesRef point) {
+ // we verified this up front in query's ctor:
+ assert point.length == pointBytes.length;
+ System.arraycopy(point.bytes, point.offset, pointBytes, 0, pointBytes.length);
+ }
+
+ @Override
+ public void grow(int count) {
+ result.grow(count);
+ }
+
+ @Override
+ public void visit(int docID) {
+ hitCount[0]++;
+ result.add(docID);
+ }
+
+ @Override
+ public void visit(int docID, byte[] packedValue) {
+ assert packedValue.length == point.length;
+ if (Arrays.equals(packedValue, pointBytes)) {
+ // The point for this doc matches the point we are querying on
+ hitCount[0]++;
+ result.add(docID);
+ }
+ }
+
+ @Override
+ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+
+ boolean crosses = false;
+
+ for(int dim=0;dim<numDims;dim++) {
+ int offset = dim*bytesPerDim;
+
+ int cmpMin = StringHelper.compare(bytesPerDim, minPackedValue, offset, pointBytes, offset);
+ if (cmpMin > 0) {
+ return Relation.CELL_OUTSIDE_QUERY;
+ }
+
+ int cmpMax = StringHelper.compare(bytesPerDim, maxPackedValue, offset, pointBytes, offset);
+ if (cmpMax < 0) {
+ return Relation.CELL_OUTSIDE_QUERY;
+ }
+
+ if (cmpMin != 0 || cmpMax != 0) {
+ crosses = true;
+ }
+ }
+
+ if (crosses) {
+ return Relation.CELL_CROSSES_QUERY;
+ } else {
+ // nocommit make sure tests hit this case:
+ // NOTE: we only hit this if we are on a cell whose min and max values are exactly equal to our point,
+ // which can easily happen if many docs share this one value
+ return Relation.CELL_INSIDE_QUERY;
+ }
+ }
+ }
+
@Override
public int hashCode() {
int hash = super.hashCode();