You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2016/04/26 15:18:35 UTC
lucene-solr:branch_6x: LUCENE-7254: (sandbox/ only) Don't let abuse
cases slow down spatial queries
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x 89857653c -> 111107b3b
LUCENE-7254: (sandbox/ only) Don't let abuse cases slow down spatial queries
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/111107b3
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/111107b3
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/111107b3
Branch: refs/heads/branch_6x
Commit: 111107b3bd00473b0ddb452b5a059c95dfde272d
Parents: 8985765
Author: Robert Muir <rm...@apache.org>
Authored: Tue Apr 26 09:16:27 2016 -0400
Committer: Robert Muir <rm...@apache.org>
Committed: Tue Apr 26 09:17:48 2016 -0400
----------------------------------------------------------------------
.../org/apache/lucene/document/LatLonPoint.java | 3 +-
.../lucene/document/LatLonPointBoxQuery.java | 287 +++++++++++++++++++
.../document/LatLonPointDistanceQuery.java | 17 +-
.../document/LatLonPointInPolygonQuery.java | 18 +-
.../apache/lucene/document/MatchingPoints.java | 90 ++++++
5 files changed, 382 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/111107b3/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
index 426a702..a63e4bd 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
@@ -34,7 +34,6 @@ import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
-import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopFieldDocs;
@@ -229,7 +228,7 @@ public class LatLonPoint extends Field {
}
private static Query newBoxInternal(String field, byte[] min, byte[] max) {
- return new PointRangeQuery(field, min, max, 2) {
+ return new LatLonPointBoxQuery(field, min, max, 2) {
@Override
protected String toString(int dimension, byte[] value) {
if (dimension == 0) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/111107b3/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java
new file mode 100644
index 0000000..423af05
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Objects;
+
+import org.apache.lucene.index.PointValues;
+import org.apache.lucene.index.PointValues.IntersectVisitor;
+import org.apache.lucene.index.PointValues.Relation;
+import org.apache.lucene.search.ConstantScoreScorer;
+import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PointRangeQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.util.StringHelper;
+
+/**
+ * Fast version of {@link PointRangeQuery}. It is fast for actual range queries!
+ * @lucene.experimental
+ */
+abstract class LatLonPointBoxQuery extends Query {
+ final String field;
+ final int numDims;
+ final int bytesPerDim;
+ final byte[] lowerPoint;
+ final byte[] upperPoint;
+
+ /**
+ * Expert: create a multidimensional range query for point values.
+ *
+ * @param field field name. must not be {@code null}.
+ * @param lowerPoint lower portion of the range (inclusive).
+ * @param upperPoint upper portion of the range (inclusive).
+ * @param numDims number of dimensions.
+ * @throws IllegalArgumentException if {@code field} is null, or if {@code lowerValue.length != upperValue.length}
+ */
+ protected LatLonPointBoxQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims) {
+ checkArgs(field, lowerPoint, upperPoint);
+ this.field = field;
+ if (numDims <= 0) {
+ throw new IllegalArgumentException("numDims must be positive, got " + numDims);
+ }
+ if (lowerPoint.length == 0) {
+ throw new IllegalArgumentException("lowerPoint has length of zero");
+ }
+ if (lowerPoint.length % numDims != 0) {
+ throw new IllegalArgumentException("lowerPoint is not a fixed multiple of numDims");
+ }
+ if (lowerPoint.length != upperPoint.length) {
+ throw new IllegalArgumentException("lowerPoint has length=" + lowerPoint.length + " but upperPoint has different length=" + upperPoint.length);
+ }
+ this.numDims = numDims;
+ this.bytesPerDim = lowerPoint.length / numDims;
+
+ this.lowerPoint = lowerPoint;
+ this.upperPoint = upperPoint;
+ }
+
+ /**
+ * Check preconditions for all factory methods
+ * @throws IllegalArgumentException if {@code field}, {@code lowerPoint} or {@code upperPoint} are null.
+ */
+ public static void checkArgs(String field, Object lowerPoint, Object upperPoint) {
+ if (field == null) {
+ throw new IllegalArgumentException("field must not be null");
+ }
+ if (lowerPoint == null) {
+ throw new IllegalArgumentException("lowerPoint must not be null");
+ }
+ if (upperPoint == null) {
+ throw new IllegalArgumentException("upperPoint must not be null");
+ }
+ }
+
+ @Override
+ public final Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+
+ // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
+ // This is an inverted structure and should be used in the first pass:
+
+ return new ConstantScoreWeight(this) {
+
+ private DocIdSetIterator buildMatchingIterator(LeafReader reader, PointValues values) throws IOException {
+ MatchingPoints result = new MatchingPoints(reader, field);
+
+ values.intersect(field,
+ new IntersectVisitor() {
+
+ @Override
+ public void visit(int docID) {
+ result.add(docID);
+ }
+
+ @Override
+ public void visit(int docID, byte[] packedValue) {
+ for(int dim=0;dim<numDims;dim++) {
+ int offset = dim*bytesPerDim;
+ if (StringHelper.compare(bytesPerDim, packedValue, offset, lowerPoint, offset) < 0) {
+ // Doc's value is too low, in this dimension
+ return;
+ }
+ if (StringHelper.compare(bytesPerDim, packedValue, offset, upperPoint, offset) > 0) {
+ // Doc's value is too high, in this dimension
+ return;
+ }
+ }
+
+ // Doc is in-bounds
+ result.add(docID);
+ }
+
+ @Override
+ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+
+ boolean crosses = false;
+
+ for(int dim=0;dim<numDims;dim++) {
+ int offset = dim*bytesPerDim;
+
+ if (StringHelper.compare(bytesPerDim, minPackedValue, offset, upperPoint, offset) > 0 ||
+ StringHelper.compare(bytesPerDim, maxPackedValue, offset, lowerPoint, offset) < 0) {
+ return Relation.CELL_OUTSIDE_QUERY;
+ }
+
+ crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, lowerPoint, offset) < 0 ||
+ StringHelper.compare(bytesPerDim, maxPackedValue, offset, upperPoint, offset) > 0;
+ }
+
+ if (crosses) {
+ return Relation.CELL_CROSSES_QUERY;
+ } else {
+ return Relation.CELL_INSIDE_QUERY;
+ }
+ }
+ });
+ return result.iterator();
+ }
+
+ @Override
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ LeafReader reader = context.reader();
+ PointValues values = reader.getPointValues();
+ if (values == null) {
+ // No docs in this segment indexed any points
+ return null;
+ }
+ FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
+ if (fieldInfo == null) {
+ // No docs in this segment indexed this field at all
+ return null;
+ }
+ if (fieldInfo.getPointDimensionCount() != numDims) {
+ throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + fieldInfo.getPointDimensionCount() + " but this query has numDims=" + numDims);
+ }
+ if (bytesPerDim != fieldInfo.getPointNumBytes()) {
+ throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getPointNumBytes() + " but this query has bytesPerDim=" + bytesPerDim);
+ }
+
+ boolean allDocsMatch;
+ if (values.getDocCount(field) == reader.maxDoc()) {
+ final byte[] fieldPackedLower = values.getMinPackedValue(field);
+ final byte[] fieldPackedUpper = values.getMaxPackedValue(field);
+ allDocsMatch = true;
+ for (int i = 0; i < numDims; ++i) {
+ int offset = i * bytesPerDim;
+ if (StringHelper.compare(bytesPerDim, lowerPoint, offset, fieldPackedLower, offset) > 0
+ || StringHelper.compare(bytesPerDim, upperPoint, offset, fieldPackedUpper, offset) < 0) {
+ allDocsMatch = false;
+ break;
+ }
+ }
+ } else {
+ allDocsMatch = false;
+ }
+
+ DocIdSetIterator iterator;
+ if (allDocsMatch) {
+ // all docs have a value and all points are within bounds, so everything matches
+ iterator = DocIdSetIterator.all(reader.maxDoc());
+ } else {
+ iterator = buildMatchingIterator(reader, values);
+ }
+
+ return new ConstantScoreScorer(this, score(), iterator);
+ }
+ };
+ }
+
+ @Override
+ public final int hashCode() {
+ int hash = super.hashCode();
+ hash = 31 * hash + field.hashCode();
+ hash = 31 * hash + Arrays.hashCode(lowerPoint);
+ hash = 31 * hash + Arrays.hashCode(upperPoint);
+ hash = 31 * hash + numDims;
+ hash = 31 * hash + Objects.hashCode(bytesPerDim);
+ return hash;
+ }
+
+ @Override
+ public final boolean equals(Object other) {
+ if (super.equals(other) == false) {
+ return false;
+ }
+
+ final LatLonPointBoxQuery q = (LatLonPointBoxQuery) other;
+ if (field.equals(q.field) == false) {
+ return false;
+ }
+
+ if (q.numDims != numDims) {
+ return false;
+ }
+
+ if (q.bytesPerDim != bytesPerDim) {
+ return false;
+ }
+
+ if (Arrays.equals(lowerPoint, q.lowerPoint) == false) {
+ return false;
+ }
+
+ if (Arrays.equals(upperPoint, q.upperPoint) == false) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public final String toString(String field) {
+ final StringBuilder sb = new StringBuilder();
+ if (this.field.equals(field) == false) {
+ sb.append(this.field);
+ sb.append(':');
+ }
+
+ // print ourselves as "range per dimension"
+ for (int i = 0; i < numDims; i++) {
+ if (i > 0) {
+ sb.append(',');
+ }
+
+ int startOffset = bytesPerDim * i;
+
+ sb.append('[');
+ sb.append(toString(i, Arrays.copyOfRange(lowerPoint, startOffset, startOffset + bytesPerDim)));
+ sb.append(" TO ");
+ sb.append(toString(i, Arrays.copyOfRange(upperPoint, startOffset, startOffset + bytesPerDim)));
+ sb.append(']');
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ * Returns a string of a single value in a human-readable format for debugging.
+ * This is used by {@link #toString()}.
+ *
+ * @param dimension dimension of the particular value
+ * @param value single value, never null
+ * @return human readable value for debugging
+ */
+ protected abstract String toString(int dimension, byte[] value);
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/111107b3/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
index 9bd78fe..0759ce1 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
@@ -28,13 +28,10 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
-import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.SloppyMath;
import org.apache.lucene.util.StringHelper;
@@ -120,16 +117,11 @@ final class LatLonPointDistanceQuery extends Query {
LatLonPoint.checkCompatible(fieldInfo);
// matching docids
- DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+ MatchingPoints result = new MatchingPoints(reader, field);
values.intersect(field,
new IntersectVisitor() {
@Override
- public void grow(int count) {
- result.grow(count);
- }
-
- @Override
public void visit(int docID) {
result.add(docID);
}
@@ -209,12 +201,7 @@ final class LatLonPointDistanceQuery extends Query {
}
});
- DocIdSet set = result.build();
- final DocIdSetIterator disi = set.iterator();
- if (disi == null) {
- return null;
- }
- return new ConstantScoreScorer(this, score(), disi);
+ return new ConstantScoreScorer(this, score(), result.iterator());
}
};
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/111107b3/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
index ee7c1e8..506e6b9 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
@@ -24,8 +24,6 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
@@ -34,7 +32,6 @@ import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.geo.Polygon;
@@ -113,16 +110,11 @@ final class LatLonPointInPolygonQuery extends Query {
LatLonPoint.checkCompatible(fieldInfo);
// matching docids
- DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+ MatchingPoints result = new MatchingPoints(reader, field);
values.intersect(field,
new IntersectVisitor() {
@Override
- public void grow(int count) {
- result.grow(count);
- }
-
- @Override
public void visit(int docID) {
result.add(docID);
}
@@ -154,13 +146,7 @@ final class LatLonPointInPolygonQuery extends Query {
}
});
- DocIdSet set = result.build();
- final DocIdSetIterator disi = set.iterator();
- if (disi == null) {
- return null;
- }
-
- return new ConstantScoreScorer(this, score(), disi);
+ return new ConstantScoreScorer(this, score(), result.iterator());
}
};
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/111107b3/lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java b/lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java
new file mode 100644
index 0000000..2b6c124
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.PointValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.BitSetIterator;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.SparseFixedBitSet;
+
+/**
+ * Accumulates matching hits for points.
+ * <p>
+ * Add matches with ({@link #add(int)}) and call {@link #iterator()} for
+ * an iterator over the results.
+ * <p>
+ * This implementation currently optimizes bitset structure (sparse vs dense)
+ * and {@link DocIdSetIterator#cost()} (cardinality) based on index statistics.
+ * This API may change as point values evolves.
+ *
+ * @lucene.experimental
+ */
+final class MatchingPoints {
+ /** bitset we collect into */
+ private final BitSet bits;
+ /** number of documents containing a value for the points field */
+ private final int docCount;
+ /** number of values indexed for the points field */
+ private final long numPoints;
+ /** number of documents in the index segment */
+ private final int maxDoc;
+ /** counter of hits seen */
+ private long counter;
+
+ /**
+ * Creates a new accumulator.
+ * @param reader reader to collect point matches from
+ * @param field field name.
+ */
+ public MatchingPoints(LeafReader reader, String field) {
+ maxDoc = reader.maxDoc();
+ PointValues values = reader.getPointValues();
+ if (values == null) {
+ throw new IllegalStateException("the query is missing null checks");
+ }
+ docCount = values.getDocCount(field);
+ numPoints = values.size(field);
+ // heuristic: if the field is really sparse, use a sparse impl
+ if (docCount >= 0 && docCount * 100L < maxDoc) {
+ bits = new SparseFixedBitSet(maxDoc);
+ } else {
+ bits = new FixedBitSet(maxDoc);
+ }
+ }
+
+ /**
+ * Record a matching docid.
+ * <p>
+ * NOTE: doc IDs do not need to be provided in any order.
+ */
+ public void add(int doc) {
+ bits.set(doc);
+ counter++;
+ }
+
+ /**
+ * Returns an iterator over the recorded matches.
+ */
+ public DocIdSetIterator iterator() {
+ // if single-valued (docCount == numPoints), then this is exact
+ // otherwise its approximate based on field stats
+ return new BitSetIterator(bits, (long) (counter * (docCount / (double) numPoints)));
+ }
+}