You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2016/04/26 15:17:28 UTC

lucene-solr:master: LUCENE-7254: (sandbox/ only) Don't let abuse cases slow down spatial queries

Repository: lucene-solr
Updated Branches:
  refs/heads/master 89c65af2a -> 6fa5166e4


LUCENE-7254: (sandbox/ only) Don't let abuse cases slow down spatial queries


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6fa5166e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6fa5166e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6fa5166e

Branch: refs/heads/master
Commit: 6fa5166e41652fc58a5f18db4796e230b1354dbd
Parents: 89c65af
Author: Robert Muir <rm...@apache.org>
Authored: Tue Apr 26 09:16:27 2016 -0400
Committer: Robert Muir <rm...@apache.org>
Committed: Tue Apr 26 09:17:21 2016 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/document/LatLonPoint.java |   3 +-
 .../lucene/document/LatLonPointBoxQuery.java    | 287 +++++++++++++++++++
 .../document/LatLonPointDistanceQuery.java      |  17 +-
 .../document/LatLonPointInPolygonQuery.java     |  18 +-
 .../apache/lucene/document/MatchingPoints.java  |  90 ++++++
 5 files changed, 382 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6fa5166e/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
index 426a702..a63e4bd 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
@@ -34,7 +34,6 @@ import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.MatchNoDocsQuery;
-import org.apache.lucene.search.PointRangeQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopFieldDocs;
@@ -229,7 +228,7 @@ public class LatLonPoint extends Field {
   }
   
   private static Query newBoxInternal(String field, byte[] min, byte[] max) {
-    return new PointRangeQuery(field, min, max, 2) {
+    return new LatLonPointBoxQuery(field, min, max, 2) {
       @Override
       protected String toString(int dimension, byte[] value) {
         if (dimension == 0) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6fa5166e/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java
new file mode 100644
index 0000000..423af05
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Objects;
+
+import org.apache.lucene.index.PointValues;
+import org.apache.lucene.index.PointValues.IntersectVisitor;
+import org.apache.lucene.index.PointValues.Relation;
+import org.apache.lucene.search.ConstantScoreScorer;
+import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PointRangeQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.util.StringHelper;
+
+/** 
+ * Fast version of {@link PointRangeQuery}. It is fast for actual range queries!
+ * @lucene.experimental
+ */
+abstract class LatLonPointBoxQuery extends Query {
+  final String field;
+  final int numDims;
+  final int bytesPerDim;
+  final byte[] lowerPoint;
+  final byte[] upperPoint;
+
+  /** 
+   * Expert: create a multidimensional range query for point values.
+   *
+   * @param field field name. must not be {@code null}.
+   * @param lowerPoint lower portion of the range (inclusive).
+   * @param upperPoint upper portion of the range (inclusive).
+   * @param numDims number of dimensions.
+   * @throws IllegalArgumentException if {@code field} is null, or if {@code lowerValue.length != upperValue.length}
+   */
+  protected LatLonPointBoxQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims) {
+    checkArgs(field, lowerPoint, upperPoint);
+    this.field = field;
+    if (numDims <= 0) {
+      throw new IllegalArgumentException("numDims must be positive, got " + numDims);
+    }
+    if (lowerPoint.length == 0) {
+      throw new IllegalArgumentException("lowerPoint has length of zero");
+    }
+    if (lowerPoint.length % numDims != 0) {
+      throw new IllegalArgumentException("lowerPoint is not a fixed multiple of numDims");
+    }
+    if (lowerPoint.length != upperPoint.length) {
+      throw new IllegalArgumentException("lowerPoint has length=" + lowerPoint.length + " but upperPoint has different length=" + upperPoint.length);
+    }
+    this.numDims = numDims;
+    this.bytesPerDim = lowerPoint.length / numDims;
+
+    this.lowerPoint = lowerPoint;
+    this.upperPoint = upperPoint;
+  }
+
+  /** 
+   * Check preconditions for all factory methods
+   * @throws IllegalArgumentException if {@code field}, {@code lowerPoint} or {@code upperPoint} are null.
+   */
+  public static void checkArgs(String field, Object lowerPoint, Object upperPoint) {
+    if (field == null) {
+      throw new IllegalArgumentException("field must not be null");
+    }
+    if (lowerPoint == null) {
+      throw new IllegalArgumentException("lowerPoint must not be null");
+    }
+    if (upperPoint == null) {
+      throw new IllegalArgumentException("upperPoint must not be null");
+    }
+  }
+
+  @Override
+  public final Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+
+    // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
+    // This is an inverted structure and should be used in the first pass:
+
+    return new ConstantScoreWeight(this) {
+
+      private DocIdSetIterator buildMatchingIterator(LeafReader reader, PointValues values) throws IOException {
+        MatchingPoints result = new MatchingPoints(reader, field);
+
+        values.intersect(field,
+            new IntersectVisitor() {
+
+              @Override
+              public void visit(int docID) {
+                result.add(docID);
+              }
+
+              @Override
+              public void visit(int docID, byte[] packedValue) {
+                for(int dim=0;dim<numDims;dim++) {
+                  int offset = dim*bytesPerDim;
+                  if (StringHelper.compare(bytesPerDim, packedValue, offset, lowerPoint, offset) < 0) {
+                    // Doc's value is too low, in this dimension
+                    return;
+                  }
+                  if (StringHelper.compare(bytesPerDim, packedValue, offset, upperPoint, offset) > 0) {
+                    // Doc's value is too high, in this dimension
+                    return;
+                  }
+                }
+
+                // Doc is in-bounds
+                result.add(docID);
+              }
+
+              @Override
+              public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+
+                boolean crosses = false;
+
+                for(int dim=0;dim<numDims;dim++) {
+                  int offset = dim*bytesPerDim;
+
+                  if (StringHelper.compare(bytesPerDim, minPackedValue, offset, upperPoint, offset) > 0 ||
+                      StringHelper.compare(bytesPerDim, maxPackedValue, offset, lowerPoint, offset) < 0) {
+                    return Relation.CELL_OUTSIDE_QUERY;
+                  }
+
+                  crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, lowerPoint, offset) < 0 ||
+                    StringHelper.compare(bytesPerDim, maxPackedValue, offset, upperPoint, offset) > 0;
+                }
+
+                if (crosses) {
+                  return Relation.CELL_CROSSES_QUERY;
+                } else {
+                  return Relation.CELL_INSIDE_QUERY;
+                }
+              }
+            });
+        return result.iterator();
+      }
+
+      @Override
+      public Scorer scorer(LeafReaderContext context) throws IOException {
+        LeafReader reader = context.reader();
+        PointValues values = reader.getPointValues();
+        if (values == null) {
+          // No docs in this segment indexed any points
+          return null;
+        }
+        FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
+        if (fieldInfo == null) {
+          // No docs in this segment indexed this field at all
+          return null;
+        }
+        if (fieldInfo.getPointDimensionCount() != numDims) {
+          throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + fieldInfo.getPointDimensionCount() + " but this query has numDims=" + numDims);
+        }
+        if (bytesPerDim != fieldInfo.getPointNumBytes()) {
+          throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getPointNumBytes() + " but this query has bytesPerDim=" + bytesPerDim);
+        }
+
+        boolean allDocsMatch;
+        if (values.getDocCount(field) == reader.maxDoc()) {
+          final byte[] fieldPackedLower = values.getMinPackedValue(field);
+          final byte[] fieldPackedUpper = values.getMaxPackedValue(field);
+          allDocsMatch = true;
+          for (int i = 0; i < numDims; ++i) {
+            int offset = i * bytesPerDim;
+            if (StringHelper.compare(bytesPerDim, lowerPoint, offset, fieldPackedLower, offset) > 0
+                || StringHelper.compare(bytesPerDim, upperPoint, offset, fieldPackedUpper, offset) < 0) {
+              allDocsMatch = false;
+              break;
+            }
+          }
+        } else {
+          allDocsMatch = false;
+        }
+
+        DocIdSetIterator iterator;
+        if (allDocsMatch) {
+          // all docs have a value and all points are within bounds, so everything matches
+          iterator = DocIdSetIterator.all(reader.maxDoc());
+        } else {
+          iterator = buildMatchingIterator(reader, values);
+        }
+
+        return new ConstantScoreScorer(this, score(), iterator);
+      }
+    };
+  }
+
+  @Override
+  public final int hashCode() {
+    int hash = super.hashCode();
+    hash = 31 * hash + field.hashCode();
+    hash = 31 * hash + Arrays.hashCode(lowerPoint);
+    hash = 31 * hash + Arrays.hashCode(upperPoint);
+    hash = 31 * hash + numDims;
+    hash = 31 * hash + Objects.hashCode(bytesPerDim);
+    return hash;
+  }
+
+  @Override
+  public final boolean equals(Object other) {
+    if (super.equals(other) == false) {
+      return false;
+    }
+
+    final LatLonPointBoxQuery q = (LatLonPointBoxQuery) other;
+    if (field.equals(q.field) == false) {
+      return false;
+    }
+
+    if (q.numDims != numDims) {
+      return false;
+    }
+
+    if (q.bytesPerDim != bytesPerDim) {
+      return false;
+    }
+
+    if (Arrays.equals(lowerPoint, q.lowerPoint) == false) {
+      return false;
+    }
+    
+    if (Arrays.equals(upperPoint, q.upperPoint) == false) {
+      return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public final String toString(String field) {
+    final StringBuilder sb = new StringBuilder();
+    if (this.field.equals(field) == false) {
+      sb.append(this.field);
+      sb.append(':');
+    }
+
+    // print ourselves as "range per dimension"
+    for (int i = 0; i < numDims; i++) {
+      if (i > 0) {
+        sb.append(',');
+      }
+      
+      int startOffset = bytesPerDim * i;
+
+      sb.append('[');
+      sb.append(toString(i, Arrays.copyOfRange(lowerPoint, startOffset, startOffset + bytesPerDim)));
+      sb.append(" TO ");
+      sb.append(toString(i, Arrays.copyOfRange(upperPoint, startOffset, startOffset + bytesPerDim)));
+      sb.append(']');
+    }
+
+    return sb.toString();
+  }
+
+  /**
+   * Returns a string of a single value in a human-readable format for debugging.
+   * This is used by {@link #toString()}.
+   *
+   * @param dimension dimension of the particular value
+   * @param value single value, never null
+   * @return human readable value for debugging
+   */
+  protected abstract String toString(int dimension, byte[] value);
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6fa5166e/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
index 9bd78fe..0759ce1 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
@@ -28,13 +28,10 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.search.ConstantScoreScorer;
 import org.apache.lucene.search.ConstantScoreWeight;
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Weight;
-import org.apache.lucene.util.DocIdSetBuilder;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.SloppyMath;
 import org.apache.lucene.util.StringHelper;
@@ -120,16 +117,11 @@ final class LatLonPointDistanceQuery extends Query {
         LatLonPoint.checkCompatible(fieldInfo);
         
         // matching docids
-        DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+        MatchingPoints result = new MatchingPoints(reader, field);
 
         values.intersect(field,
                          new IntersectVisitor() {
                            @Override
-                           public void grow(int count) {
-                             result.grow(count);
-                           }
-
-                           @Override
                            public void visit(int docID) {
                              result.add(docID);
                            }
@@ -209,12 +201,7 @@ final class LatLonPointDistanceQuery extends Query {
                            }
                          });
 
-        DocIdSet set = result.build();
-        final DocIdSetIterator disi = set.iterator();
-        if (disi == null) {
-          return null;
-        }
-        return new ConstantScoreScorer(this, score(), disi);
+        return new ConstantScoreScorer(this, score(), result.iterator());
       }
     };
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6fa5166e/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
index ee7c1e8..506e6b9 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
@@ -24,8 +24,6 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.search.ConstantScoreScorer;
 import org.apache.lucene.search.ConstantScoreWeight;
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
@@ -34,7 +32,6 @@ import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.util.DocIdSetBuilder;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.geo.Polygon;
@@ -113,16 +110,11 @@ final class LatLonPointInPolygonQuery extends Query {
         LatLonPoint.checkCompatible(fieldInfo);
 
         // matching docids
-        DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+        MatchingPoints result = new MatchingPoints(reader, field);
 
         values.intersect(field, 
                          new IntersectVisitor() {
                            @Override
-                           public void grow(int count) {
-                             result.grow(count);
-                           }
-
-                           @Override
                            public void visit(int docID) {
                              result.add(docID);
                            }
@@ -154,13 +146,7 @@ final class LatLonPointInPolygonQuery extends Query {
                            }
                          });
 
-        DocIdSet set = result.build();
-        final DocIdSetIterator disi = set.iterator();
-        if (disi == null) {
-          return null;
-        }
-
-        return new ConstantScoreScorer(this, score(), disi);
+        return new ConstantScoreScorer(this, score(), result.iterator());
       }
     };
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6fa5166e/lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java b/lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java
new file mode 100644
index 0000000..2b6c124
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.PointValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.BitSetIterator;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.SparseFixedBitSet;
+
+/**
+ * Accumulates matching hits for points.
+ * <p>
+ * Add matches with ({@link #add(int)}) and call {@link #iterator()} for
+ * an iterator over the results. 
+ * <p>
+ * This implementation currently optimizes bitset structure (sparse vs dense)
+ * and {@link DocIdSetIterator#cost()} (cardinality) based on index statistics.
+ * This API may change as point values evolves.
+ * 
+ * @lucene.experimental
+ */
+final class MatchingPoints {
+  /** bitset we collect into */
+  private final BitSet bits;
+  /** number of documents containing a value for the points field */
+  private final int docCount;
+  /** number of values indexed for the points field */
+  private final long numPoints;
+  /** number of documents in the index segment */
+  private final int maxDoc;
+  /** counter of hits seen */
+  private long counter;
+
+  /**
+   * Creates a new accumulator.
+   * @param reader reader to collect point matches from
+   * @param field field name.
+   */
+  public MatchingPoints(LeafReader reader, String field) {
+    maxDoc = reader.maxDoc();
+    PointValues values = reader.getPointValues();
+    if (values == null) {
+      throw new IllegalStateException("the query is missing null checks");
+    }
+    docCount = values.getDocCount(field);
+    numPoints = values.size(field);
+    // heuristic: if the field is really sparse, use a sparse impl
+    if (docCount >= 0 && docCount * 100L < maxDoc) {
+      bits = new SparseFixedBitSet(maxDoc);
+    } else {
+      bits = new FixedBitSet(maxDoc);
+    }
+  }
+
+  /**
+   * Record a matching docid.
+   * <p>
+   * NOTE: doc IDs do not need to be provided in any order.
+   */
+  public void add(int doc) {
+    bits.set(doc);
+    counter++;
+  }
+  
+  /**
+   * Returns an iterator over the recorded matches.
+   */
+  public DocIdSetIterator iterator() {
+    // if single-valued (docCount == numPoints), then this is exact
+    // otherwise its approximate based on field stats
+    return new BitSetIterator(bits, (long) (counter * (docCount / (double) numPoints)));
+  }
+}