You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2016/03/16 14:28:13 UTC

lucene-solr:branch_6x: LUCENE-7109: LatLonPoint.newPolygonQuery should use two-phase iterator

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 4deb4cd1b -> e68dc4a33


LUCENE-7109: LatLonPoint.newPolygonQuery should use two-phase iterator


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e68dc4a3
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e68dc4a3
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e68dc4a3

Branch: refs/heads/branch_6x
Commit: e68dc4a330bed0d3cc90167b74b83261ed29fd0a
Parents: 4deb4cd
Author: Robert Muir <rm...@apache.org>
Authored: Wed Mar 16 09:25:06 2016 -0400
Committer: Robert Muir <rm...@apache.org>
Committed: Wed Mar 16 09:26:47 2016 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 +
 .../document/LatLonPointInPolygonQuery.java     | 65 +++++++++++++++++---
 .../document/TestLatLonPointInPolygonQuery.java | 49 +++++++++++++++
 3 files changed, 107 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e68dc4a3/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 2a01ddb..4941dee 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -20,6 +20,9 @@ Optimizations
 
 * LUCENE-7105: Optimize LatLonPoint's newDistanceQuery. (Robert Muir)
 
+* LUCENE-7109: LatLonPoint's newPolygonQuery supports two-phase 
+  iteration. (Robert Muir)
+
 * LUCENE-7097: IntroSorter now recurses to 2 * log_2(count) quicksort
   stack depth before switching to heapsort (Adrien Grand, Mike McCandless)
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e68dc4a3/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
index fb9189f..cb98895 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
@@ -23,15 +23,23 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.search.ConstantScoreScorer;
 import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.index.PointValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.DocIdSetBuilder;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.SparseFixedBitSet;
 import org.apache.lucene.spatial.util.GeoRelationUtils;
 import org.apache.lucene.spatial.util.GeoUtils;
 
@@ -110,9 +118,6 @@ final class LatLonPointInPolygonQuery extends Query {
     // I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
     // used in the first pass:
 
-    // TODO: except that the polygon verify is costly!  The approximation should be all docs in all overlapping cells, and matches() should
-    // then check the polygon
-
     return new ConstantScoreWeight(this) {
 
       @Override
@@ -130,22 +135,28 @@ final class LatLonPointInPolygonQuery extends Query {
         }
         LatLonPoint.checkCompatible(fieldInfo);
 
+        // approximation (postfiltering has not yet been applied)
         DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+        // subset of documents that need no postfiltering, this is purely an optimization
+        final BitSet preApproved;
+        // dumb heuristic: if the field is really sparse, use a sparse impl
+        if (values.getDocCount(field) * 100L < reader.maxDoc()) {
+          preApproved = new SparseFixedBitSet(reader.maxDoc());
+        } else {
+          preApproved = new FixedBitSet(reader.maxDoc());
+        }
         values.intersect(field,
                          new IntersectVisitor() {
                            @Override
                            public void visit(int docID) {
                              result.add(docID);
+                             preApproved.set(docID);
                            }
 
                            @Override
                            public void visit(int docID, byte[] packedValue) {
-                             assert packedValue.length == 8;
-                             double lat = LatLonPoint.decodeLatitude(packedValue, 0);
-                             double lon = LatLonPoint.decodeLongitude(packedValue, Integer.BYTES);
-                             if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, lat, lon)) {
-                               result.add(docID);
-                             }
+                             // TODO: range checks
+                             result.add(docID);
                            }
 
                            @Override
@@ -172,7 +183,41 @@ final class LatLonPointInPolygonQuery extends Query {
                            }
                          });
 
-        return new ConstantScoreScorer(this, score(), result.build().iterator());
+        DocIdSet set = result.build();
+        final DocIdSetIterator disi = set.iterator();
+        if (disi == null) {
+          return null;
+        }
+
+        // return two-phase iterator using docvalues to postfilter candidates
+        SortedNumericDocValues docValues = DocValues.getSortedNumeric(reader, field);
+        TwoPhaseIterator iterator = new TwoPhaseIterator(disi) {
+          @Override
+          public boolean matches() throws IOException {
+            int docId = disi.docID();
+            if (preApproved.get(docId)) {
+              return true;
+            } else {
+              docValues.setDocument(docId);
+              int count = docValues.count();
+              for (int i = 0; i < count; i++) {
+                long encoded = docValues.valueAt(i);
+                double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
+                double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF));
+                if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, docLatitude, docLongitude)) {
+                  return true;
+                }
+              }
+              return false;
+            }
+          }
+
+          @Override
+          public float matchCost() {
+            return 20 * polyLons.length; // TODO: make this fancier, but currently linear with number of vertices
+          }
+        };
+        return new ConstantScoreScorer(this, score(), iterator);
       }
     };
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e68dc4a3/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointInPolygonQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointInPolygonQuery.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointInPolygonQuery.java
new file mode 100644
index 0000000..de87027
--- /dev/null
+++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointInPolygonQuery.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+/** Simple tests for {@link LatLonPoint#newPolygonQuery} */
+public class TestLatLonPointInPolygonQuery extends LuceneTestCase {
+
+  /** test we can search for a polygon */
+  public void testBasics() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+
+    // add a doc with a point
+    Document document = new Document();
+    document.add(new LatLonPoint("field", 18.313694, -65.227444));
+    writer.addDocument(document);
+    
+    // search and verify we found our doc
+    IndexReader reader = writer.getReader();
+    IndexSearcher searcher = newSearcher(reader);
+    assertEquals(1, searcher.count(LatLonPoint.newPolygonQuery("field",
+                                                               new double[] { 18, 18, 19, 19, 18 },
+                                                               new double[] { -66, -65, -65, -66, -66 })));
+
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+}