You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2016/03/16 14:28:13 UTC
lucene-solr:branch_6x: LUCENE-7109: LatLonPoint.newPolygonQuery
should use two-phase iterator
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x 4deb4cd1b -> e68dc4a33
LUCENE-7109: LatLonPoint.newPolygonQuery should use two-phase iterator
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e68dc4a3
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e68dc4a3
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e68dc4a3
Branch: refs/heads/branch_6x
Commit: e68dc4a330bed0d3cc90167b74b83261ed29fd0a
Parents: 4deb4cd
Author: Robert Muir <rm...@apache.org>
Authored: Wed Mar 16 09:25:06 2016 -0400
Committer: Robert Muir <rm...@apache.org>
Committed: Wed Mar 16 09:26:47 2016 -0400
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../document/LatLonPointInPolygonQuery.java | 65 +++++++++++++++++---
.../document/TestLatLonPointInPolygonQuery.java | 49 +++++++++++++++
3 files changed, 107 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e68dc4a3/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 2a01ddb..4941dee 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -20,6 +20,9 @@ Optimizations
* LUCENE-7105: Optimize LatLonPoint's newDistanceQuery. (Robert Muir)
+* LUCENE-7109: LatLonPoint's newPolygonQuery supports two-phase
+ iteration. (Robert Muir)
+
* LUCENE-7097: IntroSorter now recurses to 2 * log_2(count) quicksort
stack depth before switching to heapsort (Adrien Grand, Mike McCandless)
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e68dc4a3/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
index fb9189f..cb98895 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
@@ -23,15 +23,23 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.index.PointValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.DocIdSetBuilder;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.SparseFixedBitSet;
import org.apache.lucene.spatial.util.GeoRelationUtils;
import org.apache.lucene.spatial.util.GeoUtils;
@@ -110,9 +118,6 @@ final class LatLonPointInPolygonQuery extends Query {
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
// used in the first pass:
- // TODO: except that the polygon verify is costly! The approximation should be all docs in all overlapping cells, and matches() should
- // then check the polygon
-
return new ConstantScoreWeight(this) {
@Override
@@ -130,22 +135,28 @@ final class LatLonPointInPolygonQuery extends Query {
}
LatLonPoint.checkCompatible(fieldInfo);
+ // approximation (postfiltering has not yet been applied)
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+ // subset of documents that need no postfiltering, this is purely an optimization
+ final BitSet preApproved;
+ // dumb heuristic: if the field is really sparse, use a sparse impl
+ if (values.getDocCount(field) * 100L < reader.maxDoc()) {
+ preApproved = new SparseFixedBitSet(reader.maxDoc());
+ } else {
+ preApproved = new FixedBitSet(reader.maxDoc());
+ }
values.intersect(field,
new IntersectVisitor() {
@Override
public void visit(int docID) {
result.add(docID);
+ preApproved.set(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
- assert packedValue.length == 8;
- double lat = LatLonPoint.decodeLatitude(packedValue, 0);
- double lon = LatLonPoint.decodeLongitude(packedValue, Integer.BYTES);
- if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, lat, lon)) {
- result.add(docID);
- }
+ // TODO: range checks
+ result.add(docID);
}
@Override
@@ -172,7 +183,41 @@ final class LatLonPointInPolygonQuery extends Query {
}
});
- return new ConstantScoreScorer(this, score(), result.build().iterator());
+ DocIdSet set = result.build();
+ final DocIdSetIterator disi = set.iterator();
+ if (disi == null) {
+ return null;
+ }
+
+ // return two-phase iterator using docvalues to postfilter candidates
+ SortedNumericDocValues docValues = DocValues.getSortedNumeric(reader, field);
+ TwoPhaseIterator iterator = new TwoPhaseIterator(disi) {
+ @Override
+ public boolean matches() throws IOException {
+ int docId = disi.docID();
+ if (preApproved.get(docId)) {
+ return true;
+ } else {
+ docValues.setDocument(docId);
+ int count = docValues.count();
+ for (int i = 0; i < count; i++) {
+ long encoded = docValues.valueAt(i);
+ double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
+ double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF));
+ if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, docLatitude, docLongitude)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+
+ @Override
+ public float matchCost() {
+ return 20 * polyLons.length; // TODO: make this fancier, but currently linear with number of vertices
+ }
+ };
+ return new ConstantScoreScorer(this, score(), iterator);
}
};
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e68dc4a3/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointInPolygonQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointInPolygonQuery.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointInPolygonQuery.java
new file mode 100644
index 0000000..de87027
--- /dev/null
+++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointInPolygonQuery.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+/** Simple tests for {@link LatLonPoint#newPolygonQuery} */
+public class TestLatLonPointInPolygonQuery extends LuceneTestCase {
+
+ /** test we can search for a polygon */
+ public void testBasics() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+
+ // add a doc with a point
+ Document document = new Document();
+ document.add(new LatLonPoint("field", 18.313694, -65.227444));
+ writer.addDocument(document);
+
+ // search and verify we found our doc
+ IndexReader reader = writer.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ assertEquals(1, searcher.count(LatLonPoint.newPolygonQuery("field",
+ new double[] { 18, 18, 19, 19, 18 },
+ new double[] { -66, -65, -65, -66, -66 })));
+
+ reader.close();
+ writer.close();
+ dir.close();
+ }
+}