You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2016/03/12 17:00:28 UTC
lucene-solr git commit: LUCENE-7099: use two-phase iteration in
LatLonPoint.newDistanceQuery
Repository: lucene-solr
Updated Branches:
refs/heads/master 0ff341f74 -> 576a40596
LUCENE-7099: use two-phase iteration in LatLonPoint.newDistanceQuery
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/576a4059
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/576a4059
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/576a4059
Branch: refs/heads/master
Commit: 576a40596db5f65f53f2d9e9c494fc71e7e7bade
Parents: 0ff341f
Author: Robert Muir <rm...@apache.org>
Authored: Sat Mar 12 10:58:32 2016 -0500
Committer: Robert Muir <rm...@apache.org>
Committed: Sat Mar 12 10:59:43 2016 -0500
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../org/apache/lucene/document/LatLonPoint.java | 14 +++--
.../document/LatLonPointDistanceQuery.java | 66 +++++++++++++++++---
.../apache/lucene/document/TestLatLonPoint.java | 10 +++
4 files changed, 80 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/576a4059/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 82d22fe..cd9fac2 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -18,6 +18,9 @@ Optimizations
* LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10%
speedup on merging 2D LatLonPoint values (Mike McCandless)
+* LUCENE-7099: LatLonPoint's newDistanceQuery supports two-phase
+ iteration. (Robert Muir)
+
Other
* LUCENE-7087: Let MemoryIndex#fromDocument(...) accept 'Iterable<? extends IndexableField>'
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/576a4059/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
index e8c2f17..ebf850c 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
@@ -96,8 +96,10 @@ public class LatLonPoint extends Field {
}
private static final int BITS = 32;
- private static final double LONGITUDE_SCALE = (0x1L<<BITS)/360.0D;
- private static final double LATITUDE_SCALE = (0x1L<<BITS)/180.0D;
+ private static final double LONGITUDE_ENCODE = (0x1L<<BITS)/360.0D;
+ private static final double LONGITUDE_DECODE = 1/LONGITUDE_ENCODE;
+ private static final double LATITUDE_ENCODE = (0x1L<<BITS)/180.0D;
+ private static final double LATITUDE_DECODE = 1/LATITUDE_ENCODE;
@Override
public String toString() {
@@ -142,7 +144,7 @@ public class LatLonPoint extends Field {
if (latitude == 90.0D) {
latitude = Math.nextDown(latitude);
}
- return Math.toIntExact((long) (latitude * LATITUDE_SCALE));
+ return Math.toIntExact((long) (latitude * LATITUDE_ENCODE));
}
/**
@@ -159,7 +161,7 @@ public class LatLonPoint extends Field {
if (longitude == 180.0D) {
longitude = Math.nextDown(longitude);
}
- return Math.toIntExact((long) (longitude * LONGITUDE_SCALE));
+ return Math.toIntExact((long) (longitude * LONGITUDE_ENCODE));
}
/**
@@ -168,7 +170,7 @@ public class LatLonPoint extends Field {
* @return decoded latitude value.
*/
public static double decodeLatitude(int encoded) {
- double result = encoded / LATITUDE_SCALE;
+ double result = encoded * LATITUDE_DECODE;
assert GeoUtils.isValidLat(result);
return result;
}
@@ -189,7 +191,7 @@ public class LatLonPoint extends Field {
* @return decoded longitude value.
*/
public static double decodeLongitude(int encoded) {
- double result = encoded / LONGITUDE_SCALE;
+ double result = encoded * LONGITUDE_DECODE;
assert GeoUtils.isValidLon(result);
return result;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/576a4059/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
index 9374b17..9d23986 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
@@ -18,22 +18,30 @@ package org.apache.lucene.document;
import java.io.IOException;
+import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
+import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.spatial.util.GeoDistanceUtils;
import org.apache.lucene.spatial.util.GeoRect;
import org.apache.lucene.spatial.util.GeoUtils;
+import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.DocIdSetBuilder;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.SparseFixedBitSet;
/**
* Distance query for {@link LatLonPoint}.
@@ -95,22 +103,32 @@ final class LatLonPointDistanceQuery extends Query {
}
LatLonPoint.checkCompatible(fieldInfo);
+ // approximation (postfiltering has not yet been applied)
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+ // subset of documents that need no postfiltering, this is purely an optimization
+ final BitSet preApproved;
+ // dumb heuristic: if the field is really sparse, use a sparse impl
+ if (values.getDocCount(field) * 100L < reader.maxDoc()) {
+ preApproved = new SparseFixedBitSet(reader.maxDoc());
+ } else {
+ preApproved = new FixedBitSet(reader.maxDoc());
+ }
values.intersect(field,
new IntersectVisitor() {
@Override
+ public void grow(int count) {
+ result.grow(count);
+ }
+
+ @Override
public void visit(int docID) {
result.add(docID);
+ preApproved.set(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
- assert packedValue.length == 8;
- double lat = LatLonPoint.decodeLatitude(packedValue, 0);
- double lon = LatLonPoint.decodeLongitude(packedValue, Integer.BYTES);
- if (GeoDistanceUtils.haversin(latitude, longitude, lat, lon) <= radiusMeters) {
- visit(docID);
- }
+ result.add(docID);
}
// algorithm: we create a bounding box (two bounding boxes if we cross the dateline).
@@ -142,7 +160,41 @@ final class LatLonPointDistanceQuery extends Query {
}
});
- return new ConstantScoreScorer(this, score(), result.build().iterator());
+ DocIdSet set = result.build();
+ final DocIdSetIterator disi = set.iterator();
+ if (disi == null) {
+ return null;
+ }
+
+ // return two-phase iterator using docvalues to postfilter candidates
+ SortedNumericDocValues docValues = DocValues.getSortedNumeric(reader, field);
+ TwoPhaseIterator iterator = new TwoPhaseIterator(disi) {
+ @Override
+ public boolean matches() throws IOException {
+ int docId = disi.docID();
+ if (preApproved.get(docId)) {
+ return true;
+ } else {
+ docValues.setDocument(docId);
+ int count = docValues.count();
+ for (int i = 0; i < count; i++) {
+ long encoded = docValues.valueAt(i);
+ double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
+ double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF));
+ if (GeoDistanceUtils.haversin(latitude, longitude, docLatitude, docLongitude) <= radiusMeters) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+
+ @Override
+ public float matchCost() {
+ return 20; // TODO: make this fancier
+ }
+ };
+ return new ConstantScoreScorer(this, score(), iterator);
}
};
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/576a4059/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java
index 519478f..d180d58 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java
@@ -159,6 +159,16 @@ public class TestLatLonPoint extends LuceneTestCase {
assertEquals(-180.0, LatLonPoint.decodeLongitude(LatLonPoint.encodeLongitude(-180.0)), ENCODING_TOLERANCE);
}
+ public void testEncodeDecodeExtremeValues() throws Exception {
+ assertEquals(Integer.MIN_VALUE, LatLonPoint.encodeLatitude(-90.0));
+ assertEquals(0, LatLonPoint.encodeLatitude(0.0));
+ assertEquals(Integer.MAX_VALUE, LatLonPoint.encodeLatitude(90.0));
+
+ assertEquals(Integer.MIN_VALUE, LatLonPoint.encodeLongitude(-180.0));
+ assertEquals(0, LatLonPoint.encodeLatitude(0.0));
+ assertEquals(Integer.MAX_VALUE, LatLonPoint.encodeLongitude(180.0));
+ }
+
public void testEncodeDecodeIsStable() throws Exception {
int iters = atLeast(1000);
for(int iter=0;iter<iters;iter++) {