You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2016/03/18 09:29:52 UTC

[05/50] lucene-solr:apiv2: LUCENE-7099: use two-phase iteration in LatLonPoint.newDistanceQuery

LUCENE-7099: use two-phase iteration in LatLonPoint.newDistanceQuery


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/576a4059
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/576a4059
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/576a4059

Branch: refs/heads/apiv2
Commit: 576a40596db5f65f53f2d9e9c494fc71e7e7bade
Parents: 0ff341f
Author: Robert Muir <rm...@apache.org>
Authored: Sat Mar 12 10:58:32 2016 -0500
Committer: Robert Muir <rm...@apache.org>
Committed: Sat Mar 12 10:59:43 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 +
 .../org/apache/lucene/document/LatLonPoint.java | 14 +++--
 .../document/LatLonPointDistanceQuery.java      | 66 +++++++++++++++++---
 .../apache/lucene/document/TestLatLonPoint.java | 10 +++
 4 files changed, 80 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/576a4059/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 82d22fe..cd9fac2 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -18,6 +18,9 @@ Optimizations
 * LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10%
   speedup on merging 2D LatLonPoint values (Mike McCandless)
 
+* LUCENE-7099: LatLonPoint's newDistanceQuery supports two-phase
+  iteration. (Robert Muir)
+
 Other
 
 * LUCENE-7087: Let MemoryIndex#fromDocument(...) accept 'Iterable<? extends IndexableField>'

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/576a4059/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
index e8c2f17..ebf850c 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
@@ -96,8 +96,10 @@ public class LatLonPoint extends Field {
   }
 
   private static final int BITS = 32;
-  private static final double LONGITUDE_SCALE = (0x1L<<BITS)/360.0D;
-  private static final double LATITUDE_SCALE = (0x1L<<BITS)/180.0D;
+  private static final double LONGITUDE_ENCODE = (0x1L<<BITS)/360.0D;
+  private static final double LONGITUDE_DECODE = 1/LONGITUDE_ENCODE;
+  private static final double LATITUDE_ENCODE  = (0x1L<<BITS)/180.0D;
+  private static final double LATITUDE_DECODE  =  1/LATITUDE_ENCODE;
   
   @Override
   public String toString() {
@@ -142,7 +144,7 @@ public class LatLonPoint extends Field {
     if (latitude == 90.0D) {
       latitude = Math.nextDown(latitude);
     }
-    return Math.toIntExact((long) (latitude * LATITUDE_SCALE));
+    return Math.toIntExact((long) (latitude * LATITUDE_ENCODE));
   }
 
   /** 
@@ -159,7 +161,7 @@ public class LatLonPoint extends Field {
     if (longitude == 180.0D) {
       longitude = Math.nextDown(longitude);
     }
-    return Math.toIntExact((long) (longitude * LONGITUDE_SCALE));
+    return Math.toIntExact((long) (longitude * LONGITUDE_ENCODE));
   }
 
   /** 
@@ -168,7 +170,7 @@ public class LatLonPoint extends Field {
    * @return decoded latitude value.
    */
   public static double decodeLatitude(int encoded) {
-    double result = encoded / LATITUDE_SCALE;
+    double result = encoded * LATITUDE_DECODE;
     assert GeoUtils.isValidLat(result);
     return result;
   }
@@ -189,7 +191,7 @@ public class LatLonPoint extends Field {
    * @return decoded longitude value.
    */  
   public static double decodeLongitude(int encoded) {
-    double result = encoded / LONGITUDE_SCALE;
+    double result = encoded * LONGITUDE_DECODE;
     assert GeoUtils.isValidLon(result);
     return result;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/576a4059/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
index 9374b17..9d23986 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
@@ -18,22 +18,30 @@ package org.apache.lucene.document;
 
 import java.io.IOException;
 
+import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PointValues;
+import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.search.ConstantScoreScorer;
 import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.spatial.util.GeoDistanceUtils;
 import org.apache.lucene.spatial.util.GeoRect;
 import org.apache.lucene.spatial.util.GeoUtils;
+import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.DocIdSetBuilder;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.SparseFixedBitSet;
 
 /**
  * Distance query for {@link LatLonPoint}.
@@ -95,22 +103,32 @@ final class LatLonPointDistanceQuery extends Query {
         }
         LatLonPoint.checkCompatible(fieldInfo);
         
+        // approximation (postfiltering has not yet been applied)
         DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
+        // subset of documents that need no postfiltering, this is purely an optimization
+        final BitSet preApproved;
+        // dumb heuristic: if the field is really sparse, use a sparse impl
+        if (values.getDocCount(field) * 100L < reader.maxDoc()) {
+          preApproved = new SparseFixedBitSet(reader.maxDoc());
+        } else {
+          preApproved = new FixedBitSet(reader.maxDoc());
+        }
         values.intersect(field,
                          new IntersectVisitor() {
                            @Override
+                           public void grow(int count) {
+                             result.grow(count);
+                           }
+
+                           @Override
                            public void visit(int docID) {
                              result.add(docID);
+                             preApproved.set(docID);
                            }
 
                            @Override
                            public void visit(int docID, byte[] packedValue) {
-                             assert packedValue.length == 8;
-                             double lat = LatLonPoint.decodeLatitude(packedValue, 0);
-                             double lon = LatLonPoint.decodeLongitude(packedValue, Integer.BYTES);
-                             if (GeoDistanceUtils.haversin(latitude, longitude, lat, lon) <= radiusMeters) {
-                               visit(docID);
-                             }
+                             result.add(docID);
                            }
                            
                            // algorithm: we create a bounding box (two bounding boxes if we cross the dateline).
@@ -142,7 +160,41 @@ final class LatLonPointDistanceQuery extends Query {
                            }
                          });
 
-        return new ConstantScoreScorer(this, score(), result.build().iterator());
+        DocIdSet set = result.build();
+        final DocIdSetIterator disi = set.iterator();
+        if (disi == null) {
+          return null;
+        }
+
+        // return two-phase iterator using docvalues to postfilter candidates
+        SortedNumericDocValues docValues = DocValues.getSortedNumeric(reader, field);
+        TwoPhaseIterator iterator = new TwoPhaseIterator(disi) {
+          @Override
+          public boolean matches() throws IOException {
+            int docId = disi.docID();
+            if (preApproved.get(docId)) {
+              return true;
+            } else {
+              docValues.setDocument(docId);
+              int count = docValues.count();
+              for (int i = 0; i < count; i++) {
+                long encoded = docValues.valueAt(i);
+                double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
+                double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF));
+                if (GeoDistanceUtils.haversin(latitude, longitude, docLatitude, docLongitude) <= radiusMeters) {
+                  return true;
+                }
+              }
+              return false;
+            }
+          }
+
+          @Override
+          public float matchCost() {
+            return 20; // TODO: make this fancier
+          }
+        };
+        return new ConstantScoreScorer(this, score(), iterator);
       }
     };
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/576a4059/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java
index 519478f..d180d58 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java
@@ -159,6 +159,16 @@ public class TestLatLonPoint extends LuceneTestCase {
     assertEquals(-180.0, LatLonPoint.decodeLongitude(LatLonPoint.encodeLongitude(-180.0)), ENCODING_TOLERANCE);
   }
 
+  public void testEncodeDecodeExtremeValues() throws Exception {
+    assertEquals(Integer.MIN_VALUE, LatLonPoint.encodeLatitude(-90.0));
+    assertEquals(0, LatLonPoint.encodeLatitude(0.0));
+    assertEquals(Integer.MAX_VALUE, LatLonPoint.encodeLatitude(90.0));
+
+    assertEquals(Integer.MIN_VALUE, LatLonPoint.encodeLongitude(-180.0));
+    assertEquals(0, LatLonPoint.encodeLatitude(0.0));
+    assertEquals(Integer.MAX_VALUE, LatLonPoint.encodeLongitude(180.0));
+  }
+
   public void testEncodeDecodeIsStable() throws Exception {
     int iters = atLeast(1000);
     for(int iter=0;iter<iters;iter++) {