You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2016/03/15 00:08:09 UTC

lucene-solr git commit: LUCENE-7104: remove "sort missing first" from LatLonPoint.newDistanceSort and simplify/speedup code

Repository: lucene-solr
Updated Branches:
  refs/heads/master 1660b5630 -> 02bb6c015


LUCENE-7104: remove "sort missing first" from LatLonPoint.newDistanceSort and simplify/speedup code


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/02bb6c01
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/02bb6c01
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/02bb6c01

Branch: refs/heads/master
Commit: 02bb6c01550771fb0d7a75d4b283c897024c923b
Parents: 1660b56
Author: Robert Muir <rm...@apache.org>
Authored: Mon Mar 14 19:07:30 2016 -0400
Committer: Robert Muir <rm...@apache.org>
Committed: Mon Mar 14 19:07:30 2016 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/document/LatLonPoint.java |   3 +-
 .../document/LatLonPointDistanceComparator.java | 116 +++++++------------
 .../document/LatLonPointDistanceQuery.java      |   8 +-
 .../lucene/document/LatLonPointSortField.java   |  14 +--
 .../document/TestLatLonPointDistanceSort.java   |  57 +++------
 5 files changed, 69 insertions(+), 129 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/02bb6c01/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
index 9677baa..f5541bd 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
@@ -333,8 +333,7 @@ public class LatLonPoint extends Field {
    * the hits contains a Double instance with the distance in meters.
    * <p>
    * If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance
-   * (missing values sort last). You can change this to sort missing values first by calling 
-   * {@link SortField#setMissingValue(Object) setMissingValue(Double.NEGATIVE_INFINITY)} on the returned SortField. 
+   * (missing values sort last).
    * <p>
    * If a document contains multiple values for the field, the <i>closest</i> distance to the location is used.
    * <p>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/02bb6c01/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java
index ef4c3f3..2102d81 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java
@@ -42,7 +42,6 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
   final String field;
   final double latitude;
   final double longitude;
-  final double missingValue;
 
   final double[] values;
   double bottom;
@@ -52,27 +51,22 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
   // current bounding box(es) for the bottom distance on the PQ.
   // these are pre-encoded with LatLonPoint's encoding and 
   // used to exclude uncompetitive hits faster.
-  int minLon;
-  int maxLon;
-  int minLat;
-  int maxLat;
-
-  // crossesDateLine is true, then we have a second box to check
-  boolean crossesDateLine;
-  int minLon2;
-  int maxLon2;
-  int minLat2;
-  int maxLat2;
+  int minLon = Integer.MIN_VALUE;
+  int maxLon = Integer.MAX_VALUE;
+  int minLat = Integer.MIN_VALUE;
+  int maxLat = Integer.MAX_VALUE;
+
+  // second set of longitude ranges to check (for cross-dateline case)
+  int minLon2 = Integer.MAX_VALUE;
 
   // the number of times setBottom has been called (adversary protection)
   int setBottomCounter = 0;
 
-  public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits, double missingValue) {
+  public LatLonPointDistanceComparator(String field, double latitude, double longitude, int numHits) {
     this.field = field;
     this.latitude = latitude;
     this.longitude = longitude;
     this.values = new double[numHits];
-    this.missingValue = missingValue;
   }
   
   @Override
@@ -90,53 +84,22 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
     // sampling if we get called way too much: don't make gobs of bounding
     // boxes if comparator hits a worst case order (e.g. backwards distance order)
     if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) {
-      // don't pass infinite values to circleToBBox: just make a complete box.
-      if (bottom == missingValue) {
-        minLat = minLon = Integer.MIN_VALUE;
-        maxLat = maxLon = Integer.MAX_VALUE;
-        crossesDateLine = false;
+      GeoRect box = GeoUtils.circleToBBox(longitude, latitude, haversin2(bottom));
+      // pre-encode our box to our integer encoding, so we don't have to decode 
+      // to double values for uncompetitive hits. This has some cost!
+      minLat = LatLonPoint.encodeLatitude(box.minLat);
+      maxLat = LatLonPoint.encodeLatitude(box.maxLat);
+      if (box.crossesDateline()) {
+        // box1
+        minLon = Integer.MIN_VALUE;
+        maxLon = LatLonPoint.encodeLongitude(box.maxLon);
+        // box2
+        minLon2 = LatLonPoint.encodeLongitude(box.minLon);
       } else {
-        assert Double.isFinite(bottom);
-        GeoRect box = GeoUtils.circleToBBox(longitude, latitude, haversin2(bottom));
-        // pre-encode our box to our integer encoding, so we don't have to decode 
-        // to double values for uncompetitive hits. This has some cost!
-        int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat);
-        int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat);
-        int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon);
-        int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon);
-        // be sure to not introduce quantization error in our optimization, just 
-        // round up our encoded box safely in all directions.
-        if (minLatEncoded != Integer.MIN_VALUE) {
-          minLatEncoded--;
-        }
-        if (minLonEncoded != Integer.MIN_VALUE) {
-          minLonEncoded--;
-        }
-        if (maxLatEncoded != Integer.MAX_VALUE) {
-          maxLatEncoded++;
-        }
-        if (maxLonEncoded != Integer.MAX_VALUE) {
-          maxLonEncoded++;
-        }
-        crossesDateLine = box.crossesDateline();
-        // crosses dateline: split
-        if (crossesDateLine) {
-          // box1
-          minLon = Integer.MIN_VALUE;
-          maxLon = maxLonEncoded;
-          minLat = minLatEncoded;
-          maxLat = maxLatEncoded;
-          // box2
-          minLon2 = minLonEncoded;
-          maxLon2 = Integer.MAX_VALUE;
-          minLat2 = minLatEncoded;
-          maxLat2 = maxLatEncoded;
-        } else {
-          minLon = minLonEncoded;
-          maxLon = maxLonEncoded;
-          minLat = minLatEncoded;
-          maxLat = maxLatEncoded;
-        }
+        minLon = LatLonPoint.encodeLongitude(box.minLon);
+        maxLon = LatLonPoint.encodeLongitude(box.maxLon);
+        // disable box2
+        minLon2 = Integer.MAX_VALUE;
       }
     }
     setBottomCounter++;
@@ -153,24 +116,33 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
 
     int numValues = currentDocs.count();
     if (numValues == 0) {
-      return Double.compare(bottom, missingValue);
+      return Double.compare(bottom, Double.POSITIVE_INFINITY);
     }
 
-    double minValue = Double.POSITIVE_INFINITY;
+    int cmp = -1;
     for (int i = 0; i < numValues; i++) {
       long encoded = currentDocs.valueAt(i);
+
+      // test bounding box
       int latitudeBits = (int)(encoded >> 32);
+      if (latitudeBits < minLat || latitudeBits > maxLat) {
+        continue;
+      }
       int longitudeBits = (int)(encoded & 0xFFFFFFFF);
-      boolean outsideBox = ((latitudeBits < minLat || longitudeBits < minLon || latitudeBits > maxLat || longitudeBits > maxLon) &&
-            (crossesDateLine == false || latitudeBits < minLat2 || longitudeBits < minLon2 || latitudeBits > maxLat2 || longitudeBits > maxLon2));
+      if ((longitudeBits < minLon || longitudeBits > maxLon) && (longitudeBits < minLon2)) {
+        continue;
+      }
+
       // only compute actual distance if its inside "competitive bounding box"
-      if (outsideBox == false) {
-        double docLatitude = LatLonPoint.decodeLatitude(latitudeBits);
-        double docLongitude = LatLonPoint.decodeLongitude(longitudeBits);
-        minValue = Math.min(minValue, haversin1(latitude, longitude, docLatitude, docLongitude));
+      double docLatitude = LatLonPoint.decodeLatitude(latitudeBits);
+      double docLongitude = LatLonPoint.decodeLongitude(longitudeBits);
+      cmp = Math.max(cmp, Double.compare(bottom, haversin1(latitude, longitude, docLatitude, docLongitude)));
+      // once we compete in the PQ, no need to continue.
+      if (cmp > 0) {
+        return cmp;
       }
     }
-    return Double.compare(bottom, minValue);
+    return cmp;
   }
   
   @Override
@@ -204,12 +176,8 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
   double sortKey(int doc) {
     currentDocs.setDocument(doc);
 
-    int numValues = currentDocs.count();
-    if (numValues == 0) {
-      return missingValue;
-    }
-
     double minValue = Double.POSITIVE_INFINITY;
+    int numValues = currentDocs.count();
     for (int i = 0; i < numValues; i++) {
       long encoded = currentDocs.valueAt(i);
       double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/02bb6c01/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
index 9d23986..3f86f1e 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
@@ -142,9 +142,11 @@ final class LatLonPointDistanceQuery extends Query {
                              double latMax = LatLonPoint.decodeLatitude(maxPackedValue, 0);
                              double lonMax = LatLonPoint.decodeLongitude(maxPackedValue, Integer.BYTES);
                              
-                             if ((latMax < box1.minLat || lonMax < box1.minLon || latMin > box1.maxLat || lonMin > box1.maxLon) && 
-                                 (box2 == null || latMax < box2.minLat || lonMax < box2.minLon || latMin > box2.maxLat || lonMin > box2.maxLon)) {
-                               // we are fully outside of bounding box(es), don't proceed any further.
+                             if (latMax < box1.minLat || latMin > box1.maxLat) {
+                               // latitude out of bounding box range
+                               return Relation.CELL_OUTSIDE_QUERY;
+                             } else if ((lonMax < box1.minLon || lonMin > box1.maxLon) && (box2 == null || lonMax < box2.minLon)) {
+                               // longitude out of bounding box range
                                return Relation.CELL_OUTSIDE_QUERY;
                              } else if (lonMax - longitude < 90 && longitude - lonMin < 90 &&
                                  GeoDistanceUtils.haversin(latitude, longitude, latMin, lonMin) <= radiusMeters &&

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/02bb6c01/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java
index da90b86..f883043 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java
@@ -47,7 +47,7 @@ final class LatLonPointSortField extends SortField {
   
   @Override
   public FieldComparator<?> getComparator(int numHits, int sortPos) throws IOException {
-    return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits, getMissingValue());
+    return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits);
   }
 
   @Override
@@ -57,16 +57,10 @@ final class LatLonPointSortField extends SortField {
 
   @Override
   public void setMissingValue(Object missingValue) {
-    if (missingValue == null) {
-      throw new IllegalArgumentException("Missing value cannot be null");
+    if (Double.valueOf(Double.POSITIVE_INFINITY).equals(missingValue) == false) {
+      throw new IllegalArgumentException("Missing value can only be Double.POSITIVE_INFINITY (missing values last), but got " + missingValue);
     }
-    if (missingValue.getClass() != Double.class)
-      throw new IllegalArgumentException("Missing value can only be of type java.lang.Double, but got " + missingValue.getClass());
-    Double value = (Double) missingValue;
-    if (!Double.isInfinite(value)) {
-      throw new IllegalArgumentException("Missing value can only be Double.NEGATIVE_INFINITY (missing values first) or Double.POSITIVE_INFINITY (missing values last), but got " + value);
-    }
-    this.missingValue = value;
+    this.missingValue = missingValue;
   }
   
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/02bb6c01/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java
index 7df956f..4376313 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java
@@ -28,6 +28,8 @@ import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.spatial.util.GeoDistanceUtils;
+import org.apache.lucene.spatial.util.GeoRect;
+import org.apache.lucene.spatial.util.GeoUtils;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
@@ -110,45 +112,6 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
     dir.close();
   }
   
-  /** Add two points (one doc missing) and sort by distance */
-  public void testMissingFirst() throws Exception {
-    Directory dir = newDirectory();
-    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
-    
-    // missing
-    Document doc = new Document();
-    iw.addDocument(doc);
-    
-    doc = new Document();
-    doc.add(new LatLonPoint("location", 40.718266, -74.007819));
-    iw.addDocument(doc);
-    
-    doc = new Document();
-    doc.add(new LatLonPoint("location", 40.7051157, -74.0088305));
-    iw.addDocument(doc);
-    
-    IndexReader reader = iw.getReader();
-    IndexSearcher searcher = newSearcher(reader);
-    iw.close();
-
-    SortField sortField = LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731);
-    sortField.setMissingValue(Double.NEGATIVE_INFINITY);
-    Sort sort = new Sort(sortField);
-    TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
-
-    FieldDoc d = (FieldDoc) td.scoreDocs[0];
-    assertEquals(Double.NEGATIVE_INFINITY, (Double)d.fields[0], 0.0D);
-    
-    d = (FieldDoc) td.scoreDocs[1];
-    assertEquals(462.61748421408186D, (Double)d.fields[0], 0.0D);
-    
-    d = (FieldDoc) td.scoreDocs[2];
-    assertEquals(1056.1630445911035D, (Double)d.fields[0], 0.0D);
-    
-    reader.close();
-    dir.close();
-  }
-  
   /** Run a few iterations with just 10 docs, hopefully easy to debug */
   public void testRandom() throws Exception {
     for (int iters = 0; iters < 100; iters++) {
@@ -239,7 +202,7 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
     for (int i = 0; i < numQueries; i++) {
       double lat = -90 + 180.0 * random().nextDouble();
       double lon = -180 + 360.0 * random().nextDouble();
-      double missingValue = random().nextBoolean() ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
+      double missingValue = Double.POSITIVE_INFINITY;
 
       Result expected[] = new Result[reader.maxDoc()];
       
@@ -309,4 +272,18 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
       assertEquals(expected, actual);
     }
   }
+  
+  /** Test infinite radius covers whole earth */
+  public void testInfiniteRect() {
+    for (int i = 0; i < 100000; i++) {
+      double centerLat = -90 + 180.0 * random().nextDouble();
+      double centerLon = -180 + 360.0 * random().nextDouble();
+      GeoRect rect = GeoUtils.circleToBBox(centerLat, centerLon, Double.POSITIVE_INFINITY);
+      assertEquals(-180.0, rect.minLon, 0.0D);
+      assertEquals(180.0, rect.maxLon, 0.0D);
+      assertEquals(-90.0, rect.minLat, 0.0D);
+      assertEquals(90.0, rect.maxLat, 0.0D);
+      assertFalse(rect.crossesDateline());
+    }
+  }
 }