You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2016/03/21 01:43:41 UTC

[19/50] lucene-solr:jira/SOLR-445: LUCENE-7102: LatLonPoint.newDistanceSort fails with "sort missing first"

LUCENE-7102: LatLonPoint.newDistanceSort fails with "sort missing first"


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0f949c81
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0f949c81
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0f949c81

Branch: refs/heads/jira/SOLR-445
Commit: 0f949c815343c853499d518e7d565d642d93ce63
Parents: 80fe00b
Author: Robert Muir <rm...@apache.org>
Authored: Mon Mar 14 14:08:25 2016 -0400
Committer: Robert Muir <rm...@apache.org>
Committed: Mon Mar 14 14:08:25 2016 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/document/LatLonPoint.java |   4 +-
 .../document/LatLonPointDistanceComparator.java |  84 +++++++-------
 .../lucene/document/LatLonPointSortField.java   |   6 +-
 .../document/TestLatLonPointDistanceSort.java   | 111 +++++++++++++++++--
 4 files changed, 152 insertions(+), 53 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0f949c81/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
index ebf850c..9677baa 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java
@@ -333,8 +333,8 @@ public class LatLonPoint extends Field {
    * the hits contains a Double instance with the distance in meters.
    * <p>
    * If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance
-   * (missing last). You can change this by calling {@link SortField#setMissingValue(Object)} on the returned SortField 
-   * to a different Double value.
+   * (missing values sort last). You can change this to sort missing values first by calling 
+   * {@link SortField#setMissingValue(Object) setMissingValue(Double.NEGATIVE_INFINITY)} on the returned SortField. 
    * <p>
    * If a document contains multiple values for the field, the <i>closest</i> distance to the location is used.
    * <p>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0f949c81/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java
index e64f4b0..86c9134 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java
@@ -89,45 +89,53 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
     // sampling if we get called way too much: don't make gobs of bounding
     // boxes if comparator hits a worst case order (e.g. backwards distance order)
     if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) {
-      GeoRect box = GeoUtils.circleToBBox(longitude, latitude, bottom);
-      // pre-encode our box to our integer encoding, so we don't have to decode 
-      // to double values for uncompetitive hits. This has some cost!
-      int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat);
-      int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat);
-      int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon);
-      int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon);
-      // be sure to not introduce quantization error in our optimization, just 
-      // round up our encoded box safely in all directions.
-      if (minLatEncoded != Integer.MIN_VALUE) {
-        minLatEncoded--;
-      }
-      if (minLonEncoded != Integer.MIN_VALUE) {
-        minLonEncoded--;
-      }
-      if (maxLatEncoded != Integer.MAX_VALUE) {
-        maxLatEncoded++;
-      }
-      if (maxLonEncoded != Integer.MAX_VALUE) {
-        maxLonEncoded++;
-      }
-      crossesDateLine = box.crossesDateline();
-      // crosses dateline: split
-      if (crossesDateLine) {
-        // box1
-        minLon = Integer.MIN_VALUE;
-        maxLon = maxLonEncoded;
-        minLat = minLatEncoded;
-        maxLat = maxLatEncoded;
-        // box2
-        minLon2 = minLonEncoded;
-        maxLon2 = Integer.MAX_VALUE;
-        minLat2 = minLatEncoded;
-        maxLat2 = maxLatEncoded;
+      // don't pass infinite values to circleToBBox: just make a complete box.
+      if (bottom == missingValue) {
+        minLat = minLon = Integer.MIN_VALUE;
+        maxLat = maxLon = Integer.MAX_VALUE;
+        crossesDateLine = false;
       } else {
-        minLon = minLonEncoded;
-        maxLon = maxLonEncoded;
-        minLat = minLatEncoded;
-        maxLat = maxLatEncoded;
+        assert Double.isFinite(bottom);
+        GeoRect box = GeoUtils.circleToBBox(longitude, latitude, bottom);
+        // pre-encode our box to our integer encoding, so we don't have to decode 
+        // to double values for uncompetitive hits. This has some cost!
+        int minLatEncoded = LatLonPoint.encodeLatitude(box.minLat);
+        int maxLatEncoded = LatLonPoint.encodeLatitude(box.maxLat);
+        int minLonEncoded = LatLonPoint.encodeLongitude(box.minLon);
+        int maxLonEncoded = LatLonPoint.encodeLongitude(box.maxLon);
+        // be sure to not introduce quantization error in our optimization, just 
+        // round up our encoded box safely in all directions.
+        if (minLatEncoded != Integer.MIN_VALUE) {
+          minLatEncoded--;
+        }
+        if (minLonEncoded != Integer.MIN_VALUE) {
+          minLonEncoded--;
+        }
+        if (maxLatEncoded != Integer.MAX_VALUE) {
+          maxLatEncoded++;
+        }
+        if (maxLonEncoded != Integer.MAX_VALUE) {
+          maxLonEncoded++;
+        }
+        crossesDateLine = box.crossesDateline();
+        // crosses dateline: split
+        if (crossesDateLine) {
+          // box1
+          minLon = Integer.MIN_VALUE;
+          maxLon = maxLonEncoded;
+          minLat = minLatEncoded;
+          maxLat = maxLatEncoded;
+          // box2
+          minLon2 = minLonEncoded;
+          maxLon2 = Integer.MAX_VALUE;
+          minLat2 = minLatEncoded;
+          maxLat2 = maxLatEncoded;
+        } else {
+          minLon = minLonEncoded;
+          maxLon = maxLonEncoded;
+          minLat = minLatEncoded;
+          maxLat = maxLatEncoded;
+        }
       }
     }
     setBottomCounter++;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0f949c81/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java
index 9d3928e..da90b86 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointSortField.java
@@ -62,7 +62,11 @@ final class LatLonPointSortField extends SortField {
     }
     if (missingValue.getClass() != Double.class)
       throw new IllegalArgumentException("Missing value can only be of type java.lang.Double, but got " + missingValue.getClass());
-    this.missingValue = missingValue;
+    Double value = (Double) missingValue;
+    if (!Double.isInfinite(value)) {
+      throw new IllegalArgumentException("Missing value can only be Double.NEGATIVE_INFINITY (missing values first) or Double.POSITIVE_INFINITY (missing values last), but got " + value);
+    }
+    this.missingValue = value;
   }
   
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0f949c81/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java
index ea36ea6..a776b3f 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java
@@ -73,6 +73,82 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
     dir.close();
   }
   
+  /** Add two points (one doc missing) and sort by distance */
+  public void testMissingLast() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    
+    // missing
+    Document doc = new Document();
+    iw.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(new LatLonPoint("location", 40.718266, -74.007819));
+    iw.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(new LatLonPoint("location", 40.7051157, -74.0088305));
+    iw.addDocument(doc);
+    
+    IndexReader reader = iw.getReader();
+    IndexSearcher searcher = new IndexSearcher(reader);
+    iw.close();
+
+    Sort sort = new Sort(LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731));
+    TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
+    
+    FieldDoc d = (FieldDoc) td.scoreDocs[0];
+    assertEquals(462.61748421408186D, (Double)d.fields[0], 0.0D);
+    
+    d = (FieldDoc) td.scoreDocs[1];
+    assertEquals(1056.1630445911035D, (Double)d.fields[0], 0.0D);
+    
+    d = (FieldDoc) td.scoreDocs[2];
+    assertEquals(Double.POSITIVE_INFINITY, (Double)d.fields[0], 0.0D);
+    
+    reader.close();
+    dir.close();
+  }
+  
+  /** Add two points (one doc missing) and sort by distance */
+  public void testMissingFirst() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    
+    // missing
+    Document doc = new Document();
+    iw.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(new LatLonPoint("location", 40.718266, -74.007819));
+    iw.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(new LatLonPoint("location", 40.7051157, -74.0088305));
+    iw.addDocument(doc);
+    
+    IndexReader reader = iw.getReader();
+    IndexSearcher searcher = new IndexSearcher(reader);
+    iw.close();
+
+    SortField sortField = LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731);
+    sortField.setMissingValue(Double.NEGATIVE_INFINITY);
+    Sort sort = new Sort(sortField);
+    TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
+
+    FieldDoc d = (FieldDoc) td.scoreDocs[0];
+    assertEquals(Double.NEGATIVE_INFINITY, (Double)d.fields[0], 0.0D);
+    
+    d = (FieldDoc) td.scoreDocs[1];
+    assertEquals(462.61748421408186D, (Double)d.fields[0], 0.0D);
+    
+    d = (FieldDoc) td.scoreDocs[2];
+    assertEquals(1056.1630445911035D, (Double)d.fields[0], 0.0D);
+    
+    reader.close();
+    dir.close();
+  }
+  
   /** Run a few iterations with just 10 docs, hopefully easy to debug */
   public void testRandom() throws Exception {
     for (int iters = 0; iters < 100; iters++) {
@@ -141,17 +217,20 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
 
     for (int i = 0; i < numDocs; i++) {
-      double latRaw = -90 + 180.0 * random().nextDouble();
-      double lonRaw = -180 + 360.0 * random().nextDouble();
-      // pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
-      double lat = LatLonPoint.decodeLatitude(LatLonPoint.encodeLatitude(latRaw));
-      double lon = LatLonPoint.decodeLongitude(LatLonPoint.encodeLongitude(lonRaw));
       Document doc = new Document();
       doc.add(new StoredField("id", i));
       doc.add(new NumericDocValuesField("id", i));
-      doc.add(new LatLonPoint("field", lat, lon));
-      doc.add(new StoredField("lat", lat));
-      doc.add(new StoredField("lon", lon));
+      if (random().nextInt(10) > 7) {
+        double latRaw = -90 + 180.0 * random().nextDouble();
+        double lonRaw = -180 + 360.0 * random().nextDouble();
+        // pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
+        double lat = LatLonPoint.decodeLatitude(LatLonPoint.encodeLatitude(latRaw));
+        double lon = LatLonPoint.decodeLongitude(LatLonPoint.encodeLongitude(lonRaw));
+
+        doc.add(new LatLonPoint("field", lat, lon));
+        doc.add(new StoredField("lat", lat));
+        doc.add(new StoredField("lon", lon));
+      } // otherwise "missing"
       writer.addDocument(doc);
     }
     IndexReader reader = writer.getReader();
@@ -160,14 +239,20 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
     for (int i = 0; i < numQueries; i++) {
       double lat = -90 + 180.0 * random().nextDouble();
       double lon = -180 + 360.0 * random().nextDouble();
+      double missingValue = random().nextBoolean() ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
 
       Result expected[] = new Result[reader.maxDoc()];
       
       for (int doc = 0; doc < reader.maxDoc(); doc++) {
         Document targetDoc = reader.document(doc);
-        double docLatitude = targetDoc.getField("lat").numericValue().doubleValue();
-        double docLongitude = targetDoc.getField("lon").numericValue().doubleValue();
-        double distance = GeoDistanceUtils.haversin(lat, lon, docLatitude, docLongitude);
+        final double distance;
+        if (targetDoc.getField("lat") == null) {
+          distance = missingValue; // missing
+        } else {
+          double docLatitude = targetDoc.getField("lat").numericValue().doubleValue();
+          double docLongitude = targetDoc.getField("lon").numericValue().doubleValue();
+          distance = GeoDistanceUtils.haversin(lat, lon, docLatitude, docLongitude);
+        }
         int id = targetDoc.getField("id").numericValue().intValue();
         expected[doc] = new Result(id, distance);
       }
@@ -177,7 +262,9 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
       // randomize the topN a bit
       int topN = TestUtil.nextInt(random(), 1, reader.maxDoc());
       // sort by distance, then ID
-      Sort sort = new Sort(LatLonPoint.newDistanceSort("field", lat, lon), 
+      SortField distanceSort = LatLonPoint.newDistanceSort("field", lat, lon);
+      distanceSort.setMissingValue(missingValue);
+      Sort sort = new Sort(distanceSort, 
                            new SortField("id", SortField.Type.INT));
 
       TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), topN, sort);