You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2018/03/17 16:24:28 UTC

lucene-solr:branch_7x: SOLR-11731: LatLonPointSpatialField now supports docValue retrieval Closes #323

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 911fda2ef -> 20d7d993f


SOLR-11731: LatLonPointSpatialField now supports docValue retrieval
Closes #323

(cherry picked from commit 4b08efc)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/20d7d993
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/20d7d993
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/20d7d993

Branch: refs/heads/branch_7x
Commit: 20d7d993f9abc0ca4ba898b3f5729cd103cc755b
Parents: 911fda2
Author: David Smiley <ds...@apache.org>
Authored: Sat Mar 17 12:21:53 2018 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Sat Mar 17 12:24:23 2018 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../solr/schema/LatLonPointSpatialField.java    |  24 ++++
 .../apache/solr/search/SolrDocumentFetcher.java |  19 ++-
 .../solr/collection1/conf/schema-spatial.xml    |   3 +
 .../apache/solr/search/TestSolr4Spatial2.java   | 144 +++++++++++++++++--
 5 files changed, 176 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/20d7d993/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index c5f71f6..93a6714 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -48,6 +48,9 @@ Optimizations
   differential fetching now speeds up recovery times when full index replication is needed, but only
   a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher)
 
+* SOLR-11731: LatLonPointSpatialField can now decode points from docValues when stored=false docValues=true,
+  albeit with maximum precision of 1.33cm (Karthik Ramachandran, David Smiley)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/20d7d993/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java b/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
index a92fc00..f612a4a 100644
--- a/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
+++ b/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
@@ -18,11 +18,13 @@
 package org.apache.solr.schema;
 
 import java.io.IOException;
+import java.math.BigDecimal;
 import java.util.Objects;
 
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.LatLonDocValuesField;
 import org.apache.lucene.document.LatLonPoint;
+import org.apache.lucene.geo.GeoEncodingUtils;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.queries.function.ValueSource;
@@ -46,6 +48,8 @@ import org.locationtech.spatial4j.shape.Point;
 import org.locationtech.spatial4j.shape.Rectangle;
 import org.locationtech.spatial4j.shape.Shape;
 
+import static java.math.RoundingMode.CEILING;
+
 /**
  * A spatial implementation based on Lucene's {@code LatLonPoint} and {@code LatLonDocValuesField}. The
  * first is based on Lucene's "Points" API, which is a BKD Index.  This field type is strictly limited to
@@ -71,6 +75,26 @@ public class LatLonPointSpatialField extends AbstractSpatialFieldType implements
     SchemaField schemaField = schema.getField(fieldName); // TODO change AbstractSpatialFieldType so we get schemaField?
     return new LatLonPointSpatialStrategy(ctx, fieldName, schemaField.indexed(), schemaField.hasDocValues());
   }
+  
+  /**
+   * Decodes the docValues number into latitude and longitude components, formatting as "lat,lon".
+   * The encoding is governed by {@code LatLonDocValuesField}.  The decimal output representation is reflective
+   * of the available precision.
+   * @param value Non-null; stored location field data
+   * @return Non-null; "lat, lon" with 6 decimal point precision
+   */
+  public static String decodeDocValueToString(long value) {
+    final double latDouble = GeoEncodingUtils.decodeLatitude((int) (value >> 32));
+    final double lonDouble = GeoEncodingUtils.decodeLongitude((int) (value & 0xFFFFFFFFL));
+    // 7 decimal places maximizes our available precision to just over a centimeter; we have a test for it.
+    // CEILING round-trips (decode then re-encode then decode to get identical results). Others did not. It also
+    //   reverses the "floor" that occurs when we encode.
+    BigDecimal latitudeDecoded = BigDecimal.valueOf(latDouble).setScale(7, CEILING);
+    BigDecimal longitudeDecoded = BigDecimal.valueOf(lonDouble).setScale(7, CEILING);
+    return latitudeDecoded.stripTrailingZeros().toPlainString() + ","
+        + longitudeDecoded.stripTrailingZeros().toPlainString();
+    // return ((float)latDouble) + "," + ((float)lonDouble);  crude but not quite as accurate
+  }
 
   // TODO move to Lucene-spatial-extras once LatLonPoint & LatLonDocValuesField moves out of sandbox
   public static class LatLonPointSpatialStrategy extends SpatialStrategy {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/20d7d993/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java
index 7c71b64..dbb8bd7 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java
@@ -56,6 +56,7 @@ import org.apache.lucene.util.NumericUtils;
 import org.apache.solr.common.SolrDocumentBase;
 import org.apache.solr.core.SolrConfig;
 import org.apache.solr.schema.BoolField;
+import org.apache.solr.schema.LatLonPointSpatialField;
 import org.apache.solr.schema.AbstractEnumField;
 import org.apache.solr.schema.NumberType;
 import org.apache.solr.schema.SchemaField;
@@ -490,8 +491,16 @@ public class SolrDocumentFetcher {
             long number = numericDv.nextValue();
             Object value = decodeNumberFromDV(schemaField, number, true);
             // return immediately if the number is not decodable, hence won't return an empty list.
-            if (value == null) return null;
-            else outValues.add(value);
+            if (value == null) {
+              return null;
+            }
+            // normally never true but LatLonPointSpatialField uses SORTED_NUMERIC even when single valued
+            else if (schemaField.multiValued() == false) {
+              return value;
+            }
+            else {
+              outValues.add(value);
+            }
           }
           assert outValues.size() > 0;
           return outValues;
@@ -515,6 +524,12 @@ public class SolrDocumentFetcher {
   }
 
   private Object decodeNumberFromDV(SchemaField schemaField, long value, boolean sortableNumeric) {
+    // note: This special-case is unfortunate; if we have to add any more than perhaps the fieldType should
+    //  have this method so that specific field types can customize it.
+    if (schemaField.getType() instanceof LatLonPointSpatialField) {
+      return LatLonPointSpatialField.decodeDocValueToString(value);
+    }
+    
     if (schemaField.getType().getNumberType() == null) {
       log.warn("Couldn't decode docValues for field: [{}], schemaField: [{}], numberType is unknown",
           schemaField.getName(), schemaField);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/20d7d993/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml b/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml
index d973fb0..50823e0 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml
@@ -86,8 +86,11 @@
   <field name="llp_idx" type="llp" indexed="true" docValues="false" />
   <field name="llp_dv" type="llp" indexed="false" docValues="true" />
   <field name="llp_1_dv_st" type="llp" indexed="false" docValues="true" stored="true" multiValued="false"/>
+  <field name="llp_N_dv_st" type="llp" indexed="false" docValues="true" stored="true" multiValued="true"/>
   <field name="llp_1_dv" type="llp" indexed="false" docValues="true" stored="false" multiValued="false" useDocValuesAsStored="false"/>
+  <field name="llp_N_dv" type="llp" indexed="false" docValues="true" stored="false" multiValued="true" useDocValuesAsStored="false"/>
   <field name="llp_1_dv_dvasst" type="llp" indexed="false" docValues="true" stored="false" multiValued="false"  useDocValuesAsStored="true"/>
+  <field name="llp_N_dv_dvasst" type="llp" indexed="false" docValues="true" stored="false" multiValued="true"  useDocValuesAsStored="true"/>
 
   <dynamicField name="bboxD_*" type="bbox" indexed="true"/>
   <dynamicField name="str_*" type="string" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/20d7d993/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
index dc21173..78d569a 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
@@ -16,15 +16,31 @@
  */
 package org.apache.solr.search;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import com.carrotsearch.randomizedtesting.annotations.Repeat;
+import org.apache.lucene.geo.GeoTestUtil;
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.metrics.MetricsMap;
 import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.util.SpatialUtils;
+import org.apache.solr.util.TestUtils;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import org.locationtech.spatial4j.context.SpatialContext;
+import org.locationtech.spatial4j.distance.DistanceUtils;
+import org.locationtech.spatial4j.shape.Point;
 
 //Unlike TestSolr4Spatial, not parametrized / not generic.
 public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
@@ -117,24 +133,122 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
         "q", "{!cache=false field f=" + fieldName + "}Intersects(" + polygonWKT + ")",
         "sort", "id asc"), "/response/numFound==2");
   }
-  
+
+  @Test @Repeat(iterations = 10)
+  public void testLLPDecodeIsStableAndPrecise() throws Exception {
+    // test that LatLonPointSpatialField decode of docValue will round-trip (re-index then re-decode) to the same value
+    @SuppressWarnings({"resource", "IOResourceOpenedButNotSafelyClosed"})
+    SolrClient client = new EmbeddedSolrServer(h.getCore());// do NOT close it; it will close Solr
+
+    final String fld = "llp_1_dv_dvasst";
+    String ptOrig = GeoTestUtil.nextLatitude() + "," + GeoTestUtil.nextLongitude();
+    assertU(adoc("id", "0", fld, ptOrig));
+    assertU(commit());
+    // retrieve it (probably less precision
+    String ptDecoded1 = (String) client.query(params("q", "id:0")).getResults().get(0).get(fld);
+    // now write it back
+    assertU(adoc("id", "0", fld, ptDecoded1));
+    assertU(commit());
+    // retrieve it and hopefully the same
+    String ptDecoded2 = (String) client.query(params("q", "id:0")).getResults().get(0).get(fld);
+    assertEquals("orig:" + ptOrig, ptDecoded1, ptDecoded2);
+
+    // test that the representation is pretty accurate
+    final Point ptOrigObj = SpatialUtils.parsePoint(ptOrig, SpatialContext.GEO);
+    final Point ptDecodedObj = SpatialUtils.parsePoint(ptDecoded1, SpatialContext.GEO);
+    double deltaCentimeters = SpatialContext.GEO.calcDistance(ptOrigObj, ptDecodedObj) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
+//    //See javadocs of LatLonDocValuesField
+//    final Point absErrorPt = SpatialContext.GEO.getShapeFactory().pointXY(8.381903171539307E-8, 4.190951585769653E-8);
+//    double deltaCentimetersMax
+//        = SpatialContext.GEO.calcDistance(absErrorPt, 0,0) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
+//    //  equals 1.0420371840922256   which is a bit lower than what we're able to do
+
+    assertTrue("deltaCm too high: " + deltaCentimeters, deltaCentimeters < 1.33);
+  }
+
   @Test
   public void testLatLonRetrieval() throws Exception {
-    assertU(adoc("id", "0",
-        "llp_1_dv_st", "-75,41",
-        "llp_1_dv", "-80,20",
-        "llp_1_dv_dvasst", "10,-30"));
+    final String ptHighPrecision = "40.2996543270,-74.0824956673";
+    final String ptLossOfPrecision = "40.2996544,-74.0824957"; // rounded version of the one above, losing precision
+
+    // "_1" is single, "_N" is multiValued
+    // "_dv" is docValues (otherwise not),  "_dvasst" is useDocValuesAsStored (otherwise not)
+    // "_st" is stored" (otherwise not)
+
+    List<RetrievalCombo> combos = Arrays.asList(
+        new RetrievalCombo("llp_1_dv_st", ptHighPrecision),
+        new RetrievalCombo("llp_N_dv_st", Arrays.asList("-40,40", "-45,45")),
+        new RetrievalCombo("llp_N_dv_st", Arrays.asList("-40,40")), // multiValued but 1 value
+
+        new RetrievalCombo("llp_1_dv_dvasst", ptHighPrecision, ptLossOfPrecision),
+        // this one comes back in a different order since it gets sorted low to high
+        new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList("-40,40", "-45,45"), Arrays.asList("-45,45", "-40,40")),
+        new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList("-40,40")), // multiValued but 1 value
+        // edge cases.  (note we sorted it as Lucene will internally)
+        new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList(
+            "-90,180", "-90,-180",
+            "0,0", "0,180", "0,-180",
+            "90,0", "90,180", "90,-180")),
+
+        new RetrievalCombo("llp_1_dv", ptHighPrecision, ptLossOfPrecision),
+        new RetrievalCombo("llp_N_dv", Arrays.asList("-45,45", "-40,40"))
+
+        );
+    Collections.shuffle(combos, random());
+
+    // add and commit
+    for (RetrievalCombo combo : combos) {
+      SolrInputDocument doc = new SolrInputDocument();
+      doc.addField("id", "" + combo.id);
+      for (String indexValue : combo.indexValues) {
+        doc.addField(combo.fieldName, indexValue);
+      }
+      assertU(adoc(doc));
+      if (TestUtils.rarely()) { // induce segments to potentially change internal behavior
+        assertU(commit());
+      }
+    }
     assertU(commit());
-    assertJQ(req("q","*:*", "fl","*"),
-        "response/docs/[0]/llp_1_dv_st=='-75,41'",
-        // Right now we do not support decoding point value from dv field
-        "!response/docs/[0]/llp_1_dv=='-80,20'",
-        "!response/docs/[0]/llp_1_dv_dvasst=='10,-30'");
-    assertJQ(req("q","*:*", "fl","llp_1_dv_st, llp_1_dv, llp_1_dv_dvasst"),
-        "response/docs/[0]/llp_1_dv_st=='-75,41'",
-        // Even when these fields are specified, we won't return them
-        "!response/docs/[0]/llp_1_dv=='-80,20'",
-        "!response/docs/[0]/llp_1_dv_dvasst=='10,-30'");
+
+    // create an assertJQ assertion string, once for fl=*, another for when the field is listed
+    List<String> assertJQsFlListed = new ArrayList<>();
+    List<String> assertJQsFlStar = new ArrayList<>();
+    for (RetrievalCombo combo : combos) {
+      String expect = "response/docs/[" + combo.id + "]/" + combo.fieldName + "==" + combo.expectReturnJSON;
+      assertJQsFlListed.add(expect);
+      if (combo.fieldName.endsWith("_dv")) {
+        expect =  "response/docs/[" + combo.id + "]=={'id':'" + combo.id + "'}"; // only the id, nothing else
+      }
+      assertJQsFlStar.add(expect);
+    }
+    // check
+    assertJQ(req("q","*:*", "sort", "id asc",
+        "fl","*"),
+        assertJQsFlStar.toArray(new String[0]));
+    assertJQ(req("q","*:*", "sort", "id asc",
+        "fl", "id," + combos.stream().map(c -> c.fieldName).collect(Collectors.joining(","))),
+        assertJQsFlListed.toArray(new String[0]));
+  }
+
+  private static class RetrievalCombo {
+    static int idCounter = 0;
+    final int id = idCounter++;
+    final String fieldName;
+    final List<String> indexValues;
+    final String expectReturnJSON; //or null if not expected in response
+
+    RetrievalCombo(String fieldName, List<String> indexValues) { this(fieldName, indexValues, indexValues);}
+    RetrievalCombo(String fieldName, List<String> indexValues, List<String> returnValues) {
+      this.fieldName = fieldName;
+      this.indexValues = indexValues;
+      this.expectReturnJSON = returnValues.stream().collect(Collectors.joining("', '", "['", "']"));
+    }
+    RetrievalCombo(String fieldName, String indexValue) { this(fieldName, indexValue, indexValue); }
+    RetrievalCombo(String fieldName, String indexValue, String returnValue) {
+      this.fieldName = fieldName;
+      this.indexValues = Collections.singletonList(indexValue);
+      this.expectReturnJSON = "'" + returnValue + "'";
+    }
   }
 
   private void testRptWithGeometryField(String fieldName) throws Exception {