You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2019/08/20 21:20:23 UTC

[lucene-solr] branch branch_8x updated: LUCENE-8755: Spatial-extras quad and packed-quad trees now index points a little faster, and also fix an edge case bug. Fixes #824

This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new c09ddcb  LUCENE-8755: Spatial-extras quad and packed-quad trees now index  points a little faster, and also fix an edge case bug.  Fixes #824
c09ddcb is described below

commit c09ddcb7180ccc4abaef25f654ffc053421353a4
Author: nppoly <np...@foxmail.com>
AuthorDate: Wed Aug 7 10:35:55 2019 +0200

    LUCENE-8755: Spatial-extras quad and packed-quad trees now index
     points a little faster, and also fix an edge case bug.
     Fixes #824
    
    (cherry picked from commit 26628b2717a73235db56fde94f7f5b64cbc5b8b2)
---
 lucene/CHANGES.txt                                 | 13 +++
 .../spatial/prefix/tree/PackedQuadPrefixTree.java  | 68 ++++++++++++---
 .../lucene/spatial/prefix/tree/QuadPrefixTree.java | 97 ++++++++++++++++++----
 .../prefix/tree/SpatialPrefixTreeFactory.java      | 33 +++++++-
 .../schema/AbstractSpatialPrefixTreeFieldType.java |  2 +
 .../solr/collection1/conf/schema-spatial.xml       |  8 +-
 .../org/apache/solr/search/TestSolr4Spatial2.java  |  7 ++
 7 files changed, 194 insertions(+), 34 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 9c3f11e..db29fa4 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -10,6 +10,11 @@ API Changes
 * LUCENE-8909: IndexWriter#getFieldNames() method is used to get fields present in index. After LUCENE-8316, this
   method is no longer required. Hence, deprecate IndexWriter#getFieldNames() method. (Adrien Grand, Munendra S N)
 
+* LUCENE-8755: SpatialPrefixTreeFactory now consumes the "version" parsed with Lucene's Version class.  The quad
+  and packed quad prefix trees are sensitive to this.  It's recommended to pass the version like you
+  should do likewise for analysis components for tokenized text, or else changes to the encoding in future versions
+  may be incompatible with older indexes.  (Chongchen Chen, David Smiley)
+
 New Features
 
 * LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
@@ -56,6 +61,14 @@ the total hits is not requested.
 * LUCENE-8941: Matches on wildcard queries will defer building their full
   disjunction until a MatchesIterator is pulled (Alan Woodward)
 
+* LUCENE-8755: spatial-extras quad and packed quad prefix trees now index points faster.
+  (Chongchen Chen, David Smiley)
+
+Bug Fixes
+
+* LUCENE-8755: spatial-extras quad and packed quad prefix trees could throw a
+  NullPointerException for certain cell edge coordinates (Chongchen Chen, David Smiley)
+
 Other
 
 * LUCENE-8778 LUCENE-8911: Define analyzer SPI names as static final fields and document the names in Javadocs.
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java
index 6046ed2..a7174a4 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java
@@ -22,6 +22,7 @@ import java.util.List;
 import java.util.NoSuchElementException;
 
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Version;
 import org.locationtech.spatial4j.context.SpatialContext;
 import org.locationtech.spatial4j.shape.Point;
 import org.locationtech.spatial4j.shape.Rectangle;
@@ -60,7 +61,9 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
   public static class Factory extends QuadPrefixTree.Factory {
     @Override
     protected SpatialPrefixTree newSPT() {
-      return new PackedQuadPrefixTree(ctx, maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
+      PackedQuadPrefixTree tree = new PackedQuadPrefixTree(ctx, maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
+      tree.robust = getVersion().onOrAfter(Version.LUCENE_8_3_0);
+      return tree;
     }
   }
 
@@ -83,25 +86,66 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
 
   @Override
   public Cell getCell(Point p, int level) {
-    List<Cell> cells = new ArrayList<>(1);
-    build(xmid, ymid, 0, cells, 0x0L, ctx.makePoint(p.getX(), p.getY()), level);
-    return cells.get(0);//note cells could be longer if p on edge
+    if (!robust) { // old method
+      List<Cell> cells = new ArrayList<>(1);
+      buildNotRobustly(xmid, ymid, 0, cells, 0x0L, ctx.makePoint(p.getX(), p.getY()), level);
+      if (!cells.isEmpty()) {
+        return cells.get(0);//note cells could be longer if p on edge
+      }
+    }
+
+    double currentXmid = xmid;
+    double currentYmid = ymid;
+    double xp = p.getX();
+    double yp = p.getY();
+    long  term = 0L;
+    int levelLimit = level > maxLevels ? maxLevels : level;
+    SpatialRelation rel = SpatialRelation.CONTAINS;
+    for (int lvl = 0; lvl < levelLimit; lvl++){
+      int quad = battenberg(currentXmid, currentYmid, xp, yp);
+      double halfWidth = levelW[lvl + 1];
+      double halfHeight = levelH[lvl + 1];
+      switch(quad){
+        case 0:
+          currentXmid -= halfWidth;
+          currentYmid += halfHeight;
+          break;
+        case 1:
+          currentXmid += halfWidth;
+          currentYmid += halfHeight;
+          break;
+        case 2:
+          currentXmid -= halfWidth;
+          currentYmid -= halfHeight;
+          break;
+        case 3:
+          currentXmid += halfWidth;
+          currentYmid -= halfHeight;
+          break;
+        default:
+      }
+      // set bits for next level
+      term |= (((long)(quad))<<(64-((lvl + 1)<<1)));
+      // increment level
+      term = ((term>>>1)+1)<<1;
+    }
+    return new PackedQuadCell(term, rel);
   }
 
-  protected void build(double x, double y, int level, List<Cell> matches, long term, Shape shape, int maxLevel) {
+  protected void buildNotRobustly(double x, double y, int level, List<Cell> matches, long term, Shape shape, int maxLevel) {
     double w = levelW[level] / 2;
     double h = levelH[level] / 2;
 
     // Z-Order
     // http://en.wikipedia.org/wiki/Z-order_%28curve%29
-    checkBattenberg(QUAD[0], x - w, y + h, level, matches, term, shape, maxLevel);
-    checkBattenberg(QUAD[1], x + w, y + h, level, matches, term, shape, maxLevel);
-    checkBattenberg(QUAD[2], x - w, y - h, level, matches, term, shape, maxLevel);
-    checkBattenberg(QUAD[3], x + w, y - h, level, matches, term, shape, maxLevel);
+    checkBattenbergNotRobustly(QUAD[0], x - w, y + h, level, matches, term, shape, maxLevel);
+    checkBattenbergNotRobustly(QUAD[1], x + w, y + h, level, matches, term, shape, maxLevel);
+    checkBattenbergNotRobustly(QUAD[2], x - w, y - h, level, matches, term, shape, maxLevel);
+    checkBattenbergNotRobustly(QUAD[3], x + w, y - h, level, matches, term, shape, maxLevel);
   }
 
-  protected void checkBattenberg(byte quad, double cx, double cy, int level, List<Cell> matches,
-                               long term, Shape shape, int maxLevel) {
+  protected void checkBattenbergNotRobustly(byte quad, double cx, double cy, int level, List<Cell> matches,
+                                 long term, Shape shape, int maxLevel) {
     // short-circuit if we find a match for the point (no need to continue recursion)
     if (shape instanceof Point && !matches.isEmpty())
       return;
@@ -122,7 +166,7 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
     if (SpatialRelation.CONTAINS == v || (level >= maxLevel)) {
       matches.add(new PackedQuadCell(term, v.transpose()));
     } else {// SpatialRelation.WITHIN, SpatialRelation.INTERSECTS
-      build(cx, cy, level, matches, term, shape, maxLevel);
+      buildNotRobustly(cx, cy, level, matches, term, shape, maxLevel);
     }
   }
 
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java
index 9bc947f..6133e26 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java
@@ -23,6 +23,7 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Locale;
 
+import org.apache.lucene.util.Version;
 import org.locationtech.spatial4j.context.SpatialContext;
 import org.locationtech.spatial4j.shape.Point;
 import org.locationtech.spatial4j.shape.Rectangle;
@@ -43,17 +44,17 @@ public class QuadPrefixTree extends LegacyPrefixTree {
    * Factory for creating {@link QuadPrefixTree} instances with useful defaults
    */
   public static class Factory extends SpatialPrefixTreeFactory {
-
     @Override
     protected int getLevelForDistance(double degrees) {
-      QuadPrefixTree grid = new QuadPrefixTree(ctx, MAX_LEVELS_POSSIBLE);
-      return grid.getLevelForDistance(degrees);
+      return newSPT().getLevelForDistance(degrees);
     }
 
     @Override
     protected SpatialPrefixTree newSPT() {
-      return new QuadPrefixTree(ctx,
+      QuadPrefixTree tree = new QuadPrefixTree(ctx,
           maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
+      tree.robust = getVersion().onOrAfter(Version.LUCENE_8_3_0);
+      return tree;
     }
   }
 
@@ -75,6 +76,8 @@ public class QuadPrefixTree extends LegacyPrefixTree {
   final int[]    levelS; // side
   final int[]    levelN; // number
 
+  protected boolean robust = true; // for backward compatibility, use the old method if user specified old version.
+
   public QuadPrefixTree(
       SpatialContext ctx, Rectangle bounds, int maxLevels) {
     super(ctx, maxLevels);
@@ -83,10 +86,10 @@ public class QuadPrefixTree extends LegacyPrefixTree {
     this.ymin = bounds.getMinY();
     this.ymax = bounds.getMaxY();
 
-    levelW = new double[maxLevels];
-    levelH = new double[maxLevels];
-    levelS = new int[maxLevels];
-    levelN = new int[maxLevels];
+    levelW = new double[maxLevels + 1];
+    levelH = new double[maxLevels + 1];
+    levelS = new int[maxLevels + 1];
+    levelN = new int[maxLevels + 1];
 
     gridW = xmax - xmin;
     gridH = ymax - ymin;
@@ -146,12 +149,50 @@ public class QuadPrefixTree extends LegacyPrefixTree {
 
   @Override
   public Cell getCell(Point p, int level) {
-    List<Cell> cells = new ArrayList<>(1);
-    build(xmid, ymid, 0, cells, new BytesRef(maxLevels+1), ctx.makePoint(p.getX(),p.getY()), level);
-    return cells.get(0);//note cells could be longer if p on edge
+    if (!robust) { // old method
+      List<Cell> cells = new ArrayList<>(1);
+      buildNotRobustly(xmid, ymid, 0, cells, new BytesRef(maxLevels+1), ctx.makePoint(p.getX(),p.getY()), level);
+      if (!cells.isEmpty()) {
+        return cells.get(0);//note cells could be longer if p on edge
+      }
+    }
+
+    double currentXmid = xmid;
+    double currentYmid = ymid;
+    double xp = p.getX();
+    double yp = p.getY();
+    BytesRef str = new BytesRef(maxLevels+1);
+    int levelLimit = level > maxLevels ? maxLevels : level;
+    SpatialRelation rel = SpatialRelation.CONTAINS;
+    for (int lvl = 0; lvl < levelLimit; lvl++){
+      int c = battenberg(currentXmid, currentYmid, xp, yp);
+      double halfWidth = levelW[lvl + 1];
+      double halfHeight = levelH[lvl + 1];
+      switch(c){
+        case 0:
+          currentXmid -= halfWidth;
+          currentYmid += halfHeight;
+          break;
+        case 1:
+          currentXmid += halfWidth;
+          currentYmid += halfHeight;
+          break;
+        case 2:
+          currentXmid -= halfWidth;
+          currentYmid -= halfHeight;
+          break;
+        case 3:
+          currentXmid += halfWidth;
+          currentYmid -= halfHeight;
+          break;
+        default:
+      }
+      str.bytes[str.length++] = (byte)('A' + c);
+    }
+    return new QuadCell(str, rel);
   }
 
-  private void build(
+  private void buildNotRobustly(
       double x,
       double y,
       int level,
@@ -165,10 +206,10 @@ public class QuadPrefixTree extends LegacyPrefixTree {
 
     // Z-Order
     // http://en.wikipedia.org/wiki/Z-order_%28curve%29
-    checkBattenberg('A', x - w, y + h, level, matches, str, shape, maxLevel);
-    checkBattenberg('B', x + w, y + h, level, matches, str, shape, maxLevel);
-    checkBattenberg('C', x - w, y - h, level, matches, str, shape, maxLevel);
-    checkBattenberg('D', x + w, y - h, level, matches, str, shape, maxLevel);
+    checkBattenbergNotRobustly('A', x - w, y + h, level, matches, str, shape, maxLevel);
+    checkBattenbergNotRobustly('B', x + w, y + h, level, matches, str, shape, maxLevel);
+    checkBattenbergNotRobustly('C', x - w, y - h, level, matches, str, shape, maxLevel);
+    checkBattenbergNotRobustly('D', x + w, y - h, level, matches, str, shape, maxLevel);
 
     // possibly consider hilbert curve
     // http://en.wikipedia.org/wiki/Hilbert_curve
@@ -176,7 +217,7 @@ public class QuadPrefixTree extends LegacyPrefixTree {
     // if we actually use the range property in the query, this could be useful
   }
 
-  protected void checkBattenberg(
+  protected void checkBattenbergNotRobustly(
       char c,
       double cx,
       double cy,
@@ -207,12 +248,32 @@ public class QuadPrefixTree extends LegacyPrefixTree {
         //str.append(SpatialPrefixGrid.INTERSECTS);
         matches.add(new QuadCell(BytesRef.deepCopyOf(str), v.transpose()));
       } else {
-        build(cx, cy, nextLevel, matches, str, shape, maxLevel);
+        buildNotRobustly(cx, cy, nextLevel, matches, str, shape, maxLevel);
       }
     }
     str.length = strlen;
   }
 
+  /** Returns a Z-Order quadrant [0-3]. */
+  protected int battenberg(double xmid, double ymid, double xp, double yp){
+    // http://en.wikipedia.org/wiki/Z-order_%28curve%29
+    if (ymid <= yp) {
+      if (xmid >= xp) {
+        return 0;
+      }
+      return 1;
+    } else {
+      if (xmid >= xp) {
+        return 2;
+      }
+      return 3;
+    }
+    // possibly consider hilbert curve
+    // http://en.wikipedia.org/wiki/Hilbert_curve
+    // http://blog.notdot.net/2009/11/Damn-Cool-Algorithms-Spatial-indexing-with-Quadtrees-and-Hilbert-Curves
+    // if we actually use the range property in the query, this could be useful
+  }
+
   protected class QuadCell extends LegacyCell {
 
     QuadCell(byte[] bytes, int off, int len) {
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java
index d88e41a..cdc3758 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java
@@ -16,8 +16,10 @@
  */
 package org.apache.lucene.spatial.prefix.tree;
 
+import java.text.ParseException;
 import java.util.Map;
 
+import org.apache.lucene.util.Version;
 import org.locationtech.spatial4j.context.SpatialContext;
 import org.locationtech.spatial4j.distance.DistanceUtils;
 
@@ -33,14 +35,17 @@ public abstract class SpatialPrefixTreeFactory {
   public static final String PREFIX_TREE = "prefixTree";
   public static final String MAX_LEVELS = "maxLevels";
   public static final String MAX_DIST_ERR = "maxDistErr";
+  public static final String VERSION = "version";
 
   protected Map<String, String> args;
   protected SpatialContext ctx;
   protected Integer maxLevels;
+  private Version version;
 
   /**
-   * The factory  is looked up via "prefixTree" in args, expecting "geohash" or "quad".
+   * The factory is looked up via "prefixTree" in args, expecting "geohash" or "quad".
    * If it's neither of these, then "geohash" is chosen for a geo context, otherwise "quad" is chosen.
+   * The "version" arg, if present, is parsed with {@link Version} and the prefix tree might be sensitive to it.
    */
   public static SpatialPrefixTree makeSPT(Map<String,String> args, ClassLoader classLoader, SpatialContext ctx) {
     //TODO refactor to use Java SPI like how Lucene already does for codecs/postingsFormats, etc
@@ -64,16 +69,26 @@ public abstract class SpatialPrefixTreeFactory {
         throw new RuntimeException(e);
       }
     }
-    instance.init(args,ctx);
+    instance.init(args, ctx);
     return instance.newSPT();
   }
 
   protected void init(Map<String, String> args, SpatialContext ctx) {
     this.args = args;
     this.ctx = ctx;
+    initVersion();
     initMaxLevels();
   }
 
+  protected void initVersion() {
+    String versionStr = args.get(VERSION);
+    try {
+      setVersion(versionStr == null ? Version.LATEST : Version.parseLeniently(versionStr));
+    } catch (ParseException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
   protected void initMaxLevels() {
     String mlStr = args.get(MAX_LEVELS);
     if (mlStr != null) {
@@ -94,6 +109,20 @@ public abstract class SpatialPrefixTreeFactory {
     maxLevels = getLevelForDistance(degrees);
   }
 
+  /**
+   * Set the version of Lucene this tree should mimic the behavior for for analysis.
+   */
+  public void setVersion(Version v) {
+    version = v;
+  }
+
+  /**
+   * Return the version of Lucene this tree will mimic the behavior of for analysis.
+   */
+  public Version getVersion() {
+    return version;
+  }
+
   /** Calls {@link SpatialPrefixTree#getLevelForDistance(double)}. */
   protected abstract int getLevelForDistance(double degrees);
 
diff --git a/solr/core/src/java/org/apache/solr/schema/AbstractSpatialPrefixTreeFieldType.java b/solr/core/src/java/org/apache/solr/schema/AbstractSpatialPrefixTreeFieldType.java
index b6e9106..70c7b3d 100644
--- a/solr/core/src/java/org/apache/solr/schema/AbstractSpatialPrefixTreeFieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/AbstractSpatialPrefixTreeFieldType.java
@@ -49,6 +49,8 @@ public abstract class AbstractSpatialPrefixTreeFieldType<T extends PrefixTreeStr
   protected void init(IndexSchema schema, Map<String, String> args) {
     super.init(schema, args);
 
+    args.putIfAbsent(SpatialPrefixTreeFactory.VERSION, schema.getDefaultLuceneMatchVersion().toString());
+
     // Convert the maxDistErr to degrees (based on distanceUnits) since Lucene spatial layer depends on degrees
     if(args.containsKey(SpatialPrefixTreeFactory.MAX_DIST_ERR)) {
       double maxDistErrOriginal = Double.parseDouble(args.get(SpatialPrefixTreeFactory.MAX_DIST_ERR));
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml b/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml
index 535c851..3f09e3a 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml
@@ -72,9 +72,13 @@
              numberType="pdouble" distanceUnits="degrees" storeSubFields="false"/>
 
   <fieldType name="llp" class="solr.LatLonPointSpatialField" distanceUnits="degrees" multiValued="true" />
-
+  <fieldType name="oslocation" class="solr.SpatialRecursivePrefixTreeFieldType" geo="false" maxDistErr="0.000009"
+             worldBounds="ENVELOPE(0,700000,1300000,0)" distErrPct="0.15"/>
+  <fieldType name="oslocationold" class="solr.SpatialRecursivePrefixTreeFieldType" geo="false" maxDistErr="0.000009"
+             worldBounds="ENVELOPE(0,700000,1300000,0)" distErrPct="0.15" version="8.2.0"/>
   <field name="id" type="string" required="true"/>
-
+  <field name="oslocation" type="oslocation" />
+  <field name="oslocationold" type="oslocationold" />
   <field name="srpt_geohash" type="srpt_geohash" multiValued="true"/>
   <field name="srpt_quad" type="srpt_quad" multiValued="true"/>
   <field name="srpt_packedquad" type="srpt_packedquad" multiValued="true"/>
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
index d7fde70..cbbba42 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
@@ -61,6 +61,13 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
   }
 
   @Test
+  public void testQuadTreeRobustness() {
+    assertU(adoc("id", "0", "oslocation", "244502.06 639062.07"));
+    // old (pre 8.3.0) still works
+    assertU(adoc("id", "0", "oslocationold", "244502.06 639062.07"));
+  }
+
+  @Test
   public void testBBox() throws Exception {
     String fieldName = random().nextBoolean() ? "bbox" : "bboxD_dynamic";
     assertU(adoc("id", "0"));//nothing