You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2019/08/20 21:20:23 UTC
[lucene-solr] branch branch_8x updated: LUCENE-8755: Spatial-extras
quad and packed-quad trees now index points a little faster,
and also fix an edge case bug. Fixes #824
This is an automated email from the ASF dual-hosted git repository.
dsmiley pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new c09ddcb LUCENE-8755: Spatial-extras quad and packed-quad trees now index points a little faster, and also fix an edge case bug. Fixes #824
c09ddcb is described below
commit c09ddcb7180ccc4abaef25f654ffc053421353a4
Author: nppoly <np...@foxmail.com>
AuthorDate: Wed Aug 7 10:35:55 2019 +0200
LUCENE-8755: Spatial-extras quad and packed-quad trees now index
points a little faster, and also fix an edge case bug.
Fixes #824
(cherry picked from commit 26628b2717a73235db56fde94f7f5b64cbc5b8b2)
---
lucene/CHANGES.txt | 13 +++
.../spatial/prefix/tree/PackedQuadPrefixTree.java | 68 ++++++++++++---
.../lucene/spatial/prefix/tree/QuadPrefixTree.java | 97 ++++++++++++++++++----
.../prefix/tree/SpatialPrefixTreeFactory.java | 33 +++++++-
.../schema/AbstractSpatialPrefixTreeFieldType.java | 2 +
.../solr/collection1/conf/schema-spatial.xml | 8 +-
.../org/apache/solr/search/TestSolr4Spatial2.java | 7 ++
7 files changed, 194 insertions(+), 34 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 9c3f11e..db29fa4 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -10,6 +10,11 @@ API Changes
* LUCENE-8909: IndexWriter#getFieldNames() method is used to get fields present in index. After LUCENE-8316, this
method is no longer required. Hence, deprecate IndexWriter#getFieldNames() method. (Adrien Grand, Munendra S N)
+* LUCENE-8755: SpatialPrefixTreeFactory now consumes the "version" parsed with Lucene's Version class. The quad
+ and packed quad prefix trees are sensitive to this. It's recommended to pass the version like you
+ should do likewise for analysis components for tokenized text, or else changes to the encoding in future versions
+ may be incompatible with older indexes. (Chongchen Chen, David Smiley)
+
New Features
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
@@ -56,6 +61,14 @@ the total hits is not requested.
* LUCENE-8941: Matches on wildcard queries will defer building their full
disjunction until a MatchesIterator is pulled (Alan Woodward)
+* LUCENE-8755: spatial-extras quad and packed quad prefix trees now index points faster.
+ (Chongchen Chen, David Smiley)
+
+Bug Fixes
+
+* LUCENE-8755: spatial-extras quad and packed quad prefix trees could throw a
+ NullPointerException for certain cell edge coordinates (Chongchen Chen, David Smiley)
+
Other
* LUCENE-8778 LUCENE-8911: Define analyzer SPI names as static final fields and document the names in Javadocs.
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java
index 6046ed2..a7174a4 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java
@@ -22,6 +22,7 @@ import java.util.List;
import java.util.NoSuchElementException;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Version;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
@@ -60,7 +61,9 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
public static class Factory extends QuadPrefixTree.Factory {
@Override
protected SpatialPrefixTree newSPT() {
- return new PackedQuadPrefixTree(ctx, maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
+ PackedQuadPrefixTree tree = new PackedQuadPrefixTree(ctx, maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
+ tree.robust = getVersion().onOrAfter(Version.LUCENE_8_3_0);
+ return tree;
}
}
@@ -83,25 +86,66 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
@Override
public Cell getCell(Point p, int level) {
- List<Cell> cells = new ArrayList<>(1);
- build(xmid, ymid, 0, cells, 0x0L, ctx.makePoint(p.getX(), p.getY()), level);
- return cells.get(0);//note cells could be longer if p on edge
+ if (!robust) { // old method
+ List<Cell> cells = new ArrayList<>(1);
+ buildNotRobustly(xmid, ymid, 0, cells, 0x0L, ctx.makePoint(p.getX(), p.getY()), level);
+ if (!cells.isEmpty()) {
+ return cells.get(0);//note cells could be longer if p on edge
+ }
+ }
+
+ double currentXmid = xmid;
+ double currentYmid = ymid;
+ double xp = p.getX();
+ double yp = p.getY();
+ long term = 0L;
+ int levelLimit = level > maxLevels ? maxLevels : level;
+ SpatialRelation rel = SpatialRelation.CONTAINS;
+ for (int lvl = 0; lvl < levelLimit; lvl++){
+ int quad = battenberg(currentXmid, currentYmid, xp, yp);
+ double halfWidth = levelW[lvl + 1];
+ double halfHeight = levelH[lvl + 1];
+ switch(quad){
+ case 0:
+ currentXmid -= halfWidth;
+ currentYmid += halfHeight;
+ break;
+ case 1:
+ currentXmid += halfWidth;
+ currentYmid += halfHeight;
+ break;
+ case 2:
+ currentXmid -= halfWidth;
+ currentYmid -= halfHeight;
+ break;
+ case 3:
+ currentXmid += halfWidth;
+ currentYmid -= halfHeight;
+ break;
+ default:
+ }
+ // set bits for next level
+ term |= (((long)(quad))<<(64-((lvl + 1)<<1)));
+ // increment level
+ term = ((term>>>1)+1)<<1;
+ }
+ return new PackedQuadCell(term, rel);
}
- protected void build(double x, double y, int level, List<Cell> matches, long term, Shape shape, int maxLevel) {
+ protected void buildNotRobustly(double x, double y, int level, List<Cell> matches, long term, Shape shape, int maxLevel) {
double w = levelW[level] / 2;
double h = levelH[level] / 2;
// Z-Order
// http://en.wikipedia.org/wiki/Z-order_%28curve%29
- checkBattenberg(QUAD[0], x - w, y + h, level, matches, term, shape, maxLevel);
- checkBattenberg(QUAD[1], x + w, y + h, level, matches, term, shape, maxLevel);
- checkBattenberg(QUAD[2], x - w, y - h, level, matches, term, shape, maxLevel);
- checkBattenberg(QUAD[3], x + w, y - h, level, matches, term, shape, maxLevel);
+ checkBattenbergNotRobustly(QUAD[0], x - w, y + h, level, matches, term, shape, maxLevel);
+ checkBattenbergNotRobustly(QUAD[1], x + w, y + h, level, matches, term, shape, maxLevel);
+ checkBattenbergNotRobustly(QUAD[2], x - w, y - h, level, matches, term, shape, maxLevel);
+ checkBattenbergNotRobustly(QUAD[3], x + w, y - h, level, matches, term, shape, maxLevel);
}
- protected void checkBattenberg(byte quad, double cx, double cy, int level, List<Cell> matches,
- long term, Shape shape, int maxLevel) {
+ protected void checkBattenbergNotRobustly(byte quad, double cx, double cy, int level, List<Cell> matches,
+ long term, Shape shape, int maxLevel) {
// short-circuit if we find a match for the point (no need to continue recursion)
if (shape instanceof Point && !matches.isEmpty())
return;
@@ -122,7 +166,7 @@ public class PackedQuadPrefixTree extends QuadPrefixTree {
if (SpatialRelation.CONTAINS == v || (level >= maxLevel)) {
matches.add(new PackedQuadCell(term, v.transpose()));
} else {// SpatialRelation.WITHIN, SpatialRelation.INTERSECTS
- build(cx, cy, level, matches, term, shape, maxLevel);
+ buildNotRobustly(cx, cy, level, matches, term, shape, maxLevel);
}
}
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java
index 9bc947f..6133e26 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java
@@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.List;
import java.util.Locale;
+import org.apache.lucene.util.Version;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
@@ -43,17 +44,17 @@ public class QuadPrefixTree extends LegacyPrefixTree {
* Factory for creating {@link QuadPrefixTree} instances with useful defaults
*/
public static class Factory extends SpatialPrefixTreeFactory {
-
@Override
protected int getLevelForDistance(double degrees) {
- QuadPrefixTree grid = new QuadPrefixTree(ctx, MAX_LEVELS_POSSIBLE);
- return grid.getLevelForDistance(degrees);
+ return newSPT().getLevelForDistance(degrees);
}
@Override
protected SpatialPrefixTree newSPT() {
- return new QuadPrefixTree(ctx,
+ QuadPrefixTree tree = new QuadPrefixTree(ctx,
maxLevels != null ? maxLevels : MAX_LEVELS_POSSIBLE);
+ tree.robust = getVersion().onOrAfter(Version.LUCENE_8_3_0);
+ return tree;
}
}
@@ -75,6 +76,8 @@ public class QuadPrefixTree extends LegacyPrefixTree {
final int[] levelS; // side
final int[] levelN; // number
+ protected boolean robust = true; // for backward compatibility, use the old method if user specified old version.
+
public QuadPrefixTree(
SpatialContext ctx, Rectangle bounds, int maxLevels) {
super(ctx, maxLevels);
@@ -83,10 +86,10 @@ public class QuadPrefixTree extends LegacyPrefixTree {
this.ymin = bounds.getMinY();
this.ymax = bounds.getMaxY();
- levelW = new double[maxLevels];
- levelH = new double[maxLevels];
- levelS = new int[maxLevels];
- levelN = new int[maxLevels];
+ levelW = new double[maxLevels + 1];
+ levelH = new double[maxLevels + 1];
+ levelS = new int[maxLevels + 1];
+ levelN = new int[maxLevels + 1];
gridW = xmax - xmin;
gridH = ymax - ymin;
@@ -146,12 +149,50 @@ public class QuadPrefixTree extends LegacyPrefixTree {
@Override
public Cell getCell(Point p, int level) {
- List<Cell> cells = new ArrayList<>(1);
- build(xmid, ymid, 0, cells, new BytesRef(maxLevels+1), ctx.makePoint(p.getX(),p.getY()), level);
- return cells.get(0);//note cells could be longer if p on edge
+ if (!robust) { // old method
+ List<Cell> cells = new ArrayList<>(1);
+ buildNotRobustly(xmid, ymid, 0, cells, new BytesRef(maxLevels+1), ctx.makePoint(p.getX(),p.getY()), level);
+ if (!cells.isEmpty()) {
+ return cells.get(0);//note cells could be longer if p on edge
+ }
+ }
+
+ double currentXmid = xmid;
+ double currentYmid = ymid;
+ double xp = p.getX();
+ double yp = p.getY();
+ BytesRef str = new BytesRef(maxLevels+1);
+ int levelLimit = level > maxLevels ? maxLevels : level;
+ SpatialRelation rel = SpatialRelation.CONTAINS;
+ for (int lvl = 0; lvl < levelLimit; lvl++){
+ int c = battenberg(currentXmid, currentYmid, xp, yp);
+ double halfWidth = levelW[lvl + 1];
+ double halfHeight = levelH[lvl + 1];
+ switch(c){
+ case 0:
+ currentXmid -= halfWidth;
+ currentYmid += halfHeight;
+ break;
+ case 1:
+ currentXmid += halfWidth;
+ currentYmid += halfHeight;
+ break;
+ case 2:
+ currentXmid -= halfWidth;
+ currentYmid -= halfHeight;
+ break;
+ case 3:
+ currentXmid += halfWidth;
+ currentYmid -= halfHeight;
+ break;
+ default:
+ }
+ str.bytes[str.length++] = (byte)('A' + c);
+ }
+ return new QuadCell(str, rel);
}
- private void build(
+ private void buildNotRobustly(
double x,
double y,
int level,
@@ -165,10 +206,10 @@ public class QuadPrefixTree extends LegacyPrefixTree {
// Z-Order
// http://en.wikipedia.org/wiki/Z-order_%28curve%29
- checkBattenberg('A', x - w, y + h, level, matches, str, shape, maxLevel);
- checkBattenberg('B', x + w, y + h, level, matches, str, shape, maxLevel);
- checkBattenberg('C', x - w, y - h, level, matches, str, shape, maxLevel);
- checkBattenberg('D', x + w, y - h, level, matches, str, shape, maxLevel);
+ checkBattenbergNotRobustly('A', x - w, y + h, level, matches, str, shape, maxLevel);
+ checkBattenbergNotRobustly('B', x + w, y + h, level, matches, str, shape, maxLevel);
+ checkBattenbergNotRobustly('C', x - w, y - h, level, matches, str, shape, maxLevel);
+ checkBattenbergNotRobustly('D', x + w, y - h, level, matches, str, shape, maxLevel);
// possibly consider hilbert curve
// http://en.wikipedia.org/wiki/Hilbert_curve
@@ -176,7 +217,7 @@ public class QuadPrefixTree extends LegacyPrefixTree {
// if we actually use the range property in the query, this could be useful
}
- protected void checkBattenberg(
+ protected void checkBattenbergNotRobustly(
char c,
double cx,
double cy,
@@ -207,12 +248,32 @@ public class QuadPrefixTree extends LegacyPrefixTree {
//str.append(SpatialPrefixGrid.INTERSECTS);
matches.add(new QuadCell(BytesRef.deepCopyOf(str), v.transpose()));
} else {
- build(cx, cy, nextLevel, matches, str, shape, maxLevel);
+ buildNotRobustly(cx, cy, nextLevel, matches, str, shape, maxLevel);
}
}
str.length = strlen;
}
+ /** Returns a Z-Order quadrant [0-3]. */
+ protected int battenberg(double xmid, double ymid, double xp, double yp){
+ // http://en.wikipedia.org/wiki/Z-order_%28curve%29
+ if (ymid <= yp) {
+ if (xmid >= xp) {
+ return 0;
+ }
+ return 1;
+ } else {
+ if (xmid >= xp) {
+ return 2;
+ }
+ return 3;
+ }
+ // possibly consider hilbert curve
+ // http://en.wikipedia.org/wiki/Hilbert_curve
+ // http://blog.notdot.net/2009/11/Damn-Cool-Algorithms-Spatial-indexing-with-Quadtrees-and-Hilbert-Curves
+ // if we actually use the range property in the query, this could be useful
+ }
+
protected class QuadCell extends LegacyCell {
QuadCell(byte[] bytes, int off, int len) {
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java
index d88e41a..cdc3758 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeFactory.java
@@ -16,8 +16,10 @@
*/
package org.apache.lucene.spatial.prefix.tree;
+import java.text.ParseException;
import java.util.Map;
+import org.apache.lucene.util.Version;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.distance.DistanceUtils;
@@ -33,14 +35,17 @@ public abstract class SpatialPrefixTreeFactory {
public static final String PREFIX_TREE = "prefixTree";
public static final String MAX_LEVELS = "maxLevels";
public static final String MAX_DIST_ERR = "maxDistErr";
+ public static final String VERSION = "version";
protected Map<String, String> args;
protected SpatialContext ctx;
protected Integer maxLevels;
+ private Version version;
/**
- * The factory is looked up via "prefixTree" in args, expecting "geohash" or "quad".
+ * The factory is looked up via "prefixTree" in args, expecting "geohash" or "quad".
* If it's neither of these, then "geohash" is chosen for a geo context, otherwise "quad" is chosen.
+ * The "version" arg, if present, is parsed with {@link Version} and the prefix tree might be sensitive to it.
*/
public static SpatialPrefixTree makeSPT(Map<String,String> args, ClassLoader classLoader, SpatialContext ctx) {
//TODO refactor to use Java SPI like how Lucene already does for codecs/postingsFormats, etc
@@ -64,16 +69,26 @@ public abstract class SpatialPrefixTreeFactory {
throw new RuntimeException(e);
}
}
- instance.init(args,ctx);
+ instance.init(args, ctx);
return instance.newSPT();
}
protected void init(Map<String, String> args, SpatialContext ctx) {
this.args = args;
this.ctx = ctx;
+ initVersion();
initMaxLevels();
}
+ protected void initVersion() {
+ String versionStr = args.get(VERSION);
+ try {
+ setVersion(versionStr == null ? Version.LATEST : Version.parseLeniently(versionStr));
+ } catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
protected void initMaxLevels() {
String mlStr = args.get(MAX_LEVELS);
if (mlStr != null) {
@@ -94,6 +109,20 @@ public abstract class SpatialPrefixTreeFactory {
maxLevels = getLevelForDistance(degrees);
}
+ /**
+ * Set the version of Lucene this tree should mimic the behavior for for analysis.
+ */
+ public void setVersion(Version v) {
+ version = v;
+ }
+
+ /**
+ * Return the version of Lucene this tree will mimic the behavior of for analysis.
+ */
+ public Version getVersion() {
+ return version;
+ }
+
/** Calls {@link SpatialPrefixTree#getLevelForDistance(double)}. */
protected abstract int getLevelForDistance(double degrees);
diff --git a/solr/core/src/java/org/apache/solr/schema/AbstractSpatialPrefixTreeFieldType.java b/solr/core/src/java/org/apache/solr/schema/AbstractSpatialPrefixTreeFieldType.java
index b6e9106..70c7b3d 100644
--- a/solr/core/src/java/org/apache/solr/schema/AbstractSpatialPrefixTreeFieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/AbstractSpatialPrefixTreeFieldType.java
@@ -49,6 +49,8 @@ public abstract class AbstractSpatialPrefixTreeFieldType<T extends PrefixTreeStr
protected void init(IndexSchema schema, Map<String, String> args) {
super.init(schema, args);
+ args.putIfAbsent(SpatialPrefixTreeFactory.VERSION, schema.getDefaultLuceneMatchVersion().toString());
+
// Convert the maxDistErr to degrees (based on distanceUnits) since Lucene spatial layer depends on degrees
if(args.containsKey(SpatialPrefixTreeFactory.MAX_DIST_ERR)) {
double maxDistErrOriginal = Double.parseDouble(args.get(SpatialPrefixTreeFactory.MAX_DIST_ERR));
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml b/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml
index 535c851..3f09e3a 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml
@@ -72,9 +72,13 @@
numberType="pdouble" distanceUnits="degrees" storeSubFields="false"/>
<fieldType name="llp" class="solr.LatLonPointSpatialField" distanceUnits="degrees" multiValued="true" />
-
+ <fieldType name="oslocation" class="solr.SpatialRecursivePrefixTreeFieldType" geo="false" maxDistErr="0.000009"
+ worldBounds="ENVELOPE(0,700000,1300000,0)" distErrPct="0.15"/>
+ <fieldType name="oslocationold" class="solr.SpatialRecursivePrefixTreeFieldType" geo="false" maxDistErr="0.000009"
+ worldBounds="ENVELOPE(0,700000,1300000,0)" distErrPct="0.15" version="8.2.0"/>
<field name="id" type="string" required="true"/>
-
+ <field name="oslocation" type="oslocation" />
+ <field name="oslocationold" type="oslocationold" />
<field name="srpt_geohash" type="srpt_geohash" multiValued="true"/>
<field name="srpt_quad" type="srpt_quad" multiValued="true"/>
<field name="srpt_packedquad" type="srpt_packedquad" multiValued="true"/>
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
index d7fde70..cbbba42 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
@@ -61,6 +61,13 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
}
@Test
+ public void testQuadTreeRobustness() {
+ assertU(adoc("id", "0", "oslocation", "244502.06 639062.07"));
+ // old (pre 8.3.0) still works
+ assertU(adoc("id", "0", "oslocationold", "244502.06 639062.07"));
+ }
+
+ @Test
public void testBBox() throws Exception {
String fieldName = random().nextBoolean() ? "bbox" : "bboxD_dynamic";
assertU(adoc("id", "0"));//nothing