You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2014/03/24 20:40:31 UTC
svn commit: r1581017 - in /lucene/dev/branches/lucene_solr_4_7: ./ lucene/
lucene/spatial/ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/
lucene/spatial/src/test/org/apache/lucene/spatial/prefix/
Author: dsmiley
Date: Mon Mar 24 19:40:31 2014
New Revision: 1581017
URL: http://svn.apache.org/r1581017
Log:
LUCENE-4978: spatial grid false-negatives at edge
Modified:
lucene/dev/branches/lucene_solr_4_7/ (props changed)
lucene/dev/branches/lucene_solr_4_7/lucene/ (props changed)
lucene/dev/branches/lucene_solr_4_7/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene_solr_4_7/lucene/spatial/ (props changed)
lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java
lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java
lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestRecursivePrefixTreeStrategy.java
Modified: lucene/dev/branches/lucene_solr_4_7/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/lucene/CHANGES.txt?rev=1581017&r1=1581016&r2=1581017&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_7/lucene/CHANGES.txt Mon Mar 24 19:40:31 2014
@@ -52,6 +52,10 @@ Bug Fixes
* LUCENE-5544: Exceptions during IndexWriter.rollback could leak file handles
and the write lock. (Robert Muir)
+* LUCENE-4978: Spatial RecursivePrefixTree queries could result in false-negatives for
+ indexed shapes within 1/2 maxDistErr from the edge of the query shape. This meant
+ searching for a point by the same point as a query rarely worked. (David Smiley)
+
Build
* LUCENE-5511: "ant precommit" / "ant check-svn-working-copy" now work again
Modified: lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java?rev=1581017&r1=1581016&r2=1581017&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/IntersectsPrefixTreeFilter.java Mon Mar 24 19:40:31 2014
@@ -81,15 +81,7 @@ public class IntersectsPrefixTreeFilter
@Override
protected void visitScanned(Cell cell) throws IOException {
- Shape cShape;
- //if this cell represents a point, use the cell center vs the box
- // TODO this behavior is debatable; might want to be configurable
- // (points never have isLeaf())
- if (cell.getLevel() == grid.getMaxLevels() && !cell.isLeaf())
- cShape = cell.getCenter();
- else
- cShape = cell.getShape();
- if (queryShape.relate(cShape).intersects())
+ if (queryShape.relate(cell.getShape()).intersects())
collectDocs(results);
}
Modified: lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java?rev=1581017&r1=1581016&r2=1581017&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java Mon Mar 24 19:40:31 2014
@@ -18,6 +18,7 @@ package org.apache.lucene.spatial.prefix
*/
import com.carrotsearch.randomizedtesting.annotations.Repeat;
+import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.context.SpatialContextFactory;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
@@ -31,6 +32,7 @@ import org.apache.lucene.document.String
import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.StrategyTestCase;
import org.apache.lucene.spatial.prefix.tree.Cell;
+import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
@@ -50,6 +52,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
import static com.spatial4j.core.shape.SpatialRelation.CONTAINS;
@@ -69,7 +72,17 @@ public class SpatialOpRecursivePrefixTre
deleteAll();
}
- public void mySetup(int maxLevels) throws IOException {
+ public void setupGrid(int maxLevels) throws IOException {
+ if (randomBoolean())
+ setupQuadGrid(maxLevels);
+ else
+ setupGeohashGrid(maxLevels);
+ //((PrefixTreeStrategy) strategy).setDistErrPct(0);//fully precise to grid
+
+ System.out.println("Strategy: " + strategy.toString());
+ }
+
+ private void setupQuadGrid(int maxLevels) {
//non-geospatial makes this test a little easier (in gridSnap), and using boundary values 2^X raises
// the prospect of edge conditions we want to test, plus makes for simpler numbers (no decimals).
SpatialContextFactory factory = new SpatialContextFactory();
@@ -78,46 +91,52 @@ public class SpatialOpRecursivePrefixTre
this.ctx = factory.newSpatialContext();
//A fairly shallow grid, and default 2.5% distErrPct
if (maxLevels == -1)
- maxLevels = randomIntBetween(1, 8);
+ maxLevels = randomIntBetween(1, 8);//max 64k cells (4^8), also 256*256
this.grid = new QuadPrefixTree(ctx, maxLevels);
this.strategy = new RecursivePrefixTreeStrategy(grid, getClass().getSimpleName());
- //((PrefixTreeStrategy) strategy).setDistErrPct(0);//fully precise to grid
+ }
- System.out.println("Strategy: " + strategy.toString());
+ public void setupGeohashGrid(int maxLevels) {
+ this.ctx = SpatialContext.GEO;
+ //A fairly shallow grid, and default 2.5% distErrPct
+ if (maxLevels == -1)
+ maxLevels = randomIntBetween(1, 3);//max 16k cells (32^3)
+ this.grid = new GeohashPrefixTree(ctx, maxLevels);
+ this.strategy = new RecursivePrefixTreeStrategy(grid, getClass().getSimpleName());
}
@Test
@Repeat(iterations = ITERATIONS)
public void testIntersects() throws IOException {
- mySetup(-1);
+ setupGrid(-1);
doTest(SpatialOperation.Intersects);
}
@Test
@Repeat(iterations = ITERATIONS)
public void testWithin() throws IOException {
- mySetup(-1);
+ setupGrid(-1);
doTest(SpatialOperation.IsWithin);
}
@Test
@Repeat(iterations = ITERATIONS)
public void testContains() throws IOException {
- mySetup(-1);
+ setupGrid(-1);
doTest(SpatialOperation.Contains);
}
@Test
@Repeat(iterations = ITERATIONS)
public void testDisjoint() throws IOException {
- mySetup(-1);
+ setupGrid(-1);
doTest(SpatialOperation.IsDisjointTo);
}
/** See LUCENE-5062, {@link ContainsPrefixTreeFilter#multiOverlappingIndexedShapes}. */
@Test
public void testContainsPairOverlap() throws IOException {
- mySetup(3);
+ setupQuadGrid(3);
adoc("0", new ShapePair(ctx.makeRectangle(0, 33, -128, 128), ctx.makeRectangle(33, 128, -128, 128), true));
commit();
Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.Contains,
@@ -128,7 +147,7 @@ public class SpatialOpRecursivePrefixTre
@Test
public void testWithinDisjointParts() throws IOException {
- mySetup(7);
+ setupQuadGrid(7);
//one shape comprised of two parts, quite separated apart
adoc("0", new ShapePair(ctx.makeRectangle(0, 10, -120, -100), ctx.makeRectangle(220, 240, 110, 125), false));
commit();
@@ -142,7 +161,7 @@ public class SpatialOpRecursivePrefixTre
@Test /** LUCENE-4916 */
public void testWithinLeafApproxRule() throws IOException {
- mySetup(2);//4x4 grid
+ setupQuadGrid(2);//4x4 grid
//indexed shape will simplify to entire right half (2 top cells)
adoc("0", ctx.makeRectangle(192, 204, -128, 128));
commit();
@@ -199,31 +218,29 @@ public class SpatialOpRecursivePrefixTre
final boolean biasContains = (operation == SpatialOperation.Contains);
+ //Main index loop:
Map<String, Shape> indexedShapes = new LinkedHashMap<String, Shape>();
Map<String, Shape> indexedShapesGS = new LinkedHashMap<String, Shape>();//grid snapped
final int numIndexedShapes = randomIntBetween(1, 6);
+ boolean indexedAtLeastOneShapePair = false;
for (int i = 0; i < numIndexedShapes; i++) {
String id = "" + i;
Shape indexedShape;
- Shape indexedShapeGS; //(grid-snapped)
int R = random().nextInt(12);
if (R == 0) {//1 in 12
- indexedShape = null; //no shape for this doc
- indexedShapeGS = null;
- } else if (R % 3 == 0) {//4 in 12 (0,3,6,9)
+ indexedShape = null;
+ } else if (R == 1) {//1 in 12
+ indexedShape = randomPoint();//just one point
+ } else if (R <= 4) {//3 in 12
//comprised of more than one shape
- Rectangle shape1 = randomRectangle();
- Rectangle shape2 = randomRectangle();
- indexedShape = new ShapePair(shape1, shape2, biasContains);
- indexedShapeGS = new ShapePair(gridSnap(shape1), gridSnap(shape2), biasContains);
+ indexedShape = randomShapePairRect(biasContains);
+ indexedAtLeastOneShapePair = true;
} else {
- //just one shape
- indexedShape = randomRectangle();
- indexedShapeGS = gridSnap(indexedShape);
+ indexedShape = randomRectangle();//just one rect
}
- //TODO sometimes index a point. Need to fix LUCENE-4978 first though.
+
indexedShapes.put(id, indexedShape);
- indexedShapesGS.put(id, indexedShapeGS);
+ indexedShapesGS.put(id, gridSnap(indexedShape));
adoc(id, indexedShape);
@@ -244,12 +261,23 @@ public class SpatialOpRecursivePrefixTre
commit();
+ //Main query loop:
final int numQueryShapes = atLeast(20);
for (int i = 0; i < numQueryShapes; i++) {
int scanLevel = randomInt(grid.getMaxLevels());
((RecursivePrefixTreeStrategy) strategy).setPrefixGridScanLevel(scanLevel);
- final Shape queryShape = randomRectangle();
+ final Shape queryShape;
+ switch (randomInt(10)) {
+ case 0: queryShape = randomPoint(); break;
+ case 1:case 2:case 3:
+ if (!indexedAtLeastOneShapePair) { // avoids ShapePair.relate(ShapePair), which isn't reliable
+ queryShape = randomShapePairRect(biasContains);
+ break;
+ }
+ default: queryShape = randomRectangle();
+ }
+ final Shape queryShapeGS = gridSnap(queryShape);
final boolean opIsDisjoint = operation == SpatialOperation.IsDisjointTo;
@@ -271,7 +299,7 @@ public class SpatialOpRecursivePrefixTre
if (opIsDisjoint) {
//if no longer intersect after buffering them, for disjoint, remember this
indexedShapeCompare = indexedShapesGS.get(id);
- queryShapeCompare = gridSnap(queryShape);
+ queryShapeCompare = queryShapeGS;
if (!operation.evaluate(indexedShapeCompare, queryShapeCompare))
secondaryIds.add(id);
}
@@ -279,14 +307,14 @@ public class SpatialOpRecursivePrefixTre
//buffer either the indexed or query shape (via gridSnap) and try again
if (operation == SpatialOperation.Intersects) {
indexedShapeCompare = indexedShapesGS.get(id);
- queryShapeCompare = gridSnap(queryShape);
+ queryShapeCompare = queryShapeGS;
//TODO Unfortunately, grid-snapping both can result in intersections that otherwise
// wouldn't happen when the grids are adjacent. Not a big deal but our test is just a
// bit more lenient.
} else if (operation == SpatialOperation.Contains) {
indexedShapeCompare = indexedShapesGS.get(id);
} else if (operation == SpatialOperation.IsWithin) {
- queryShapeCompare = gridSnap(queryShape);
+ queryShapeCompare = queryShapeGS;
}
if (operation.evaluate(indexedShapeCompare, queryShapeCompare))
secondaryIds.add(id);
@@ -295,6 +323,8 @@ public class SpatialOpRecursivePrefixTre
//Search and verify results
SpatialArgs args = new SpatialArgs(operation, queryShape);
+ if (queryShape instanceof ShapePair)
+ args.setDistErrPct(0.0);//a hack; we want to be more detailed than gridSnap(queryShape)
Query query = strategy.makeQuery(args);
SearchResults got = executeQuery(query, 100);
Set<String> remainingExpectedIds = new LinkedHashSet<String>(expectedIds);
@@ -314,19 +344,30 @@ public class SpatialOpRecursivePrefixTre
}
}
+ private Shape randomShapePairRect(boolean biasContains) {
+ Rectangle shape1 = randomRectangle();
+ Rectangle shape2 = randomRectangle();
+ return new ShapePair(shape1, shape2, biasContains);
+ }
+
private void fail(String label, String id, Map<String, Shape> indexedShapes, Map<String, Shape> indexedShapesGS, Shape queryShape) {
System.err.println("Ig:" + indexedShapesGS.get(id) + " Qg:" + gridSnap(queryShape));
- fail(label + " I #" + id + ":" + indexedShapes.get(id) + " Q:" + queryShape);
+ fail(label + " I#" + id + ":" + indexedShapes.get(id) + " Q:" + queryShape);
}
-
// private Rectangle inset(Rectangle r) {
// //typically inset by 1 (whole numbers are easy to read)
// double d = Math.min(1.0, grid.getDistanceForLevel(grid.getMaxLevels()) / 4);
// return ctx.makeRectangle(r.getMinX() + d, r.getMaxX() - d, r.getMinY() + d, r.getMaxY() - d);
// }
- protected Rectangle gridSnap(Shape snapMe) {
+ protected Shape gridSnap(Shape snapMe) {
+ if (snapMe == null)
+ return null;
+ if (snapMe instanceof ShapePair) {
+ ShapePair me = (ShapePair) snapMe;
+ return new ShapePair(gridSnap(me.shape1), gridSnap(me.shape2), me.biasContainsThenWithin);
+ }
//The next 4 lines mimic PrefixTreeStrategy.createIndexableFields()
double distErrPct = ((PrefixTreeStrategy) strategy).getDistErrPct();
double distErr = SpatialArgs.calcDistanceFromErrPct(snapMe, distErrPct, ctx);
@@ -348,12 +389,12 @@ public class SpatialOpRecursivePrefixTre
* The tests here are sensitive to these matters, although in practice ShapeCollection
* is fine.
*/
- private class ShapePair extends ShapeCollection<Rectangle> {
+ private class ShapePair extends ShapeCollection<Shape> {
- final Rectangle shape1, shape2;
+ final Shape shape1, shape2;
final boolean biasContainsThenWithin;//a hack
- public ShapePair(Rectangle shape1, Rectangle shape2, boolean containsThenWithin) {
+ public ShapePair(Shape shape1, Shape shape2, boolean containsThenWithin) {
super(Arrays.asList(shape1, shape2), ctx);
this.shape1 = shape1;
this.shape2 = shape2;
Modified: lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestRecursivePrefixTreeStrategy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestRecursivePrefixTreeStrategy.java?rev=1581017&r1=1581016&r2=1581017&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestRecursivePrefixTreeStrategy.java (original)
+++ lucene/dev/branches/lucene_solr_4_7/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/TestRecursivePrefixTreeStrategy.java Mon Mar 24 19:40:31 2014
@@ -19,9 +19,7 @@ package org.apache.lucene.spatial.prefix
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.distance.DistanceUtils;
-import com.spatial4j.core.io.GeohashUtils;
import com.spatial4j.core.shape.Point;
-import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.spatial.SpatialMatchConcern;
import org.apache.lucene.spatial.StrategyTestCase;
@@ -31,10 +29,7 @@ import org.apache.lucene.spatial.query.S
import org.junit.Test;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
import java.util.HashSet;
-import java.util.List;
import java.util.Set;
public class TestRecursivePrefixTreeStrategy extends StrategyTestCase {
@@ -104,75 +99,6 @@ public class TestRecursivePrefixTreeStra
checkHits(q(qPt, 34 * KM2DEG, distErrPct), 0, null);
}
- @Test
- public void geohashRecursiveRandom() throws IOException {
- init(12);
-
- //1. Iterate test with the cluster at some worldly point of interest
- Point[] clusterCenters = new Point[]{ctx.makePoint(-180,0), ctx.makePoint(0,90), ctx.makePoint(0,-90)};
- for (Point clusterCenter : clusterCenters) {
- //2. Iterate on size of cluster (a really small one and a large one)
- String hashCenter = GeohashUtils.encodeLatLon(clusterCenter.getY(), clusterCenter.getX(), maxLength);
- //calculate the number of degrees in the smallest grid box size (use for both lat & lon)
- String smallBox = hashCenter.substring(0,hashCenter.length()-1);//chop off leaf precision
- Rectangle clusterDims = GeohashUtils.decodeBoundary(smallBox,ctx);
- double smallRadius = Math.max(clusterDims.getMaxX()-clusterDims.getMinX(),clusterDims.getMaxY()-clusterDims.getMinY());
- assert smallRadius < 1;
- double largeRadius = 20d;//good large size; don't use >=45 for this test code to work
- double[] radiusDegs = {largeRadius,smallRadius};
- for (double radiusDeg : radiusDegs) {
- //3. Index random points in this cluster circle
- deleteAll();
- List<Point> points = new ArrayList<Point>();
- for(int i = 0; i < 20; i++) {
- //Note that this will not result in randomly distributed points in the
- // circle, they will be concentrated towards the center a little. But
- // it's good enough.
- Point pt = ctx.getDistCalc().pointOnBearing(clusterCenter,
- random().nextDouble() * radiusDeg, random().nextInt() * 360, ctx, null);
- pt = alignGeohash(pt);
- points.add(pt);
- addDocument(newDoc("" + i, pt));
- }
- commit();
-
- //3. Use some query centers. Each is twice the cluster's radius away.
- for(int ri = 0; ri < 4; ri++) {
- Point queryCenter = ctx.getDistCalc().pointOnBearing(clusterCenter,
- radiusDeg*2, random().nextInt(360), ctx, null);
- queryCenter = alignGeohash(queryCenter);
- //4.1 Query a small box getting nothing
- checkHits(q(queryCenter, radiusDeg - smallRadius/2), 0, null);
- //4.2 Query a large box enclosing the cluster, getting everything
- checkHits(q(queryCenter, radiusDeg*3 + smallRadius/2), points.size(), null);
- //4.3 Query a medium box getting some (calculate the correct solution and verify)
- double queryDist = radiusDeg * 2;
-
- //Find matching points. Put into int[] of doc ids which is the same thing as the index into points list.
- int[] ids = new int[points.size()];
- int ids_sz = 0;
- for (int i = 0; i < points.size(); i++) {
- Point point = points.get(i);
- if (ctx.getDistCalc().distance(queryCenter, point) <= queryDist)
- ids[ids_sz++] = i;
- }
- ids = Arrays.copyOf(ids, ids_sz);
- //assert ids_sz > 0 (can't because randomness keeps us from being able to)
-
- checkHits(q(queryCenter, queryDist), ids.length, ids);
- }
-
- }//for radiusDeg
-
- }//for clusterCenter
-
- }//randomTest()
-
- /** Query point-distance (in degrees) with zero error percent. */
- private SpatialArgs q(Point pt, double distDEG) {
- return q(pt, distDEG, 0.0);
- }
-
private SpatialArgs q(Point pt, double distDEG, double distErrPct) {
Shape shape = ctx.makeCircle(pt, distDEG);
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,shape);
@@ -194,8 +120,4 @@ public class TestRecursivePrefixTreeStra
}
}
- /** NGeohash round-trip for given precision. */
- private Point alignGeohash(Point p) {
- return GeohashUtils.decode(GeohashUtils.encodeLatLon(p.getY(), p.getX(), maxLength), ctx);
- }
}