You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2017/01/30 10:20:46 UTC
[1/6] lucene-solr:master: LUCENE-7661: Speed up for
LatLonPointInPolygonQuery by pre-computing the relation of the polygon with a
grid.
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x 9e98100d1 -> b6b44d44d
refs/heads/master aa5e048cb -> 9d5dc0cf5
LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the relation of the polygon with a grid.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/74240be0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/74240be0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/74240be0
Branch: refs/heads/master
Commit: 74240be0f5a7f9de373bc53d01d5a43cd6c5bb05
Parents: aa5e048
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 09:44:48 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 09:45:06 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../org/apache/lucene/geo/GeoEncodingUtils.java | 145 +++++++++++++++----
.../document/LatLonDocValuesDistanceQuery.java | 2 +-
.../document/LatLonPointDistanceQuery.java | 2 +-
.../document/LatLonPointInPolygonQuery.java | 6 +-
5 files changed, 128 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/74240be0/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e59744a..0be102c 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -128,6 +128,9 @@ Optimizations
* LUCENE-7656: Speed up for LatLonPointDistanceQuery by computing distances even
less often. (Adrien Grand)
+* LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the
+ relation of the polygon with a grid. (Adrien Grand)
+
Build
* LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/74240be0/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
index cc75fbf..663cb2e 100644
--- a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
@@ -27,6 +27,8 @@ import static org.apache.lucene.geo.GeoUtils.MIN_LAT_INCL;
import static org.apache.lucene.geo.GeoUtils.checkLatitude;
import static org.apache.lucene.geo.GeoUtils.checkLongitude;
+import java.util.function.Function;
+
/**
* reusable geopoint encoding methods
*
@@ -158,11 +160,48 @@ public final class GeoEncodingUtils {
* @lucene.internal */
public static DistancePredicate createDistancePredicate(double lat, double lon, double radiusMeters) {
final Rectangle boundingBox = Rectangle.fromPointDistance(lat, lon, radiusMeters);
+ final double axisLat = Rectangle.axisLat(lat, radiusMeters);
+ final double distanceSortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
+ final Function<Rectangle, Relation> boxToRelation = box -> GeoUtils.relate(
+ box.minLat, box.maxLat, box.minLon, box.maxLon, lat, lon, distanceSortKey, axisLat);
+ final Grid subBoxes = createSubBoxes(boundingBox, boxToRelation);
+
+ return new DistancePredicate(
+ subBoxes.latShift, subBoxes.lonShift,
+ subBoxes.latBase, subBoxes.lonBase,
+ subBoxes.maxLatDelta, subBoxes.maxLonDelta,
+ subBoxes.relations,
+ lat, lon, distanceSortKey);
+ }
+
+ /** Create a predicate that checks whether points are within a polygon.
+ * It works the same way as {@link #createDistancePredicate}.
+ * @lucene.internal */
+ public static PolygonPredicate createPolygonPredicate(Polygon[] polygons, Polygon2D tree) {
+ final Rectangle boundingBox = Rectangle.fromPolygon(polygons);
+ final Function<Rectangle, Relation> boxToRelation = box -> tree.relate(
+ box.minLat, box.maxLat, box.minLon, box.maxLon);
+ final Grid subBoxes = createSubBoxes(boundingBox, boxToRelation);
+
+ return new PolygonPredicate(
+ subBoxes.latShift, subBoxes.lonShift,
+ subBoxes.latBase, subBoxes.lonBase,
+ subBoxes.maxLatDelta, subBoxes.maxLonDelta,
+ subBoxes.relations,
+ tree);
+ }
+
+ private static Grid createSubBoxes(Rectangle boundingBox, Function<Rectangle, Relation> boxToRelation) {
final int minLat = encodeLatitudeCeil(boundingBox.minLat);
final int maxLat = encodeLatitude(boundingBox.maxLat);
final int minLon = encodeLongitudeCeil(boundingBox.minLon);
final int maxLon = encodeLongitude(boundingBox.maxLon);
+ if (maxLat < minLat || (boundingBox.crossesDateline() == false && maxLon < minLon)) {
+ // the box cannot match any quantized point
+ return new Grid(1, 1, 0, 0, 0, 0, new byte[0]);
+ }
+
final int latShift, lonShift;
final int latBase, lonBase;
final int maxLatDelta, maxLonDelta;
@@ -186,8 +225,6 @@ public final class GeoEncodingUtils {
assert maxLonDelta > 0;
}
- final double axisLat = Rectangle.axisLat(lat, radiusMeters);
- final double distanceSortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
final byte[] relations = new byte[maxLatDelta * maxLonDelta];
for (int i = 0; i < maxLatDelta; ++i) {
for (int j = 0; j < maxLonDelta; ++j) {
@@ -196,55 +233,54 @@ public final class GeoEncodingUtils {
final int boxMaxLat = boxMinLat + (1 << latShift) - 1;
final int boxMaxLon = boxMinLon + (1 << lonShift) - 1;
- relations[i * maxLonDelta + j] = (byte) GeoUtils.relate(
+ relations[i * maxLonDelta + j] = (byte) boxToRelation.apply(new Rectangle(
decodeLatitude(boxMinLat), decodeLatitude(boxMaxLat),
- decodeLongitude(boxMinLon), decodeLongitude(boxMaxLon),
- lat, lon, distanceSortKey, axisLat).ordinal();
+ decodeLongitude(boxMinLon), decodeLongitude(boxMaxLon))).ordinal();
}
}
- return new DistancePredicate(
+ return new Grid(
latShift, lonShift,
latBase, lonBase,
maxLatDelta, maxLonDelta,
- relations,
- lat, lon, distanceSortKey);
+ relations);
}
/** Compute the minimum shift value so that
* {@code (b>>>shift)-(a>>>shift)} is less that {@code ARITY}. */
private static int computeShift(long a, long b) {
- assert a < b;
+ assert a <= b;
// We enforce a shift of at least 1 so that when we work with unsigned ints
// by doing (lat - MIN_VALUE), the result of the shift (lat - MIN_VALUE) >>> shift
// can be used for comparisons without particular care: the sign bit has
// been cleared so comparisons work the same for signed and unsigned ints
for (int shift = 1; ; ++shift) {
final long delta = (b >>> shift) - (a >>> shift);
- if (delta >= 0 && delta < DistancePredicate.ARITY) {
+ if (delta >= 0 && delta < Grid.ARITY) {
return shift;
}
}
}
- /** A predicate that checks whether a given point is within a distance of another point. */
- public static class DistancePredicate {
-
- private static final int ARITY = 64;
+ private static class Grid {
+ static final int ARITY = 64;
- private final int latShift, lonShift;
- private final int latBase, lonBase;
- private final int maxLatDelta, maxLonDelta;
- private final byte[] relations;
- private final double lat, lon;
- private final double distanceKey;
+ final int latShift, lonShift;
+ final int latBase, lonBase;
+ final int maxLatDelta, maxLonDelta;
+ final byte[] relations;
- private DistancePredicate(
+ private Grid(
int latShift, int lonShift,
int latBase, int lonBase,
int maxLatDelta, int maxLonDelta,
- byte[] relations,
- double lat, double lon, double distanceKey) {
+ byte[] relations) {
+ if (latShift < 1 || latShift > 31) {
+ throw new IllegalArgumentException();
+ }
+ if (lonShift < 1 || lonShift > 31) {
+ throw new IllegalArgumentException();
+ }
this.latShift = latShift;
this.lonShift = lonShift;
this.latBase = latBase;
@@ -252,6 +288,22 @@ public final class GeoEncodingUtils {
this.maxLatDelta = maxLatDelta;
this.maxLonDelta = maxLonDelta;
this.relations = relations;
+ }
+ }
+
+ /** A predicate that checks whether a given point is within a distance of another point. */
+ public static class DistancePredicate extends Grid {
+
+ private final double lat, lon;
+ private final double distanceKey;
+
+ private DistancePredicate(
+ int latShift, int lonShift,
+ int latBase, int lonBase,
+ int maxLatDelta, int maxLonDelta,
+ byte[] relations,
+ double lat, double lon, double distanceKey) {
+ super(latShift, lonShift, latBase, lonBase, maxLatDelta, maxLonDelta, relations);
this.lat = lat;
this.lon = lon;
this.distanceKey = distanceKey;
@@ -259,16 +311,17 @@ public final class GeoEncodingUtils {
/** Check whether the given point is within a distance of another point.
* NOTE: this operates directly on the encoded representation of points. */
- public boolean apply(int lat, int lon) {
+ public boolean test(int lat, int lon) {
final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
if (lat2 < latBase || lat2 >= latBase + maxLatDelta) {
return false;
}
int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
if (lon2 < lonBase) { // wrap
- lon2 += 1L << (32 - lonShift);
- assert lon2 >= lonBase;
+ lon2 += 1 << (32 - lonShift);
}
+ assert Integer.toUnsignedLong(lon2) >= lonBase;
+ assert lon2 - lonBase >= 0;
if (lon2 - lonBase >= maxLonDelta) {
return false;
}
@@ -284,4 +337,44 @@ public final class GeoEncodingUtils {
}
}
+ /** A predicate that checks whether a given point is within a polygon. */
+ public static class PolygonPredicate extends Grid {
+
+ private final Polygon2D tree;
+
+ private PolygonPredicate(
+ int latShift, int lonShift,
+ int latBase, int lonBase,
+ int maxLatDelta, int maxLonDelta,
+ byte[] relations,
+ Polygon2D tree) {
+ super(latShift, lonShift, latBase, lonBase, maxLatDelta, maxLonDelta, relations);
+ this.tree = tree;
+ }
+
+ /** Check whether the given point is within the considered polygon.
+ * NOTE: this operates directly on the encoded representation of points. */
+ public boolean test(int lat, int lon) {
+ final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
+ if (lat2 < latBase || lat2 >= latBase + maxLatDelta) {
+ return false;
+ }
+ int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
+ if (lon2 < lonBase) { // wrap
+ lon2 += 1 << (32 - lonShift);
+ }
+ assert Integer.toUnsignedLong(lon2) >= lonBase;
+ assert lon2 - lonBase >= 0;
+ if (lon2 - lonBase >= maxLonDelta) {
+ return false;
+ }
+
+ final int relation = relations[(lat2 - latBase) * maxLonDelta + (lon2 - lonBase)];
+ if (relation == Relation.CELL_CROSSES_QUERY.ordinal()) {
+ return tree.contains(decodeLatitude(lat), decodeLongitude(lon));
+ } else {
+ return relation == Relation.CELL_INSIDE_QUERY.ordinal();
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/74240be0/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
index ab05804..e38d9fe 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
@@ -111,7 +111,7 @@ final class LatLonDocValuesDistanceQuery extends Query {
final long value = values.nextValue();
final int lat = (int) (value >>> 32);
final int lon = (int) (value & 0xFFFFFFFF);
- if (distancePredicate.apply(lat, lon)) {
+ if (distancePredicate.test(lat, lon)) {
return true;
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/74240be0/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
index 3d0b82e..71ddf3d 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
@@ -164,7 +164,7 @@ final class LatLonPointDistanceQuery extends Query {
int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0);
int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
- if (distancePredicate.apply(docLatitude, docLongitude)) {
+ if (distancePredicate.test(docLatitude, docLongitude)) {
adder.add(docID);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/74240be0/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
index ec7c682..c272b4d 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
@@ -35,6 +35,7 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.geo.Polygon2D;
@@ -92,6 +93,7 @@ final class LatLonPointInPolygonQuery extends Query {
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
final Polygon2D tree = Polygon2D.create(polygons);
+ final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree);
return new ConstantScoreWeight(this, boost) {
@@ -130,8 +132,8 @@ final class LatLonPointInPolygonQuery extends Query {
@Override
public void visit(int docID, byte[] packedValue) {
- if (tree.contains(decodeLatitude(packedValue, 0),
- decodeLongitude(packedValue, Integer.BYTES))) {
+ if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0),
+ NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
adder.add(docID);
}
}
[4/6] lucene-solr:branch_6x: LUCENE-7661: Speed up for
LatLonPointInPolygonQuery by pre-computing the relation of the polygon with a
grid.
Posted by jp...@apache.org.
LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the relation of the polygon with a grid.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d2051e3f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d2051e3f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d2051e3f
Branch: refs/heads/branch_6x
Commit: d2051e3f9d4edeb5d6c74f71684c14596453b4a2
Parents: 9e98100
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 09:44:48 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 11:16:31 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../org/apache/lucene/geo/GeoEncodingUtils.java | 145 +++++++++++++++----
.../document/LatLonDocValuesDistanceQuery.java | 2 +-
.../document/LatLonPointDistanceQuery.java | 2 +-
.../document/LatLonPointInPolygonQuery.java | 6 +-
5 files changed, 128 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d2051e3f/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index cd77115..1bc5046 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -70,6 +70,9 @@ Optimizations
* LUCENE-7656: Speed up for LatLonPointDistanceQuery by computing distances even
less often. (Adrien Grand)
+* LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the
+ relation of the polygon with a grid. (Adrien Grand)
+
Build
* LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d2051e3f/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
index cc75fbf..663cb2e 100644
--- a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
@@ -27,6 +27,8 @@ import static org.apache.lucene.geo.GeoUtils.MIN_LAT_INCL;
import static org.apache.lucene.geo.GeoUtils.checkLatitude;
import static org.apache.lucene.geo.GeoUtils.checkLongitude;
+import java.util.function.Function;
+
/**
* reusable geopoint encoding methods
*
@@ -158,11 +160,48 @@ public final class GeoEncodingUtils {
* @lucene.internal */
public static DistancePredicate createDistancePredicate(double lat, double lon, double radiusMeters) {
final Rectangle boundingBox = Rectangle.fromPointDistance(lat, lon, radiusMeters);
+ final double axisLat = Rectangle.axisLat(lat, radiusMeters);
+ final double distanceSortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
+ final Function<Rectangle, Relation> boxToRelation = box -> GeoUtils.relate(
+ box.minLat, box.maxLat, box.minLon, box.maxLon, lat, lon, distanceSortKey, axisLat);
+ final Grid subBoxes = createSubBoxes(boundingBox, boxToRelation);
+
+ return new DistancePredicate(
+ subBoxes.latShift, subBoxes.lonShift,
+ subBoxes.latBase, subBoxes.lonBase,
+ subBoxes.maxLatDelta, subBoxes.maxLonDelta,
+ subBoxes.relations,
+ lat, lon, distanceSortKey);
+ }
+
+ /** Create a predicate that checks whether points are within a polygon.
+ * It works the same way as {@link #createDistancePredicate}.
+ * @lucene.internal */
+ public static PolygonPredicate createPolygonPredicate(Polygon[] polygons, Polygon2D tree) {
+ final Rectangle boundingBox = Rectangle.fromPolygon(polygons);
+ final Function<Rectangle, Relation> boxToRelation = box -> tree.relate(
+ box.minLat, box.maxLat, box.minLon, box.maxLon);
+ final Grid subBoxes = createSubBoxes(boundingBox, boxToRelation);
+
+ return new PolygonPredicate(
+ subBoxes.latShift, subBoxes.lonShift,
+ subBoxes.latBase, subBoxes.lonBase,
+ subBoxes.maxLatDelta, subBoxes.maxLonDelta,
+ subBoxes.relations,
+ tree);
+ }
+
+ private static Grid createSubBoxes(Rectangle boundingBox, Function<Rectangle, Relation> boxToRelation) {
final int minLat = encodeLatitudeCeil(boundingBox.minLat);
final int maxLat = encodeLatitude(boundingBox.maxLat);
final int minLon = encodeLongitudeCeil(boundingBox.minLon);
final int maxLon = encodeLongitude(boundingBox.maxLon);
+ if (maxLat < minLat || (boundingBox.crossesDateline() == false && maxLon < minLon)) {
+ // the box cannot match any quantized point
+ return new Grid(1, 1, 0, 0, 0, 0, new byte[0]);
+ }
+
final int latShift, lonShift;
final int latBase, lonBase;
final int maxLatDelta, maxLonDelta;
@@ -186,8 +225,6 @@ public final class GeoEncodingUtils {
assert maxLonDelta > 0;
}
- final double axisLat = Rectangle.axisLat(lat, radiusMeters);
- final double distanceSortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
final byte[] relations = new byte[maxLatDelta * maxLonDelta];
for (int i = 0; i < maxLatDelta; ++i) {
for (int j = 0; j < maxLonDelta; ++j) {
@@ -196,55 +233,54 @@ public final class GeoEncodingUtils {
final int boxMaxLat = boxMinLat + (1 << latShift) - 1;
final int boxMaxLon = boxMinLon + (1 << lonShift) - 1;
- relations[i * maxLonDelta + j] = (byte) GeoUtils.relate(
+ relations[i * maxLonDelta + j] = (byte) boxToRelation.apply(new Rectangle(
decodeLatitude(boxMinLat), decodeLatitude(boxMaxLat),
- decodeLongitude(boxMinLon), decodeLongitude(boxMaxLon),
- lat, lon, distanceSortKey, axisLat).ordinal();
+ decodeLongitude(boxMinLon), decodeLongitude(boxMaxLon))).ordinal();
}
}
- return new DistancePredicate(
+ return new Grid(
latShift, lonShift,
latBase, lonBase,
maxLatDelta, maxLonDelta,
- relations,
- lat, lon, distanceSortKey);
+ relations);
}
/** Compute the minimum shift value so that
* {@code (b>>>shift)-(a>>>shift)} is less that {@code ARITY}. */
private static int computeShift(long a, long b) {
- assert a < b;
+ assert a <= b;
// We enforce a shift of at least 1 so that when we work with unsigned ints
// by doing (lat - MIN_VALUE), the result of the shift (lat - MIN_VALUE) >>> shift
// can be used for comparisons without particular care: the sign bit has
// been cleared so comparisons work the same for signed and unsigned ints
for (int shift = 1; ; ++shift) {
final long delta = (b >>> shift) - (a >>> shift);
- if (delta >= 0 && delta < DistancePredicate.ARITY) {
+ if (delta >= 0 && delta < Grid.ARITY) {
return shift;
}
}
}
- /** A predicate that checks whether a given point is within a distance of another point. */
- public static class DistancePredicate {
-
- private static final int ARITY = 64;
+ private static class Grid {
+ static final int ARITY = 64;
- private final int latShift, lonShift;
- private final int latBase, lonBase;
- private final int maxLatDelta, maxLonDelta;
- private final byte[] relations;
- private final double lat, lon;
- private final double distanceKey;
+ final int latShift, lonShift;
+ final int latBase, lonBase;
+ final int maxLatDelta, maxLonDelta;
+ final byte[] relations;
- private DistancePredicate(
+ private Grid(
int latShift, int lonShift,
int latBase, int lonBase,
int maxLatDelta, int maxLonDelta,
- byte[] relations,
- double lat, double lon, double distanceKey) {
+ byte[] relations) {
+ if (latShift < 1 || latShift > 31) {
+ throw new IllegalArgumentException();
+ }
+ if (lonShift < 1 || lonShift > 31) {
+ throw new IllegalArgumentException();
+ }
this.latShift = latShift;
this.lonShift = lonShift;
this.latBase = latBase;
@@ -252,6 +288,22 @@ public final class GeoEncodingUtils {
this.maxLatDelta = maxLatDelta;
this.maxLonDelta = maxLonDelta;
this.relations = relations;
+ }
+ }
+
+ /** A predicate that checks whether a given point is within a distance of another point. */
+ public static class DistancePredicate extends Grid {
+
+ private final double lat, lon;
+ private final double distanceKey;
+
+ private DistancePredicate(
+ int latShift, int lonShift,
+ int latBase, int lonBase,
+ int maxLatDelta, int maxLonDelta,
+ byte[] relations,
+ double lat, double lon, double distanceKey) {
+ super(latShift, lonShift, latBase, lonBase, maxLatDelta, maxLonDelta, relations);
this.lat = lat;
this.lon = lon;
this.distanceKey = distanceKey;
@@ -259,16 +311,17 @@ public final class GeoEncodingUtils {
/** Check whether the given point is within a distance of another point.
* NOTE: this operates directly on the encoded representation of points. */
- public boolean apply(int lat, int lon) {
+ public boolean test(int lat, int lon) {
final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
if (lat2 < latBase || lat2 >= latBase + maxLatDelta) {
return false;
}
int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
if (lon2 < lonBase) { // wrap
- lon2 += 1L << (32 - lonShift);
- assert lon2 >= lonBase;
+ lon2 += 1 << (32 - lonShift);
}
+ assert Integer.toUnsignedLong(lon2) >= lonBase;
+ assert lon2 - lonBase >= 0;
if (lon2 - lonBase >= maxLonDelta) {
return false;
}
@@ -284,4 +337,44 @@ public final class GeoEncodingUtils {
}
}
+ /** A predicate that checks whether a given point is within a polygon. */
+ public static class PolygonPredicate extends Grid {
+
+ private final Polygon2D tree;
+
+ private PolygonPredicate(
+ int latShift, int lonShift,
+ int latBase, int lonBase,
+ int maxLatDelta, int maxLonDelta,
+ byte[] relations,
+ Polygon2D tree) {
+ super(latShift, lonShift, latBase, lonBase, maxLatDelta, maxLonDelta, relations);
+ this.tree = tree;
+ }
+
+ /** Check whether the given point is within the considered polygon.
+ * NOTE: this operates directly on the encoded representation of points. */
+ public boolean test(int lat, int lon) {
+ final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
+ if (lat2 < latBase || lat2 >= latBase + maxLatDelta) {
+ return false;
+ }
+ int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
+ if (lon2 < lonBase) { // wrap
+ lon2 += 1 << (32 - lonShift);
+ }
+ assert Integer.toUnsignedLong(lon2) >= lonBase;
+ assert lon2 - lonBase >= 0;
+ if (lon2 - lonBase >= maxLonDelta) {
+ return false;
+ }
+
+ final int relation = relations[(lat2 - latBase) * maxLonDelta + (lon2 - lonBase)];
+ if (relation == Relation.CELL_CROSSES_QUERY.ordinal()) {
+ return tree.contains(decodeLatitude(lat), decodeLongitude(lon));
+ } else {
+ return relation == Relation.CELL_INSIDE_QUERY.ordinal();
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d2051e3f/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
index 6febedc..df40eef 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
@@ -113,7 +113,7 @@ final class LatLonDocValuesDistanceQuery extends Query {
final long value = values.valueAt(i);
final int lat = (int) (value >>> 32);
final int lon = (int) (value & 0xFFFFFFFF);
- if (distancePredicate.apply(lat, lon)) {
+ if (distancePredicate.test(lat, lon)) {
return true;
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d2051e3f/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
index 421858d..6028086 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
@@ -164,7 +164,7 @@ final class LatLonPointDistanceQuery extends Query {
int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0);
int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
- if (distancePredicate.apply(docLatitude, docLongitude)) {
+ if (distancePredicate.test(docLatitude, docLongitude)) {
adder.add(docID);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d2051e3f/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
index 8db8296..4609d35 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
@@ -35,6 +35,7 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.geo.Polygon2D;
@@ -92,6 +93,7 @@ final class LatLonPointInPolygonQuery extends Query {
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
final Polygon2D tree = Polygon2D.create(polygons);
+ final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree);
return new ConstantScoreWeight(this) {
@@ -130,8 +132,8 @@ final class LatLonPointInPolygonQuery extends Query {
@Override
public void visit(int docID, byte[] packedValue) {
- if (tree.contains(decodeLatitude(packedValue, 0),
- decodeLongitude(packedValue, Integer.BYTES))) {
+ if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0),
+ NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
adder.add(docID);
}
}
[3/6] lucene-solr:master: LUCENE-7654: To-parent block joins should
implement two-phase iteration.
Posted by jp...@apache.org.
LUCENE-7654: To-parent block joins should implement two-phase iteration.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/9d5dc0cf
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/9d5dc0cf
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/9d5dc0cf
Branch: refs/heads/master
Commit: 9d5dc0cf573cf8fc75dd7799844db2cb0fa52da8
Parents: 076662d
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 10:46:22 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 10:46:22 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 +
.../search/join/ToParentBlockJoinQuery.java | 336 ++++++++++---------
.../lucene/search/join/TestBlockJoin.java | 5 +-
.../search/join/TestBlockJoinValidation.java | 19 --
4 files changed, 189 insertions(+), 175 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6a0d17d..17a528e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -135,6 +135,10 @@ Optimizations
whether BKD cells are entirely within the distance close to the dateline.
(Adrien Grand)
+* LUCENE-7654: ToParentBlockJoinQuery now implements two-phase iteration and
+ computes scores lazily in order to be faster when used in conjunctions.
+ (Adrien Grand)
+
Build
* LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
index 6369eea..2837423 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
@@ -20,15 +20,18 @@ import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Locale;
+
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.ScorerSupplier;
+import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
@@ -74,7 +77,7 @@ public class ToParentBlockJoinQuery extends Query {
private final ScoreMode scoreMode;
/** Create a ToParentBlockJoinQuery.
- *
+ *
* @param childQuery Query matching child documents.
* @param parentsFilter Filter identifying the parent documents.
* @param scoreMode How to aggregate multiple child scores
@@ -100,7 +103,7 @@ public class ToParentBlockJoinQuery extends Query {
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new BlockJoinWeight(this, childQuery.createWeight(searcher, needsScores, boost), parentsFilter, needsScores ? scoreMode : ScoreMode.None);
}
-
+
/** Return our child query. */
public Query getChildQuery() {
return childQuery;
@@ -116,33 +119,44 @@ public class ToParentBlockJoinQuery extends Query {
this.scoreMode = scoreMode;
}
- // NOTE: acceptDocs applies (and is checked) only in the
- // parent document space
@Override
- public Scorer scorer(LeafReaderContext readerContext) throws IOException {
-
- final Scorer childScorer = in.scorer(readerContext);
- if (childScorer == null) {
- // No matches
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ final ScorerSupplier scorerSupplier = scorerSupplier(context);
+ if (scorerSupplier == null) {
return null;
}
+ return scorerSupplier.get(false);
+ }
- final int firstChildDoc = childScorer.iterator().nextDoc();
- if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
- // No matches
+ // NOTE: acceptDocs applies (and is checked) only in the
+ // parent document space
+ @Override
+ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+ final ScorerSupplier childScorerSupplier = in.scorerSupplier(context);
+ if (childScorerSupplier == null) {
return null;
}
// NOTE: this does not take accept docs into account, the responsibility
// to not match deleted docs is on the scorer
- final BitSet parents = parentsFilter.getBitSet(readerContext);
-
+ final BitSet parents = parentsFilter.getBitSet(context);
if (parents == null) {
// No matches
return null;
}
- return new BlockJoinScorer(this, childScorer, parents, firstChildDoc, scoreMode);
+ return new ScorerSupplier() {
+
+ @Override
+ public Scorer get(boolean randomAccess) throws IOException {
+ return new BlockJoinScorer(BlockJoinWeight.this, childScorerSupplier.get(randomAccess), parents, scoreMode);
+ }
+
+ @Override
+ public long cost() {
+ return childScorerSupplier.cost();
+ }
+ };
}
@Override
@@ -154,180 +168,191 @@ public class ToParentBlockJoinQuery extends Query {
return Explanation.noMatch("Not a match");
}
}
-
- static class BlockJoinScorer extends Scorer {
- private final Scorer childScorer;
+
+ private static class ParentApproximation extends DocIdSetIterator {
+
+ private final DocIdSetIterator childApproximation;
private final BitSet parentBits;
- private final ScoreMode scoreMode;
- private int parentDoc = -1;
- private int prevParentDoc;
- private float parentScore;
- private int parentFreq;
- private int nextChildDoc;
- private int childDocUpto;
-
- public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
- super(weight);
- //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ private int doc = -1;
+
+ ParentApproximation(DocIdSetIterator childApproximation, BitSet parentBits) {
+ this.childApproximation = childApproximation;
this.parentBits = parentBits;
- this.childScorer = childScorer;
- this.scoreMode = scoreMode;
- nextChildDoc = firstChildDoc;
}
@Override
- public Collection<ChildScorer> getChildren() {
- return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+ public int docID() {
+ return doc;
}
@Override
- public DocIdSetIterator iterator() {
- return new DocIdSetIterator() {
- final DocIdSetIterator childIt = childScorer.iterator();
-
- @Override
- public int nextDoc() throws IOException {
- //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
- if (nextChildDoc == NO_MORE_DOCS) {
- //System.out.println(" end");
- return parentDoc = NO_MORE_DOCS;
- }
-
- // Gather all children sharing the same parent as
- // nextChildDoc
-
- parentDoc = parentBits.nextSetBit(nextChildDoc);
-
- // Parent & child docs are supposed to be
- // orthogonal:
- checkOrthogonal(nextChildDoc, parentDoc);
-
- //System.out.println(" parentDoc=" + parentDoc);
- assert parentDoc != DocIdSetIterator.NO_MORE_DOCS;
-
- float totalScore = 0;
- float maxScore = Float.NEGATIVE_INFINITY;
- float minScore = Float.POSITIVE_INFINITY;
-
- childDocUpto = 0;
- parentFreq = 0;
- do {
-
- //System.out.println(" c=" + nextChildDoc);
- if (scoreMode != ScoreMode.None) {
- // TODO: specialize this into dedicated classes per-scoreMode
- final float childScore = childScorer.score();
- final int childFreq = childScorer.freq();
- maxScore = Math.max(childScore, maxScore);
- minScore = Math.min(childScore, minScore);
- totalScore += childScore;
- parentFreq += childFreq;
- }
- childDocUpto++;
- nextChildDoc = childIt.nextDoc();
- } while (nextChildDoc < parentDoc);
-
- // Parent & child docs are supposed to be
- // orthogonal:
- checkOrthogonal(nextChildDoc, parentDoc);
-
- switch(scoreMode) {
- case Avg:
- parentScore = totalScore / childDocUpto;
- break;
- case Max:
- parentScore = maxScore;
- break;
- case Min:
- parentScore = minScore;
- break;
- case Total:
- parentScore = totalScore;
- break;
- case None:
- break;
- }
-
- //System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
- return parentDoc;
- }
-
- @Override
- public int advance(int parentTarget) throws IOException {
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
+ }
- //System.out.println("Q.advance parentTarget=" + parentTarget);
- if (parentTarget == NO_MORE_DOCS) {
- return parentDoc = NO_MORE_DOCS;
- }
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= parentBits.length()) {
+ return doc = NO_MORE_DOCS;
+ }
+ final int firstChildTarget = target == 0 ? 0 : parentBits.prevSetBit(target - 1) + 1;
+ int childDoc = childApproximation.docID();
+ if (childDoc < firstChildTarget) {
+ childDoc = childApproximation.advance(firstChildTarget);
+ }
+ if (childDoc >= parentBits.length() - 1) {
+ return doc = NO_MORE_DOCS;
+ }
+ return doc = parentBits.nextSetBit(childDoc + 1);
+ }
- if (parentTarget == 0) {
- // Callers should only be passing in a docID from
- // the parent space, so this means this parent
- // has no children (it got docID 0), so it cannot
- // possibly match. We must handle this case
- // separately otherwise we pass invalid -1 to
- // prevSetBit below:
- return nextDoc();
- }
+ @Override
+ public long cost() {
+ return childApproximation.cost();
+ }
+ }
- prevParentDoc = parentBits.prevSetBit(parentTarget-1);
+ private static class ParentTwoPhase extends TwoPhaseIterator {
- //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
- assert prevParentDoc >= parentDoc;
- if (prevParentDoc > nextChildDoc) {
- nextChildDoc = childIt.advance(prevParentDoc);
- // System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
- //} else {
- //System.out.println(" skip childScorer advance");
- }
+ private final ParentApproximation parentApproximation;
+ private final DocIdSetIterator childApproximation;
+ private final TwoPhaseIterator childTwoPhase;
- // Parent & child docs are supposed to be orthogonal:
- checkOrthogonal(nextChildDoc, prevParentDoc);
+ ParentTwoPhase(ParentApproximation parentApproximation, TwoPhaseIterator childTwoPhase) {
+ super(parentApproximation);
+ this.parentApproximation = parentApproximation;
+ this.childApproximation = childTwoPhase.approximation();
+ this.childTwoPhase = childTwoPhase;
+ }
- final int nd = nextDoc();
- //System.out.println(" return nextParentDoc=" + nd);
- return nd;
+ @Override
+ public boolean matches() throws IOException {
+ assert childApproximation.docID() < parentApproximation.docID();
+ do {
+ if (childTwoPhase.matches()) {
+ return true;
}
+ } while (childApproximation.nextDoc() < parentApproximation.docID());
+ return false;
+ }
- @Override
- public int docID() {
- return parentDoc;
- }
+ @Override
+ public float matchCost() {
+ // TODO: how could we compute a match cost?
+ return childTwoPhase.matchCost() + 10;
+ }
+ }
- @Override
- public long cost() {
- return childIt.cost();
- }
- };
+ static class BlockJoinScorer extends Scorer {
+ private final Scorer childScorer;
+ private final BitSet parentBits;
+ private final ScoreMode scoreMode;
+ private final DocIdSetIterator childApproximation;
+ private final TwoPhaseIterator childTwoPhase;
+ private final ParentApproximation parentApproximation;
+ private final ParentTwoPhase parentTwoPhase;
+ private float score;
+ private int freq;
+
+ public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, ScoreMode scoreMode) {
+ super(weight);
+ //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ this.parentBits = parentBits;
+ this.childScorer = childScorer;
+ this.scoreMode = scoreMode;
+ childTwoPhase = childScorer.twoPhaseIterator();
+ if (childTwoPhase == null) {
+ childApproximation = childScorer.iterator();
+ parentApproximation = new ParentApproximation(childApproximation, parentBits);
+ parentTwoPhase = null;
+ } else {
+ childApproximation = childTwoPhase.approximation();
+ parentApproximation = new ParentApproximation(childTwoPhase.approximation(), parentBits);
+ parentTwoPhase = new ParentTwoPhase(parentApproximation, childTwoPhase);
+ }
}
- private void checkOrthogonal(int childDoc, int parentDoc) {
- if (childDoc==parentDoc) {
- throw new IllegalStateException("Child query must not match same docs with parent filter. "
- + "Combine them as must clauses (+) to find a problem doc. "
- + "docId=" + nextChildDoc + ", " + childScorer.getClass());
-
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+ }
+
+ @Override
+ public DocIdSetIterator iterator() {
+ if (parentTwoPhase == null) {
+ // the approximation is exact
+ return parentApproximation;
+ } else {
+ return TwoPhaseIterator.asDocIdSetIterator(parentTwoPhase);
}
}
@Override
+ public TwoPhaseIterator twoPhaseIterator() {
+ return parentTwoPhase;
+ }
+
+ @Override
public int docID() {
- return parentDoc;
+ return parentApproximation.docID();
}
@Override
public float score() throws IOException {
- return parentScore;
+ setScoreAndFreq();
+ return score;
}
@Override
- public int freq() {
- return parentFreq;
+ public int freq() throws IOException {
+ setScoreAndFreq();
+ return freq;
+ }
+
+ private void setScoreAndFreq() throws IOException {
+ if (childApproximation.docID() >= parentApproximation.docID()) {
+ return;
+ }
+ double score = scoreMode == ScoreMode.None ? 0 : childScorer.score();
+ int freq = 1;
+ while (childApproximation.nextDoc() < parentApproximation.docID()) {
+ if (childTwoPhase == null || childTwoPhase.matches()) {
+ final float childScore = childScorer.score();
+ freq += 1;
+ switch (scoreMode) {
+ case Total:
+ case Avg:
+ score += childScore;
+ break;
+ case Min:
+ score = Math.min(score, childScore);
+ break;
+ case Max:
+ score = Math.min(score, childScore);
+ break;
+ case None:
+ break;
+ default:
+ throw new AssertionError();
+ }
+ }
+ }
+ if (childApproximation.docID() == parentApproximation.docID() && (childTwoPhase == null || childTwoPhase.matches())) {
+ throw new IllegalStateException("Child query must not match same docs with parent filter. "
+ + "Combine them as must clauses (+) to find a problem doc. "
+ + "docId=" + parentApproximation.docID() + ", " + childScorer.getClass());
+ }
+ if (scoreMode == ScoreMode.Avg) {
+ score /= freq;
+ }
+ this.score = (float) score;
+ this.freq = freq;
}
public Explanation explain(LeafReaderContext context, Weight childWeight) throws IOException {
+ int prevParentDoc = parentBits.prevSetBit(parentApproximation.docID() - 1);
int start = context.docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
- int end = context.docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc
+ int end = context.docBase + parentApproximation.docID() - 1; // -1 b/c parentDoc is parent doc
Explanation bestChild = null;
int matches = 0;
@@ -341,6 +366,7 @@ public class ToParentBlockJoinQuery extends Query {
}
}
+ assert freq() == matches;
return Explanation.match(score(), String.format(Locale.ROOT,
"Score based on %d child docs in range from %d to %d, best match:", matches, start, end), bestChild
);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
index f508f84..a13e66f 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
@@ -671,7 +671,7 @@ public class TestBlockJoin extends LuceneTestCase {
System.out.println("TEST: iter=" + (1+iter) + " of " + iters);
}
- final Query childQuery;
+ Query childQuery;
if (random().nextInt(3) == 2) {
final int childFieldID = random().nextInt(childFields.length);
childQuery = new TermQuery(new Term("child" + childFieldID,
@@ -706,6 +706,9 @@ public class TestBlockJoin extends LuceneTestCase {
random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
childQuery = bq.build();
}
+ if (random().nextBoolean()) {
+ childQuery = new RandomApproximationQuery(childQuery, random());
+ }
final ScoreMode agg = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
index aa68d09..cb3762c 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
@@ -87,25 +87,6 @@ public class TestBlockJoinValidation extends LuceneTestCase {
assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
}
- public void testAdvanceValidationForToParentBjq() throws Exception {
- int randomChildNumber = getRandomChildNumber(0);
- // we need to make advance method meet wrong document, so random child number
- // in BJQ must be greater than child number in Boolean clause
- int nextRandomChildNumber = getRandomChildNumber(randomChildNumber);
- Query parentQueryWithRandomChild = createChildrenQueryWithOneParent(nextRandomChildNumber);
- ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, parentsFilter, ScoreMode.None);
- // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
- BooleanQuery.Builder conjunctionQuery = new BooleanQuery.Builder();
- WildcardQuery childQuery = new WildcardQuery(new Term("child", createFieldValue(randomChildNumber)));
- conjunctionQuery.add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
- conjunctionQuery.add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
-
- IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
- indexSearcher.search(conjunctionQuery.build(), 1);
- });
- assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
- }
-
public void testNextDocValidationForToChildBjq() throws Exception {
Query parentQueryWithRandomChild = createParentsQueryWithOneChild(getRandomChildNumber(0));
[5/6] lucene-solr:branch_6x: LUCENE-7660: LatLonPointDistanceQuery
could skip distance computations more often.
Posted by jp...@apache.org.
LUCENE-7660: LatLonPointDistanceQuery could skip distance computations more often.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1332f0f0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1332f0f0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1332f0f0
Branch: refs/heads/branch_6x
Commit: 1332f0f05b265879073f2879b67da9172b7f203b
Parents: d2051e3
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 09:47:51 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 11:16:31 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 ++++
.../core/src/java/org/apache/lucene/geo/GeoUtils.java | 13 ++++++++++++-
.../src/test/org/apache/lucene/geo/TestGeoUtils.java | 12 ++++++++++++
3 files changed, 28 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1332f0f0/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1bc5046..e1af086 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -73,6 +73,10 @@ Optimizations
* LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the
relation of the polygon with a grid. (Adrien Grand)
+* LUCENE-7660: Speed up LatLonPointDistanceQuery by improving the detection of
+ whether BKD cells are entirely within the distance close to the dateline.
+ (Adrien Grand)
+
Build
* LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1332f0f0/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java b/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
index fa6f7a5..3695d01 100644
--- a/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
@@ -153,7 +153,7 @@ public final class GeoUtils {
}
}
- if (maxLon - lon < 90 && lon - minLon < 90 &&
+ if (within90LonDegrees(lon, minLon, maxLon) &&
SloppyMath.haversinSortKey(lat, lon, minLat, minLon) <= distanceSortKey &&
SloppyMath.haversinSortKey(lat, lon, minLat, maxLon) <= distanceSortKey &&
SloppyMath.haversinSortKey(lat, lon, maxLat, minLon) <= distanceSortKey &&
@@ -164,4 +164,15 @@ public final class GeoUtils {
return Relation.CELL_CROSSES_QUERY;
}
+
+ /** Return whether all points of {@code [minLon,maxLon]} are within 90 degrees of {@code lon}. */
+ static boolean within90LonDegrees(double lon, double minLon, double maxLon) {
+ if (maxLon <= lon - 180) {
+ lon -= 360;
+ } else if (minLon >= lon + 180) {
+ lon += 360;
+ }
+ return maxLon - lon < 90 && lon - minLon < 90;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1332f0f0/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java b/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
index 2cfb2f8..3d95a6d 100644
--- a/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
+++ b/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
@@ -293,4 +293,16 @@ public class TestGeoUtils extends LuceneTestCase {
return false;
}
+
+ public void testWithin90LonDegrees() {
+ assertTrue(GeoUtils.within90LonDegrees(0, -80, 80));
+ assertFalse(GeoUtils.within90LonDegrees(0, -100, 80));
+ assertFalse(GeoUtils.within90LonDegrees(0, -80, 100));
+
+ assertTrue(GeoUtils.within90LonDegrees(-150, 140, 170));
+ assertFalse(GeoUtils.within90LonDegrees(-150, 120, 150));
+
+ assertTrue(GeoUtils.within90LonDegrees(150, -170, -140));
+ assertFalse(GeoUtils.within90LonDegrees(150, -150, -120));
+ }
}
[6/6] lucene-solr:branch_6x: LUCENE-7654: To-parent block joins
should implement two-phase iteration.
Posted by jp...@apache.org.
LUCENE-7654: To-parent block joins should implement two-phase iteration.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/b6b44d44
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/b6b44d44
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/b6b44d44
Branch: refs/heads/branch_6x
Commit: b6b44d44d8277c533c7df975e05dd1c313cf3f23
Parents: 1332f0f
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 10:46:22 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 11:16:41 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 +
.../search/join/ToParentBlockJoinQuery.java | 336 ++++++++++---------
.../lucene/search/join/TestBlockJoin.java | 5 +-
.../search/join/TestBlockJoinValidation.java | 19 --
4 files changed, 189 insertions(+), 175 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6b44d44/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e1af086..c26b8b9 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -77,6 +77,10 @@ Optimizations
whether BKD cells are entirely within the distance close to the dateline.
(Adrien Grand)
+* LUCENE-7654: ToParentBlockJoinQuery now implements two-phase iteration and
+ computes scores lazily in order to be faster when used in conjunctions.
+ (Adrien Grand)
+
Build
* LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6b44d44/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
index 254917f..0e15ac2 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
@@ -20,15 +20,18 @@ import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Locale;
+
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.ScorerSupplier;
+import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
@@ -74,7 +77,7 @@ public class ToParentBlockJoinQuery extends Query {
private final ScoreMode scoreMode;
/** Create a ToParentBlockJoinQuery.
- *
+ *
* @param childQuery Query matching child documents.
* @param parentsFilter Filter identifying the parent documents.
* @param scoreMode How to aggregate multiple child scores
@@ -100,7 +103,7 @@ public class ToParentBlockJoinQuery extends Query {
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new BlockJoinWeight(this, childQuery.createWeight(searcher, needsScores), parentsFilter, needsScores ? scoreMode : ScoreMode.None);
}
-
+
/** Return our child query. */
public Query getChildQuery() {
return childQuery;
@@ -116,33 +119,44 @@ public class ToParentBlockJoinQuery extends Query {
this.scoreMode = scoreMode;
}
- // NOTE: acceptDocs applies (and is checked) only in the
- // parent document space
@Override
- public Scorer scorer(LeafReaderContext readerContext) throws IOException {
-
- final Scorer childScorer = in.scorer(readerContext);
- if (childScorer == null) {
- // No matches
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ final ScorerSupplier scorerSupplier = scorerSupplier(context);
+ if (scorerSupplier == null) {
return null;
}
+ return scorerSupplier.get(false);
+ }
- final int firstChildDoc = childScorer.iterator().nextDoc();
- if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
- // No matches
+ // NOTE: acceptDocs applies (and is checked) only in the
+ // parent document space
+ @Override
+ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+ final ScorerSupplier childScorerSupplier = in.scorerSupplier(context);
+ if (childScorerSupplier == null) {
return null;
}
// NOTE: this does not take accept docs into account, the responsibility
// to not match deleted docs is on the scorer
- final BitSet parents = parentsFilter.getBitSet(readerContext);
-
+ final BitSet parents = parentsFilter.getBitSet(context);
if (parents == null) {
// No matches
return null;
}
- return new BlockJoinScorer(this, childScorer, parents, firstChildDoc, scoreMode);
+ return new ScorerSupplier() {
+
+ @Override
+ public Scorer get(boolean randomAccess) throws IOException {
+ return new BlockJoinScorer(BlockJoinWeight.this, childScorerSupplier.get(randomAccess), parents, scoreMode);
+ }
+
+ @Override
+ public long cost() {
+ return childScorerSupplier.cost();
+ }
+ };
}
@Override
@@ -154,180 +168,191 @@ public class ToParentBlockJoinQuery extends Query {
return Explanation.noMatch("Not a match");
}
}
-
- static class BlockJoinScorer extends Scorer {
- private final Scorer childScorer;
+
+ private static class ParentApproximation extends DocIdSetIterator {
+
+ private final DocIdSetIterator childApproximation;
private final BitSet parentBits;
- private final ScoreMode scoreMode;
- private int parentDoc = -1;
- private int prevParentDoc;
- private float parentScore;
- private int parentFreq;
- private int nextChildDoc;
- private int childDocUpto;
-
- public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
- super(weight);
- //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ private int doc = -1;
+
+ ParentApproximation(DocIdSetIterator childApproximation, BitSet parentBits) {
+ this.childApproximation = childApproximation;
this.parentBits = parentBits;
- this.childScorer = childScorer;
- this.scoreMode = scoreMode;
- nextChildDoc = firstChildDoc;
}
@Override
- public Collection<ChildScorer> getChildren() {
- return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+ public int docID() {
+ return doc;
}
@Override
- public DocIdSetIterator iterator() {
- return new DocIdSetIterator() {
- final DocIdSetIterator childIt = childScorer.iterator();
-
- @Override
- public int nextDoc() throws IOException {
- //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
- if (nextChildDoc == NO_MORE_DOCS) {
- //System.out.println(" end");
- return parentDoc = NO_MORE_DOCS;
- }
-
- // Gather all children sharing the same parent as
- // nextChildDoc
-
- parentDoc = parentBits.nextSetBit(nextChildDoc);
-
- // Parent & child docs are supposed to be
- // orthogonal:
- checkOrthogonal(nextChildDoc, parentDoc);
-
- //System.out.println(" parentDoc=" + parentDoc);
- assert parentDoc != DocIdSetIterator.NO_MORE_DOCS;
-
- float totalScore = 0;
- float maxScore = Float.NEGATIVE_INFINITY;
- float minScore = Float.POSITIVE_INFINITY;
-
- childDocUpto = 0;
- parentFreq = 0;
- do {
-
- //System.out.println(" c=" + nextChildDoc);
- if (scoreMode != ScoreMode.None) {
- // TODO: specialize this into dedicated classes per-scoreMode
- final float childScore = childScorer.score();
- final int childFreq = childScorer.freq();
- maxScore = Math.max(childScore, maxScore);
- minScore = Math.min(childScore, minScore);
- totalScore += childScore;
- parentFreq += childFreq;
- }
- childDocUpto++;
- nextChildDoc = childIt.nextDoc();
- } while (nextChildDoc < parentDoc);
-
- // Parent & child docs are supposed to be
- // orthogonal:
- checkOrthogonal(nextChildDoc, parentDoc);
-
- switch(scoreMode) {
- case Avg:
- parentScore = totalScore / childDocUpto;
- break;
- case Max:
- parentScore = maxScore;
- break;
- case Min:
- parentScore = minScore;
- break;
- case Total:
- parentScore = totalScore;
- break;
- case None:
- break;
- }
-
- //System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
- return parentDoc;
- }
-
- @Override
- public int advance(int parentTarget) throws IOException {
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
+ }
- //System.out.println("Q.advance parentTarget=" + parentTarget);
- if (parentTarget == NO_MORE_DOCS) {
- return parentDoc = NO_MORE_DOCS;
- }
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= parentBits.length()) {
+ return doc = NO_MORE_DOCS;
+ }
+ final int firstChildTarget = target == 0 ? 0 : parentBits.prevSetBit(target - 1) + 1;
+ int childDoc = childApproximation.docID();
+ if (childDoc < firstChildTarget) {
+ childDoc = childApproximation.advance(firstChildTarget);
+ }
+ if (childDoc >= parentBits.length() - 1) {
+ return doc = NO_MORE_DOCS;
+ }
+ return doc = parentBits.nextSetBit(childDoc + 1);
+ }
- if (parentTarget == 0) {
- // Callers should only be passing in a docID from
- // the parent space, so this means this parent
- // has no children (it got docID 0), so it cannot
- // possibly match. We must handle this case
- // separately otherwise we pass invalid -1 to
- // prevSetBit below:
- return nextDoc();
- }
+ @Override
+ public long cost() {
+ return childApproximation.cost();
+ }
+ }
- prevParentDoc = parentBits.prevSetBit(parentTarget-1);
+ private static class ParentTwoPhase extends TwoPhaseIterator {
- //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
- assert prevParentDoc >= parentDoc;
- if (prevParentDoc > nextChildDoc) {
- nextChildDoc = childIt.advance(prevParentDoc);
- // System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
- //} else {
- //System.out.println(" skip childScorer advance");
- }
+ private final ParentApproximation parentApproximation;
+ private final DocIdSetIterator childApproximation;
+ private final TwoPhaseIterator childTwoPhase;
- // Parent & child docs are supposed to be orthogonal:
- checkOrthogonal(nextChildDoc, prevParentDoc);
+ ParentTwoPhase(ParentApproximation parentApproximation, TwoPhaseIterator childTwoPhase) {
+ super(parentApproximation);
+ this.parentApproximation = parentApproximation;
+ this.childApproximation = childTwoPhase.approximation();
+ this.childTwoPhase = childTwoPhase;
+ }
- final int nd = nextDoc();
- //System.out.println(" return nextParentDoc=" + nd);
- return nd;
+ @Override
+ public boolean matches() throws IOException {
+ assert childApproximation.docID() < parentApproximation.docID();
+ do {
+ if (childTwoPhase.matches()) {
+ return true;
}
+ } while (childApproximation.nextDoc() < parentApproximation.docID());
+ return false;
+ }
- @Override
- public int docID() {
- return parentDoc;
- }
+ @Override
+ public float matchCost() {
+ // TODO: how could we compute a match cost?
+ return childTwoPhase.matchCost() + 10;
+ }
+ }
- @Override
- public long cost() {
- return childIt.cost();
- }
- };
+ static class BlockJoinScorer extends Scorer {
+ private final Scorer childScorer;
+ private final BitSet parentBits;
+ private final ScoreMode scoreMode;
+ private final DocIdSetIterator childApproximation;
+ private final TwoPhaseIterator childTwoPhase;
+ private final ParentApproximation parentApproximation;
+ private final ParentTwoPhase parentTwoPhase;
+ private float score;
+ private int freq;
+
+ public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, ScoreMode scoreMode) {
+ super(weight);
+ //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ this.parentBits = parentBits;
+ this.childScorer = childScorer;
+ this.scoreMode = scoreMode;
+ childTwoPhase = childScorer.twoPhaseIterator();
+ if (childTwoPhase == null) {
+ childApproximation = childScorer.iterator();
+ parentApproximation = new ParentApproximation(childApproximation, parentBits);
+ parentTwoPhase = null;
+ } else {
+ childApproximation = childTwoPhase.approximation();
+ parentApproximation = new ParentApproximation(childTwoPhase.approximation(), parentBits);
+ parentTwoPhase = new ParentTwoPhase(parentApproximation, childTwoPhase);
+ }
}
- private void checkOrthogonal(int childDoc, int parentDoc) {
- if (childDoc==parentDoc) {
- throw new IllegalStateException("Child query must not match same docs with parent filter. "
- + "Combine them as must clauses (+) to find a problem doc. "
- + "docId=" + nextChildDoc + ", " + childScorer.getClass());
-
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+ }
+
+ @Override
+ public DocIdSetIterator iterator() {
+ if (parentTwoPhase == null) {
+ // the approximation is exact
+ return parentApproximation;
+ } else {
+ return TwoPhaseIterator.asDocIdSetIterator(parentTwoPhase);
}
}
@Override
+ public TwoPhaseIterator twoPhaseIterator() {
+ return parentTwoPhase;
+ }
+
+ @Override
public int docID() {
- return parentDoc;
+ return parentApproximation.docID();
}
@Override
public float score() throws IOException {
- return parentScore;
+ setScoreAndFreq();
+ return score;
}
@Override
- public int freq() {
- return parentFreq;
+ public int freq() throws IOException {
+ setScoreAndFreq();
+ return freq;
+ }
+
+ private void setScoreAndFreq() throws IOException {
+ if (childApproximation.docID() >= parentApproximation.docID()) {
+ return;
+ }
+ double score = scoreMode == ScoreMode.None ? 0 : childScorer.score();
+ int freq = 1;
+ while (childApproximation.nextDoc() < parentApproximation.docID()) {
+ if (childTwoPhase == null || childTwoPhase.matches()) {
+ final float childScore = childScorer.score();
+ freq += 1;
+ switch (scoreMode) {
+ case Total:
+ case Avg:
+ score += childScore;
+ break;
+ case Min:
+ score = Math.min(score, childScore);
+ break;
+ case Max:
+ score = Math.min(score, childScore);
+ break;
+ case None:
+ break;
+ default:
+ throw new AssertionError();
+ }
+ }
+ }
+ if (childApproximation.docID() == parentApproximation.docID() && (childTwoPhase == null || childTwoPhase.matches())) {
+ throw new IllegalStateException("Child query must not match same docs with parent filter. "
+ + "Combine them as must clauses (+) to find a problem doc. "
+ + "docId=" + parentApproximation.docID() + ", " + childScorer.getClass());
+ }
+ if (scoreMode == ScoreMode.Avg) {
+ score /= freq;
+ }
+ this.score = (float) score;
+ this.freq = freq;
}
public Explanation explain(LeafReaderContext context, Weight childWeight) throws IOException {
+ int prevParentDoc = parentBits.prevSetBit(parentApproximation.docID() - 1);
int start = context.docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
- int end = context.docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc
+ int end = context.docBase + parentApproximation.docID() - 1; // -1 b/c parentDoc is parent doc
Explanation bestChild = null;
int matches = 0;
@@ -341,6 +366,7 @@ public class ToParentBlockJoinQuery extends Query {
}
}
+ assert freq() == matches;
return Explanation.match(score(), String.format(Locale.ROOT,
"Score based on %d child docs in range from %d to %d, best match:", matches, start, end), bestChild
);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6b44d44/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
index 02f6b23..9a6ead4 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
@@ -671,7 +671,7 @@ public class TestBlockJoin extends LuceneTestCase {
System.out.println("TEST: iter=" + (1+iter) + " of " + iters);
}
- final Query childQuery;
+ Query childQuery;
if (random().nextInt(3) == 2) {
final int childFieldID = random().nextInt(childFields.length);
childQuery = new TermQuery(new Term("child" + childFieldID,
@@ -706,6 +706,9 @@ public class TestBlockJoin extends LuceneTestCase {
random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
childQuery = bq.build();
}
+ if (random().nextBoolean()) {
+ childQuery = new RandomApproximationQuery(childQuery, random());
+ }
final ScoreMode agg = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6b44d44/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
index aa68d09..cb3762c 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
@@ -87,25 +87,6 @@ public class TestBlockJoinValidation extends LuceneTestCase {
assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
}
- public void testAdvanceValidationForToParentBjq() throws Exception {
- int randomChildNumber = getRandomChildNumber(0);
- // we need to make advance method meet wrong document, so random child number
- // in BJQ must be greater than child number in Boolean clause
- int nextRandomChildNumber = getRandomChildNumber(randomChildNumber);
- Query parentQueryWithRandomChild = createChildrenQueryWithOneParent(nextRandomChildNumber);
- ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, parentsFilter, ScoreMode.None);
- // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
- BooleanQuery.Builder conjunctionQuery = new BooleanQuery.Builder();
- WildcardQuery childQuery = new WildcardQuery(new Term("child", createFieldValue(randomChildNumber)));
- conjunctionQuery.add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
- conjunctionQuery.add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
-
- IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
- indexSearcher.search(conjunctionQuery.build(), 1);
- });
- assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
- }
-
public void testNextDocValidationForToChildBjq() throws Exception {
Query parentQueryWithRandomChild = createParentsQueryWithOneChild(getRandomChildNumber(0));
[2/6] lucene-solr:master: LUCENE-7660: LatLonPointDistanceQuery could
skip distance computations more often.
Posted by jp...@apache.org.
LUCENE-7660: LatLonPointDistanceQuery could skip distance computations more often.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/076662d1
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/076662d1
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/076662d1
Branch: refs/heads/master
Commit: 076662d1b23414b94e332206dd7ff73e8d9f9d0b
Parents: 74240be
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 09:47:51 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 09:47:51 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 ++++
.../core/src/java/org/apache/lucene/geo/GeoUtils.java | 13 ++++++++++++-
.../src/test/org/apache/lucene/geo/TestGeoUtils.java | 12 ++++++++++++
3 files changed, 28 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/076662d1/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0be102c..6a0d17d 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -131,6 +131,10 @@ Optimizations
* LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the
relation of the polygon with a grid. (Adrien Grand)
+* LUCENE-7660: Speed up LatLonPointDistanceQuery by improving the detection of
+ whether BKD cells are entirely within the distance close to the dateline.
+ (Adrien Grand)
+
Build
* LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/076662d1/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java b/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
index defcb48..aaca549 100644
--- a/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
@@ -152,7 +152,7 @@ public final class GeoUtils {
}
}
- if (maxLon - lon < 90 && lon - minLon < 90 &&
+ if (within90LonDegrees(lon, minLon, maxLon) &&
SloppyMath.haversinSortKey(lat, lon, minLat, minLon) <= distanceSortKey &&
SloppyMath.haversinSortKey(lat, lon, minLat, maxLon) <= distanceSortKey &&
SloppyMath.haversinSortKey(lat, lon, maxLat, minLon) <= distanceSortKey &&
@@ -163,4 +163,15 @@ public final class GeoUtils {
return Relation.CELL_CROSSES_QUERY;
}
+
+ /** Return whether all points of {@code [minLon,maxLon]} are within 90 degrees of {@code lon}. */
+ static boolean within90LonDegrees(double lon, double minLon, double maxLon) {
+ if (maxLon <= lon - 180) {
+ lon -= 360;
+ } else if (minLon >= lon + 180) {
+ lon += 360;
+ }
+ return maxLon - lon < 90 && lon - minLon < 90;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/076662d1/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java b/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
index 2cfb2f8..3d95a6d 100644
--- a/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
+++ b/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
@@ -293,4 +293,16 @@ public class TestGeoUtils extends LuceneTestCase {
return false;
}
+
+ public void testWithin90LonDegrees() {
+ assertTrue(GeoUtils.within90LonDegrees(0, -80, 80));
+ assertFalse(GeoUtils.within90LonDegrees(0, -100, 80));
+ assertFalse(GeoUtils.within90LonDegrees(0, -80, 100));
+
+ assertTrue(GeoUtils.within90LonDegrees(-150, 140, 170));
+ assertFalse(GeoUtils.within90LonDegrees(-150, 120, 150));
+
+ assertTrue(GeoUtils.within90LonDegrees(150, -170, -140));
+ assertFalse(GeoUtils.within90LonDegrees(150, -150, -120));
+ }
}