You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2017/01/30 10:20:46 UTC

[1/6] lucene-solr:master: LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the relation of the polygon with a grid.

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 9e98100d1 -> b6b44d44d
  refs/heads/master aa5e048cb -> 9d5dc0cf5


LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the relation of the polygon with a grid.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/74240be0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/74240be0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/74240be0

Branch: refs/heads/master
Commit: 74240be0f5a7f9de373bc53d01d5a43cd6c5bb05
Parents: aa5e048
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 09:44:48 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 09:45:06 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 +
 .../org/apache/lucene/geo/GeoEncodingUtils.java | 145 +++++++++++++++----
 .../document/LatLonDocValuesDistanceQuery.java  |   2 +-
 .../document/LatLonPointDistanceQuery.java      |   2 +-
 .../document/LatLonPointInPolygonQuery.java     |   6 +-
 5 files changed, 128 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/74240be0/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e59744a..0be102c 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -128,6 +128,9 @@ Optimizations
 * LUCENE-7656: Speed up for LatLonPointDistanceQuery by computing distances even
   less often. (Adrien Grand)
 
+* LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the
+  relation of the polygon with a grid. (Adrien Grand)
+
 Build
 
 * LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/74240be0/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
index cc75fbf..663cb2e 100644
--- a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
@@ -27,6 +27,8 @@ import static org.apache.lucene.geo.GeoUtils.MIN_LAT_INCL;
 import static org.apache.lucene.geo.GeoUtils.checkLatitude;
 import static org.apache.lucene.geo.GeoUtils.checkLongitude;
 
+import java.util.function.Function;
+
 /**
  * reusable geopoint encoding methods
  *
@@ -158,11 +160,48 @@ public final class GeoEncodingUtils {
    *  @lucene.internal */
   public static DistancePredicate createDistancePredicate(double lat, double lon, double radiusMeters) {
     final Rectangle boundingBox = Rectangle.fromPointDistance(lat, lon, radiusMeters);
+    final double axisLat = Rectangle.axisLat(lat, radiusMeters);
+    final double distanceSortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
+    final Function<Rectangle, Relation> boxToRelation = box -> GeoUtils.relate(
+        box.minLat, box.maxLat, box.minLon, box.maxLon, lat, lon, distanceSortKey, axisLat);
+    final Grid subBoxes = createSubBoxes(boundingBox, boxToRelation);
+
+    return new DistancePredicate(
+        subBoxes.latShift, subBoxes.lonShift,
+        subBoxes.latBase, subBoxes.lonBase,
+        subBoxes.maxLatDelta, subBoxes.maxLonDelta,
+        subBoxes.relations,
+        lat, lon, distanceSortKey);
+  }
+
+  /** Create a predicate that checks whether points are within a polygon.
+   *  It works the same way as {@link #createDistancePredicate}.
+   *  @lucene.internal */
+  public static PolygonPredicate createPolygonPredicate(Polygon[] polygons, Polygon2D tree) {
+    final Rectangle boundingBox = Rectangle.fromPolygon(polygons);
+    final Function<Rectangle, Relation> boxToRelation = box -> tree.relate(
+        box.minLat, box.maxLat, box.minLon, box.maxLon);
+    final Grid subBoxes = createSubBoxes(boundingBox, boxToRelation);
+
+    return new PolygonPredicate(
+        subBoxes.latShift, subBoxes.lonShift,
+        subBoxes.latBase, subBoxes.lonBase,
+        subBoxes.maxLatDelta, subBoxes.maxLonDelta,
+        subBoxes.relations,
+        tree);
+  }
+
+  private static Grid createSubBoxes(Rectangle boundingBox, Function<Rectangle, Relation> boxToRelation) {
     final int minLat = encodeLatitudeCeil(boundingBox.minLat);
     final int maxLat = encodeLatitude(boundingBox.maxLat);
     final int minLon = encodeLongitudeCeil(boundingBox.minLon);
     final int maxLon = encodeLongitude(boundingBox.maxLon);
 
+    if (maxLat < minLat || (boundingBox.crossesDateline() == false && maxLon < minLon)) {
+      // the box cannot match any quantized point
+      return new Grid(1, 1, 0, 0, 0, 0, new byte[0]);
+    }
+
     final int latShift, lonShift;
     final int latBase, lonBase;
     final int maxLatDelta, maxLonDelta;
@@ -186,8 +225,6 @@ public final class GeoEncodingUtils {
       assert maxLonDelta > 0;
     }
 
-    final double axisLat = Rectangle.axisLat(lat, radiusMeters);
-    final double distanceSortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
     final byte[] relations = new byte[maxLatDelta * maxLonDelta];
     for (int i = 0; i < maxLatDelta; ++i) {
       for (int j = 0; j < maxLonDelta; ++j) {
@@ -196,55 +233,54 @@ public final class GeoEncodingUtils {
         final int boxMaxLat = boxMinLat + (1 << latShift) - 1;
         final int boxMaxLon = boxMinLon + (1 << lonShift) - 1;
 
-        relations[i * maxLonDelta + j] = (byte) GeoUtils.relate(
+        relations[i * maxLonDelta + j] = (byte) boxToRelation.apply(new Rectangle(
             decodeLatitude(boxMinLat), decodeLatitude(boxMaxLat),
-            decodeLongitude(boxMinLon), decodeLongitude(boxMaxLon),
-            lat, lon, distanceSortKey, axisLat).ordinal();
+            decodeLongitude(boxMinLon), decodeLongitude(boxMaxLon))).ordinal();
       }
     }
 
-    return new DistancePredicate(
+    return new Grid(
         latShift, lonShift,
         latBase, lonBase,
         maxLatDelta, maxLonDelta,
-        relations,
-        lat, lon, distanceSortKey);
+        relations);
   }
 
   /** Compute the minimum shift value so that
    * {@code (b>>>shift)-(a>>>shift)} is less that {@code ARITY}. */
   private static int computeShift(long a, long b) {
-    assert a < b;
+    assert a <= b;
     // We enforce a shift of at least 1 so that when we work with unsigned ints
     // by doing (lat - MIN_VALUE), the result of the shift (lat - MIN_VALUE) >>> shift
     // can be used for comparisons without particular care: the sign bit has
     // been cleared so comparisons work the same for signed and unsigned ints
     for (int shift = 1; ; ++shift) {
       final long delta = (b >>> shift) - (a >>> shift);
-      if (delta >= 0 && delta < DistancePredicate.ARITY) {
+      if (delta >= 0 && delta < Grid.ARITY) {
         return shift;
       }
     }
   }
 
-  /** A predicate that checks whether a given point is within a distance of another point. */
-  public static class DistancePredicate {
-
-    private static final int ARITY = 64;
+  private static class Grid {
+    static final int ARITY = 64;
 
-    private final int latShift, lonShift;
-    private final int latBase, lonBase;
-    private final int maxLatDelta, maxLonDelta;
-    private final byte[] relations;
-    private final double lat, lon;
-    private final double distanceKey;
+    final int latShift, lonShift;
+    final int latBase, lonBase;
+    final int maxLatDelta, maxLonDelta;
+    final byte[] relations;
 
-    private DistancePredicate(
+    private Grid(
         int latShift, int lonShift,
         int latBase, int lonBase,
         int maxLatDelta, int maxLonDelta,
-        byte[] relations,
-        double lat, double lon, double distanceKey) {
+        byte[] relations) {
+      if (latShift < 1 || latShift > 31) {
+        throw new IllegalArgumentException();
+      }
+      if (lonShift < 1 || lonShift > 31) {
+        throw new IllegalArgumentException();
+      }
       this.latShift = latShift;
       this.lonShift = lonShift;
       this.latBase = latBase;
@@ -252,6 +288,22 @@ public final class GeoEncodingUtils {
       this.maxLatDelta = maxLatDelta;
       this.maxLonDelta = maxLonDelta;
       this.relations = relations;
+    }
+  }
+
+  /** A predicate that checks whether a given point is within a distance of another point. */
+  public static class DistancePredicate extends Grid {
+
+    private final double lat, lon;
+    private final double distanceKey;
+
+    private DistancePredicate(
+        int latShift, int lonShift,
+        int latBase, int lonBase,
+        int maxLatDelta, int maxLonDelta,
+        byte[] relations,
+        double lat, double lon, double distanceKey) {
+      super(latShift, lonShift, latBase, lonBase, maxLatDelta, maxLonDelta, relations);
       this.lat = lat;
       this.lon = lon;
       this.distanceKey = distanceKey;
@@ -259,16 +311,17 @@ public final class GeoEncodingUtils {
 
     /** Check whether the given point is within a distance of another point.
      *  NOTE: this operates directly on the encoded representation of points. */
-    public boolean apply(int lat, int lon) {
+    public boolean test(int lat, int lon) {
       final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
       if (lat2 < latBase || lat2 >= latBase + maxLatDelta) {
         return false;
       }
       int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
       if (lon2 < lonBase) { // wrap
-        lon2 += 1L << (32 - lonShift);
-        assert lon2 >= lonBase;
+        lon2 += 1 << (32 - lonShift);
       }
+      assert Integer.toUnsignedLong(lon2) >= lonBase;
+      assert lon2 - lonBase >= 0;
       if (lon2 - lonBase >= maxLonDelta) {
         return false;
       }
@@ -284,4 +337,44 @@ public final class GeoEncodingUtils {
     }
   }
 
+  /** A predicate that checks whether a given point is within a polygon. */
+  public static class PolygonPredicate extends Grid {
+
+    private final Polygon2D tree;
+
+    private PolygonPredicate(
+        int latShift, int lonShift,
+        int latBase, int lonBase,
+        int maxLatDelta, int maxLonDelta,
+        byte[] relations,
+        Polygon2D tree) {
+      super(latShift, lonShift, latBase, lonBase, maxLatDelta, maxLonDelta, relations);
+      this.tree = tree;
+    }
+
+    /** Check whether the given point is within the considered polygon.
+     *  NOTE: this operates directly on the encoded representation of points. */
+    public boolean test(int lat, int lon) {
+      final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
+      if (lat2 < latBase || lat2 >= latBase + maxLatDelta) {
+        return false;
+      }
+      int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
+      if (lon2 < lonBase) { // wrap
+        lon2 += 1 << (32 - lonShift);
+      }
+      assert Integer.toUnsignedLong(lon2) >= lonBase;
+      assert lon2 - lonBase >= 0;
+      if (lon2 - lonBase >= maxLonDelta) {
+        return false;
+      }
+
+      final int relation = relations[(lat2 - latBase) * maxLonDelta + (lon2 - lonBase)];
+      if (relation == Relation.CELL_CROSSES_QUERY.ordinal()) {
+        return tree.contains(decodeLatitude(lat), decodeLongitude(lon));
+      } else {
+        return relation == Relation.CELL_INSIDE_QUERY.ordinal();
+      }
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/74240be0/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
index ab05804..e38d9fe 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
@@ -111,7 +111,7 @@ final class LatLonDocValuesDistanceQuery extends Query {
               final long value = values.nextValue();
               final int lat = (int) (value >>> 32);
               final int lon = (int) (value & 0xFFFFFFFF);
-              if (distancePredicate.apply(lat, lon)) {
+              if (distancePredicate.test(lat, lon)) {
                 return true;
               }
             }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/74240be0/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
index 3d0b82e..71ddf3d 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
@@ -164,7 +164,7 @@ final class LatLonPointDistanceQuery extends Query {
 
                              int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0);
                              int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
-                             if (distancePredicate.apply(docLatitude, docLongitude)) {
+                             if (distancePredicate.test(docLatitude, docLongitude)) {
                                adder.add(docID);
                              }
                            }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/74240be0/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
index ec7c682..c272b4d 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
@@ -35,6 +35,7 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.util.DocIdSetBuilder;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.geo.GeoEncodingUtils;
 import org.apache.lucene.geo.Polygon;
 import org.apache.lucene.geo.Polygon2D;
 
@@ -92,6 +93,7 @@ final class LatLonPointInPolygonQuery extends Query {
     NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
 
     final Polygon2D tree = Polygon2D.create(polygons);
+    final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree);
 
     return new ConstantScoreWeight(this, boost) {
 
@@ -130,8 +132,8 @@ final class LatLonPointInPolygonQuery extends Query {
 
                            @Override
                            public void visit(int docID, byte[] packedValue) {
-                             if (tree.contains(decodeLatitude(packedValue, 0), 
-                                               decodeLongitude(packedValue, Integer.BYTES))) {
+                             if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0),
+                                                       NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
                                adder.add(docID);
                              }
                            }


[4/6] lucene-solr:branch_6x: LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the relation of the polygon with a grid.

Posted by jp...@apache.org.
LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the relation of the polygon with a grid.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d2051e3f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d2051e3f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d2051e3f

Branch: refs/heads/branch_6x
Commit: d2051e3f9d4edeb5d6c74f71684c14596453b4a2
Parents: 9e98100
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 09:44:48 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 11:16:31 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 +
 .../org/apache/lucene/geo/GeoEncodingUtils.java | 145 +++++++++++++++----
 .../document/LatLonDocValuesDistanceQuery.java  |   2 +-
 .../document/LatLonPointDistanceQuery.java      |   2 +-
 .../document/LatLonPointInPolygonQuery.java     |   6 +-
 5 files changed, 128 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d2051e3f/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index cd77115..1bc5046 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -70,6 +70,9 @@ Optimizations
 * LUCENE-7656: Speed up for LatLonPointDistanceQuery by computing distances even
   less often. (Adrien Grand)
 
+* LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the
+  relation of the polygon with a grid. (Adrien Grand)
+
 Build
 
 * LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d2051e3f/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
index cc75fbf..663cb2e 100644
--- a/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/geo/GeoEncodingUtils.java
@@ -27,6 +27,8 @@ import static org.apache.lucene.geo.GeoUtils.MIN_LAT_INCL;
 import static org.apache.lucene.geo.GeoUtils.checkLatitude;
 import static org.apache.lucene.geo.GeoUtils.checkLongitude;
 
+import java.util.function.Function;
+
 /**
  * reusable geopoint encoding methods
  *
@@ -158,11 +160,48 @@ public final class GeoEncodingUtils {
    *  @lucene.internal */
   public static DistancePredicate createDistancePredicate(double lat, double lon, double radiusMeters) {
     final Rectangle boundingBox = Rectangle.fromPointDistance(lat, lon, radiusMeters);
+    final double axisLat = Rectangle.axisLat(lat, radiusMeters);
+    final double distanceSortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
+    final Function<Rectangle, Relation> boxToRelation = box -> GeoUtils.relate(
+        box.minLat, box.maxLat, box.minLon, box.maxLon, lat, lon, distanceSortKey, axisLat);
+    final Grid subBoxes = createSubBoxes(boundingBox, boxToRelation);
+
+    return new DistancePredicate(
+        subBoxes.latShift, subBoxes.lonShift,
+        subBoxes.latBase, subBoxes.lonBase,
+        subBoxes.maxLatDelta, subBoxes.maxLonDelta,
+        subBoxes.relations,
+        lat, lon, distanceSortKey);
+  }
+
+  /** Create a predicate that checks whether points are within a polygon.
+   *  It works the same way as {@link #createDistancePredicate}.
+   *  @lucene.internal */
+  public static PolygonPredicate createPolygonPredicate(Polygon[] polygons, Polygon2D tree) {
+    final Rectangle boundingBox = Rectangle.fromPolygon(polygons);
+    final Function<Rectangle, Relation> boxToRelation = box -> tree.relate(
+        box.minLat, box.maxLat, box.minLon, box.maxLon);
+    final Grid subBoxes = createSubBoxes(boundingBox, boxToRelation);
+
+    return new PolygonPredicate(
+        subBoxes.latShift, subBoxes.lonShift,
+        subBoxes.latBase, subBoxes.lonBase,
+        subBoxes.maxLatDelta, subBoxes.maxLonDelta,
+        subBoxes.relations,
+        tree);
+  }
+
+  private static Grid createSubBoxes(Rectangle boundingBox, Function<Rectangle, Relation> boxToRelation) {
     final int minLat = encodeLatitudeCeil(boundingBox.minLat);
     final int maxLat = encodeLatitude(boundingBox.maxLat);
     final int minLon = encodeLongitudeCeil(boundingBox.minLon);
     final int maxLon = encodeLongitude(boundingBox.maxLon);
 
+    if (maxLat < minLat || (boundingBox.crossesDateline() == false && maxLon < minLon)) {
+      // the box cannot match any quantized point
+      return new Grid(1, 1, 0, 0, 0, 0, new byte[0]);
+    }
+
     final int latShift, lonShift;
     final int latBase, lonBase;
     final int maxLatDelta, maxLonDelta;
@@ -186,8 +225,6 @@ public final class GeoEncodingUtils {
       assert maxLonDelta > 0;
     }
 
-    final double axisLat = Rectangle.axisLat(lat, radiusMeters);
-    final double distanceSortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
     final byte[] relations = new byte[maxLatDelta * maxLonDelta];
     for (int i = 0; i < maxLatDelta; ++i) {
       for (int j = 0; j < maxLonDelta; ++j) {
@@ -196,55 +233,54 @@ public final class GeoEncodingUtils {
         final int boxMaxLat = boxMinLat + (1 << latShift) - 1;
         final int boxMaxLon = boxMinLon + (1 << lonShift) - 1;
 
-        relations[i * maxLonDelta + j] = (byte) GeoUtils.relate(
+        relations[i * maxLonDelta + j] = (byte) boxToRelation.apply(new Rectangle(
             decodeLatitude(boxMinLat), decodeLatitude(boxMaxLat),
-            decodeLongitude(boxMinLon), decodeLongitude(boxMaxLon),
-            lat, lon, distanceSortKey, axisLat).ordinal();
+            decodeLongitude(boxMinLon), decodeLongitude(boxMaxLon))).ordinal();
       }
     }
 
-    return new DistancePredicate(
+    return new Grid(
         latShift, lonShift,
         latBase, lonBase,
         maxLatDelta, maxLonDelta,
-        relations,
-        lat, lon, distanceSortKey);
+        relations);
   }
 
   /** Compute the minimum shift value so that
    * {@code (b>>>shift)-(a>>>shift)} is less that {@code ARITY}. */
   private static int computeShift(long a, long b) {
-    assert a < b;
+    assert a <= b;
     // We enforce a shift of at least 1 so that when we work with unsigned ints
     // by doing (lat - MIN_VALUE), the result of the shift (lat - MIN_VALUE) >>> shift
     // can be used for comparisons without particular care: the sign bit has
     // been cleared so comparisons work the same for signed and unsigned ints
     for (int shift = 1; ; ++shift) {
       final long delta = (b >>> shift) - (a >>> shift);
-      if (delta >= 0 && delta < DistancePredicate.ARITY) {
+      if (delta >= 0 && delta < Grid.ARITY) {
         return shift;
       }
     }
   }
 
-  /** A predicate that checks whether a given point is within a distance of another point. */
-  public static class DistancePredicate {
-
-    private static final int ARITY = 64;
+  private static class Grid {
+    static final int ARITY = 64;
 
-    private final int latShift, lonShift;
-    private final int latBase, lonBase;
-    private final int maxLatDelta, maxLonDelta;
-    private final byte[] relations;
-    private final double lat, lon;
-    private final double distanceKey;
+    final int latShift, lonShift;
+    final int latBase, lonBase;
+    final int maxLatDelta, maxLonDelta;
+    final byte[] relations;
 
-    private DistancePredicate(
+    private Grid(
         int latShift, int lonShift,
         int latBase, int lonBase,
         int maxLatDelta, int maxLonDelta,
-        byte[] relations,
-        double lat, double lon, double distanceKey) {
+        byte[] relations) {
+      if (latShift < 1 || latShift > 31) {
+        throw new IllegalArgumentException();
+      }
+      if (lonShift < 1 || lonShift > 31) {
+        throw new IllegalArgumentException();
+      }
       this.latShift = latShift;
       this.lonShift = lonShift;
       this.latBase = latBase;
@@ -252,6 +288,22 @@ public final class GeoEncodingUtils {
       this.maxLatDelta = maxLatDelta;
       this.maxLonDelta = maxLonDelta;
       this.relations = relations;
+    }
+  }
+
+  /** A predicate that checks whether a given point is within a distance of another point. */
+  public static class DistancePredicate extends Grid {
+
+    private final double lat, lon;
+    private final double distanceKey;
+
+    private DistancePredicate(
+        int latShift, int lonShift,
+        int latBase, int lonBase,
+        int maxLatDelta, int maxLonDelta,
+        byte[] relations,
+        double lat, double lon, double distanceKey) {
+      super(latShift, lonShift, latBase, lonBase, maxLatDelta, maxLonDelta, relations);
       this.lat = lat;
       this.lon = lon;
       this.distanceKey = distanceKey;
@@ -259,16 +311,17 @@ public final class GeoEncodingUtils {
 
     /** Check whether the given point is within a distance of another point.
      *  NOTE: this operates directly on the encoded representation of points. */
-    public boolean apply(int lat, int lon) {
+    public boolean test(int lat, int lon) {
       final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
       if (lat2 < latBase || lat2 >= latBase + maxLatDelta) {
         return false;
       }
       int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
       if (lon2 < lonBase) { // wrap
-        lon2 += 1L << (32 - lonShift);
-        assert lon2 >= lonBase;
+        lon2 += 1 << (32 - lonShift);
       }
+      assert Integer.toUnsignedLong(lon2) >= lonBase;
+      assert lon2 - lonBase >= 0;
       if (lon2 - lonBase >= maxLonDelta) {
         return false;
       }
@@ -284,4 +337,44 @@ public final class GeoEncodingUtils {
     }
   }
 
+  /** A predicate that checks whether a given point is within a polygon. */
+  public static class PolygonPredicate extends Grid {
+
+    private final Polygon2D tree;
+
+    private PolygonPredicate(
+        int latShift, int lonShift,
+        int latBase, int lonBase,
+        int maxLatDelta, int maxLonDelta,
+        byte[] relations,
+        Polygon2D tree) {
+      super(latShift, lonShift, latBase, lonBase, maxLatDelta, maxLonDelta, relations);
+      this.tree = tree;
+    }
+
+    /** Check whether the given point is within the considered polygon.
+     *  NOTE: this operates directly on the encoded representation of points. */
+    public boolean test(int lat, int lon) {
+      final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
+      if (lat2 < latBase || lat2 >= latBase + maxLatDelta) {
+        return false;
+      }
+      int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
+      if (lon2 < lonBase) { // wrap
+        lon2 += 1 << (32 - lonShift);
+      }
+      assert Integer.toUnsignedLong(lon2) >= lonBase;
+      assert lon2 - lonBase >= 0;
+      if (lon2 - lonBase >= maxLonDelta) {
+        return false;
+      }
+
+      final int relation = relations[(lat2 - latBase) * maxLonDelta + (lon2 - lonBase)];
+      if (relation == Relation.CELL_CROSSES_QUERY.ordinal()) {
+        return tree.contains(decodeLatitude(lat), decodeLongitude(lon));
+      } else {
+        return relation == Relation.CELL_INSIDE_QUERY.ordinal();
+      }
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d2051e3f/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
index 6febedc..df40eef 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java
@@ -113,7 +113,7 @@ final class LatLonDocValuesDistanceQuery extends Query {
               final long value = values.valueAt(i);
               final int lat = (int) (value >>> 32);
               final int lon = (int) (value & 0xFFFFFFFF);
-              if (distancePredicate.apply(lat, lon)) {
+              if (distancePredicate.test(lat, lon)) {
                 return true;
               }
             }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d2051e3f/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
index 421858d..6028086 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
@@ -164,7 +164,7 @@ final class LatLonPointDistanceQuery extends Query {
 
                              int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0);
                              int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
-                             if (distancePredicate.apply(docLatitude, docLongitude)) {
+                             if (distancePredicate.test(docLatitude, docLongitude)) {
                                adder.add(docID);
                              }
                            }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d2051e3f/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
index 8db8296..4609d35 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java
@@ -35,6 +35,7 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.util.DocIdSetBuilder;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.geo.GeoEncodingUtils;
 import org.apache.lucene.geo.Polygon;
 import org.apache.lucene.geo.Polygon2D;
 
@@ -92,6 +93,7 @@ final class LatLonPointInPolygonQuery extends Query {
     NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
 
     final Polygon2D tree = Polygon2D.create(polygons);
+    final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree);
 
     return new ConstantScoreWeight(this) {
 
@@ -130,8 +132,8 @@ final class LatLonPointInPolygonQuery extends Query {
 
                            @Override
                            public void visit(int docID, byte[] packedValue) {
-                             if (tree.contains(decodeLatitude(packedValue, 0), 
-                                               decodeLongitude(packedValue, Integer.BYTES))) {
+                             if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0),
+                                                       NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
                                adder.add(docID);
                              }
                            }


[3/6] lucene-solr:master: LUCENE-7654: To-parent block joins should implement two-phase iteration.

Posted by jp...@apache.org.
LUCENE-7654: To-parent block joins should implement two-phase iteration.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/9d5dc0cf
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/9d5dc0cf
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/9d5dc0cf

Branch: refs/heads/master
Commit: 9d5dc0cf573cf8fc75dd7799844db2cb0fa52da8
Parents: 076662d
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 10:46:22 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 10:46:22 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   4 +
 .../search/join/ToParentBlockJoinQuery.java     | 336 ++++++++++---------
 .../lucene/search/join/TestBlockJoin.java       |   5 +-
 .../search/join/TestBlockJoinValidation.java    |  19 --
 4 files changed, 189 insertions(+), 175 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6a0d17d..17a528e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -135,6 +135,10 @@ Optimizations
   whether BKD cells are entirely within the distance close to the dateline.
   (Adrien Grand)
 
+* LUCENE-7654: ToParentBlockJoinQuery now implements two-phase iteration and
+  computes scores lazily in order to be faster when used in conjunctions.
+  (Adrien Grand)
+
 Build
 
 * LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
index 6369eea..2837423 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
@@ -20,15 +20,18 @@ import java.io.IOException;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Locale;
+
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.FilterWeight;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.FilterWeight;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.ScorerSupplier;
+import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.BitSet;
 
@@ -74,7 +77,7 @@ public class ToParentBlockJoinQuery extends Query {
   private final ScoreMode scoreMode;
 
   /** Create a ToParentBlockJoinQuery.
-   * 
+   *
    * @param childQuery Query matching child documents.
    * @param parentsFilter Filter identifying the parent documents.
    * @param scoreMode How to aggregate multiple child scores
@@ -100,7 +103,7 @@ public class ToParentBlockJoinQuery extends Query {
   public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
     return new BlockJoinWeight(this, childQuery.createWeight(searcher, needsScores, boost), parentsFilter, needsScores ? scoreMode : ScoreMode.None);
   }
-  
+
   /** Return our child query. */
   public Query getChildQuery() {
     return childQuery;
@@ -116,33 +119,44 @@ public class ToParentBlockJoinQuery extends Query {
       this.scoreMode = scoreMode;
     }
 
-    // NOTE: acceptDocs applies (and is checked) only in the
-    // parent document space
     @Override
-    public Scorer scorer(LeafReaderContext readerContext) throws IOException {
-
-      final Scorer childScorer = in.scorer(readerContext);
-      if (childScorer == null) {
-        // No matches
+    public Scorer scorer(LeafReaderContext context) throws IOException {
+      final ScorerSupplier scorerSupplier = scorerSupplier(context);
+      if (scorerSupplier == null) {
         return null;
       }
+      return scorerSupplier.get(false);
+    }
 
-      final int firstChildDoc = childScorer.iterator().nextDoc();
-      if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
-        // No matches
+    // NOTE: acceptDocs applies (and is checked) only in the
+    // parent document space
+    @Override
+    public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+      final ScorerSupplier childScorerSupplier = in.scorerSupplier(context);
+      if (childScorerSupplier == null) {
         return null;
       }
 
       // NOTE: this does not take accept docs into account, the responsibility
       // to not match deleted docs is on the scorer
-      final BitSet parents = parentsFilter.getBitSet(readerContext);
-
+      final BitSet parents = parentsFilter.getBitSet(context);
       if (parents == null) {
         // No matches
         return null;
       }
 
-      return new BlockJoinScorer(this, childScorer, parents, firstChildDoc, scoreMode);
+      return new ScorerSupplier() {
+
+        @Override
+        public Scorer get(boolean randomAccess) throws IOException {
+          return new BlockJoinScorer(BlockJoinWeight.this, childScorerSupplier.get(randomAccess), parents, scoreMode);
+        }
+
+        @Override
+        public long cost() {
+          return childScorerSupplier.cost();
+        }
+      };
     }
 
     @Override
@@ -154,180 +168,191 @@ public class ToParentBlockJoinQuery extends Query {
       return Explanation.noMatch("Not a match");
     }
   }
-  
-  static class BlockJoinScorer extends Scorer {
-    private final Scorer childScorer;
+
+  private static class ParentApproximation extends DocIdSetIterator {
+
+    private final DocIdSetIterator childApproximation;
     private final BitSet parentBits;
-    private final ScoreMode scoreMode;
-    private int parentDoc = -1;
-    private int prevParentDoc;
-    private float parentScore;
-    private int parentFreq;
-    private int nextChildDoc;
-    private int childDocUpto;
-
-    public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
-      super(weight);
-      //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+    private int doc = -1;
+
+    ParentApproximation(DocIdSetIterator childApproximation, BitSet parentBits) {
+      this.childApproximation = childApproximation;
       this.parentBits = parentBits;
-      this.childScorer = childScorer;
-      this.scoreMode = scoreMode;
-      nextChildDoc = firstChildDoc;
     }
 
     @Override
-    public Collection<ChildScorer> getChildren() {
-      return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+    public int docID() {
+      return doc;
     }
 
     @Override
-    public DocIdSetIterator iterator() {
-      return new DocIdSetIterator() {
-        final DocIdSetIterator childIt = childScorer.iterator();
-
-        @Override
-        public int nextDoc() throws IOException {
-          //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
-          if (nextChildDoc == NO_MORE_DOCS) {
-            //System.out.println("  end");
-            return parentDoc = NO_MORE_DOCS;
-          }
-
-          // Gather all children sharing the same parent as
-          // nextChildDoc
-
-          parentDoc = parentBits.nextSetBit(nextChildDoc);
-
-          // Parent & child docs are supposed to be
-          // orthogonal:
-          checkOrthogonal(nextChildDoc, parentDoc);
-
-          //System.out.println("  parentDoc=" + parentDoc);
-          assert parentDoc != DocIdSetIterator.NO_MORE_DOCS;
-
-          float totalScore = 0;
-          float maxScore = Float.NEGATIVE_INFINITY;
-          float minScore = Float.POSITIVE_INFINITY;
-
-          childDocUpto = 0;
-          parentFreq = 0;
-          do {
-
-            //System.out.println("  c=" + nextChildDoc);
-            if (scoreMode != ScoreMode.None) {
-              // TODO: specialize this into dedicated classes per-scoreMode
-              final float childScore = childScorer.score();
-              final int childFreq = childScorer.freq();
-              maxScore = Math.max(childScore, maxScore);
-              minScore = Math.min(childScore, minScore);
-              totalScore += childScore;
-              parentFreq += childFreq;
-            }
-            childDocUpto++;
-            nextChildDoc = childIt.nextDoc();
-          } while (nextChildDoc < parentDoc);
-
-          // Parent & child docs are supposed to be
-          // orthogonal:
-          checkOrthogonal(nextChildDoc, parentDoc);
-
-          switch(scoreMode) {
-          case Avg:
-            parentScore = totalScore / childDocUpto;
-            break;
-          case Max:
-            parentScore = maxScore;
-            break;
-          case Min:
-            parentScore = minScore;
-            break;
-          case Total:
-            parentScore = totalScore;
-            break;
-          case None:
-            break;
-          }
-
-          //System.out.println("  return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
-          return parentDoc;
-        }
-
-        @Override
-        public int advance(int parentTarget) throws IOException {
+    public int nextDoc() throws IOException {
+      return advance(doc + 1);
+    }
 
-          //System.out.println("Q.advance parentTarget=" + parentTarget);
-          if (parentTarget == NO_MORE_DOCS) {
-            return parentDoc = NO_MORE_DOCS;
-          }
+    @Override
+    public int advance(int target) throws IOException {
+      if (target >= parentBits.length()) {
+        return doc = NO_MORE_DOCS;
+      }
+      final int firstChildTarget = target == 0 ? 0 : parentBits.prevSetBit(target - 1) + 1;
+      int childDoc = childApproximation.docID();
+      if (childDoc < firstChildTarget) {
+        childDoc = childApproximation.advance(firstChildTarget);
+      }
+      if (childDoc >= parentBits.length() - 1) {
+        return doc = NO_MORE_DOCS;
+      }
+      return doc = parentBits.nextSetBit(childDoc + 1);
+    }
 
-          if (parentTarget == 0) {
-            // Callers should only be passing in a docID from
-            // the parent space, so this means this parent
-            // has no children (it got docID 0), so it cannot
-            // possibly match.  We must handle this case
-            // separately otherwise we pass invalid -1 to
-            // prevSetBit below:
-            return nextDoc();
-          }
+    @Override
+    public long cost() {
+      return childApproximation.cost();
+    }
+  }
 
-          prevParentDoc = parentBits.prevSetBit(parentTarget-1);
+  private static class ParentTwoPhase extends TwoPhaseIterator {
 
-          //System.out.println("  rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
-          assert prevParentDoc >= parentDoc;
-          if (prevParentDoc > nextChildDoc) {
-            nextChildDoc = childIt.advance(prevParentDoc);
-            // System.out.println("  childScorer advanced to child docID=" + nextChildDoc);
-          //} else {
-            //System.out.println("  skip childScorer advance");
-          }
+    private final ParentApproximation parentApproximation;
+    private final DocIdSetIterator childApproximation;
+    private final TwoPhaseIterator childTwoPhase;
 
-          // Parent & child docs are supposed to be orthogonal:
-          checkOrthogonal(nextChildDoc, prevParentDoc);
+    ParentTwoPhase(ParentApproximation parentApproximation, TwoPhaseIterator childTwoPhase) {
+      super(parentApproximation);
+      this.parentApproximation = parentApproximation;
+      this.childApproximation = childTwoPhase.approximation();
+      this.childTwoPhase = childTwoPhase;
+    }
 
-          final int nd = nextDoc();
-          //System.out.println("  return nextParentDoc=" + nd);
-          return nd;
+    @Override
+    public boolean matches() throws IOException {
+      assert childApproximation.docID() < parentApproximation.docID();
+      do {
+        if (childTwoPhase.matches()) {
+          return true;
         }
+      } while (childApproximation.nextDoc() < parentApproximation.docID());
+      return false;
+    }
 
-        @Override
-        public int docID() {
-          return parentDoc;
-        }
+    @Override
+    public float matchCost() {
+      // TODO: how could we compute a match cost?
+      return childTwoPhase.matchCost() + 10;
+    }
+  }
 
-        @Override
-        public long cost() {
-          return childIt.cost();
-        }
-      };
+  static class BlockJoinScorer extends Scorer {
+    private final Scorer childScorer;
+    private final BitSet parentBits;
+    private final ScoreMode scoreMode;
+    private final DocIdSetIterator childApproximation;
+    private final TwoPhaseIterator childTwoPhase;
+    private final ParentApproximation parentApproximation;
+    private final ParentTwoPhase parentTwoPhase;
+    private float score;
+    private int freq;
+
+    public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, ScoreMode scoreMode) {
+      super(weight);
+      //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+      this.parentBits = parentBits;
+      this.childScorer = childScorer;
+      this.scoreMode = scoreMode;
+      childTwoPhase = childScorer.twoPhaseIterator();
+      if (childTwoPhase == null) {
+        childApproximation = childScorer.iterator();
+        parentApproximation = new ParentApproximation(childApproximation, parentBits);
+        parentTwoPhase = null;
+      } else {
+        childApproximation = childTwoPhase.approximation();
+        parentApproximation = new ParentApproximation(childTwoPhase.approximation(), parentBits);
+        parentTwoPhase = new ParentTwoPhase(parentApproximation, childTwoPhase);
+      }
     }
 
-    private void checkOrthogonal(int childDoc, int parentDoc) {
-      if (childDoc==parentDoc) {
-        throw new IllegalStateException("Child query must not match same docs with parent filter. "
-             + "Combine them as must clauses (+) to find a problem doc. "
-             + "docId=" + nextChildDoc + ", " + childScorer.getClass());
-        
+    @Override
+    public Collection<ChildScorer> getChildren() {
+      return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+    }
+
+    @Override
+    public DocIdSetIterator iterator() {
+      if (parentTwoPhase == null) {
+        // the approximation is exact
+        return parentApproximation;
+      } else {
+        return TwoPhaseIterator.asDocIdSetIterator(parentTwoPhase);
       }
     }
 
     @Override
+    public TwoPhaseIterator twoPhaseIterator() {
+      return parentTwoPhase;
+    }
+
+    @Override
     public int docID() {
-      return parentDoc;
+      return parentApproximation.docID();
     }
 
     @Override
     public float score() throws IOException {
-      return parentScore;
+      setScoreAndFreq();
+      return score;
     }
     
     @Override
-    public int freq() {
-      return parentFreq;
+    public int freq() throws IOException {
+      setScoreAndFreq();
+      return freq;
+    }
+
+    private void setScoreAndFreq() throws IOException {
+      if (childApproximation.docID() >= parentApproximation.docID()) {
+        return;
+      }
+      double score = scoreMode == ScoreMode.None ? 0 : childScorer.score();
+      int freq = 1;
+      while (childApproximation.nextDoc() < parentApproximation.docID()) {
+        if (childTwoPhase == null || childTwoPhase.matches()) {
+          final float childScore = childScorer.score();
+          freq += 1;
+          switch (scoreMode) {
+            case Total:
+            case Avg:
+              score += childScore;
+              break;
+            case Min:
+              score = Math.min(score, childScore);
+              break;
+            case Max:
+              score = Math.min(score, childScore);
+              break;
+            case None:
+              break;
+            default:
+              throw new AssertionError();
+          }
+        }
+      }
+      if (childApproximation.docID() == parentApproximation.docID() && (childTwoPhase == null || childTwoPhase.matches())) {
+        throw new IllegalStateException("Child query must not match same docs with parent filter. "
+            + "Combine them as must clauses (+) to find a problem doc. "
+            + "docId=" + parentApproximation.docID() + ", " + childScorer.getClass());
+      }
+      if (scoreMode == ScoreMode.Avg) {
+        score /= freq;
+      }
+      this.score = (float) score;
+      this.freq = freq;
     }
 
     public Explanation explain(LeafReaderContext context, Weight childWeight) throws IOException {
+      int prevParentDoc = parentBits.prevSetBit(parentApproximation.docID() - 1);
       int start = context.docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
-      int end = context.docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc
+      int end = context.docBase + parentApproximation.docID() - 1; // -1 b/c parentDoc is parent doc
 
       Explanation bestChild = null;
       int matches = 0;
@@ -341,6 +366,7 @@ public class ToParentBlockJoinQuery extends Query {
         }
       }
 
+      assert freq() == matches;
       return Explanation.match(score(), String.format(Locale.ROOT,
           "Score based on %d child docs in range from %d to %d, best match:", matches, start, end), bestChild
       );

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
index f508f84..a13e66f 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
@@ -671,7 +671,7 @@ public class TestBlockJoin extends LuceneTestCase {
         System.out.println("TEST: iter=" + (1+iter) + " of " + iters);
       }
 
-      final Query childQuery;
+      Query childQuery;
       if (random().nextInt(3) == 2) {
         final int childFieldID = random().nextInt(childFields.length);
         childQuery = new TermQuery(new Term("child" + childFieldID,
@@ -706,6 +706,9 @@ public class TestBlockJoin extends LuceneTestCase {
                random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
         childQuery = bq.build();
       }
+      if (random().nextBoolean()) {
+        childQuery = new RandomApproximationQuery(childQuery, random());
+      }
 
 
       final ScoreMode agg = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
index aa68d09..cb3762c 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
@@ -87,25 +87,6 @@ public class TestBlockJoinValidation extends LuceneTestCase {
     assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
   }
 
-  public void testAdvanceValidationForToParentBjq() throws Exception {
-    int randomChildNumber = getRandomChildNumber(0);
-    // we need to make advance method meet wrong document, so random child number
-    // in BJQ must be greater than child number in Boolean clause
-    int nextRandomChildNumber = getRandomChildNumber(randomChildNumber);
-    Query parentQueryWithRandomChild = createChildrenQueryWithOneParent(nextRandomChildNumber);
-    ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, parentsFilter, ScoreMode.None);
-    // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
-    BooleanQuery.Builder conjunctionQuery = new BooleanQuery.Builder();
-    WildcardQuery childQuery = new WildcardQuery(new Term("child", createFieldValue(randomChildNumber)));
-    conjunctionQuery.add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
-    conjunctionQuery.add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
-    
-    IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
-      indexSearcher.search(conjunctionQuery.build(), 1);
-    });
-    assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
-  }
-
   public void testNextDocValidationForToChildBjq() throws Exception {
     Query parentQueryWithRandomChild = createParentsQueryWithOneChild(getRandomChildNumber(0));
 


[5/6] lucene-solr:branch_6x: LUCENE-7660: LatLonPointDistanceQuery could skip distance computations more often.

Posted by jp...@apache.org.
LUCENE-7660: LatLonPointDistanceQuery could skip distance computations more often.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1332f0f0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1332f0f0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1332f0f0

Branch: refs/heads/branch_6x
Commit: 1332f0f05b265879073f2879b67da9172b7f203b
Parents: d2051e3
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 09:47:51 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 11:16:31 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                                     |  4 ++++
 .../core/src/java/org/apache/lucene/geo/GeoUtils.java  | 13 ++++++++++++-
 .../src/test/org/apache/lucene/geo/TestGeoUtils.java   | 12 ++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1332f0f0/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1bc5046..e1af086 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -73,6 +73,10 @@ Optimizations
 * LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the
   relation of the polygon with a grid. (Adrien Grand)
 
+* LUCENE-7660: Speed up LatLonPointDistanceQuery by improving the detection of
+  whether BKD cells are entirely within the distance close to the dateline.
+  (Adrien Grand)
+
 Build
 
 * LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1332f0f0/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java b/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
index fa6f7a5..3695d01 100644
--- a/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
@@ -153,7 +153,7 @@ public final class GeoUtils {
       }
     }
 
-    if (maxLon - lon < 90 && lon - minLon < 90 &&
+    if (within90LonDegrees(lon, minLon, maxLon) &&
         SloppyMath.haversinSortKey(lat, lon, minLat, minLon) <= distanceSortKey &&
         SloppyMath.haversinSortKey(lat, lon, minLat, maxLon) <= distanceSortKey &&
         SloppyMath.haversinSortKey(lat, lon, maxLat, minLon) <= distanceSortKey &&
@@ -164,4 +164,15 @@ public final class GeoUtils {
 
     return Relation.CELL_CROSSES_QUERY;
   }
+
+  /** Return whether all points of {@code [minLon,maxLon]} are within 90 degrees of {@code lon}. */
+  static boolean within90LonDegrees(double lon, double minLon, double maxLon) {
+    if (maxLon <= lon - 180) {
+      lon -= 360;
+    } else if (minLon >= lon + 180) {
+      lon += 360;
+    }
+    return maxLon - lon < 90 && lon - minLon < 90;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1332f0f0/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java b/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
index 2cfb2f8..3d95a6d 100644
--- a/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
+++ b/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
@@ -293,4 +293,16 @@ public class TestGeoUtils extends LuceneTestCase {
 
     return false;
   }
+
+  public void testWithin90LonDegrees() {
+    assertTrue(GeoUtils.within90LonDegrees(0, -80, 80));
+    assertFalse(GeoUtils.within90LonDegrees(0, -100, 80));
+    assertFalse(GeoUtils.within90LonDegrees(0, -80, 100));
+
+    assertTrue(GeoUtils.within90LonDegrees(-150, 140, 170));
+    assertFalse(GeoUtils.within90LonDegrees(-150, 120, 150));
+
+    assertTrue(GeoUtils.within90LonDegrees(150, -170, -140));
+    assertFalse(GeoUtils.within90LonDegrees(150, -150, -120));
+  }
 }


[6/6] lucene-solr:branch_6x: LUCENE-7654: To-parent block joins should implement two-phase iteration.

Posted by jp...@apache.org.
LUCENE-7654: To-parent block joins should implement two-phase iteration.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/b6b44d44
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/b6b44d44
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/b6b44d44

Branch: refs/heads/branch_6x
Commit: b6b44d44d8277c533c7df975e05dd1c313cf3f23
Parents: 1332f0f
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 10:46:22 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 11:16:41 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   4 +
 .../search/join/ToParentBlockJoinQuery.java     | 336 ++++++++++---------
 .../lucene/search/join/TestBlockJoin.java       |   5 +-
 .../search/join/TestBlockJoinValidation.java    |  19 --
 4 files changed, 189 insertions(+), 175 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6b44d44/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e1af086..c26b8b9 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -77,6 +77,10 @@ Optimizations
   whether BKD cells are entirely within the distance close to the dateline.
   (Adrien Grand)
 
+* LUCENE-7654: ToParentBlockJoinQuery now implements two-phase iteration and
+  computes scores lazily in order to be faster when used in conjunctions.
+  (Adrien Grand)
+
 Build
 
 * LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6b44d44/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
index 254917f..0e15ac2 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
@@ -20,15 +20,18 @@ import java.io.IOException;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Locale;
+
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.FilterWeight;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.FilterWeight;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.ScorerSupplier;
+import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.BitSet;
 
@@ -74,7 +77,7 @@ public class ToParentBlockJoinQuery extends Query {
   private final ScoreMode scoreMode;
 
   /** Create a ToParentBlockJoinQuery.
-   * 
+   *
    * @param childQuery Query matching child documents.
    * @param parentsFilter Filter identifying the parent documents.
    * @param scoreMode How to aggregate multiple child scores
@@ -100,7 +103,7 @@ public class ToParentBlockJoinQuery extends Query {
   public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
     return new BlockJoinWeight(this, childQuery.createWeight(searcher, needsScores), parentsFilter, needsScores ? scoreMode : ScoreMode.None);
   }
-  
+
   /** Return our child query. */
   public Query getChildQuery() {
     return childQuery;
@@ -116,33 +119,44 @@ public class ToParentBlockJoinQuery extends Query {
       this.scoreMode = scoreMode;
     }
 
-    // NOTE: acceptDocs applies (and is checked) only in the
-    // parent document space
     @Override
-    public Scorer scorer(LeafReaderContext readerContext) throws IOException {
-
-      final Scorer childScorer = in.scorer(readerContext);
-      if (childScorer == null) {
-        // No matches
+    public Scorer scorer(LeafReaderContext context) throws IOException {
+      final ScorerSupplier scorerSupplier = scorerSupplier(context);
+      if (scorerSupplier == null) {
         return null;
       }
+      return scorerSupplier.get(false);
+    }
 
-      final int firstChildDoc = childScorer.iterator().nextDoc();
-      if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
-        // No matches
+    // NOTE: acceptDocs applies (and is checked) only in the
+    // parent document space
+    @Override
+    public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+      final ScorerSupplier childScorerSupplier = in.scorerSupplier(context);
+      if (childScorerSupplier == null) {
         return null;
       }
 
       // NOTE: this does not take accept docs into account, the responsibility
       // to not match deleted docs is on the scorer
-      final BitSet parents = parentsFilter.getBitSet(readerContext);
-
+      final BitSet parents = parentsFilter.getBitSet(context);
       if (parents == null) {
         // No matches
         return null;
       }
 
-      return new BlockJoinScorer(this, childScorer, parents, firstChildDoc, scoreMode);
+      return new ScorerSupplier() {
+
+        @Override
+        public Scorer get(boolean randomAccess) throws IOException {
+          return new BlockJoinScorer(BlockJoinWeight.this, childScorerSupplier.get(randomAccess), parents, scoreMode);
+        }
+
+        @Override
+        public long cost() {
+          return childScorerSupplier.cost();
+        }
+      };
     }
 
     @Override
@@ -154,180 +168,191 @@ public class ToParentBlockJoinQuery extends Query {
       return Explanation.noMatch("Not a match");
     }
   }
-  
-  static class BlockJoinScorer extends Scorer {
-    private final Scorer childScorer;
+
+  private static class ParentApproximation extends DocIdSetIterator {
+
+    private final DocIdSetIterator childApproximation;
     private final BitSet parentBits;
-    private final ScoreMode scoreMode;
-    private int parentDoc = -1;
-    private int prevParentDoc;
-    private float parentScore;
-    private int parentFreq;
-    private int nextChildDoc;
-    private int childDocUpto;
-
-    public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
-      super(weight);
-      //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+    private int doc = -1;
+
+    ParentApproximation(DocIdSetIterator childApproximation, BitSet parentBits) {
+      this.childApproximation = childApproximation;
       this.parentBits = parentBits;
-      this.childScorer = childScorer;
-      this.scoreMode = scoreMode;
-      nextChildDoc = firstChildDoc;
     }
 
     @Override
-    public Collection<ChildScorer> getChildren() {
-      return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+    public int docID() {
+      return doc;
     }
 
     @Override
-    public DocIdSetIterator iterator() {
-      return new DocIdSetIterator() {
-        final DocIdSetIterator childIt = childScorer.iterator();
-
-        @Override
-        public int nextDoc() throws IOException {
-          //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
-          if (nextChildDoc == NO_MORE_DOCS) {
-            //System.out.println("  end");
-            return parentDoc = NO_MORE_DOCS;
-          }
-
-          // Gather all children sharing the same parent as
-          // nextChildDoc
-
-          parentDoc = parentBits.nextSetBit(nextChildDoc);
-
-          // Parent & child docs are supposed to be
-          // orthogonal:
-          checkOrthogonal(nextChildDoc, parentDoc);
-
-          //System.out.println("  parentDoc=" + parentDoc);
-          assert parentDoc != DocIdSetIterator.NO_MORE_DOCS;
-
-          float totalScore = 0;
-          float maxScore = Float.NEGATIVE_INFINITY;
-          float minScore = Float.POSITIVE_INFINITY;
-
-          childDocUpto = 0;
-          parentFreq = 0;
-          do {
-
-            //System.out.println("  c=" + nextChildDoc);
-            if (scoreMode != ScoreMode.None) {
-              // TODO: specialize this into dedicated classes per-scoreMode
-              final float childScore = childScorer.score();
-              final int childFreq = childScorer.freq();
-              maxScore = Math.max(childScore, maxScore);
-              minScore = Math.min(childScore, minScore);
-              totalScore += childScore;
-              parentFreq += childFreq;
-            }
-            childDocUpto++;
-            nextChildDoc = childIt.nextDoc();
-          } while (nextChildDoc < parentDoc);
-
-          // Parent & child docs are supposed to be
-          // orthogonal:
-          checkOrthogonal(nextChildDoc, parentDoc);
-
-          switch(scoreMode) {
-          case Avg:
-            parentScore = totalScore / childDocUpto;
-            break;
-          case Max:
-            parentScore = maxScore;
-            break;
-          case Min:
-            parentScore = minScore;
-            break;
-          case Total:
-            parentScore = totalScore;
-            break;
-          case None:
-            break;
-          }
-
-          //System.out.println("  return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
-          return parentDoc;
-        }
-
-        @Override
-        public int advance(int parentTarget) throws IOException {
+    public int nextDoc() throws IOException {
+      return advance(doc + 1);
+    }
 
-          //System.out.println("Q.advance parentTarget=" + parentTarget);
-          if (parentTarget == NO_MORE_DOCS) {
-            return parentDoc = NO_MORE_DOCS;
-          }
+    @Override
+    public int advance(int target) throws IOException {
+      if (target >= parentBits.length()) {
+        return doc = NO_MORE_DOCS;
+      }
+      final int firstChildTarget = target == 0 ? 0 : parentBits.prevSetBit(target - 1) + 1;
+      int childDoc = childApproximation.docID();
+      if (childDoc < firstChildTarget) {
+        childDoc = childApproximation.advance(firstChildTarget);
+      }
+      if (childDoc >= parentBits.length() - 1) {
+        return doc = NO_MORE_DOCS;
+      }
+      return doc = parentBits.nextSetBit(childDoc + 1);
+    }
 
-          if (parentTarget == 0) {
-            // Callers should only be passing in a docID from
-            // the parent space, so this means this parent
-            // has no children (it got docID 0), so it cannot
-            // possibly match.  We must handle this case
-            // separately otherwise we pass invalid -1 to
-            // prevSetBit below:
-            return nextDoc();
-          }
+    @Override
+    public long cost() {
+      return childApproximation.cost();
+    }
+  }
 
-          prevParentDoc = parentBits.prevSetBit(parentTarget-1);
+  private static class ParentTwoPhase extends TwoPhaseIterator {
 
-          //System.out.println("  rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
-          assert prevParentDoc >= parentDoc;
-          if (prevParentDoc > nextChildDoc) {
-            nextChildDoc = childIt.advance(prevParentDoc);
-            // System.out.println("  childScorer advanced to child docID=" + nextChildDoc);
-          //} else {
-            //System.out.println("  skip childScorer advance");
-          }
+    private final ParentApproximation parentApproximation;
+    private final DocIdSetIterator childApproximation;
+    private final TwoPhaseIterator childTwoPhase;
 
-          // Parent & child docs are supposed to be orthogonal:
-          checkOrthogonal(nextChildDoc, prevParentDoc);
+    ParentTwoPhase(ParentApproximation parentApproximation, TwoPhaseIterator childTwoPhase) {
+      super(parentApproximation);
+      this.parentApproximation = parentApproximation;
+      this.childApproximation = childTwoPhase.approximation();
+      this.childTwoPhase = childTwoPhase;
+    }
 
-          final int nd = nextDoc();
-          //System.out.println("  return nextParentDoc=" + nd);
-          return nd;
+    @Override
+    public boolean matches() throws IOException {
+      assert childApproximation.docID() < parentApproximation.docID();
+      do {
+        if (childTwoPhase.matches()) {
+          return true;
         }
+      } while (childApproximation.nextDoc() < parentApproximation.docID());
+      return false;
+    }
 
-        @Override
-        public int docID() {
-          return parentDoc;
-        }
+    @Override
+    public float matchCost() {
+      // TODO: how could we compute a match cost?
+      return childTwoPhase.matchCost() + 10;
+    }
+  }
 
-        @Override
-        public long cost() {
-          return childIt.cost();
-        }
-      };
+  static class BlockJoinScorer extends Scorer {
+    private final Scorer childScorer;
+    private final BitSet parentBits;
+    private final ScoreMode scoreMode;
+    private final DocIdSetIterator childApproximation;
+    private final TwoPhaseIterator childTwoPhase;
+    private final ParentApproximation parentApproximation;
+    private final ParentTwoPhase parentTwoPhase;
+    private float score;
+    private int freq;
+
+    public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, ScoreMode scoreMode) {
+      super(weight);
+      //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+      this.parentBits = parentBits;
+      this.childScorer = childScorer;
+      this.scoreMode = scoreMode;
+      childTwoPhase = childScorer.twoPhaseIterator();
+      if (childTwoPhase == null) {
+        childApproximation = childScorer.iterator();
+        parentApproximation = new ParentApproximation(childApproximation, parentBits);
+        parentTwoPhase = null;
+      } else {
+        childApproximation = childTwoPhase.approximation();
+        parentApproximation = new ParentApproximation(childTwoPhase.approximation(), parentBits);
+        parentTwoPhase = new ParentTwoPhase(parentApproximation, childTwoPhase);
+      }
     }
 
-    private void checkOrthogonal(int childDoc, int parentDoc) {
-      if (childDoc==parentDoc) {
-        throw new IllegalStateException("Child query must not match same docs with parent filter. "
-             + "Combine them as must clauses (+) to find a problem doc. "
-             + "docId=" + nextChildDoc + ", " + childScorer.getClass());
-        
+    @Override
+    public Collection<ChildScorer> getChildren() {
+      return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+    }
+
+    @Override
+    public DocIdSetIterator iterator() {
+      if (parentTwoPhase == null) {
+        // the approximation is exact
+        return parentApproximation;
+      } else {
+        return TwoPhaseIterator.asDocIdSetIterator(parentTwoPhase);
       }
     }
 
     @Override
+    public TwoPhaseIterator twoPhaseIterator() {
+      return parentTwoPhase;
+    }
+
+    @Override
     public int docID() {
-      return parentDoc;
+      return parentApproximation.docID();
     }
 
     @Override
     public float score() throws IOException {
-      return parentScore;
+      setScoreAndFreq();
+      return score;
     }
     
     @Override
-    public int freq() {
-      return parentFreq;
+    public int freq() throws IOException {
+      setScoreAndFreq();
+      return freq;
+    }
+
+    private void setScoreAndFreq() throws IOException {
+      if (childApproximation.docID() >= parentApproximation.docID()) {
+        return;
+      }
+      double score = scoreMode == ScoreMode.None ? 0 : childScorer.score();
+      int freq = 1;
+      while (childApproximation.nextDoc() < parentApproximation.docID()) {
+        if (childTwoPhase == null || childTwoPhase.matches()) {
+          final float childScore = childScorer.score();
+          freq += 1;
+          switch (scoreMode) {
+            case Total:
+            case Avg:
+              score += childScore;
+              break;
+            case Min:
+              score = Math.min(score, childScore);
+              break;
+            case Max:
+              score = Math.min(score, childScore);
+              break;
+            case None:
+              break;
+            default:
+              throw new AssertionError();
+          }
+        }
+      }
+      if (childApproximation.docID() == parentApproximation.docID() && (childTwoPhase == null || childTwoPhase.matches())) {
+        throw new IllegalStateException("Child query must not match same docs with parent filter. "
+            + "Combine them as must clauses (+) to find a problem doc. "
+            + "docId=" + parentApproximation.docID() + ", " + childScorer.getClass());
+      }
+      if (scoreMode == ScoreMode.Avg) {
+        score /= freq;
+      }
+      this.score = (float) score;
+      this.freq = freq;
     }
 
     public Explanation explain(LeafReaderContext context, Weight childWeight) throws IOException {
+      int prevParentDoc = parentBits.prevSetBit(parentApproximation.docID() - 1);
       int start = context.docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
-      int end = context.docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc
+      int end = context.docBase + parentApproximation.docID() - 1; // -1 b/c parentDoc is parent doc
 
       Explanation bestChild = null;
       int matches = 0;
@@ -341,6 +366,7 @@ public class ToParentBlockJoinQuery extends Query {
         }
       }
 
+      assert freq() == matches;
       return Explanation.match(score(), String.format(Locale.ROOT,
           "Score based on %d child docs in range from %d to %d, best match:", matches, start, end), bestChild
       );

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6b44d44/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
index 02f6b23..9a6ead4 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
@@ -671,7 +671,7 @@ public class TestBlockJoin extends LuceneTestCase {
         System.out.println("TEST: iter=" + (1+iter) + " of " + iters);
       }
 
-      final Query childQuery;
+      Query childQuery;
       if (random().nextInt(3) == 2) {
         final int childFieldID = random().nextInt(childFields.length);
         childQuery = new TermQuery(new Term("child" + childFieldID,
@@ -706,6 +706,9 @@ public class TestBlockJoin extends LuceneTestCase {
                random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
         childQuery = bq.build();
       }
+      if (random().nextBoolean()) {
+        childQuery = new RandomApproximationQuery(childQuery, random());
+      }
 
 
       final ScoreMode agg = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6b44d44/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
index aa68d09..cb3762c 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
@@ -87,25 +87,6 @@ public class TestBlockJoinValidation extends LuceneTestCase {
     assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
   }
 
-  public void testAdvanceValidationForToParentBjq() throws Exception {
-    int randomChildNumber = getRandomChildNumber(0);
-    // we need to make advance method meet wrong document, so random child number
-    // in BJQ must be greater than child number in Boolean clause
-    int nextRandomChildNumber = getRandomChildNumber(randomChildNumber);
-    Query parentQueryWithRandomChild = createChildrenQueryWithOneParent(nextRandomChildNumber);
-    ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, parentsFilter, ScoreMode.None);
-    // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
-    BooleanQuery.Builder conjunctionQuery = new BooleanQuery.Builder();
-    WildcardQuery childQuery = new WildcardQuery(new Term("child", createFieldValue(randomChildNumber)));
-    conjunctionQuery.add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
-    conjunctionQuery.add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
-    
-    IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
-      indexSearcher.search(conjunctionQuery.build(), 1);
-    });
-    assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
-  }
-
   public void testNextDocValidationForToChildBjq() throws Exception {
     Query parentQueryWithRandomChild = createParentsQueryWithOneChild(getRandomChildNumber(0));
 


[2/6] lucene-solr:master: LUCENE-7660: LatLonPointDistanceQuery could skip distance computations more often.

Posted by jp...@apache.org.
LUCENE-7660: LatLonPointDistanceQuery could skip distance computations more often.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/076662d1
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/076662d1
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/076662d1

Branch: refs/heads/master
Commit: 076662d1b23414b94e332206dd7ff73e8d9f9d0b
Parents: 74240be
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 09:47:51 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 09:47:51 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                                     |  4 ++++
 .../core/src/java/org/apache/lucene/geo/GeoUtils.java  | 13 ++++++++++++-
 .../src/test/org/apache/lucene/geo/TestGeoUtils.java   | 12 ++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/076662d1/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0be102c..6a0d17d 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -131,6 +131,10 @@ Optimizations
 * LUCENE-7661: Speed up for LatLonPointInPolygonQuery by pre-computing the
   relation of the polygon with a grid. (Adrien Grand)
 
+* LUCENE-7660: Speed up LatLonPointDistanceQuery by improving the detection of
+  whether BKD cells are entirely within the distance close to the dateline.
+  (Adrien Grand)
+
 Build
 
 * LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/076662d1/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java b/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
index defcb48..aaca549 100644
--- a/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/geo/GeoUtils.java
@@ -152,7 +152,7 @@ public final class GeoUtils {
       }
     }
 
-    if (maxLon - lon < 90 && lon - minLon < 90 &&
+    if (within90LonDegrees(lon, minLon, maxLon) &&
         SloppyMath.haversinSortKey(lat, lon, minLat, minLon) <= distanceSortKey &&
         SloppyMath.haversinSortKey(lat, lon, minLat, maxLon) <= distanceSortKey &&
         SloppyMath.haversinSortKey(lat, lon, maxLat, minLon) <= distanceSortKey &&
@@ -163,4 +163,15 @@ public final class GeoUtils {
 
     return Relation.CELL_CROSSES_QUERY;
   }
+
+  /** Return whether all points of {@code [minLon,maxLon]} are within 90 degrees of {@code lon}. */
+  static boolean within90LonDegrees(double lon, double minLon, double maxLon) {
+    if (maxLon <= lon - 180) {
+      lon -= 360;
+    } else if (minLon >= lon + 180) {
+      lon += 360;
+    }
+    return maxLon - lon < 90 && lon - minLon < 90;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/076662d1/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java b/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
index 2cfb2f8..3d95a6d 100644
--- a/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
+++ b/lucene/core/src/test/org/apache/lucene/geo/TestGeoUtils.java
@@ -293,4 +293,16 @@ public class TestGeoUtils extends LuceneTestCase {
 
     return false;
   }
+
+  public void testWithin90LonDegrees() {
+    assertTrue(GeoUtils.within90LonDegrees(0, -80, 80));
+    assertFalse(GeoUtils.within90LonDegrees(0, -100, 80));
+    assertFalse(GeoUtils.within90LonDegrees(0, -80, 100));
+
+    assertTrue(GeoUtils.within90LonDegrees(-150, 140, 170));
+    assertFalse(GeoUtils.within90LonDegrees(-150, 120, 150));
+
+    assertTrue(GeoUtils.within90LonDegrees(150, -170, -140));
+    assertFalse(GeoUtils.within90LonDegrees(150, -150, -120));
+  }
 }