You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by th...@apache.org on 2016/04/14 01:22:01 UTC

[45/50] lucene-solr:jira/SOLR-8908: LUCENE-7214: Remove two-phase iteration from LatLonPoint.newDistanceQuery

LUCENE-7214: Remove two-phase iteration from LatLonPoint.newDistanceQuery


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/fc19c99e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/fc19c99e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/fc19c99e

Branch: refs/heads/jira/SOLR-8908
Commit: fc19c99e0efc4cf7a0015974de118b2ae220e9cf
Parents: 689e966
Author: Robert Muir <rm...@apache.org>
Authored: Wed Apr 13 15:56:32 2016 -0400
Committer: Robert Muir <rm...@apache.org>
Committed: Wed Apr 13 15:57:04 2016 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 -
 .../document/LatLonPointDistanceQuery.java      | 79 +++++---------------
 2 files changed, 18 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fc19c99e/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 58eee6a..ac68f78 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -39,9 +39,6 @@ Optimizations
 * LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10%
   speedup on merging 2D LatLonPoint values (Mike McCandless)
 
-* LUCENE-7099: LatLonPoint's newDistanceQuery supports two-phase
-  iteration. (Robert Muir)
-
 * LUCENE-7105: Optimize LatLonPoint's newDistanceQuery. (Robert Muir)
 
 * LUCENE-7109: LatLonPoint's newPolygonQuery supports two-phase 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fc19c99e/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
index 843421b..604886b 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java
@@ -18,13 +18,12 @@ package org.apache.lucene.document;
 
 import java.io.IOException;
 
+import org.apache.lucene.geo.GeoUtils;
 import org.apache.lucene.geo.Rectangle;
-import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PointValues;
-import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.search.ConstantScoreScorer;
@@ -34,15 +33,10 @@ import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
-import org.apache.lucene.geo.GeoUtils;
-import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.DocIdSetBuilder;
-import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.SloppyMath;
-import org.apache.lucene.util.SparseFixedBitSet;
 import org.apache.lucene.util.StringHelper;
 
 import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude;
@@ -104,7 +98,7 @@ final class LatLonPointDistanceQuery extends Query {
     }
 
     // compute a maximum partial haversin: unless our box is crazy, we can use this bound
-    // to reject edge cases faster in matches()
+    // to reject edge cases faster in visit()
     final double maxPartialDistance;
     if (box.maxLon - longitude < 90 && longitude - box.minLon < 90) {
       maxPartialDistance = Math.max(SloppyMath.haversinSortKey(latitude, longitude, latitude, box.maxLon),
@@ -132,16 +126,9 @@ final class LatLonPointDistanceQuery extends Query {
         }
         LatLonPoint.checkCompatible(fieldInfo);
         
-        // approximation (postfiltering has not yet been applied)
+        // matching docids
         DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
-        // subset of documents that need no postfiltering, this is purely an optimization
-        final BitSet preApproved;
-        // dumb heuristic: if the field is really sparse, use a sparse impl
-        if (values.getDocCount(field) * 100L < reader.maxDoc()) {
-          preApproved = new SparseFixedBitSet(reader.maxDoc());
-        } else {
-          preApproved = new FixedBitSet(reader.maxDoc());
-        }
+
         values.intersect(field,
                          new IntersectVisitor() {
                            @Override
@@ -152,14 +139,11 @@ final class LatLonPointDistanceQuery extends Query {
                            @Override
                            public void visit(int docID) {
                              result.add(docID);
-                             preApproved.set(docID);
                            }
 
                            @Override
                            public void visit(int docID, byte[] packedValue) {
-                             // we bounds check individual values, as subtrees may cross, but we are being sent the values anyway:
-                             // this reduces the amount of docvalues fetches (improves approximation)
-
+                             // bounding box check
                              if (StringHelper.compare(Integer.BYTES, packedValue, 0, maxLat, 0) > 0 ||
                                  StringHelper.compare(Integer.BYTES, packedValue, 0, minLat, 0) < 0) {
                                // latitude out of bounding box range
@@ -173,13 +157,23 @@ final class LatLonPointDistanceQuery extends Query {
                                return;
                              }
 
-                             result.add(docID);
+                             double docLatitude = decodeLatitude(packedValue, 0);
+                             double docLongitude = decodeLongitude(packedValue, Integer.BYTES);
+
+                             // first check the partial distance, if its more than that, it can't be <= radiusMeters
+                             double h1 = SloppyMath.haversinSortKey(latitude, longitude, docLatitude, docLongitude);
+                             if (h1 <= maxPartialDistance) {
+                               // fully confirm with part 2:
+                               if (SloppyMath.haversinMeters(h1) <= radiusMeters) {
+                                 result.add(docID);
+                               }
+                             }
                            }
                            
                            // algorithm: we create a bounding box (two bounding boxes if we cross the dateline).
                            // 1. check our bounding box(es) first. if the subtree is entirely outside of those, bail.
                            // 2. check if the subtree is disjoint. it may cross the bounding box but not intersect with circle
-                           // 3. see if the subtree is fully contained. if the subtree is enormous along the x axis, wrapping half way around the world, etc: then this can't work, just go to step 3.
+                           // 3. see if the subtree is fully contained. if the subtree is enormous along the x axis, wrapping half way around the world, etc: then this can't work, just go to step 4.
                            // 4. recurse naively (subtrees crossing over circle edge)
                            @Override
                            public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
@@ -231,44 +225,7 @@ final class LatLonPointDistanceQuery extends Query {
         if (disi == null) {
           return null;
         }
-
-        // return two-phase iterator using docvalues to postfilter candidates
-        SortedNumericDocValues docValues = DocValues.getSortedNumeric(reader, field);
-        TwoPhaseIterator iterator = new TwoPhaseIterator(disi) {
-          @Override
-          public boolean matches() throws IOException {
-            int docId = disi.docID();
-            if (preApproved.get(docId)) {
-              return true;
-            } else {
-              docValues.setDocument(docId);
-              int count = docValues.count();
-              for (int i = 0; i < count; i++) {
-                long encoded = docValues.valueAt(i);
-                double docLatitude = decodeLatitude((int)(encoded >> 32));
-                double docLongitude = decodeLongitude((int)(encoded & 0xFFFFFFFF));
-
-                // first check the partial distance, if its more than that, it can't be <= radiusMeters
-                double h1 = SloppyMath.haversinSortKey(latitude, longitude, docLatitude, docLongitude);
-                if (h1 > maxPartialDistance) {
-                  continue;
-                }
-
-                // fully confirm with part 2:
-                if (SloppyMath.haversinMeters(h1) <= radiusMeters) {
-                  return true;
-                }
-              }
-              return false;
-            }
-          }
-
-          @Override
-          public float matchCost() {
-            return 20; // TODO: make this fancier
-          }
-        };
-        return new ConstantScoreScorer(this, score(), iterator);
+        return new ConstantScoreScorer(this, score(), disi);
       }
     };
   }