You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ki...@apache.org on 2020/12/21 17:17:18 UTC

[incubator-pinot] branch h3-index updated: Removing H3WITHIN UDF and adding logic to use h3 index for st_distance udf

This is an automated email from the ASF dual-hosted git repository.

kishoreg pushed a commit to branch h3-index
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/h3-index by this push:
     new bab10b9  Removing H3WITHIN UDF and adding logic to use h3 index for st_distance udf
bab10b9 is described below

commit bab10b9536c22cce3e21a974077860e79148bb24
Author: kishoreg <g....@gmail.com>
AuthorDate: Mon Dec 21 09:16:46 2020 -0800

    Removing H3WITHIN UDF and adding logic to use h3 index for st_distance udf
---
 .../operator/filter/H3IndexFilterOperator.java     | 73 +++++++++++++++++-----
 .../org/apache/pinot/core/plan/FilterPlanNode.java | 32 +++++-----
 .../request/context/predicate/GeoPredicate.java    | 12 +---
 .../creator/impl/geospatial/H3IndexCreator.java    | 10 +++
 .../batch/starbucksStores/rawdata/data.csv         |  2 +-
 5 files changed, 87 insertions(+), 42 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java
index 7528b7e..0b65dd7 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java
@@ -23,10 +23,19 @@ import com.uber.h3core.LengthUnit;
 import java.io.IOException;
 import java.util.List;
 import org.apache.pinot.core.common.DataSource;
+import org.apache.pinot.core.geospatial.serde.GeometrySerializer;
+import org.apache.pinot.core.geospatial.transform.function.StPointFunction;
+import org.apache.pinot.core.indexsegment.IndexSegment;
 import org.apache.pinot.core.operator.blocks.FilterBlock;
 import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
+import org.apache.pinot.core.query.request.context.ExpressionContext;
+import org.apache.pinot.core.query.request.context.FunctionContext;
 import org.apache.pinot.core.query.request.context.predicate.GeoPredicate;
+import org.apache.pinot.core.query.request.context.predicate.Predicate;
+import org.apache.pinot.core.query.request.context.predicate.RangePredicate;
 import org.apache.pinot.core.segment.index.readers.geospatial.H3IndexReader;
+import org.apache.pinot.spi.utils.BytesUtils;
+import org.locationtech.jts.geom.Geometry;
 import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
 import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
@@ -36,14 +45,51 @@ public class H3IndexFilterOperator extends BaseFilterOperator {
 
   // NOTE: Range index can only apply to dictionary-encoded columns for now
   // TODO: Support raw index columns
-  private final GeoPredicate _geoPredicate;
-  private final DataSource _dataSource;
   private final int _numDocs;
   private final H3Core _h3Core;
+  private final H3IndexReader _h3IndexReader;
+  private Geometry _geometry;
+  private double _distance;
 
-  public H3IndexFilterOperator(GeoPredicate geoPredicate, DataSource dataSource, int numDocs) {
-    _geoPredicate = geoPredicate;
-    _dataSource = dataSource;
+  public H3IndexFilterOperator(Predicate predicate, IndexSegment indexSegment, int numDocs) {
+    FunctionContext function = predicate.getLhs().getFunction();
+    String columnName;
+
+    if (function.getArguments().get(0).getType() == ExpressionContext.Type.IDENTIFIER) {
+      columnName = function.getArguments().get(0).getIdentifier();
+      byte[] bytes = BytesUtils.toBytes(function.getArguments().get(1).getLiteral());
+      _geometry = GeometrySerializer.deserialize(bytes);
+    } else if (function.getArguments().get(1).getType() == ExpressionContext.Type.IDENTIFIER) {
+      columnName = function.getArguments().get(1).getIdentifier();
+      byte[] bytes = BytesUtils.toBytes(function.getArguments().get(0).getLiteral());
+      _geometry = GeometrySerializer.deserialize(bytes);
+    } else {
+      throw new RuntimeException("Expecting one of the arguments of ST_DISTANCE to be an identifier");
+    }
+    DataSource dataSource = indexSegment.getDataSource(columnName);
+    _h3IndexReader = dataSource.getH3Index();
+    switch (predicate.getType()) {
+      case EQ:
+        break;
+      case NOT_EQ:
+        break;
+      case IN:
+        break;
+      case NOT_IN:
+        break;
+      case RANGE:
+        RangePredicate rangePredicate = (RangePredicate) predicate;
+        _distance = Double.parseDouble(rangePredicate.getUpperBound());
+        break;
+      case REGEXP_LIKE:
+        break;
+      case TEXT_MATCH:
+        break;
+      case IS_NULL:
+        break;
+      case IS_NOT_NULL:
+        break;
+    }
     _numDocs = numDocs;
     try {
       _h3Core = H3Core.newInstance();
@@ -54,34 +100,31 @@ public class H3IndexFilterOperator extends BaseFilterOperator {
 
   @Override
   protected FilterBlock getNextBlock() {
-    H3IndexReader h3IndexReader = _dataSource.getH3Index();
     //todo: this needs to come from somewhere?
     int resolution = 5;
-    long h3Id = _h3Core
-        .geoToH3(_geoPredicate.getGeometry().getCoordinate().x, _geoPredicate.getGeometry().getCoordinate().y,
-            resolution);
-    assert h3IndexReader != null;
+    long h3Id = _h3Core.geoToH3(_geometry.getCoordinate().x, _geometry.getCoordinate().y, resolution);
+    assert _h3IndexReader != null;
 
-    //find the number of rings based on geopredicate.distance
+    //find the number of rings based on distance
     //FullMatch
     double edgeLength = _h3Core.edgeLength(resolution, LengthUnit.km);
-    int numFullMatchedRings = (int) (_geoPredicate.getDistance() / edgeLength);
+    int numFullMatchedRings = (int) (_distance / edgeLength);
     List<Long> fullMatchRings = _h3Core.kRing(h3Id, numFullMatchedRings);
     fullMatchRings.add(h3Id);
     MutableRoaringBitmap fullMatchedDocIds = new MutableRoaringBitmap();
     for (long id : fullMatchRings) {
-      ImmutableRoaringBitmap docIds = h3IndexReader.getDocIds(id);
+      ImmutableRoaringBitmap docIds = _h3IndexReader.getDocIds(id);
       fullMatchedDocIds.or(docIds);
     }
 
     //partial matchedRings
-    int numPartialMatchedRings = (int) (_geoPredicate.getDistance() / edgeLength);
+    int numPartialMatchedRings = (int) ((_distance + edgeLength) / edgeLength);
     List<Long> partialMatchedRings = _h3Core.kRing(h3Id, numPartialMatchedRings);
     partialMatchedRings.add(h3Id);
     final MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
     partialMatchedRings.removeAll(fullMatchRings);
     for (long id : partialMatchedRings) {
-      ImmutableRoaringBitmap docIds = h3IndexReader.getDocIds(id);
+      ImmutableRoaringBitmap docIds = _h3IndexReader.getDocIds(id);
       partialMatchDocIds.or(docIds);
     }
 
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
index 43b2174..4455a24 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
@@ -23,8 +23,10 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import javax.annotation.Nullable;
+import org.apache.pinot.common.request.Identifier;
 import org.apache.pinot.core.common.DataSource;
 import org.apache.pinot.core.geospatial.GeometryUtils;
+import org.apache.pinot.core.geospatial.transform.function.StDistanceFunction;
 import org.apache.pinot.core.indexsegment.IndexSegment;
 import org.apache.pinot.core.operator.filter.BaseFilterOperator;
 import org.apache.pinot.core.operator.filter.BitmapBasedFilterOperator;
@@ -128,24 +130,22 @@ public class FilterPlanNode implements PlanNode {
         ExpressionContext lhs = predicate.getLhs();
         if (lhs.getType() == ExpressionContext.Type.FUNCTION) {
           FunctionContext function = lhs.getFunction();
-          if (function.getFunctionName().equalsIgnoreCase("H3_WITHIN")) {
+
+          boolean canApplyH3Index = false;
+          if (function.getFunctionName().equalsIgnoreCase(StDistanceFunction.FUNCTION_NAME)) {
             String columnName = function.getArguments().get(0).getIdentifier();
-            GeoPredicate geoPredicate = new GeoPredicate();
-            geoPredicate.setType(GeoPredicate.Type.WITHIN);
-            float lat = Float.parseFloat(function.getArguments().get(1).getLiteral());
-            float lon = Float.parseFloat(function.getArguments().get(2).getLiteral());
-            float distance = Float.parseFloat(function.getArguments().get(3).getLiteral());
-//            float resolution =Float.parseFloat(function.getArguments().get(4).getLiteral());
-            Point point = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(lat, lon));
-            geoPredicate.setGeometry(point);
-            geoPredicate.setDistance(distance);
-            //set geo predicate
-            return new H3IndexFilterOperator(geoPredicate, _indexSegment.getDataSource(columnName), _numDocs);
-          } else {
-            // TODO: ExpressionFilterOperator does not support predicate types without PredicateEvaluator (IS_NULL,
-            //       IS_NOT_NULL, TEXT_MATCH)
-            return new ExpressionFilterOperator(_indexSegment, predicate, _numDocs);
+            DataSource dataSource = _indexSegment.getDataSource(columnName);
+            if (dataSource.getH3Index() != null) {
+              canApplyH3Index = true;
+            }
+          }
+
+          if (canApplyH3Index) {
+            return new H3IndexFilterOperator(predicate, _indexSegment, _numDocs);
           }
+          // TODO: ExpressionFilterOperator does not support predicate types without PredicateEvaluator (IS_NULL,
+          //       IS_NOT_NULL, TEXT_MATCH)
+          return new ExpressionFilterOperator(_indexSegment, predicate, _numDocs);
         } else {
           DataSource dataSource = _indexSegment.getDataSource(lhs.getIdentifier());
           switch (predicate.getType()) {
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java
index f47ace2..473be7c 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java
@@ -10,13 +10,13 @@ public class GeoPredicate {
   //this is the column name
   ExpressionContext _lhs;
 
-  Type type;
+  Predicate type;
 
   Geometry _geometry;
 
   double _distance;
 
-  public enum Type {
+  public enum Pre {
     WITHIN, OVERLAP;
   }
 
@@ -28,14 +28,6 @@ public class GeoPredicate {
     _lhs = lhs;
   }
 
-  public Type getType() {
-    return type;
-  }
-
-  public void setType(Type type) {
-    this.type = type;
-  }
-
   public Geometry getGeometry() {
     return _geometry;
   }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java
index 67b3fc9..ec7ef6f 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java
@@ -18,11 +18,15 @@ import java.util.PriorityQueue;
 import java.util.Random;
 import java.util.TreeMap;
 import org.apache.commons.io.FileUtils;
+import org.apache.pinot.core.geospatial.GeometryUtils;
 import org.apache.pinot.core.segment.creator.GeoSpatialIndexCreator;
 import org.apache.pinot.core.segment.index.readers.geospatial.H3IndexReader;
 import org.apache.pinot.core.segment.memory.PinotDataBuffer;
 import org.apache.pinot.spi.data.DimensionFieldSpec;
 import org.apache.pinot.spi.data.FieldSpec;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.geom.Point;
 import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
 import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
@@ -272,6 +276,10 @@ public class H3IndexCreator implements GeoSpatialIndexCreator {
 
   public static void main(String[] args)
       throws Exception {
+    Point point1 = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(37.3861, -122.0839));
+    Point point2 = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(37.368832, -122.036346));
+    System.out.println("point1.distance(point2) = " + point1.distance(point2));
+    System.exit(0);
     File indexDir = new File(System.getProperty("java.io.tmpdir"), "h3IndexDir");
     FileUtils.deleteDirectory(indexDir);
     indexDir.mkdirs();
@@ -310,5 +318,7 @@ public class H3IndexCreator implements GeoSpatialIndexCreator {
         System.out.printf("Matched: expected: %d actual: %d for h3:%d \n", map.get(h3), docIds.getCardinality(), h3);
       }
     }
+
+
   }
 }
diff --git a/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv b/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv
index 3fee0f5..646aac6 100644
--- a/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv
+++ b/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv
@@ -1,4 +1,4 @@
-lat,long,name,address
+lon,lat,name,address
 -149.8935557,61.21759217,Starbucks - AK - Anchorage  00001,"601 West Street_601 West 5th Avenue_Anchorage, Alaska 99501_907-277-2477"
 -149.9054948,61.19533942,Starbucks - AK - Anchorage  00002,"Carrs-Anchorage #1805_1650 W Northern Lights Blvd_Anchorage, Alaska 99503_907-339-0500"
 -149.7522,61.2297,Starbucks - AK - Anchorage  00003,"Elmendorf AFB_Bldg 5800 Westover Avenue_Anchorage, Alaska 99506"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org