You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ki...@apache.org on 2020/12/21 17:17:18 UTC
[incubator-pinot] branch h3-index updated: Removing H3WITHIN UDF
and adding logic to use h3 index for st_distance udf
This is an automated email from the ASF dual-hosted git repository.
kishoreg pushed a commit to branch h3-index
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/h3-index by this push:
new bab10b9 Removing H3WITHIN UDF and adding logic to use h3 index for st_distance udf
bab10b9 is described below
commit bab10b9536c22cce3e21a974077860e79148bb24
Author: kishoreg <g....@gmail.com>
AuthorDate: Mon Dec 21 09:16:46 2020 -0800
Removing H3WITHIN UDF and adding logic to use h3 index for st_distance udf
---
.../operator/filter/H3IndexFilterOperator.java | 73 +++++++++++++++++-----
.../org/apache/pinot/core/plan/FilterPlanNode.java | 32 +++++-----
.../request/context/predicate/GeoPredicate.java | 12 +---
.../creator/impl/geospatial/H3IndexCreator.java | 10 +++
.../batch/starbucksStores/rawdata/data.csv | 2 +-
5 files changed, 87 insertions(+), 42 deletions(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java
index 7528b7e..0b65dd7 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java
@@ -23,10 +23,19 @@ import com.uber.h3core.LengthUnit;
import java.io.IOException;
import java.util.List;
import org.apache.pinot.core.common.DataSource;
+import org.apache.pinot.core.geospatial.serde.GeometrySerializer;
+import org.apache.pinot.core.geospatial.transform.function.StPointFunction;
+import org.apache.pinot.core.indexsegment.IndexSegment;
import org.apache.pinot.core.operator.blocks.FilterBlock;
import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
+import org.apache.pinot.core.query.request.context.ExpressionContext;
+import org.apache.pinot.core.query.request.context.FunctionContext;
import org.apache.pinot.core.query.request.context.predicate.GeoPredicate;
+import org.apache.pinot.core.query.request.context.predicate.Predicate;
+import org.apache.pinot.core.query.request.context.predicate.RangePredicate;
import org.apache.pinot.core.segment.index.readers.geospatial.H3IndexReader;
+import org.apache.pinot.spi.utils.BytesUtils;
+import org.locationtech.jts.geom.Geometry;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
import org.roaringbitmap.buffer.MutableRoaringBitmap;
@@ -36,14 +45,51 @@ public class H3IndexFilterOperator extends BaseFilterOperator {
// NOTE: Range index can only apply to dictionary-encoded columns for now
// TODO: Support raw index columns
- private final GeoPredicate _geoPredicate;
- private final DataSource _dataSource;
private final int _numDocs;
private final H3Core _h3Core;
+ private final H3IndexReader _h3IndexReader;
+ private Geometry _geometry;
+ private double _distance;
- public H3IndexFilterOperator(GeoPredicate geoPredicate, DataSource dataSource, int numDocs) {
- _geoPredicate = geoPredicate;
- _dataSource = dataSource;
+ public H3IndexFilterOperator(Predicate predicate, IndexSegment indexSegment, int numDocs) {
+ FunctionContext function = predicate.getLhs().getFunction();
+ String columnName;
+
+ if (function.getArguments().get(0).getType() == ExpressionContext.Type.IDENTIFIER) {
+ columnName = function.getArguments().get(0).getIdentifier();
+ byte[] bytes = BytesUtils.toBytes(function.getArguments().get(1).getLiteral());
+ _geometry = GeometrySerializer.deserialize(bytes);
+ } else if (function.getArguments().get(1).getType() == ExpressionContext.Type.IDENTIFIER) {
+ columnName = function.getArguments().get(1).getIdentifier();
+ byte[] bytes = BytesUtils.toBytes(function.getArguments().get(0).getLiteral());
+ _geometry = GeometrySerializer.deserialize(bytes);
+ } else {
+ throw new RuntimeException("Expecting one of the arguments of ST_DISTANCE to be an identifier");
+ }
+ DataSource dataSource = indexSegment.getDataSource(columnName);
+ _h3IndexReader = dataSource.getH3Index();
+ switch (predicate.getType()) {
+ case EQ:
+ break;
+ case NOT_EQ:
+ break;
+ case IN:
+ break;
+ case NOT_IN:
+ break;
+ case RANGE:
+ RangePredicate rangePredicate = (RangePredicate) predicate;
+ _distance = Double.parseDouble(rangePredicate.getUpperBound());
+ break;
+ case REGEXP_LIKE:
+ break;
+ case TEXT_MATCH:
+ break;
+ case IS_NULL:
+ break;
+ case IS_NOT_NULL:
+ break;
+ }
_numDocs = numDocs;
try {
_h3Core = H3Core.newInstance();
@@ -54,34 +100,31 @@ public class H3IndexFilterOperator extends BaseFilterOperator {
@Override
protected FilterBlock getNextBlock() {
- H3IndexReader h3IndexReader = _dataSource.getH3Index();
//todo: this needs to come from somewhere?
int resolution = 5;
- long h3Id = _h3Core
- .geoToH3(_geoPredicate.getGeometry().getCoordinate().x, _geoPredicate.getGeometry().getCoordinate().y,
- resolution);
- assert h3IndexReader != null;
+ long h3Id = _h3Core.geoToH3(_geometry.getCoordinate().x, _geometry.getCoordinate().y, resolution);
+ assert _h3IndexReader != null;
- //find the number of rings based on geopredicate.distance
+ //find the number of rings based on distance
//FullMatch
double edgeLength = _h3Core.edgeLength(resolution, LengthUnit.km);
- int numFullMatchedRings = (int) (_geoPredicate.getDistance() / edgeLength);
+ int numFullMatchedRings = (int) (_distance / edgeLength);
List<Long> fullMatchRings = _h3Core.kRing(h3Id, numFullMatchedRings);
fullMatchRings.add(h3Id);
MutableRoaringBitmap fullMatchedDocIds = new MutableRoaringBitmap();
for (long id : fullMatchRings) {
- ImmutableRoaringBitmap docIds = h3IndexReader.getDocIds(id);
+ ImmutableRoaringBitmap docIds = _h3IndexReader.getDocIds(id);
fullMatchedDocIds.or(docIds);
}
//partial matchedRings
- int numPartialMatchedRings = (int) (_geoPredicate.getDistance() / edgeLength);
+ int numPartialMatchedRings = (int) ((_distance + edgeLength) / edgeLength);
List<Long> partialMatchedRings = _h3Core.kRing(h3Id, numPartialMatchedRings);
partialMatchedRings.add(h3Id);
final MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
partialMatchedRings.removeAll(fullMatchRings);
for (long id : partialMatchedRings) {
- ImmutableRoaringBitmap docIds = h3IndexReader.getDocIds(id);
+ ImmutableRoaringBitmap docIds = _h3IndexReader.getDocIds(id);
partialMatchDocIds.or(docIds);
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
index 43b2174..4455a24 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
@@ -23,8 +23,10 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
+import org.apache.pinot.common.request.Identifier;
import org.apache.pinot.core.common.DataSource;
import org.apache.pinot.core.geospatial.GeometryUtils;
+import org.apache.pinot.core.geospatial.transform.function.StDistanceFunction;
import org.apache.pinot.core.indexsegment.IndexSegment;
import org.apache.pinot.core.operator.filter.BaseFilterOperator;
import org.apache.pinot.core.operator.filter.BitmapBasedFilterOperator;
@@ -128,24 +130,22 @@ public class FilterPlanNode implements PlanNode {
ExpressionContext lhs = predicate.getLhs();
if (lhs.getType() == ExpressionContext.Type.FUNCTION) {
FunctionContext function = lhs.getFunction();
- if (function.getFunctionName().equalsIgnoreCase("H3_WITHIN")) {
+
+ boolean canApplyH3Index = false;
+ if (function.getFunctionName().equalsIgnoreCase(StDistanceFunction.FUNCTION_NAME)) {
String columnName = function.getArguments().get(0).getIdentifier();
- GeoPredicate geoPredicate = new GeoPredicate();
- geoPredicate.setType(GeoPredicate.Type.WITHIN);
- float lat = Float.parseFloat(function.getArguments().get(1).getLiteral());
- float lon = Float.parseFloat(function.getArguments().get(2).getLiteral());
- float distance = Float.parseFloat(function.getArguments().get(3).getLiteral());
-// float resolution =Float.parseFloat(function.getArguments().get(4).getLiteral());
- Point point = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(lat, lon));
- geoPredicate.setGeometry(point);
- geoPredicate.setDistance(distance);
- //set geo predicate
- return new H3IndexFilterOperator(geoPredicate, _indexSegment.getDataSource(columnName), _numDocs);
- } else {
- // TODO: ExpressionFilterOperator does not support predicate types without PredicateEvaluator (IS_NULL,
- // IS_NOT_NULL, TEXT_MATCH)
- return new ExpressionFilterOperator(_indexSegment, predicate, _numDocs);
+ DataSource dataSource = _indexSegment.getDataSource(columnName);
+ if (dataSource.getH3Index() != null) {
+ canApplyH3Index = true;
+ }
+ }
+
+ if (canApplyH3Index) {
+ return new H3IndexFilterOperator(predicate, _indexSegment, _numDocs);
}
+ // TODO: ExpressionFilterOperator does not support predicate types without PredicateEvaluator (IS_NULL,
+ // IS_NOT_NULL, TEXT_MATCH)
+ return new ExpressionFilterOperator(_indexSegment, predicate, _numDocs);
} else {
DataSource dataSource = _indexSegment.getDataSource(lhs.getIdentifier());
switch (predicate.getType()) {
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java
index f47ace2..473be7c 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/GeoPredicate.java
@@ -10,13 +10,13 @@ public class GeoPredicate {
//this is the column name
ExpressionContext _lhs;
- Type type;
+ Predicate type;
Geometry _geometry;
double _distance;
- public enum Type {
+ public enum Pre {
WITHIN, OVERLAP;
}
@@ -28,14 +28,6 @@ public class GeoPredicate {
_lhs = lhs;
}
- public Type getType() {
- return type;
- }
-
- public void setType(Type type) {
- this.type = type;
- }
-
public Geometry getGeometry() {
return _geometry;
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java
index 67b3fc9..ec7ef6f 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/geospatial/H3IndexCreator.java
@@ -18,11 +18,15 @@ import java.util.PriorityQueue;
import java.util.Random;
import java.util.TreeMap;
import org.apache.commons.io.FileUtils;
+import org.apache.pinot.core.geospatial.GeometryUtils;
import org.apache.pinot.core.segment.creator.GeoSpatialIndexCreator;
import org.apache.pinot.core.segment.index.readers.geospatial.H3IndexReader;
import org.apache.pinot.core.segment.memory.PinotDataBuffer;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.geom.Point;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
import org.roaringbitmap.buffer.MutableRoaringBitmap;
@@ -272,6 +276,10 @@ public class H3IndexCreator implements GeoSpatialIndexCreator {
public static void main(String[] args)
throws Exception {
+ Point point1 = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(37.3861, -122.0839));
+ Point point2 = GeometryUtils.GEOMETRY_FACTORY.createPoint(new Coordinate(37.368832, -122.036346));
+ System.out.println("point1.distance(point2) = " + point1.distance(point2));
+ System.exit(0);
File indexDir = new File(System.getProperty("java.io.tmpdir"), "h3IndexDir");
FileUtils.deleteDirectory(indexDir);
indexDir.mkdirs();
@@ -310,5 +318,7 @@ public class H3IndexCreator implements GeoSpatialIndexCreator {
System.out.printf("Matched: expected: %d actual: %d for h3:%d \n", map.get(h3), docIds.getCardinality(), h3);
}
}
+
+
}
}
diff --git a/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv b/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv
index 3fee0f5..646aac6 100644
--- a/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv
+++ b/pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv
@@ -1,4 +1,4 @@
-lat,long,name,address
+lon,lat,name,address
-149.8935557,61.21759217,Starbucks - AK - Anchorage 00001,"601 West Street_601 West 5th Avenue_Anchorage, Alaska 99501_907-277-2477"
-149.9054948,61.19533942,Starbucks - AK - Anchorage 00002,"Carrs-Anchorage #1805_1650 W Northern Lights Blvd_Anchorage, Alaska 99503_907-339-0500"
-149.7522,61.2297,Starbucks - AK - Anchorage 00003,"Elmendorf AFB_Bldg 5800 Westover Avenue_Anchorage, Alaska 99506"
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org