You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2022/04/21 17:44:51 UTC

[GitHub] [pinot] Jackie-Jiang commented on a diff in pull request #8498: Support st_contains using H3 index

Jackie-Jiang commented on code in PR #8498:
URL: https://github.com/apache/pinot/pull/8498#discussion_r855430611


##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java:
##########
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.filter;
+
+import it.unimi.dsi.fastutil.longs.LongSet;
+import java.util.Collections;
+import java.util.List;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.ExpressionContext.Type;
+import org.apache.pinot.common.request.context.predicate.EqPredicate;
+import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.blocks.FilterBlock;
+import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator;
+import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
+import org.apache.pinot.segment.local.utils.GeometrySerializer;
+import org.apache.pinot.segment.local.utils.H3Utils;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.index.reader.H3IndexReader;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+import org.apache.pinot.spi.utils.BytesUtils;
+import org.locationtech.jts.geom.Geometry;
+import org.roaringbitmap.buffer.BufferFastAggregation;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+
+/**
+ * A filter operator that uses H3 index for geospatial data inclusion
+ */
+public class H3InclusionIndexFilterOperator extends BaseFilterOperator {
+
+  private static final String EXPLAIN_NAME = "INCLUSION_FILTER_H3_INDEX";
+
+  private static final String OPERATOR_NAME = "H3InclusionIndexFilterOperator";
+
+  private final IndexSegment _segment;
+  private final Predicate _predicate;
+  private final int _numDocs;
+  private final H3IndexReader _h3IndexReader;
+  private final LongSet _fullyCoverH3Cells;
+  private final LongSet _potentialCoverH3Cells;
+  private final boolean _isPositiveCheck;
+
+  public H3InclusionIndexFilterOperator(IndexSegment segment, Predicate predicate, int numDocs) {
+    _segment = segment;
+    _predicate = predicate;
+    _numDocs = numDocs;
+
+    List<ExpressionContext> arguments = predicate.getLhs().getFunction().getArguments();
+    Geometry geometry;
+    // Assume first argument is Literal, and second argument is IDENTIFIER for St_Contains.
+    assert arguments.get(1).getType() == Type.IDENTIFIER;
+    assert arguments.get(0).getType() == Type.LITERAL;
+    EqPredicate eqPredicate = (EqPredicate) predicate;
+    if (eqPredicate.getValue().equals("1")) {

Review Comment:
   It can be either `1` or `true`. Use `BooleanUtils.toBoolean(eqPredicate.getValue())`



##########
pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java:
##########
@@ -108,23 +114,24 @@ public Map<Predicate, PredicateEvaluator> getPredicateEvaluatorMap() {
   /**
    * H3 index can be applied iff:
    * <ul>
-   *   <li>Predicate is of type RANGE</li>
-   *   <li>Left-hand-side of the predicate is an ST_Distance function</li>
-   *   <li>One argument of the ST_Distance function is an identifier, the other argument is an literal</li>
+   *   <li>Predicate is of type RANGE or EQ</li>
+   *   <li>Left-hand-side of the predicate is an ST_Distance or ST_Contains function</li>
+   *   <li>One argument of the ST_Distance or ST_Contains function is an identifier, the other argument is an
+   *   literal</li>
    *   <li>The identifier column has H3 index</li>
    * </ul>
    */
   private boolean canApplyH3Index(Predicate predicate, FunctionContext function) {

Review Comment:
   Suggest having separate check methods for `ST_Distance` and `ST_Contains` to prevent mix-match (e.g. `ST_Distance` with `EQ` is not supported), and also avoid the extra check after the method returns `true`



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java:
##########
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.filter;
+
+import it.unimi.dsi.fastutil.longs.LongSet;
+import java.util.Collections;
+import java.util.List;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.ExpressionContext.Type;
+import org.apache.pinot.common.request.context.predicate.EqPredicate;
+import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.blocks.FilterBlock;
+import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator;
+import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
+import org.apache.pinot.segment.local.utils.GeometrySerializer;
+import org.apache.pinot.segment.local.utils.H3Utils;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.index.reader.H3IndexReader;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+import org.apache.pinot.spi.utils.BytesUtils;
+import org.locationtech.jts.geom.Geometry;
+import org.roaringbitmap.buffer.BufferFastAggregation;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+
+/**
+ * A filter operator that uses H3 index for geospatial data inclusion
+ */
+public class H3InclusionIndexFilterOperator extends BaseFilterOperator {
+
+  private static final String EXPLAIN_NAME = "INCLUSION_FILTER_H3_INDEX";
+
+  private static final String OPERATOR_NAME = "H3InclusionIndexFilterOperator";
+
+  private final IndexSegment _segment;
+  private final Predicate _predicate;
+  private final int _numDocs;
+  private final H3IndexReader _h3IndexReader;
+  private final LongSet _fullyCoverH3Cells;
+  private final LongSet _potentialCoverH3Cells;
+  private final boolean _isPositiveCheck;
+
+  public H3InclusionIndexFilterOperator(IndexSegment segment, Predicate predicate, int numDocs) {
+    _segment = segment;
+    _predicate = predicate;
+    _numDocs = numDocs;
+
+    List<ExpressionContext> arguments = predicate.getLhs().getFunction().getArguments();
+    Geometry geometry;
+    // Assume first argument is Literal, and second argument is IDENTIFIER for St_Contains.
+    assert arguments.get(1).getType() == Type.IDENTIFIER;
+    assert arguments.get(0).getType() == Type.LITERAL;
+    EqPredicate eqPredicate = (EqPredicate) predicate;
+    if (eqPredicate.getValue().equals("1")) {
+      _isPositiveCheck = true;
+    } else if (eqPredicate.getValue().equals("0")) {
+      _isPositiveCheck = false;
+    } else {
+      throw new BadQueryRequestException("Expected value for ST_Contain is 0 or 1");
+    }
+    // look up arg1's h3 indices
+    _h3IndexReader = segment.getDataSource(arguments.get(1).getIdentifier()).getH3Index();
+    // arg0 is the literal
+    geometry = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(0).getLiteral()));
+    // must be some h3 index
+    assert _h3IndexReader != null;
+
+    // get the set of H3 cells at the specified resolution which completely cover the input shape and potential cover.
+    Pair<LongSet, LongSet> fullCoverAndPotentialCoverCells =
+        H3Utils.coverGeometryInH3(geometry, _h3IndexReader.getH3IndexResolution().getLowestResolution());
+
+    _fullyCoverH3Cells = fullCoverAndPotentialCoverCells.getLeft();
+    _potentialCoverH3Cells = fullCoverAndPotentialCoverCells.getRight();
+  }
+
+  @Override
+  protected FilterBlock getNextBlock() {
+    // have list of h3 cell ids for polygon provided
+    // return filtered num_docs
+    ImmutableRoaringBitmap[] potentialMatchDocIds = new ImmutableRoaringBitmap[_potentialCoverH3Cells.size()];
+    int i = 0;
+

Review Comment:
   (minor) Remove the empty lines in this method. I feel the empty lines are actually making the readability worse



##########
pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java:
##########
@@ -56,6 +58,10 @@
 
 
 public class FilterPlanNode implements PlanNode {
+
+  private static final Set<String> CAN_APPLY_H3_INDEX_FUNCTION_NAMES =
+      ImmutableSet.of("st_distance", "stdistance", "st_contains", "stcontains");

Review Comment:
   `st_within` can also be handled if the second argument is literal



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java:
##########
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.filter;
+
+import it.unimi.dsi.fastutil.longs.LongSet;
+import java.util.Collections;
+import java.util.List;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.ExpressionContext.Type;
+import org.apache.pinot.common.request.context.predicate.EqPredicate;
+import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.blocks.FilterBlock;
+import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator;
+import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
+import org.apache.pinot.segment.local.utils.GeometrySerializer;
+import org.apache.pinot.segment.local.utils.H3Utils;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.index.reader.H3IndexReader;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+import org.apache.pinot.spi.utils.BytesUtils;
+import org.locationtech.jts.geom.Geometry;
+import org.roaringbitmap.buffer.BufferFastAggregation;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+
+/**
+ * A filter operator that uses H3 index for geospatial data inclusion
+ */
+public class H3InclusionIndexFilterOperator extends BaseFilterOperator {
+
+  private static final String EXPLAIN_NAME = "INCLUSION_FILTER_H3_INDEX";
+
+  private static final String OPERATOR_NAME = "H3InclusionIndexFilterOperator";
+
+  private final IndexSegment _segment;
+  private final Predicate _predicate;
+  private final int _numDocs;
+  private final H3IndexReader _h3IndexReader;
+  private final LongSet _fullyCoverH3Cells;
+  private final LongSet _potentialCoverH3Cells;
+  private final boolean _isPositiveCheck;
+
+  public H3InclusionIndexFilterOperator(IndexSegment segment, Predicate predicate, int numDocs) {
+    _segment = segment;
+    _predicate = predicate;
+    _numDocs = numDocs;
+
+    List<ExpressionContext> arguments = predicate.getLhs().getFunction().getArguments();
+    Geometry geometry;
+    // Assume first argument is Literal, and second argument is IDENTIFIER for St_Contains.
+    assert arguments.get(1).getType() == Type.IDENTIFIER;

Review Comment:
   We cannot make such assumption because it is not checked in the planning phase



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java:
##########
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.filter;
+
+import it.unimi.dsi.fastutil.longs.LongSet;
+import java.util.Collections;
+import java.util.List;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.ExpressionContext.Type;
+import org.apache.pinot.common.request.context.predicate.EqPredicate;
+import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.blocks.FilterBlock;
+import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator;
+import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
+import org.apache.pinot.segment.local.utils.GeometrySerializer;
+import org.apache.pinot.segment.local.utils.H3Utils;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.index.reader.H3IndexReader;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+import org.apache.pinot.spi.utils.BytesUtils;
+import org.locationtech.jts.geom.Geometry;
+import org.roaringbitmap.buffer.BufferFastAggregation;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+
+/**
+ * A filter operator that uses H3 index for geospatial data inclusion
+ */
+public class H3InclusionIndexFilterOperator extends BaseFilterOperator {
+
+  private static final String EXPLAIN_NAME = "INCLUSION_FILTER_H3_INDEX";
+
+  private static final String OPERATOR_NAME = "H3InclusionIndexFilterOperator";
+
+  private final IndexSegment _segment;
+  private final Predicate _predicate;
+  private final int _numDocs;
+  private final H3IndexReader _h3IndexReader;
+  private final LongSet _fullyCoverH3Cells;
+  private final LongSet _potentialCoverH3Cells;
+  private final boolean _isPositiveCheck;
+
+  public H3InclusionIndexFilterOperator(IndexSegment segment, Predicate predicate, int numDocs) {
+    _segment = segment;
+    _predicate = predicate;
+    _numDocs = numDocs;
+
+    List<ExpressionContext> arguments = predicate.getLhs().getFunction().getArguments();
+    Geometry geometry;
+    // Assume first argument is Literal, and second argument is IDENTIFIER for St_Contains.
+    assert arguments.get(1).getType() == Type.IDENTIFIER;
+    assert arguments.get(0).getType() == Type.LITERAL;
+    EqPredicate eqPredicate = (EqPredicate) predicate;
+    if (eqPredicate.getValue().equals("1")) {
+      _isPositiveCheck = true;
+    } else if (eqPredicate.getValue().equals("0")) {
+      _isPositiveCheck = false;
+    } else {
+      throw new BadQueryRequestException("Expected value for ST_Contain is 0 or 1");
+    }
+    // look up arg1's h3 indices
+    _h3IndexReader = segment.getDataSource(arguments.get(1).getIdentifier()).getH3Index();
+    // arg0 is the literal
+    geometry = GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(0).getLiteral()));
+    // must be some h3 index
+    assert _h3IndexReader != null;
+
+    // get the set of H3 cells at the specified resolution which completely cover the input shape and potential cover.
+    Pair<LongSet, LongSet> fullCoverAndPotentialCoverCells =

Review Comment:
   Move the actual calculation into the `getNextBlock()` which should happen in the execution phase instead of planning phase



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org