You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2024/02/24 18:24:40 UTC
(pinot) branch master updated: Fix the race condition for H3InclusionIndexFilterOperator (#12487)
This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new fb1b79da88 Fix the race condition for H3InclusionIndexFilterOperator (#12487)
fb1b79da88 is described below
commit fb1b79da88b11c67af104c0b92fde36d92653aff
Author: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
AuthorDate: Sat Feb 24 10:24:34 2024 -0800
Fix the race condition for H3InclusionIndexFilterOperator (#12487)
---
.../filter/H3InclusionIndexFilterOperator.java | 34 ++++----
.../apache/pinot/segment/local/utils/H3Utils.java | 91 ++++++++++++----------
2 files changed, 67 insertions(+), 58 deletions(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java
index a88889cb27..5861d027e7 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java
@@ -80,31 +80,29 @@ public class H3InclusionIndexFilterOperator extends BaseFilterOperator {
@Override
protected BlockDocIdSet getTrues() {
- // get the set of H3 cells at the specified resolution which completely cover the input shape and potential cover.
- Pair<LongSet, LongSet> fullCoverAndPotentialCoverCells = _queryContext
- .getOrComputeSharedValue(Pair.class, LITERAL_H3_CELLS_CACHE_NAME,
+ // NOTE: LHS is the fully covered cells, RHS is the potentially covered cells (excluding fully covered cells).
+ Pair<LongSet, LongSet> fullyAndPotentiallyCoveredCells =
+ _queryContext.getOrComputeSharedValue(Pair.class, LITERAL_H3_CELLS_CACHE_NAME,
k -> H3Utils.coverGeometryInH3(_geometry, _h3IndexReader.getH3IndexResolution().getLowestResolution()));
- LongSet fullyCoverH3Cells = fullCoverAndPotentialCoverCells.getLeft();
- LongSet potentialCoverH3Cells = fullCoverAndPotentialCoverCells.getRight();
+ LongSet fullyCoveredCells = fullyAndPotentiallyCoveredCells.getLeft();
+ LongSet potentiallyCoveredCells = fullyAndPotentiallyCoveredCells.getRight();
- int i = 0;
- ImmutableRoaringBitmap[] fullMatchDocIds = new ImmutableRoaringBitmap[fullyCoverH3Cells.size()];
- LongIterator fullyCoverH3CellsIterator = fullyCoverH3Cells.iterator();
- while (fullyCoverH3CellsIterator.hasNext()) {
- fullMatchDocIds[i++] = _h3IndexReader.getDocIds(fullyCoverH3CellsIterator.nextLong());
+ int numFullyCoveredCells = fullyCoveredCells.size();
+ ImmutableRoaringBitmap[] fullMatchDocIds = new ImmutableRoaringBitmap[numFullyCoveredCells];
+ LongIterator fullyCoveredCellsIterator = fullyCoveredCells.iterator();
+ for (int i = 0; i < numFullyCoveredCells; i++) {
+ fullMatchDocIds[i] = _h3IndexReader.getDocIds(fullyCoveredCellsIterator.nextLong());
}
MutableRoaringBitmap fullMatch = BufferFastAggregation.or(fullMatchDocIds);
- // remove full match from potential match to get potential not match cells.
- i = 0;
- potentialCoverH3Cells.removeAll(fullyCoverH3Cells);
- ImmutableRoaringBitmap[] potentialNotMatchDocIds = new ImmutableRoaringBitmap[potentialCoverH3Cells.size()];
- LongIterator potentialMatchH3CellsIterator = potentialCoverH3Cells.iterator();
- while (potentialMatchH3CellsIterator.hasNext()) {
- potentialNotMatchDocIds[i++] = _h3IndexReader.getDocIds(potentialMatchH3CellsIterator.nextLong());
+ int numPotentiallyCoveredCells = potentiallyCoveredCells.size();
+ ImmutableRoaringBitmap[] potentialMatchDocIds = new ImmutableRoaringBitmap[numPotentiallyCoveredCells];
+ LongIterator potentiallyCoveredCellsIterator = potentiallyCoveredCells.iterator();
+ for (int i = 0; i < numPotentiallyCoveredCells; i++) {
+ potentialMatchDocIds[i] = _h3IndexReader.getDocIds(potentiallyCoveredCellsIterator.nextLong());
}
- MutableRoaringBitmap potentialMatch = BufferFastAggregation.or(potentialNotMatchDocIds);
+ MutableRoaringBitmap potentialMatch = BufferFastAggregation.or(potentialMatchDocIds);
if (_isPositiveCheck) {
return getFilterBlock(fullMatch, potentialMatch);
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java
index 706a85e388..2c0b995199 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java
@@ -20,10 +20,9 @@ package org.apache.pinot.segment.local.utils;
import com.uber.h3core.H3CoreV3;
import com.uber.h3core.util.LatLng;
-import it.unimi.dsi.fastutil.longs.LongArrayList;
-import it.unimi.dsi.fastutil.longs.LongList;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
+import it.unimi.dsi.fastutil.longs.LongSets;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
@@ -39,7 +38,6 @@ import org.locationtech.jts.geom.Polygon;
public class H3Utils {
-
private H3Utils() {
}
@@ -53,45 +51,57 @@ public class H3Utils {
}
}
+ /**
+ * Returns the H3 cells that is crossed by the line.
+ */
private static LongSet coverLineInH3(LineString lineString, int resolution) {
- LongSet coveringH3Cells = new LongOpenHashSet();
- LongList endpointH3Cells = new LongArrayList();
- for (Coordinate endpoint : lineString.getCoordinates()) {
- endpointH3Cells.add(H3_CORE.geoToH3(endpoint.y, endpoint.x, resolution));
+ Coordinate[] endPoints = lineString.getCoordinates();
+ int numEndPoints = endPoints.length;
+ if (numEndPoints == 0) {
+ return LongSets.EMPTY_SET;
}
- for (int i = 0; i < endpointH3Cells.size() - 1; i++) {
- try {
- coveringH3Cells.addAll(H3_CORE.h3Line(endpointH3Cells.getLong(i), endpointH3Cells.getLong(i + 1)));
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
+ long previousCell = H3_CORE.geoToH3(endPoints[0].y, endPoints[0].x, resolution);
+ if (numEndPoints == 1) {
+ return LongSets.singleton(previousCell);
+ }
+ LongSet coveringCells = new LongOpenHashSet();
+ for (int i = 1; i < numEndPoints; i++) {
+ long currentCell = H3_CORE.geoToH3(endPoints[i].y, endPoints[i].x, resolution);
+ coveringCells.addAll(H3_CORE.h3Line(previousCell, currentCell));
+ previousCell = currentCell;
}
- return coveringH3Cells;
+ return coveringCells;
}
+ /**
+ * Returns the H3 cells that is fully covered and potentially covered (excluding fully covered) by the polygon.
+ */
private static Pair<LongSet, LongSet> coverPolygonInH3(Polygon polygon, int resolution) {
+ // TODO: this can be further optimized to use native H3 implementation. They have plan to support natively.
+ // https://github.com/apache/pinot/issues/8547
List<Long> polyfillCells = H3_CORE.polyfill(Arrays.stream(polygon.getExteriorRing().getCoordinates())
.map(coordinate -> new LatLng(coordinate.y, coordinate.x)).collect(Collectors.toList()),
Collections.emptyList(), resolution);
- // TODO: this can be further optimized to use native H3 implementation. They have plan to support natively.
- // https://github.com/apache/pinot/issues/8547
- LongSet potentialH3Cells = new LongOpenHashSet();
if (polyfillCells.isEmpty()) {
// If the polyfill cells are empty, meaning the polygon might be smaller than a single cell in the H3 system.
// So just get whatever one. here choose the first one. the follow up kRing(firstCell, 1) will cover the whole
// polygon if there is potential not covered by the first point's belonging cell.
// ref: https://github.com/uber/h3/issues/456#issuecomment-827760163
Coordinate represent = polygon.getCoordinate();
- potentialH3Cells.add(H3_CORE.geoToH3(represent.getY(), represent.getX(), resolution));
- } else {
- potentialH3Cells.addAll(polyfillCells);
+ return Pair.of(LongSets.EMPTY_SET,
+ new LongOpenHashSet(H3_CORE.kRing(H3_CORE.geoToH3(represent.y, represent.x, resolution), 1)));
+ }
+
+ LongSet fullyCoveredCells = new LongOpenHashSet();
+ LongSet potentiallyCoveredCells = new LongOpenHashSet(polyfillCells);
+ for (long cell : polyfillCells) {
+ if (polygon.contains(createPolygonFromH3Cell(cell))) {
+ fullyCoveredCells.add(cell);
+ }
+ potentiallyCoveredCells.addAll(H3_CORE.kRing(cell, 1));
}
- LongSet fullyContainedCell = new LongOpenHashSet(
- potentialH3Cells.stream().filter(h3Cell -> polygon.contains(createPolygonFromH3Cell(h3Cell)))
- .collect(Collectors.toSet()));
- potentialH3Cells
- .addAll(potentialH3Cells.stream().flatMap(cell -> H3_CORE.kRing(cell, 1).stream()).collect(Collectors.toSet()));
- return Pair.of(fullyContainedCell, potentialH3Cells);
+ potentiallyCoveredCells.removeAll(fullyCoveredCells);
+ return Pair.of(fullyCoveredCells, potentiallyCoveredCells);
}
private static Polygon createPolygonFromH3Cell(long h3Cell) {
@@ -101,27 +111,28 @@ public class H3Utils {
boundary.stream().map(geoCoord -> new Coordinate(geoCoord.lng, geoCoord.lat)).toArray(Coordinate[]::new));
}
- // Return a pair of cell ids: The first fully contain, the second is potential contain.
- // potential contains contain the fully contain.
+ /**
+ * Returns the H3 cells that is fully covered and potentially covered (excluding fully covered) by the geometry.
+ */
public static Pair<LongSet, LongSet> coverGeometryInH3(Geometry geometry, int resolution) {
if (geometry instanceof Point) {
- LongSet potentialCover = new LongOpenHashSet();
- potentialCover.add(H3_CORE.geoToH3(geometry.getCoordinate().y, geometry.getCoordinate().x, resolution));
- return Pair.of(new LongOpenHashSet(), potentialCover);
+ return Pair.of(LongSets.EMPTY_SET,
+ LongSets.singleton(H3_CORE.geoToH3(geometry.getCoordinate().y, geometry.getCoordinate().x, resolution)));
} else if (geometry instanceof LineString) {
- LongSet potentialCover = new LongOpenHashSet();
- potentialCover.addAll(coverLineInH3(((LineString) geometry), resolution));
- return Pair.of(new LongOpenHashSet(), potentialCover);
+ return Pair.of(LongSets.EMPTY_SET, coverLineInH3(((LineString) geometry), resolution));
} else if (geometry instanceof Polygon) {
return coverPolygonInH3(((Polygon) geometry), resolution);
} else if (geometry instanceof GeometryCollection) {
- LongOpenHashSet fullCover = new LongOpenHashSet();
- LongOpenHashSet potentialCover = new LongOpenHashSet();
- for (int i = 0; i < geometry.getNumGeometries(); i++) {
- fullCover.addAll(coverGeometryInH3(geometry.getGeometryN(i), resolution).getLeft());
- potentialCover.addAll(coverGeometryInH3(geometry.getGeometryN(i), resolution).getRight());
+ LongOpenHashSet fullyCoveredCells = new LongOpenHashSet();
+ LongOpenHashSet potentiallyCoveredCells = new LongOpenHashSet();
+ int numGeometries = geometry.getNumGeometries();
+ for (int i = 0; i < numGeometries; i++) {
+ Pair<LongSet, LongSet> pair = coverGeometryInH3(geometry.getGeometryN(i), resolution);
+ fullyCoveredCells.addAll(pair.getLeft());
+ potentiallyCoveredCells.addAll(pair.getRight());
}
- return Pair.of(fullCover, potentialCover);
+ potentiallyCoveredCells.removeAll(fullyCoveredCells);
+ return Pair.of(fullyCoveredCells, potentiallyCoveredCells);
} else {
throw new UnsupportedOperationException("Unexpected type: " + geometry.getGeometryType());
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org