You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by zo...@apache.org on 2023/02/12 15:26:28 UTC
[sedona] 01/01: add google-s2 library, function geometry to cell ids, add spark sql function
This is an automated email from the ASF dual-hosted git repository.
zongsizhang pushed a commit to branch feature/google-s2
in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 5343711ff22ec1fbdd72185f1283c84acdd6bd9e
Author: zzs-wherobots <zz...@ZongsideMac-Studio.local>
AuthorDate: Tue Jan 31 00:07:37 2023 +0800
add google-s2 library, function geometry to cell ids, add spark sql function
---
common/pom.xml | 5 +
.../java/org/apache/sedona/common/Functions.java | 42 ++++++--
.../org/apache/sedona/common/utils/GeomUtils.java | 26 +++++
.../org/apache/sedona/common/utils/S2Utils.java | 99 ++++++++++++++++++
.../org/apache/sedona/common/FunctionsTest.java | 111 +++++++++++++++++++--
.../org/apache/sedona/common/GeometryUtilTest.java | 71 +++++++++++++
.../java/org/apache/sedona/common/S2UtilTest.java | 107 ++++++++++++++++++++
docs/api/sql/Function.md | 17 ++++
sql/pom.xml | 5 +
.../scala/org/apache/sedona/sql/UDF/Catalog.scala | 3 +-
.../sql/sedona_sql/expressions/Functions.scala | 8 ++
.../expressions/NullSafeExpressions.scala | 14 +++
.../sql/sedona_sql/expressions/st_functions.scala | 3 +
.../apache/sedona/sql/functions/STS2CellIDs.scala | 67 +++++++++++++
14 files changed, 556 insertions(+), 22 deletions(-)
diff --git a/common/pom.xml b/common/pom.xml
index 04ad4a2f..3d621a01 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -57,6 +57,11 @@
<groupId>org.wololo</groupId>
<artifactId>jts2geojson</artifactId>
</dependency>
+ <dependency>
+ <groupId>com.google.geometry</groupId>
+ <artifactId>s2-geometry</artifactId>
+ <version>2.0.0</version>
+ </dependency>
</dependencies>
<build>
<sourceDirectory>src/main/java</sourceDirectory>
diff --git a/common/src/main/java/org/apache/sedona/common/Functions.java b/common/src/main/java/org/apache/sedona/common/Functions.java
index ccc6f7a0..17809520 100644
--- a/common/src/main/java/org/apache/sedona/common/Functions.java
+++ b/common/src/main/java/org/apache/sedona/common/Functions.java
@@ -13,25 +13,21 @@
*/
package org.apache.sedona.common;
+import com.google.common.geometry.S2CellId;
+import com.google.common.geometry.S2Point;
+import com.google.common.geometry.S2Region;
+import com.google.common.geometry.S2RegionCoverer;
+import org.apache.commons.lang3.ArrayUtils;
import org.apache.sedona.common.geometryObjects.Circle;
import org.apache.sedona.common.utils.GeomUtils;
import org.apache.sedona.common.utils.GeometryGeoHashEncoder;
import org.apache.sedona.common.utils.GeometrySplitter;
+import org.apache.sedona.common.utils.S2Utils;
import org.geotools.geometry.jts.JTS;
import org.geotools.referencing.CRS;
import org.locationtech.jts.algorithm.MinimumBoundingCircle;
import org.locationtech.jts.algorithm.hull.ConcaveHull;
-import org.locationtech.jts.geom.Coordinate;
-import org.locationtech.jts.geom.Geometry;
-import org.locationtech.jts.geom.GeometryCollection;
-import org.locationtech.jts.geom.GeometryFactory;
-import org.locationtech.jts.geom.LineString;
-import org.locationtech.jts.geom.MultiLineString;
-import org.locationtech.jts.geom.MultiPoint;
-import org.locationtech.jts.geom.MultiPolygon;
-import org.locationtech.jts.geom.Point;
-import org.locationtech.jts.geom.Polygon;
-import org.locationtech.jts.geom.PrecisionModel;
+import org.locationtech.jts.geom.*;
import org.locationtech.jts.geom.util.GeometryFixer;
import org.locationtech.jts.io.gml2.GMLWriter;
import org.locationtech.jts.io.kml.KMLWriter;
@@ -50,7 +46,10 @@ import org.wololo.jts2geojson.GeoJSONWriter;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
+import java.util.function.Function;
+import java.util.stream.Collectors;
public class Functions {
@@ -543,4 +542,25 @@ public class Functions {
// check input geometry
return new GeometrySplitter(GEOMETRY_FACTORY).split(input, blade);
}
+
+
+ /**
+ * get the coordinates of a geometry and transform to Google s2 cell id
+ * @param input Geometry
+ * @param level integer, minimum level of cells covering the geometry
+ * @return List of coordinates
+ */
+ public static Long[] s2CellIDs(Geometry input, int level) {
+ HashSet<S2CellId> cellIds = new HashSet<>();
+ List<Geometry> geoms = GeomUtils.extractGeometryCollection(input);
+ for (Geometry geom : geoms) {
+ try {
+ cellIds.addAll(S2Utils.s2RegionToCellIDs(S2Utils.toS2Region(geom), 1, level, Integer.MAX_VALUE - 1));
+ } catch (IllegalArgumentException e) {
+ // the geometry can't be cast to region, we cover its coordinates, for example, Point
+ cellIds.addAll(Arrays.stream(geom.getCoordinates()).map(c -> S2Utils.coordinateToCellID(c, level)).collect(Collectors.toList()));
+ }
+ }
+ return S2Utils.roundCellsToSameLevel(new ArrayList<>(cellIds), level).stream().map(S2CellId::id).collect(Collectors.toList()).toArray(new Long[cellIds.size()]);
+ }
}
diff --git a/common/src/main/java/org/apache/sedona/common/utils/GeomUtils.java b/common/src/main/java/org/apache/sedona/common/utils/GeomUtils.java
index 5c0440cb..7dfccc7f 100644
--- a/common/src/main/java/org/apache/sedona/common/utils/GeomUtils.java
+++ b/common/src/main/java/org/apache/sedona/common/utils/GeomUtils.java
@@ -27,6 +27,8 @@ import org.locationtech.jts.operation.union.UnaryUnionOp;
import java.nio.ByteOrder;
import java.util.*;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
import static org.locationtech.jts.geom.Coordinate.NULL_ORDINATE;
@@ -381,4 +383,28 @@ public class GeomUtils {
}
return pCount;
}
+
+ public static List<Geometry> extractGeometryCollection(Geometry geom){
+ ArrayList<Geometry> leafs = new ArrayList<>();
+ if (!(geom instanceof GeometryCollection)) {
+ leafs.add(geom);
+ return leafs;
+ }
+ LinkedList<GeometryCollection> parents = new LinkedList<>();
+ parents.add((GeometryCollection) geom);
+ while (!parents.isEmpty()) {
+ GeometryCollection parent = parents.removeFirst();
+ for (int i = 0;i < parent.getNumGeometries(); i++) {
+ Geometry child = parent.getGeometryN(i);
+ if (child instanceof GeometryCollection) {
+ parents.add((GeometryCollection) child);
+ } else {
+ leafs.add(child);
+ }
+ }
+ }
+ return leafs;
+ }
+
+
}
diff --git a/common/src/main/java/org/apache/sedona/common/utils/S2Utils.java b/common/src/main/java/org/apache/sedona/common/utils/S2Utils.java
new file mode 100644
index 00000000..3a59dc59
--- /dev/null
+++ b/common/src/main/java/org/apache/sedona/common/utils/S2Utils.java
@@ -0,0 +1,99 @@
+package org.apache.sedona.common.utils;
+
+import com.google.common.geometry.*;
+import org.locationtech.jts.algorithm.Orientation;
+import org.locationtech.jts.geom.*;
+
+import javax.swing.*;
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class S2Utils {
+
+ /**
+ * @param coord Coordinate: convert a jts coordinate to a S2Point
+ * @return
+ */
+ public static S2Point toS2Point(Coordinate coord) {
+ return S2LatLng.fromDegrees(coord.y, coord.x).toPoint();
+ }
+
+ public static List<S2Point> toS2Points(Coordinate[] coords) {
+ return Arrays.stream(coords).map(S2Utils::toS2Point).collect(Collectors.toList());
+ }
+
+ /**
+ * @param line
+ * @return
+ */
+ public static S2Polyline toS2PolyLine(LineString line) {
+ return new S2Polyline(toS2Points(line.getCoordinates()));
+ }
+
+ public static S2Loop toS2Loop(LinearRing ring) {
+ return new S2Loop(
+ Orientation.isCCW(ring.getCoordinates()) ? toS2Points(ring.getCoordinates()) : toS2Points(ring.reverse().getCoordinates())
+ );
+ }
+
+ public static S2Polygon toS2Polygon(Polygon polygon) {
+ List<LinearRing> rings = new ArrayList<>();
+ rings.add(polygon.getExteriorRing());
+ for (int i = 0; i < polygon.getNumInteriorRing(); i++){
+ rings.add(polygon.getInteriorRingN(i));
+ }
+ List<S2Loop> s2Loops = rings.stream().map(
+ S2Utils::toS2Loop
+ ).collect(Collectors.toList());
+ return new S2Polygon(s2Loops);
+ }
+
+ public static List<S2CellId> s2RegionToCellIDs(S2Region region, int minLevel, int maxLevel, int maxNum) {
+ S2RegionCoverer.Builder coverBuilder = S2RegionCoverer.builder();
+ coverBuilder.setMinLevel(minLevel);
+ coverBuilder.setMaxLevel(maxLevel);
+ coverBuilder.setMaxCells(maxNum);
+ return coverBuilder.build().getCovering(region).cellIds();
+ }
+
+ public static S2CellId coordinateToCellID(Coordinate coordinate, int level) {
+ return S2CellId.fromPoint(S2Utils.toS2Point(coordinate)).parent(level);
+ }
+
+ public static List<S2CellId> roundCellsToSameLevel(List<S2CellId> cellIDs, int level) {
+ Set<Long> results = new HashSet<>();
+ for (S2CellId cellID : cellIDs) {
+ if (cellID.level() > level) {
+ results.add(cellID.parent(level).id());
+ } else if(cellID.level() < level) {
+ for (S2CellId c = cellID.childBegin(level); !c.equals(cellID.childEnd(level)); c = c.next()) {
+ results.add(c.id());
+ }
+ } else {
+ results.add(cellID.id());
+ }
+ }
+ return results.stream().map(S2CellId::new).collect(Collectors.toList());
+ }
+
+ public static Polygon toJTSPolygon(S2CellId cellId) {
+ S2LatLngRect bound = new S2Cell(cellId).getRectBound();
+ Coordinate[] coords = new Coordinate[5];
+ int[] iters = new int[] {0, 1, 2, 3, 0};
+ for (int i = 0;i < 5; i++) {
+ coords[i] = new Coordinate(bound.getVertex(iters[i]).lngDegrees(), bound.getVertex(iters[i]).latDegrees());
+ }
+ return new GeometryFactory().createPolygon(coords);
+ }
+
+ public static S2Region toS2Region(Geometry geom) throws IllegalArgumentException {
+ if (geom instanceof Polygon) {
+ return S2Utils.toS2Polygon((Polygon) geom);
+ } else if (geom instanceof LineString) {
+ return S2Utils.toS2PolyLine((LineString) geom);
+ }
+ throw new IllegalArgumentException(
+ "only object of Polygon, LinearRing, LineString type can be converted to S2Region"
+ );
+ }
+}
diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
index abe522f9..32f9086a 100644
--- a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
@@ -13,19 +13,16 @@
*/
package org.apache.sedona.common;
+import com.google.common.geometry.*;
+import org.apache.sedona.common.utils.GeomUtils;
+import org.apache.sedona.common.utils.S2Utils;
import org.junit.Test;
-import org.locationtech.jts.geom.Coordinate;
-import org.locationtech.jts.geom.Geometry;
-import org.locationtech.jts.geom.GeometryCollection;
-import org.locationtech.jts.geom.GeometryFactory;
-import org.locationtech.jts.geom.LinearRing;
-import org.locationtech.jts.geom.LineString;
-import org.locationtech.jts.geom.MultiLineString;
-import org.locationtech.jts.geom.MultiPoint;
-import org.locationtech.jts.geom.MultiPolygon;
-import org.locationtech.jts.geom.Polygon;
+import org.locationtech.jts.geom.*;
import org.locationtech.jts.io.ParseException;
+import java.util.*;
+import java.util.stream.Collectors;
+
import static org.junit.Assert.*;
public class FunctionsTest {
@@ -238,4 +235,98 @@ public class FunctionsTest {
assertNull(actualResult);
}
+
+ @Test
+ public void getGoogleS2CellIDsPoint() {
+ Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(1, 2));
+ Long[] cid = Functions.s2CellIDs(point, 30);
+ Polygon reversedPolygon = S2Utils.toJTSPolygon(new S2CellId(cid[0]));
+ // cast the cell to a rectangle, it must be able to cover the points
+ assert(reversedPolygon.contains(point));
+ }
+
+ private static boolean intersects(Set<?> s1, Set<?> s2) {
+ Set<?> copy = new HashSet<>(s1);
+ copy.retainAll(s2);
+ return !copy.isEmpty();
+ }
+
+ @Test
+ public void getGoogleS2CellIDsPolygon() {
+ // polygon with holes
+ Polygon target = GEOMETRY_FACTORY.createPolygon(
+ GEOMETRY_FACTORY.createLinearRing(coordArray(0.1, 0.1, 0.5, 0.1, 1.0, 0.3, 1.0, 1.0, 0.1, 1.0, 0.1, 0.1)),
+ new LinearRing[] {
+ GEOMETRY_FACTORY.createLinearRing(coordArray(0.2, 0.2, 0.5, 0.2, 0.6, 0.7, 0.2, 0.6, 0.2, 0.2))
+ }
+ );
+ // polygon inside the hole, shouldn't intersect with the polygon
+ Polygon polygonInHole = GEOMETRY_FACTORY.createPolygon(coordArray(0.3, 0.3, 0.4, 0.3, 0.3, 0.4, 0.3, 0.3));
+ // mbr of the polygon that cover all
+ Geometry mbr = target.getEnvelope();
+ // the ring that surrounds the polygon, shouldn't intersect
+ LinearRing surroundRing = GEOMETRY_FACTORY.createLinearRing(coordArray(0.0, 0.0, 1.2, 0.0, 1.2, 1.2, 0.0, 1.2, 0.0, 0.0));
+ System.out.println(GeomUtils.getWKT(surroundRing));
+ HashSet<Long> targetCells = new HashSet<>(Arrays.asList(Functions.s2CellIDs(target, 10)));
+ HashSet<Long> inHoleCells = new HashSet<>(Arrays.asList(Functions.s2CellIDs(polygonInHole, 10)));
+ HashSet<Long> mbrCells = new HashSet<>(Arrays.asList(Functions.s2CellIDs(mbr, 10)));
+ HashSet<Long> surroundCells = new HashSet<>(Arrays.asList(Functions.s2CellIDs(surroundRing, 10)));
+ assert mbrCells.containsAll(targetCells);
+ assert !intersects(targetCells, inHoleCells);
+ assert mbrCells.containsAll(targetCells);
+ assert !intersects(targetCells, surroundCells);
+ }
+
+ @Test
+ public void getGoogleS2CellIDsLineString() {
+ // polygon with holes
+ LineString target = GEOMETRY_FACTORY.createLineString(coordArray(0.2, 0.2, 0.3, 0.4, 0.4, 0.6));
+ LinearRing surroundRring = GEOMETRY_FACTORY.createLinearRing(coordArray(0.0, 0.0, 0.6, 0.0, 0.6, 0.8, 0.0, 0.8, 0.0, 0.0));
+ LineString crossLine = GEOMETRY_FACTORY.createLineString(coordArray(0.4, 0.1, 0.1, 0.4));
+ // mbr of the polygon that cover all
+ Geometry mbr = target.getEnvelope();
+ // cover the target polygon, and convert cells back to polygons
+ HashSet<Long> targetCells = new HashSet<>(Arrays.asList(Functions.s2CellIDs(target, 15)));
+ HashSet<Long> surroundCells = new HashSet<>(Arrays.asList(Functions.s2CellIDs(surroundRring, 15)));
+ HashSet<Long> crossCells = new HashSet<>(Arrays.asList(Functions.s2CellIDs(crossLine, 15)));
+ HashSet<Long> mbrCells = new HashSet<>(Arrays.asList(Functions.s2CellIDs(mbr, 15)));
+ assert !intersects(targetCells, surroundCells);
+ assert intersects(targetCells, crossCells);
+ assert mbrCells.containsAll(targetCells);
+ }
+
+ @Test
+ public void getGoogleS2CellIDsMultiPolygon() {
+ // polygon with holes
+ MultiPolygon target = GEOMETRY_FACTORY.createMultiPolygon(
+ new Polygon[] {
+ GEOMETRY_FACTORY.createPolygon(coordArray(0.1, 0.1, 0.5, 0.1, 0.1, 0.6, 0.1, 0.1)),
+ GEOMETRY_FACTORY.createPolygon(coordArray(0.2, 0.1, 0.6, 0.3, 0.7, 0.6, 0.2, 0.5, 0.2, 0.1))
+ }
+ );
+ Geometry mbr = target.getEnvelope();
+ LinearRing surroundRing = GEOMETRY_FACTORY.createLinearRing(coordArray(0.0, 0.0, 0.8, 0.0, 0.8, 0.8, 0.0, 0.8, 0.0, 0.0));
+ System.out.println(GeomUtils.getWKT(surroundRing));
+ HashSet<Long> targetCells = new HashSet<>(Arrays.asList(Functions.s2CellIDs(target, 10)));
+ HashSet<Long> mbrCells = new HashSet<>(Arrays.asList(Functions.s2CellIDs(mbr, 10)));
+ HashSet<Long> surroundCells = new HashSet<>(Arrays.asList(Functions.s2CellIDs(surroundRing, 10)));
+ assert mbrCells.containsAll(targetCells);
+ assert !intersects(targetCells, surroundCells);
+ }
+
+ @Test
+ public void getGoogleS2CellIDsAllSameLevel() {
+ // polygon with holes
+ GeometryCollection target = GEOMETRY_FACTORY.createGeometryCollection(
+ new Geometry[] {
+ GEOMETRY_FACTORY.createPolygon(coordArray(0.3, 0.3, 0.4, 0.3, 0.3, 0.4, 0.3, 0.3)),
+ GEOMETRY_FACTORY.createPoint(new Coordinate(0.7, 1.2))
+ }
+ );
+ Long[] cellIds = Functions.s2CellIDs(target, 10);
+ HashSet<Integer> levels = Arrays.stream(cellIds).map(c -> new S2CellId(c).level()).collect(Collectors.toCollection(HashSet::new));
+ HashSet<Integer> expects = new HashSet<>();
+ expects.add(10);
+ assertEquals(expects, levels);
+ }
}
diff --git a/common/src/test/java/org/apache/sedona/common/GeometryUtilTest.java b/common/src/test/java/org/apache/sedona/common/GeometryUtilTest.java
new file mode 100644
index 00000000..bb3816b3
--- /dev/null
+++ b/common/src/test/java/org/apache/sedona/common/GeometryUtilTest.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sedona.common;
+
+import com.google.common.geometry.S2CellId;
+import org.apache.sedona.common.utils.GeomUtils;
+import org.apache.sedona.common.utils.S2Utils;
+import org.junit.Test;
+import org.locationtech.jts.geom.*;
+import org.locationtech.jts.io.ParseException;
+import org.locationtech.jts.io.WKTReader;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+public class GeometryUtilTest {
+ public static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory();
+
+ private Coordinate[] coordArray(double... coordValues) {
+ Coordinate[] coords = new Coordinate[(int)(coordValues.length / 2)];
+ for (int i = 0; i < coordValues.length; i += 2) {
+ coords[(int)(i / 2)] = new Coordinate(coordValues[i], coordValues[i+1]);
+ }
+ return coords;
+ }
+
+ @Test
+ public void extractGeometryCollection() throws ParseException, IOException {
+ MultiPolygon multiPolygon = GEOMETRY_FACTORY.createMultiPolygon(
+ new Polygon[] {
+ GEOMETRY_FACTORY.createPolygon(coordArray(0, 1,3, 0,4, 3,0, 4,0, 1)),
+ GEOMETRY_FACTORY.createPolygon(coordArray(3, 4,6, 3,5, 5,3, 4))
+ }
+ );
+ Point point = GEOMETRY_FACTORY.createPoint(new Coordinate(5.0, 8.0));
+ GeometryCollection gc1 = GEOMETRY_FACTORY.createGeometryCollection(new Geometry[] {
+ multiPolygon, point
+ });
+ GeometryCollection gc = GEOMETRY_FACTORY.createGeometryCollection(
+ new Geometry[] {
+ multiPolygon.copy(),
+ gc1
+ }
+ );
+ List<Geometry> geoms = GeomUtils.extractGeometryCollection(gc);
+ assert (
+ Objects.equals(
+ GeomUtils.getWKT(
+ GEOMETRY_FACTORY.createGeometryCollection(
+ geoms.toArray(new Geometry[geoms.size()]))),
+ "GEOMETRYCOLLECTION (POLYGON ((0 1, 3 0, 4 3, 0 4, 0 1)), POLYGON ((3 4, 6 3, 5 5, 3 4)), POINT (5 8), POLYGON ((0 1, 3 0, 4 3, 0 4, 0 1)), POLYGON ((3 4, 6 3, 5 5, 3 4)))"
+ )
+ );
+
+ }
+
+
+}
diff --git a/common/src/test/java/org/apache/sedona/common/S2UtilTest.java b/common/src/test/java/org/apache/sedona/common/S2UtilTest.java
new file mode 100644
index 00000000..6957a0ce
--- /dev/null
+++ b/common/src/test/java/org/apache/sedona/common/S2UtilTest.java
@@ -0,0 +1,107 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sedona.common;
+
+import com.google.common.geometry.*;
+import org.apache.sedona.common.utils.GeomUtils;
+import org.apache.sedona.common.utils.S2Utils;
+import org.junit.Test;
+import org.locationtech.jts.geom.*;
+import org.locationtech.jts.io.ParseException;
+import org.locationtech.jts.io.WKTReader;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.DecimalFormat;
+import java.util.Comparator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static sun.misc.Version.println;
+
+public class S2UtilTest {
+ public static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory();
+
+ private Coordinate[] coordArray(double... coordValues) {
+ Coordinate[] coords = new Coordinate[(int)(coordValues.length / 2)];
+ for (int i = 0; i < coordValues.length; i += 2) {
+ coords[(int)(i / 2)] = new Coordinate(coordValues[i], coordValues[i+1]);
+ }
+ return coords;
+ }
+
+ @Test
+ public void toS2Point() throws ParseException {
+ Coordinate jtsCoord = new Coordinate(1, 2);
+ S2Point s2Point = S2Utils.toS2Point(jtsCoord);
+ S2LatLng latLng = new S2LatLng(s2Point);
+ assertEquals(Math.round(latLng.lngDegrees()), 1);
+ assertEquals(Math.round(latLng.latDegrees()), 2);
+ }
+
+ @Test
+ public void coverPolygon() throws ParseException {
+ Polygon polygon = (Polygon) new WKTReader().read("POLYGON ((0.5 0.5, 5 0, 6 3, 5 5, 0 5, 0.5 0.5))");
+ List<S2CellId> cellIds = S2Utils.s2RegionToCellIDs(S2Utils.toS2Polygon(polygon), 1, 5, Integer.MAX_VALUE);
+ assertEquals(5, cellIds.size());
+ assertEquals(cellIds.stream().max(Comparator.comparingLong(S2CellId::level)).get().level(), 5);
+ }
+
+ @Test
+ public void coverPolygonWithHole() throws ParseException {
+ Polygon polygon = (Polygon) new WKTReader().read("POLYGON((0.5 0.5,5 0,5 5,0 5,0.5 0.5), (1.5 1,4 1,4 3,1.5 1))");
+ Polygon hole = (Polygon) new WKTReader().read("POLYGON((1.5 1,4 1,4 3,1.5 1))");
+ List<S2CellId> cellIds = S2Utils.roundCellsToSameLevel(S2Utils.s2RegionToCellIDs(S2Utils.toS2Polygon(polygon), 1, 8, Integer.MAX_VALUE-1), 8);
+ S2CellId holeCentroidCell = S2Utils.coordinateToCellID(hole.getCentroid().getCoordinate(), 8);
+ S2CellId holeFirstVertexCell = S2Utils.coordinateToCellID(hole.getCoordinates()[0], 8);
+ assertEquals(8, cellIds.stream().max(Comparator.comparingLong(S2CellId::level)).get().level());
+ assert(!cellIds.contains(holeCentroidCell));
+ assert(cellIds.contains(holeFirstVertexCell));
+
+ }
+
+ @Test
+ public void coverLineString() throws ParseException {
+ LineString line = (LineString) new WKTReader().read("LINESTRING (1.5 2.45, 3.21 4)");
+ List<S2CellId> cellIds = S2Utils.s2RegionToCellIDs(S2Utils.toS2PolyLine(line), 1, 8, Integer.MAX_VALUE);
+ cellIds.forEach(c -> System.out.println(GeomUtils.getWKT(S2Utils.toJTSPolygon(c))));
+ assertEquals(12, cellIds.size());
+ assertEquals(cellIds.stream().max(Comparator.comparingLong(S2CellId::level)).get().level(), 8);
+ }
+
+ @Test
+ public void coverLinearLoop() throws ParseException {
+ LineString line = GEOMETRY_FACTORY.createLineString(new WKTReader().read("LINESTRING (1.5 2.45, 3.21 4, 5 2, 1.5 2.45)").getCoordinates());
+ List<S2CellId> cellIds = S2Utils.s2RegionToCellIDs(S2Utils.toS2PolyLine(line), 1, 8, Integer.MAX_VALUE);
+ cellIds.forEach(c -> System.out.println(GeomUtils.getWKT(S2Utils.toJTSPolygon(c))));
+ assertEquals(31, cellIds.size());
+ assertEquals(cellIds.stream().max(Comparator.comparingLong(S2CellId::level)).get().level(), 8);
+ }
+
+ @Test
+ public void toS2Loop() throws ParseException {
+ LinearRing ringCW = GEOMETRY_FACTORY.createLinearRing(new WKTReader().read("LINESTRING (1.5 2.45, 3.21 4, 5 2, 1.5 2.45)").getCoordinates());
+ LinearRing ringCCW = GEOMETRY_FACTORY.createLinearRing(new WKTReader().read("LINESTRING (1.5 2.45, 5 2, 3.21 4, 1.5 2.45)").getCoordinates());
+ assert(ringCCW != ringCW);
+ S2Loop s2Loop = S2Utils.toS2Loop(ringCW);
+ DecimalFormat df = new DecimalFormat("#.##");
+ LinearRing reversedRing = GEOMETRY_FACTORY.createLinearRing(
+ s2Loop.vertices().stream().map(S2LatLng::new).map(l -> new Coordinate(Double.parseDouble(df.format(l.lngDegrees())), Double.parseDouble(df.format(l.latDegrees())))).collect(Collectors.toList()).toArray(new Coordinate[4])
+ );
+ assertEquals(ringCCW, reversedRing);
+ }
+
+}
diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md
index de0fa210..9bd8e91e 100644
--- a/docs/api/sql/Function.md
+++ b/docs/api/sql/Function.md
@@ -1536,3 +1536,20 @@ SELECT ST_ZMin(ST_GeomFromText('LINESTRING(1 3 4, 5 6 7)'))
```
Output: `4.0`
+
+## ST_GetGoogleS2CellIDs
+
+Introduction: Cover the geometry with Google S2 Cells, return the corresponding cell IDs with the given level.
+The level indicates the [size of cells](https://s2geometry.io/resources/s2cell_statistics.html). With a bigger level,
+the cells will be smaller, the coverage will be more accurate, but the result size will be exponentially increasing.
+
+Format: `ST_S2CellIDs(geom: geometry, level: Int)`
+
+Since: `v1.3.2`
+
+Spark SQL example:
+```SQL
+SELECT ST_GetGoogleS2CellIDs(geom: Geometry)
+```
+
+Output: `4.0`
\ No newline at end of file
diff --git a/sql/pom.xml b/sql/pom.xml
index 12461afd..53f6ba37 100644
--- a/sql/pom.xml
+++ b/sql/pom.xml
@@ -123,6 +123,11 @@
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.compat.version}</artifactId>
</dependency>
+ <dependency>
+ <groupId>com.google.geometry</groupId>
+ <artifactId>s2-geometry</artifactId>
+ <version>2.0.0</version>
+ </dependency>
</dependencies>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
diff --git a/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index 292bee07..56912a99 100644
--- a/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -167,7 +167,8 @@ object Catalog {
function[RS_HTML](),
function[RS_Array](),
function[RS_Normalize](),
- function[RS_Append]()
+ function[RS_Append](),
+ function[ST_S2CellIDs](),
)
val aggregateExpressions: Seq[Aggregator[Geometry, Geometry, Geometry]] = Seq(
diff --git a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
index 5fdcc586..2b850cff 100644
--- a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
+++ b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
@@ -1055,3 +1055,11 @@ case class ST_Split(inputExpressions: Seq[Expression])
copy(inputExpressions = newChildren)
}
}
+
+case class ST_S2CellIDs(inputExpressions: Seq[Expression])
+ extends InferredBinaryExpression(Functions.s2CellIDs) with FoldableExpression {
+
+ protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
+ copy(inputExpressions = newChildren)
+ }
+}
diff --git a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/NullSafeExpressions.scala b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/NullSafeExpressions.scala
index ea30ecd3..4a29eb4a 100644
--- a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/NullSafeExpressions.scala
+++ b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/NullSafeExpressions.scala
@@ -21,6 +21,7 @@ package org.apache.spark.sql.sedona_sql.expressions
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.util.ArrayData
import org.apache.spark.sql.sedona_sql.expressions.implicits._
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
import org.apache.spark.sql.types._
@@ -53,6 +54,8 @@ abstract class UnaryGeometryExpression extends Expression with ExpectsInputTypes
}
protected def nullSafeEval(geometry: Geometry): Any
+
+
}
abstract class BinaryGeometryExpression extends Expression with ExpectsInputTypes {
@@ -95,6 +98,8 @@ object InferrableType {
new InferrableType[String] {}
implicit val binaryInstance: InferrableType[Array[Byte]] =
new InferrableType[Array[Byte]] {}
+ implicit val longArrayInstance: InferrableType[Array[java.lang.Long]] =
+ new InferrableType[Array[java.lang.Long]] {}
}
object InferredTypes {
@@ -121,6 +126,13 @@ object InferredTypes {
} else {
null
}
+ } else if (typeOf[T] =:= typeOf[Array[java.lang.Long]]) {
+ output: T =>
+ if (output != null) {
+ ArrayData.toArrayData(output)
+ } else {
+ null
+ }
} else {
output: T => output
}
@@ -141,6 +153,8 @@ object InferredTypes {
StringType
} else if (typeOf[T] =:= typeOf[Array[Byte]]) {
BinaryType
+ } else if (typeOf[T] =:= typeOf[Array[java.lang.Long]]) {
+ DataTypes.createArrayType(LongType)
} else {
BooleanType
}
diff --git a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
index 48c5a446..c490654a 100644
--- a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
+++ b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
@@ -271,4 +271,7 @@ object st_functions extends DataFrameAPI {
def ST_ZMin(geometry: Column): Column = wrapExpression[ST_ZMin](geometry)
def ST_ZMin(geometry: String): Column = wrapExpression[ST_ZMin](geometry)
+
+ def ST_S2CellIDs(geometry: Column, level: Int): Column = wrapExpression[ST_S2CellIDs](geometry, level)
+ def ST_S2CellIDs(geometry: String, level: Int): Column = wrapExpression[ST_S2CellIDs](geometry, level)
}
diff --git a/sql/src/test/scala/org/apache/sedona/sql/functions/STS2CellIDs.scala b/sql/src/test/scala/org/apache/sedona/sql/functions/STS2CellIDs.scala
new file mode 100644
index 00000000..74e923cf
--- /dev/null
+++ b/sql/src/test/scala/org/apache/sedona/sql/functions/STS2CellIDs.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.sql.functions
+
+import org.apache.sedona.sql.{GeometrySample, TestBaseScala}
+import org.apache.spark.sql.functions.{col, expr, lit, when}
+import org.scalatest.{GivenWhenThen, Matchers}
+
+import scala.collection.mutable
+
+class STS2CellIDs extends TestBaseScala with Matchers with GeometrySample with GivenWhenThen {
+ import sparkSession.implicits._
+
+ describe("should pass ST_S2CellIDs"){
+
+ it("should return null while using ST_S2CellIDs when geometry is empty") {
+ val geometryTable = sparkSession.sparkContext.parallelize(1 to 10).toDF()
+ .withColumn("geom", lit(null))
+
+ When("using ST_MakePolygon on null geometries")
+ val geometryTableWithCellIDs = geometryTable
+ .withColumn("cell_ids", expr("ST_S2CellIDs(geom, 4)"))
+ .select("cell_ids").collect().filter(
+ r => r.get(0)!= null
+ )
+
+ Then("no exception should be raised")
+ require(geometryTableWithCellIDs.isEmpty)
+ }
+
+ it("should correctly return array of cell ids use of ST_S2CellIDs"){
+ Given("DataFrame with valid line strings")
+ val geometryTable = Seq(
+ "POINT(1 2)",
+ "LINESTRING(-5 8, -6 1, -8 6, -2 5, -6 1, -5 8)",
+ "POLYGON ((75 29, 77 29, 77 29, 75 29))"
+
+ ).map(geom => Tuple1(wktReader.read(geom))).toDF("geom")
+
+ When("using ST_MakePolygon on those geometries")
+ val geometryDfWithCellIDs = geometryTable
+ .withColumn("cell_ids", expr("ST_S2CellIDs(geom, 5)"))
+
+ Then("valid should have cell ids returned")
+ geometryDfWithCellIDs.show(truncate=false)
+ geometryDfWithCellIDs.select("cell_ids").collect().foreach(
+ r => require(r.get(0).isInstanceOf[mutable.WrappedArray[Long]])
+ )
+ }
+ }
+}