You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2022/02/04 17:48:03 UTC

[incubator-sedona] branch master updated: [SEDONA-65] Create ST_Difference function (#576)

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 3843382  [SEDONA-65] Create ST_Difference function (#576)
3843382 is described below

commit 3843382c5a38be9ba4e79120bc6f7270a5f0ae0a
Author: Magdalena <69...@users.noreply.github.com>
AuthorDate: Fri Feb 4 18:47:55 2022 +0100

    [SEDONA-65] Create ST_Difference function (#576)
---
 docs/api/sql/Function.md                           | 21 +++++++++++++
 python/tests/sql/test_function.py                  | 25 ++++++++++++++-
 .../scala/org/apache/sedona/sql/UDF/Catalog.scala  |  1 +
 .../sql/sedona_sql/expressions/Functions.scala     | 36 ++++++++++++++++++++++
 .../org/apache/sedona/sql/functionTestScala.scala  | 31 +++++++++++++++++++
 5 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md
index e012305..fcf7cdc 100644
--- a/docs/api/sql/Function.md
+++ b/docs/api/sql/Function.md
@@ -904,6 +904,7 @@ Result:
 ```
 
 Example:
+
 ```SQL
 SELECT ST_Collect(
     Array(
@@ -921,4 +922,24 @@ Result:
 +---------------------------------------------------------------+
 |MULTIPOINT ((21.427834 52.042576573), (45.342524 56.342354355))|
 +---------------------------------------------------------------+
+```
+
+## ST_Difference
+
+Introduction: Return the difference between geometry A and B (return part of geometry A that does not intersect geometry B)
+
+Format: `ST_Difference (A:geometry, B:geometry)`
+
+Since: `v1.2.0`
+
+Example:
+
+```SQL
+SELECT ST_Difference(ST_GeomFromWKT('POLYGON ((-3 -3, 3 -3, 3 3, -3 3, -3 -3))'), ST_GeomFromWKT('POLYGON ((0 -4, 4 -4, 4 4, 0 4, 0 -4))'))
+```
+
+Result:
+
+```
+POLYGON ((0 -3, -3 -3, -3 3, 0 3, 0 -3))
 ```
\ No newline at end of file
diff --git a/python/tests/sql/test_function.py b/python/tests/sql/test_function.py
index ab8d87f..3fec4a1 100644
--- a/python/tests/sql/test_function.py
+++ b/python/tests/sql/test_function.py
@@ -270,13 +270,36 @@ class TestPredicateJoin(TestBase):
         wkt_df = self.spark.sql("select ST_AsText(countyshape) as wkt from polygondf")
         assert polygon_df.take(1)[0]["countyshape"].wkt == loads(wkt_df.take(1)[0]["wkt"]).wkt
 
-
     def test_st_n_points(self):
         test = self.spark.sql("SELECT ST_NPoints(ST_GeomFromText('LINESTRING(77.29 29.07,77.42 29.26,77.27 29.31,77.29 29.07)'))")
 
     def test_st_geometry_type(self):
         test = self.spark.sql("SELECT ST_GeometryType(ST_GeomFromText('LINESTRING(77.29 29.07,77.42 29.26,77.27 29.31,77.29 29.07)'))")
 
+    def test_st_difference_right_overlaps_left(self):
+        test_table = self.spark.sql("select ST_GeomFromWKT('POLYGON ((-3 -3, 3 -3, 3 3, -3 3, -3 -3))') as a,ST_GeomFromWKT('POLYGON ((0 -4, 4 -4, 4 4, 0 4, 0 -4))') as b")
+        test_table.createOrReplaceTempView("test_diff")
+        diff = self.spark.sql("select ST_Difference(a,b) from test_diff")
+        assert diff.take(1)[0][0].wkt == "POLYGON ((0 -3, -3 -3, -3 3, 0 3, 0 -3))"
+
+    def test_st_difference_right_not_overlaps_left(self):
+        test_table = self.spark.sql("select ST_GeomFromWKT('POLYGON ((-3 -3, 3 -3, 3 3, -3 3, -3 -3))') as a,ST_GeomFromWKT('POLYGON ((5 -3, 7 -3, 7 -1, 5 -1, 5 -3))') as b")
+        test_table.createOrReplaceTempView("test_diff")
+        diff = self.spark.sql("select ST_Difference(a,b) from test_diff")
+        assert diff.take(1)[0][0].wkt == "POLYGON ((-3 -3, 3 -3, 3 3, -3 3, -3 -3))"
+
+    def test_st_difference_left_contains_right(self):
+        test_table = self.spark.sql("select ST_GeomFromWKT('POLYGON ((-3 -3, 3 -3, 3 3, -3 3, -3 -3))') as a,ST_GeomFromWKT('POLYGON ((-1 -1, 1 -1, 1 1, -1 1, -1 -1))') as b")
+        test_table.createOrReplaceTempView("test_diff")
+        diff = self.spark.sql("select ST_Difference(a,b) from test_diff")
+        assert diff.take(1)[0][0].wkt == "POLYGON ((-3 -3, -3 3, 3 3, 3 -3, -3 -3), (-1 -1, 1 -1, 1 1, -1 1, -1 -1))"
+
+    def test_st_difference_right_not_overlaps_left(self):
+        test_table = self.spark.sql("select ST_GeomFromWKT('POLYGON ((-1 -1, 1 -1, 1 1, -1 1, -1 -1))') as a,ST_GeomFromWKT('POLYGON ((-3 -3, 3 -3, 3 3, -3 3, -3 -3))') as b")
+        test_table.createOrReplaceTempView("test_diff")
+        diff = self.spark.sql("select ST_Difference(a,b) from test_diff")
+        assert diff.take(1)[0][0].wkt == "POLYGON EMPTY"
+
     def test_st_azimuth(self):
         sample_points = create_sample_points(20)
         sample_pair_points = [[el, sample_points[1]] for el in sample_points]
diff --git a/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index 01d3c6b..54cfc14 100644
--- a/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -51,6 +51,7 @@ object Catalog {
     ST_Centroid,
     ST_Transform,
     ST_Intersection,
+    ST_Difference,
     ST_IsValid,
     ST_PrecisionReduce,
     ST_Equals,
diff --git a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
index 39abeea..77f92d2 100644
--- a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
+++ b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
@@ -1559,3 +1559,39 @@ case class ST_GeoHash(inputExpressions: Seq[Expression])
     copy(inputExpressions = newChildren)
   }
 }
+
+case class ST_Difference(inputExpressions: Seq[Expression])
+  extends Expression with CodegenFallback {
+  assert(inputExpressions.length == 2)
+
+  lazy val GeometryFactory = new GeometryFactory()
+  lazy val emptyPolygon = GeometryFactory.createPolygon(null, null)
+
+  override def nullable: Boolean = false
+
+  override def eval(inputRow: InternalRow): Any = {
+    val leftGeometry = GeometrySerializer.deserialize(inputExpressions(0).eval(inputRow).asInstanceOf[ArrayData])
+    val rightGeometry = GeometrySerializer.deserialize(inputExpressions(1).eval(inputRow).asInstanceOf[ArrayData])
+
+    val isIntersects = leftGeometry.intersects(rightGeometry)
+    lazy val isRightContainsLeft = rightGeometry.contains(leftGeometry)
+
+    if (!isIntersects) {
+      return new GenericArrayData(GeometrySerializer.serialize(leftGeometry))
+    }
+
+    if (isIntersects && isRightContainsLeft) {
+      return new GenericArrayData(GeometrySerializer.serialize(emptyPolygon))
+    }
+
+    return new GenericArrayData(GeometrySerializer.serialize(leftGeometry.difference(rightGeometry)))
+  }
+
+  override def dataType: DataType = GeometryUDT
+
+  override def children: Seq[Expression] = inputExpressions
+
+  protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
+    copy(inputExpressions = newChildren)
+  }
+}
diff --git a/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala b/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
index 7d9918f..c6bcf30 100644
--- a/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
+++ b/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
@@ -347,6 +347,37 @@ class functionTestScala extends TestBaseScala with Matchers with GeometrySample
       assert(test.take(1)(0).get(0).asInstanceOf[String].toUpperCase() == "ST_LINESTRING")
     }
 
+    it("Passed ST_Difference - part of right overlaps left") {
+
+      val testtable = sparkSession.sql("select ST_GeomFromWKT('POLYGON ((-3 -3, 3 -3, 3 3, -3 3, -3 -3))') as a,ST_GeomFromWKT('POLYGON ((0 -4, 4 -4, 4 4, 0 4, 0 -4))') as b")
+      testtable.createOrReplaceTempView("testtable")
+      val diff = sparkSession.sql("select ST_Difference(a,b) from testtable")
+      assert(diff.take(1)(0).get(0).asInstanceOf[Geometry].toText.equals("POLYGON ((0 -3, -3 -3, -3 3, 0 3, 0 -3))"))
+    }
+
+    it("Passed ST_Difference - right not overlaps left") {
+
+      val testtable = sparkSession.sql("select ST_GeomFromWKT('POLYGON ((-3 -3, 3 -3, 3 3, -3 3, -3 -3))') as a,ST_GeomFromWKT('POLYGON ((5 -3, 7 -3, 7 -1, 5 -1, 5 -3))') as b")
+      testtable.createOrReplaceTempView("testtable")
+      val diff = sparkSession.sql("select ST_Difference(a,b) from testtable")
+      assert(diff.take(1)(0).get(0).asInstanceOf[Geometry].toText.equals("POLYGON ((-3 -3, 3 -3, 3 3, -3 3, -3 -3))"))
+    }
+
+    it("Passed ST_Difference - left contains right") {
+
+      val testtable = sparkSession.sql("select ST_GeomFromWKT('POLYGON ((-3 -3, 3 -3, 3 3, -3 3, -3 -3))') as a,ST_GeomFromWKT('POLYGON ((-1 -1, 1 -1, 1 1, -1 1, -1 -1))') as b")
+      testtable.createOrReplaceTempView("testtable")
+      val diff = sparkSession.sql("select ST_Difference(a,b) from testtable")
+      assert(diff.take(1)(0).get(0).asInstanceOf[Geometry].toText.equals("POLYGON ((-3 -3, -3 3, 3 3, 3 -3, -3 -3), (-1 -1, 1 -1, 1 1, -1 1, -1 -1))"))
+    }
+
+    it("Passed ST_Difference - right contains left") {
+
+      val testtable = sparkSession.sql("select ST_GeomFromWKT('POLYGON ((-1 -1, 1 -1, 1 1, -1 1, -1 -1))') as a,ST_GeomFromWKT('POLYGON ((-3 -3, 3 -3, 3 3, -3 3, -3 -3))') as b")
+      testtable.createOrReplaceTempView("testtable")
+      val diff = sparkSession.sql("select ST_Difference(a,b) from testtable")
+      assert(diff.take(1)(0).get(0).asInstanceOf[Geometry].toText.equals("POLYGON EMPTY"))    }
+
 
     it("Passed ST_Azimuth") {