You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2022/04/06 18:35:50 UTC
[incubator-sedona] branch master updated: [SEDONA-96] Refactor ST_MakeValid to use GeometryFixer. (#596)
This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 46fc348e [SEDONA-96] Refactor ST_MakeValid to use GeometryFixer. (#596)
46fc348e is described below
commit 46fc348e41db58a51eeb70c1c7e87c0f95abb1fa
Author: Martin Andersson <u....@gmail.com>
AuthorDate: Wed Apr 6 20:35:44 2022 +0200
[SEDONA-96] Refactor ST_MakeValid to use GeometryFixer. (#596)
---
docs/api/sql/Function.md | 35 ++++++++----
pom.xml | 2 +-
.../sql/sedona_sql/expressions/Functions.scala | 56 ++++++++-----------
.../org/apache/sedona/sql/functionTestScala.scala | 63 ++++++++--------------
4 files changed, 70 insertions(+), 86 deletions(-)
diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md
index 7085cdee..71b2da8a 100644
--- a/docs/api/sql/Function.md
+++ b/docs/api/sql/Function.md
@@ -162,27 +162,40 @@ FROM polygondf
## ST_MakeValid
-Introduction: Given an invalid polygon or multipolygon and removeHoles boolean flag,
- create a valid representation of the geometry.
+Introduction: Given an invalid geometry, create a valid representation of the geometry.
-Format: `ST_MakeValid (A:geometry, removeHoles:Boolean)`
+Collapsed geometries are either converted to empty (keepCollaped=true) or a valid geometry of lower dimension (keepCollapsed=false).
+Default is keepCollapsed=false.
+
+Format: `ST_MakeValid (A:geometry)`
+
+Format: `ST_MakeValid (A:geometry, keepCollapsed:Boolean)`
Since: `v1.0.0`
Spark SQL example:
```SQL
-SELECT geometryValid.polygon
-FROM table
-LATERAL VIEW ST_MakeValid(polygon, false) geometryValid AS polygon
+WITH linestring AS (
+ SELECT ST_GeomFromWKT('LINESTRING(1 1, 1 1)') AS geom
+) SELECT ST_MakeValid(geom), ST_MakeValid(geom, true) FROM linestring
+```
+
+Result:
+```
++------------------+------------------------+
+|st_makevalid(geom)|st_makevalid(geom, true)|
++------------------+------------------------+
+| LINESTRING EMPTY| POINT (1 1)|
++------------------+------------------------+
```
!!!note
- Might return multiple polygons from a only one invalid polygon
- That's the reason why we need to use the LATERAL VIEW expression
-
-!!!note
- Throws an exception if the geometry isn't polygon or multipolygon
+ In Sedona up to and including version 1.2 the behaviour of ST_MakeValid was different.
+ Be sure to check you code when upgrading.
+ The previous implementation only worked for (multi)polygons and had a different interpretation of the second, boolean, argument.
+ It would also sometimes return multiple geometries for a single geomtry input.
+
## ST_PrecisionReduce
diff --git a/pom.xml b/pom.xml
index d150132a..b783351c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -61,7 +61,7 @@
<geotools.version>24.0</geotools.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<dependency.scope>provided</dependency.scope>
- <jts.version>1.18.0</jts.version>
+ <jts.version>1.18.2</jts.version>
<jts2geojson.version>0.16.1</jts2geojson.version>
</properties>
diff --git a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
index effe50bb..fbe446d4 100644
--- a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
+++ b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
@@ -36,6 +36,7 @@ import org.apache.spark.unsafe.types.UTF8String
import org.geotools.geometry.jts.JTS
import org.geotools.referencing.CRS
import org.locationtech.jts.algorithm.MinimumBoundingCircle
+import org.locationtech.jts.geom.util.GeometryFixer
import org.locationtech.jts.geom.{PrecisionModel, _}
import org.locationtech.jts.io.{ByteOrderValues, WKBWriter, WKTWriter}
import org.locationtech.jts.linearref.LengthIndexedLine
@@ -343,49 +344,38 @@ case class ST_Intersection(inputExpressions: Seq[Expression])
}
/**
- * Given an invalid polygon or multipolygon and removeHoles boolean flag, create a valid representation of the geometry
+ * Given an invalid geometry, create a valid representation of the geometry.
+ * See: http://lin-ear-th-inking.blogspot.com/2021/05/fixing-invalid-geometry-with-jts.html
*
* @param inputExpressions
*/
case class ST_MakeValid(inputExpressions: Seq[Expression])
- extends Generator with CodegenFallback with UserDataGeneratator {
- assert(inputExpressions.length == 2)
-
- override def elementSchema: StructType = new StructType().add("Geometry", new GeometryUDT)
-
- override def toString: String = s" **${ST_MakeValid.getClass.getName}** "
-
- override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
- val geometry = GeometrySerializer.deserialize(inputExpressions(0).eval(input).asInstanceOf[ArrayData])
- val removeHoles = inputExpressions(1).eval(input).asInstanceOf[Boolean]
-
- // in order to do flatMap on java collections(util.List[Polygon])
- import scala.jdk.CollectionConverters._
+ extends Expression with CodegenFallback {
+ assert(inputExpressions.length == 1 || inputExpressions.length == 2)
- // makeValid works only on polygon or multipolygon
- if (!geometry.getGeometryType.equalsIgnoreCase("POLYGON") && !geometry.getGeometryType.equalsIgnoreCase("MULTIPOLYGON")) {
- throw new IllegalArgumentException("ST_MakeValid works only on Polygons and MultiPolygons")
+ override def eval(input: InternalRow): Any = {
+ val geometry = inputExpressions.head.toGeometry(input)
+ val keepCollapsed = if (inputExpressions.length == 2) {
+ inputExpressions(1).eval(input).asInstanceOf[Boolean]
+ } else {
+ false
}
-
- val validGeometry = geometry match {
- case g: MultiPolygon =>
- (0 until g.getNumGeometries).flatMap(i => {
- val polygon = g.getGeometryN(i).asInstanceOf[Polygon]
- JTS.makeValid(polygon, removeHoles).asScala.iterator
- })
- case g: Polygon =>
- JTS.makeValid(g, removeHoles).asScala.iterator
- case _ => Nil
+ (geometry) match {
+ case (geometry: Geometry) => nullSafeEval(geometry, keepCollapsed)
+ case _ => null
}
+ }
- val result = validGeometry.map(g => {
- val serializedGeometry = GeometrySerializer.serialize(g.asInstanceOf[Geometry])
- InternalRow(new GenericArrayData(serializedGeometry))
- })
-
- result
+ private def nullSafeEval(geometry: Geometry, keepCollapsed: Boolean) = {
+ val fixer = new GeometryFixer(geometry)
+ fixer.setKeepCollapsed(keepCollapsed)
+ fixer.getResult.toGenericArrayData
}
+ override def nullable: Boolean = true
+
+ override def dataType: DataType = GeometryUDT
+
override def children: Seq[Expression] = inputExpressions
protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
diff --git a/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala b/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
index 9612fd56..6f55c05a 100644
--- a/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
+++ b/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
@@ -22,6 +22,7 @@ package org.apache.sedona.sql
import org.apache.commons.codec.binary.Hex
import org.apache.sedona.sql.implicits._
import org.apache.spark.sql.functions._
+import org.apache.spark.sql.sedona_sql.expressions.ST_MakeValid
import org.apache.spark.sql.{DataFrame, Row}
import org.geotools.geometry.jts.WKTReader2
import org.locationtech.jts.algorithm.MinimumBoundingCircle
@@ -203,62 +204,42 @@ class functionTestScala extends TestBaseScala with Matchers with GeometrySample
it("Passed ST_MakeValid On Invalid Polygon") {
val df = sparkSession.sql("SELECT ST_GeomFromWKT('POLYGON((1 5, 1 1, 3 3, 5 3, 7 1, 7 5, 5 3, 3 3, 1 5))') AS polygon")
- df.createOrReplaceTempView("table")
- val result = sparkSession.sql(
- """
- |SELECT geometryValid.polygon
- |FROM table
- |LATERAL VIEW ST_MakeValid(polygon, false) geometryValid AS polygon
- |""".stripMargin
- ).collect()
-
- val wktReader = new WKTReader2()
- val firstValidGeometry = wktReader.read("POLYGON ((1 5, 3 3, 1 1, 1 5))")
- val secondValidGeometry = wktReader.read("POLYGON ((5 3, 7 5, 7 1, 5 3))")
-
- assert(result.exists(row => row.getAs[Geometry](0).equals(firstValidGeometry)))
- assert(result.exists(row => row.getAs[Geometry](0).equals(secondValidGeometry)))
+ val result = df.withColumn("polygon", expr("ST_MakeValid(polygon)")).collect()
+
+ assert(result.length == 1)
+ assert(result.take(1)(0).get(0).asInstanceOf[Geometry].toText() == "MULTIPOLYGON (((1 5, 3 3, 1 1, 1 5)), ((5 3, 7 5, 7 1, 5 3)))")
}
it("Passed ST_MakeValid On Invalid MultiPolygon") {
val df = sparkSession.sql("SELECT ST_GeomFromWKT('MULTIPOLYGON(((0 0, 3 0, 3 3, 0 3, 0 0)), ((3 0, 6 0, 6 3, 3 3, 3 0)))') AS multipolygon")
- df.createOrReplaceTempView("table")
- val result = sparkSession.sql(
- """
- |SELECT geometryValid.multipolygon
- |FROM table
- |LATERAL VIEW ST_MakeValid(multipolygon, false) geometryValid AS multipolygon
- |""".stripMargin
- ).collect()
-
- val wktReader = new WKTReader2()
- val firstValidGeometry = wktReader.read("POLYGON ((0 3, 3 3, 3 0, 0 0, 0 3))")
- val secondValidGeometry = wktReader.read("POLYGON ((3 3, 6 3, 6 0, 3 0, 3 3))")
-
- assert(result.exists(row => row.getAs[Geometry](0).equals(firstValidGeometry)))
- assert(result.exists(row => row.getAs[Geometry](0).equals(secondValidGeometry)))
+ val result = df.withColumn("multipolygon", expr("ST_MakeValid(multipolygon)")).collect()
+
+ assert(result.length == 1)
+ assert(result.take(1)(0).get(0).asInstanceOf[Geometry].toText() == "POLYGON ((0 3, 3 3, 6 3, 6 0, 3 0, 0 0, 0 3))")
}
it("Passed ST_MakeValid On Valid Polygon") {
val df = sparkSession.sql("SELECT ST_GeomFromWKT('POLYGON((1 1, 8 1, 8 8, 1 8, 1 1))') AS polygon")
- df.createOrReplaceTempView("table")
- val result = sparkSession.sql(
- """
- |SELECT geometryValid.polygon
- |FROM table
- |LATERAL VIEW ST_MakeValid(polygon, false) geometryValid AS polygon
- |""".stripMargin
- ).collect()
+ val result = df.withColumn("polygon", expr("ST_MakeValid(polygon)")).collect()
+
+ assert(result.length == 1)
+ assert(result.take(1)(0).get(0).asInstanceOf[Geometry].toText() == "POLYGON ((1 1, 1 8, 8 8, 8 1, 1 1))")
+ }
+
+ it("Passed ST_MakeValid on Invalid LineString") {
+
+ val df = sparkSession.sql("SELECT ST_GeomFromWKT('LINESTRING(1 1, 1 1)') AS geom")
- val wktReader = new WKTReader2()
- val validGeometry = wktReader.read("POLYGON((1 1, 8 1, 8 8, 1 8, 1 1))")
+ val result = df.selectExpr("ST_MakeValid(geom)", "ST_MakeValid(geom, true)").collect()
- assert(result.exists(row => row.getAs[Geometry](0).equals(validGeometry)))
+ assert(result.length == 1)
+ assert(result.take(1)(0).get(0).asInstanceOf[Geometry].toText() == "LINESTRING EMPTY")
+ assert(result.take(1)(0).get(1).asInstanceOf[Geometry].toText() == "POINT (1 1)")
}
it("Passed ST_SimplifyPreserveTopology") {