You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2022/06/26 07:16:49 UTC
[incubator-sedona] branch master updated: [SEDONA-123] Fix check for invalid latitude/longitude values in ST_GeoHash (#637)
This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git
The following commit(s) were added to refs/heads/master by this push:
new c56f5d44 [SEDONA-123] Fix check for invalid latitude/longitude values in ST_GeoHash (#637)
c56f5d44 is described below
commit c56f5d44e9ba303397757f5564e4869fbc506940
Author: Brian Rice <br...@gmail.com>
AuthorDate: Sun Jun 26 02:16:44 2022 -0500
[SEDONA-123] Fix check for invalid latitude/longitude values in ST_GeoHash (#637)
---
.../geohash/GeometryGeoHashEncoder.scala | 4 +-
.../sql/functions/geohash/TestStGeoHash.scala | 135 +++++++++++++++++++++
2 files changed, 138 insertions(+), 1 deletion(-)
diff --git a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geohash/GeometryGeoHashEncoder.scala b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geohash/GeometryGeoHashEncoder.scala
index 0709b27a..f1ecbc19 100644
--- a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geohash/GeometryGeoHashEncoder.scala
+++ b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geohash/GeometryGeoHashEncoder.scala
@@ -24,7 +24,9 @@ object GeometryGeoHashEncoder {
private val geometryFactory = new GeometryFactory()
def calculate(geom: Geometry, precision: Int): Option[String] = {
val gbox = geom.getEnvelope.getEnvelopeInternal
- if (gbox.getMinX < -180 || gbox.getMinY < -90 || gbox.getMaxX > 180 || gbox.getMaxX > 90) None
+ // Latitude can take values in [-90, 90]
+ // Longitude can take values in [-180, 180]
+ if (gbox.getMinX < -180 || gbox.getMinY < -90 || gbox.getMaxX > 180 || gbox.getMaxY > 90) None
else {
val lon = gbox.getMinX + (gbox.getMaxX - gbox.getMinX) / 2
val lat = gbox.getMinY + (gbox.getMaxY - gbox.getMinY) / 2
diff --git a/sql/src/test/scala/org/apache/sedona/sql/functions/geohash/TestStGeoHash.scala b/sql/src/test/scala/org/apache/sedona/sql/functions/geohash/TestStGeoHash.scala
index d9cd56d5..895172f1 100644
--- a/sql/src/test/scala/org/apache/sedona/sql/functions/geohash/TestStGeoHash.scala
+++ b/sql/src/test/scala/org/apache/sedona/sql/functions/geohash/TestStGeoHash.scala
@@ -108,5 +108,140 @@ class TestStGeoHash extends TestBaseScala with GeometrySample with GivenWhenThen
result.head shouldBe ""
}
+
+ it("should not return null for 90 < long < 180 (SEDONA-123)") {
+ Given("geometry df")
+ val geometryDf = Seq(
+ // Format: (long, lat)
+ (1, "POINT(120.0 50.0)")
+ ).map{
+ case (id, geomWkt) => (id, wktReader.read(geomWkt))
+ }.toDF("id", "geom")
+
+ When("calculating geohash")
+ val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 12)"))
+
+ Then("geohash should not be null / return expected result")
+ val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+ result.size shouldBe 1
+ result.head shouldBe "y8vk6wjr4et3"
+ }
+
+ it("should return expected value for boundary case of min lat/long") {
+ Given("geometry df")
+ val geometryDf = Seq(
+ // Format: (long, lat)
+ (1, "POINT(-180.0 -90.0)")
+ ).map{
+ case (id, geomWkt) => (id, wktReader.read(geomWkt))
+ }.toDF("id", "geom")
+
+ When("calculating geohash")
+ val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 12)"))
+
+ Then("geohash should return expected result")
+ val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+ result.size shouldBe 1
+ result.head shouldBe "000000000000"
+ }
+
+ it("should return expected value for boundary case of max lat/long") {
+ Given("geometry df")
+ val geometryDf = Seq(
+ // Format: (long, lat)
+ (1, "POINT(180.0 90.0)")
+ ).map{
+ case (id, geomWkt) => (id, wktReader.read(geomWkt))
+ }.toDF("id", "geom")
+
+ When("calculating geohash")
+ val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 12)"))
+
+ Then("geohash should return expected result")
+ val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+ result.size shouldBe 1
+ result.head shouldBe "zzzzzzzzzzzz"
+ }
+ }
+
+ describe("should return null when geometry contains invalid coordinates") {
+ it("should return null when longitude is less than -180") {
+ Given("geometry df")
+ val geometryDf = Seq(
+ // Format: (long, lat)
+ (1, "POINT(-190.0 50.0)")
+ ).map{
+ case (id, geomWkt) => (id, wktReader.read(geomWkt))
+ }.toDF("id", "geom")
+
+ When("calculating geohash")
+ val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 1)"))
+
+ Then("geohash should be null")
+ val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+ result.size shouldBe 1
+ result.head shouldBe null
+ }
+
+ it("should return null when longitude is greater than 180") {
+ Given("geometry df")
+ val geometryDf = Seq(
+ // Format: (long, lat)
+ (1, "POINT(190.0 50.0)")
+ ).map{
+ case (id, geomWkt) => (id, wktReader.read(geomWkt))
+ }.toDF("id", "geom")
+
+ When("calculating geohash")
+ val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 1)"))
+
+ Then("geohash should be null")
+ val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+ result.size shouldBe 1
+ result.head shouldBe null
+ }
+
+ it("should return null when latitude is less than -90") {
+ Given("geometry df")
+ val geometryDf = Seq(
+ // Format: (long, lat)
+ (1, "POINT(50.0 -100.0)")
+ ).map{
+ case (id, geomWkt) => (id, wktReader.read(geomWkt))
+ }.toDF("id", "geom")
+
+ When("calculating geohash")
+ val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 1)"))
+
+ Then("geohash should be null")
+ val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+ result.size shouldBe 1
+ result.head shouldBe null
+ }
+
+ it("should return null when latitude is greater than 90") {
+ Given("geometry df")
+ val geometryDf = Seq(
+ // Format: (long, lat)
+ (1, "POINT(50.0 100.0)")
+ ).map{
+ case (id, geomWkt) => (id, wktReader.read(geomWkt))
+ }.toDF("id", "geom")
+
+ When("calculating geohash")
+ val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 1)"))
+
+ Then("geohash should be null")
+ val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+ result.size shouldBe 1
+ result.head shouldBe null
+ }
}
}