You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2022/06/26 07:16:49 UTC

[incubator-sedona] branch master updated: [SEDONA-123] Fix check for invalid latitude/longitude values in ST_GeoHash (#637)

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new c56f5d44 [SEDONA-123] Fix check for invalid latitude/longitude values in ST_GeoHash (#637)
c56f5d44 is described below

commit c56f5d44e9ba303397757f5564e4869fbc506940
Author: Brian Rice <br...@gmail.com>
AuthorDate: Sun Jun 26 02:16:44 2022 -0500

    [SEDONA-123] Fix check for invalid latitude/longitude values in ST_GeoHash (#637)
---
 .../geohash/GeometryGeoHashEncoder.scala           |   4 +-
 .../sql/functions/geohash/TestStGeoHash.scala      | 135 +++++++++++++++++++++
 2 files changed, 138 insertions(+), 1 deletion(-)

diff --git a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geohash/GeometryGeoHashEncoder.scala b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geohash/GeometryGeoHashEncoder.scala
index 0709b27a..f1ecbc19 100644
--- a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geohash/GeometryGeoHashEncoder.scala
+++ b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geohash/GeometryGeoHashEncoder.scala
@@ -24,7 +24,9 @@ object GeometryGeoHashEncoder {
   private val geometryFactory = new GeometryFactory()
   def calculate(geom: Geometry, precision: Int): Option[String] = {
     val gbox = geom.getEnvelope.getEnvelopeInternal
-    if (gbox.getMinX < -180 || gbox.getMinY < -90 || gbox.getMaxX > 180 || gbox.getMaxX > 90) None
+    // Latitude can take values in [-90, 90]
+    // Longitude can take values in [-180, 180]
+    if (gbox.getMinX < -180 || gbox.getMinY < -90 || gbox.getMaxX > 180 || gbox.getMaxY > 90) None
     else {
       val lon = gbox.getMinX + (gbox.getMaxX - gbox.getMinX) / 2
       val lat = gbox.getMinY + (gbox.getMaxY - gbox.getMinY) / 2
diff --git a/sql/src/test/scala/org/apache/sedona/sql/functions/geohash/TestStGeoHash.scala b/sql/src/test/scala/org/apache/sedona/sql/functions/geohash/TestStGeoHash.scala
index d9cd56d5..895172f1 100644
--- a/sql/src/test/scala/org/apache/sedona/sql/functions/geohash/TestStGeoHash.scala
+++ b/sql/src/test/scala/org/apache/sedona/sql/functions/geohash/TestStGeoHash.scala
@@ -108,5 +108,140 @@ class TestStGeoHash extends TestBaseScala with GeometrySample with GivenWhenThen
 
       result.head shouldBe ""
     }
+
+    it("should not return null for 90 < long < 180 (SEDONA-123)") {
+      Given("geometry df")
+      val geometryDf = Seq(
+        // Format: (long, lat)
+        (1, "POINT(120.0 50.0)")
+      ).map{
+        case (id, geomWkt) => (id, wktReader.read(geomWkt))
+      }.toDF("id", "geom")
+
+      When("calculating geohash")
+      val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 12)"))
+
+      Then("geohash should not be null / return expected result")
+      val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+      result.size shouldBe 1
+      result.head shouldBe "y8vk6wjr4et3"
+    }
+
+    it("should return expected value for boundary case of min lat/long") {
+      Given("geometry df")
+      val geometryDf = Seq(
+        // Format: (long, lat)
+        (1, "POINT(-180.0 -90.0)")
+      ).map{
+        case (id, geomWkt) => (id, wktReader.read(geomWkt))
+      }.toDF("id", "geom")
+
+      When("calculating geohash")
+      val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 12)"))
+
+      Then("geohash should return expected result")
+      val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+      result.size shouldBe 1
+      result.head shouldBe "000000000000"
+    }
+
+    it("should return expected value for boundary case of max lat/long") {
+      Given("geometry df")
+      val geometryDf = Seq(
+        // Format: (long, lat)
+        (1, "POINT(180.0 90.0)")
+      ).map{
+        case (id, geomWkt) => (id, wktReader.read(geomWkt))
+      }.toDF("id", "geom")
+
+      When("calculating geohash")
+      val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 12)"))
+
+      Then("geohash should return expected result")
+      val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+      result.size shouldBe 1
+      result.head shouldBe "zzzzzzzzzzzz"
+    }
+  }
+
+  describe("should return null when geometry contains invalid coordinates") {
+    it("should return null when longitude is less than -180") {
+      Given("geometry df")
+      val geometryDf = Seq(
+        // Format: (long, lat)
+        (1, "POINT(-190.0 50.0)")
+      ).map{
+        case (id, geomWkt) => (id, wktReader.read(geomWkt))
+      }.toDF("id", "geom")
+
+      When("calculating geohash")
+      val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 1)"))
+
+      Then("geohash should be null")
+      val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+      result.size shouldBe 1
+      result.head shouldBe null
+    }
+
+    it("should return null when longitude is greater than 180") {
+      Given("geometry df")
+      val geometryDf = Seq(
+        // Format: (long, lat)
+        (1, "POINT(190.0 50.0)")
+      ).map{
+        case (id, geomWkt) => (id, wktReader.read(geomWkt))
+      }.toDF("id", "geom")
+
+      When("calculating geohash")
+      val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 1)"))
+
+      Then("geohash should be null")
+      val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+      result.size shouldBe 1
+      result.head shouldBe null
+    }
+
+    it("should return null when latitude is less than -90") {
+      Given("geometry df")
+      val geometryDf = Seq(
+        // Format: (long, lat)
+        (1, "POINT(50.0 -100.0)")
+      ).map{
+        case (id, geomWkt) => (id, wktReader.read(geomWkt))
+      }.toDF("id", "geom")
+
+      When("calculating geohash")
+      val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 1)"))
+
+      Then("geohash should be null")
+      val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+      result.size shouldBe 1
+      result.head shouldBe null
+    }
+
+    it("should return null when latitude is greater than 90") {
+      Given("geometry df")
+      val geometryDf = Seq(
+        // Format: (long, lat)
+        (1, "POINT(50.0 100.0)")
+      ).map{
+        case (id, geomWkt) => (id, wktReader.read(geomWkt))
+      }.toDF("id", "geom")
+
+      When("calculating geohash")
+      val geoHashDf = geometryDf.withColumn("geohash", expr("ST_GeoHash(geom, 1)"))
+
+      Then("geohash should be null")
+      val result = geoHashDf.select("geohash").distinct().as[String].collect().toList
+
+      result.size shouldBe 1
+      result.head shouldBe null
+    }
   }
 }