You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2023/05/29 18:47:54 UTC

[sedona] branch change-distancesphere-value created (now 2585035e)

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a change to branch change-distancesphere-value
in repository https://gitbox.apache.org/repos/asf/sedona.git


      at 2585035e Change ST_DistanceSphere default radius to 8371008

This branch includes the following new commits:

     new 2585035e Change ST_DistanceSphere default radius to 8371008

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[sedona] 01/01: Change ST_DistanceSphere default radius to 8371008

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch change-distancesphere-value
in repository https://gitbox.apache.org/repos/asf/sedona.git

commit 2585035e44a050119b8622f1c7976217cd655357
Author: Jia Yu <ji...@apache.org>
AuthorDate: Mon May 29 11:47:43 2023 -0700

    Change ST_DistanceSphere default radius to 8371008
---
 .../java/org/apache/sedona/common/sphere/Haversine.java    |  2 +-
 .../test/java/org/apache/sedona/common/FunctionsTest.java  | 14 +++++++-------
 docs/api/flink/Function.md                                 |  4 ++--
 docs/api/sql/Function.md                                   |  8 ++++----
 .../test/java/org/apache/sedona/flink/FunctionTest.java    |  2 +-
 python/sedona/sql/st_functions.py                          |  4 ++--
 .../src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala |  2 +-
 .../spark/sql/sedona_sql/expressions/st_functions.scala    |  4 ++--
 .../org/apache/sedona/sql/dataFrameAPITestScala.scala      |  3 ++-
 .../scala/org/apache/sedona/sql/functionTestScala.scala    |  4 ++--
 10 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java b/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java
index ab46aae7..7c8c15ac 100644
--- a/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java
+++ b/common/src/main/java/org/apache/sedona/common/sphere/Haversine.java
@@ -60,6 +60,6 @@ public class Haversine
     // The radius of the earth is 6371.0 km
     public static double distance(Geometry geom1, Geometry geom2)
     {
-        return distance(geom1, geom2, 6378137.0);
+        return distance(geom1, geom2, 6371008.0);
     }
 }
diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
index b81e85a6..029c888a 100644
--- a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
@@ -449,33 +449,33 @@ public class FunctionsTest {
         // Basic check
         Point p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(0, 90));
         Point p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(0, 0));
-        assertEquals(1.0018754171394622E7, Haversine.distance(p1, p2), 0.1);
+        assertEquals(1.00075559643809E7, Haversine.distance(p1, p2), 0.1);
 
         p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(51.3168, -0.56));
         p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(55.9533, -3.1883));
-        assertEquals(544405.4459192449, Haversine.distance(p1, p2), 0.1);
+        assertEquals(543796.9506134904, Haversine.distance(p1, p2), 0.1);
 
         p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(48.353889, 11.786111));
         p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(50.033333, 8.570556));
-        assertEquals(299407.6894786948, Haversine.distance(p1, p2), 0.1);
+        assertEquals(299073.03416817175, Haversine.distance(p1, p2), 0.1);
 
         p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(48.353889, 11.786111));
         p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(52.559722, 13.287778));
-        assertEquals(480106.0821386384, Haversine.distance(p1, p2), 0.1);
+        assertEquals(479569.4558072244, Haversine.distance(p1, p2), 0.1);
 
         LineString l1 = GEOMETRY_FACTORY.createLineString(coordArray(0, 0, 0, 90));
         LineString l2 = GEOMETRY_FACTORY.createLineString(coordArray(0, 1, 0, 0));
-        assertEquals(4953717.340300673, Haversine.distance(l1, l2), 0.1);
+        assertEquals(4948180.449055, Haversine.distance(l1, l2), 0.1);
 
         // HK to Sydney
         p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(22.308919, 113.914603));
         p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(-33.946111, 151.177222));
-        assertEquals(7402166.655938837, Haversine.distance(p1, p2), 0.1);
+        assertEquals(7393893.072901942, Haversine.distance(p1, p2), 0.1);
 
         // HK to Toronto
         p1 = GEOMETRY_FACTORY.createPoint(new Coordinate(22.308919, 113.914603));
         p2 = GEOMETRY_FACTORY.createPoint(new Coordinate(43.677223, -79.630556));
-        assertEquals(1.2562590459399283E7, Haversine.distance(p1, p2), 0.1);
+        assertEquals(1.2548548944238186E7, Haversine.distance(p1, p2), 0.1);
     }
 
     @Test
diff --git a/docs/api/flink/Function.md b/docs/api/flink/Function.md
index 813ed1c1..e2f1d142 100644
--- a/docs/api/flink/Function.md
+++ b/docs/api/flink/Function.md
@@ -278,7 +278,7 @@ FROM polygondf
 
 ## ST_DistanceSphere
 
-Introduction: Return the haversine / great-circle distance of A using a given earth radius (default radius: 6378137.0). Unit is meter. Works better for large geometries (country level) compared to `ST_Distance` + `ST_Transform`. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=false)` and `ST_DistanceSphere` function and produces nearly identical results. It provides faster but less accurate result compared to `ST_DistanceSpheroid`.
+Introduction: Return the haversine / great-circle distance of A using a given earth radius (default radius: 6371008.0). Unit is meter. Works better for large geometries (country level) compared to `ST_Distance` + `ST_Transform`. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=false)` and `ST_DistanceSphere` function and produces nearly identical results. It provides faster but less accurate result compared to `ST_DistanceSpheroid`.
 
 Geometry must be in EPSG:4326 (WGS84) projection and must be in lat/lon order. You can use ==ST_FlipCoordinates== to swap lat and lon. For non-point data, we first take the centroids of both geometries and then compute the distance.
 
@@ -292,7 +292,7 @@ Example 1:
 SELECT ST_DistanceSphere(ST_GeomFromWKT('POINT (51.3168 -0.56)'), ST_GeomFromWKT('POINT (55.9533 -3.1883)'))
 ```
 
-Output: `544405.4459192449`
+Output: `543796.9506134904`
 
 Example 2:
 
diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md
index b8643268..3eba9a03 100644
--- a/docs/api/sql/Function.md
+++ b/docs/api/sql/Function.md
@@ -51,7 +51,7 @@ FROM polygondf
 
 ## ST_AreaSpheroid
 
-Introduction: Return the geodesic area of A using WGS84 spheroid. Unit is meter. Works better for large geometries (country level) compared to `ST_Area` + `ST_Transform`. It is equivalent to PostGIS `ST_Area(geography, use_spheroid=true)` function and produces nearly identical results.
+Introduction: Return the geodesic area of A using WGS84 spheroid. Unit is square meter. Works better for large geometries (country level) compared to `ST_Area` + `ST_Transform`. It is equivalent to PostGIS `ST_Area(geography, use_spheroid=true)` function and produces nearly identical results.
 
 Geometry must be in EPSG:4326 (WGS84) projection and must be in ==lat/lon== order. You can use ==ST_FlipCoordinates== to swap lat and lon.
 
@@ -416,7 +416,7 @@ FROM polygondf
 
 ## ST_DistanceSphere
 
-Introduction: Return the haversine / great-circle distance of A using a given earth radius (default radius: 6378137.0). Unit is meter. Works better for large geometries (country level) compared to `ST_Distance` + `ST_Transform`. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=false)` and `ST_DistanceSphere` function and produces nearly identical results. It provides faster but less accurate result compared to `ST_DistanceSpheroid`.
+Introduction: Return the haversine / great-circle distance of A using a given earth radius (default radius: 6371008.0). Unit is meter. Compared to `ST_Distance` + `ST_Transform`, it works better for datasets that cover large regions such as continents or the entire planet. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=false)` and `ST_DistanceSphere` function and produces nearly identical results. It provides faster but less accurate result compared to `ST_DistanceSpheroid`.
 
 Geometry must be in EPSG:4326 (WGS84) projection and must be in ==lat/lon== order. You can use ==ST_FlipCoordinates== to swap lat and lon. For non-point data, we first take the centroids of both geometries and then compute the distance.
 
@@ -429,7 +429,7 @@ Spark SQL example 1:
 SELECT ST_DistanceSphere(ST_GeomFromWKT('POINT (51.3168 -0.56)'), ST_GeomFromWKT('POINT (55.9533 -3.1883)'))
 ```
 
-Output: `544405.4459192449`
+Output: `543796.9506134904`
 
 Spark SQL example 2:
 ```sql
@@ -441,7 +441,7 @@ Output: `544405.4459192449`
 
 ## ST_DistanceSpheroid
 
-Introduction: Return the geodesic distance of A using WGS84 spheroid. Unit is meter. Works better for large geometries (country level) compared to `ST_Distance` + `ST_Transform`. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=true)` and `ST_DistanceSpheroid` function and produces nearly identical results. It provides slower but more accurate result compared to `ST_DistanceSphere`.
+Introduction: Return the geodesic distance of A using WGS84 spheroid. Unit is meter. Compared to `ST_Distance` + `ST_Transform`, it works better for datasets that cover large regions such as continents or the entire planet. It is equivalent to PostGIS `ST_Distance(geography, use_spheroid=true)` and `ST_DistanceSpheroid` function and produces nearly identical results. It provides slower but more accurate result compared to `ST_DistanceSphere`.
 
 Geometry must be in EPSG:4326 (WGS84) projection and must be in ==lat/lon== order. You can use ==ST_FlipCoordinates== to swap lat and lon. For non-point data, we first take the centroids of both geometries and then compute the distance.
 
diff --git a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
index f04328e0..933e216f 100644
--- a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
+++ b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
@@ -175,7 +175,7 @@ public class FunctionTest extends TestBase{
     public void testDistanceSphere() {
         Table tbl = tableEnv.sqlQuery(
                 "SELECT ST_DistanceSphere(ST_GeomFromWKT('POINT (51.3168 -0.56)'), ST_GeomFromWKT('POINT (55.9533 -3.1883)'))");
-        Double expected = 544405.4459192449;
+        Double expected = 543796.9506134904;
         Double actual = (Double) first(tbl).getField(0);
         assertEquals(expected, actual, 0.1);
     }
diff --git a/python/sedona/sql/st_functions.py b/python/sedona/sql/st_functions.py
index 2c5196a3..f3cea50b 100644
--- a/python/sedona/sql/st_functions.py
+++ b/python/sedona/sql/st_functions.py
@@ -424,14 +424,14 @@ def ST_DistanceSpheroid(a: ColumnOrName, b: ColumnOrName) -> Column:
     return _call_st_function("ST_DistanceSpheroid", (a, b))
 
 @validate_argument_types
-def ST_DistanceSphere(a: ColumnOrName, b: ColumnOrName, radius: Optional[Union[ColumnOrName, float]] = 6378137.0) -> Column:
+def ST_DistanceSphere(a: ColumnOrName, b: ColumnOrName, radius: Optional[Union[ColumnOrName, float]] = 6371008.0) -> Column:
     """Calculate the haversine/great-circle distance between two geometry columns using a given radius.
 
     :param a: Geometry column to use in the calculation.
     :type a: ColumnOrName
     :param b: Other geometry column to use in the calculation.
     :type b: ColumnOrName
-    :param radius: Radius of the sphere, defaults to 6378137.0
+    :param radius: Radius of the sphere, defaults to 6371008.0
     :type radius: Optional[Union[ColumnOrName, float]], optional
     :return: Two-dimensional haversine/great-circle distance between a and b as a double column. Unit is meter.
     :rtype: Column
diff --git a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index 0842f467..d50af2ab 100644
--- a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -143,7 +143,7 @@ object Catalog {
     function[ST_Split](),
     function[ST_S2CellIDs](),
     function[ST_GeometricMedian](1e-6, 1000, false),
-    function[ST_DistanceSphere](6378137.0),
+    function[ST_DistanceSphere](6371008.0),
     function[ST_DistanceSpheroid](),
     function[ST_AreaSpheroid](),
     function[ST_LengthSpheroid](),
diff --git a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
index f7a56bf8..ad29b854 100644
--- a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
+++ b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
@@ -285,8 +285,8 @@ object st_functions extends DataFrameAPI {
   def ST_GeometricMedian(geometry: Column, tolerance: Column, maxIter: Column, failIfNotConverged: Column): Column = wrapExpression[ST_GeometricMedian](geometry, tolerance, maxIter, failIfNotConverged)
   def ST_GeometricMedian(geometry: String, tolerance: Double, maxIter: Int, failIfNotConverged: Boolean): Column = wrapExpression[ST_GeometricMedian](geometry, tolerance, maxIter, failIfNotConverged)
 
-  def ST_DistanceSphere(a: Column, b: Column): Column = wrapExpression[ST_DistanceSphere](a, b, 6378137.0)
-  def ST_DistanceSphere(a: String, b: String): Column = wrapExpression[ST_DistanceSphere](a, b, 6378137.0)
+  def ST_DistanceSphere(a: Column, b: Column): Column = wrapExpression[ST_DistanceSphere](a, b, 6371008.0)
+  def ST_DistanceSphere(a: String, b: String): Column = wrapExpression[ST_DistanceSphere](a, b, 6371008.0)
   def ST_DistanceSphere(a: Column, b: Column, c: Column): Column = wrapExpression[ST_DistanceSphere](a, b, c)
   def ST_DistanceSphere(a: String, b: String, c: Double): Column = wrapExpression[ST_DistanceSphere](a, b, c)
 
diff --git a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
index 24a3d2b8..787a7300 100644
--- a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
+++ b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
@@ -917,11 +917,12 @@ class dataFrameAPITestScala extends TestBaseScala {
       val baseDf = sparkSession.sql("SELECT ST_GeomFromWKT('POINT (0 0)') AS geom1, ST_GeomFromWKT('POINT (0 90)') AS geom2")
       var df = baseDf.select(ST_DistanceSphere("geom1", "geom2"))
       var actualResult = df.take(1)(0).getDouble(0)
-      val expectedResult = 10018754.171394622
+      var expectedResult = 1.00075559643809E7
       assert(actualResult == expectedResult)
 
       df = baseDf.select(ST_DistanceSphere("geom1", "geom2", 6378137.0))
       actualResult = df.take(1)(0).getDouble(0)
+      expectedResult = 1.0018754171394622E7
       assertEquals(expectedResult, actualResult, 0.1)
     }
 
diff --git a/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala b/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
index a3336ca1..0e598e6b 100644
--- a/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
+++ b/sql/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
@@ -1842,8 +1842,8 @@ class functionTestScala extends TestBaseScala with Matchers with GeometrySample
 
   it("Should pass ST_DistanceSphere") {
     val geomTestCases = Map(
-      ("'POINT (51.3168 -0.56)'", "'POINT (55.9533 -3.1883)'") -> "544405.4459192449",
-      ("'LineString (0 0, 0 90)'", "'LineString (0 1, 0 0)'") -> "4953717.340300673"
+      ("'POINT (51.3168 -0.56)'", "'POINT (55.9533 -3.1883)'") -> "543796.9506134904",
+      ("'LineString (0 0, 0 90)'", "'LineString (0 1, 0 0)'") -> "4948180.449055"
     )
     for (((geom1, geom2), expectedResult) <- geomTestCases) {
       val df = sparkSession.sql(s"SELECT ST_DistanceSphere(ST_GeomFromWKT($geom1), ST_GeomFromWKT($geom2)), " +