You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2021/03/03 19:03:38 UTC

[incubator-sedona] branch master updated: [SEDONA-19] Update JoinParams to not always set useIndex to false (#511)

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new efd9915  [SEDONA-19] Update JoinParams to not always set useIndex to false (#511)
efd9915 is described below

commit efd9915832d99d81b68d577fafb4f0a661a1c8dc
Author: Adam Binford <ad...@gmail.com>
AuthorDate: Wed Mar 3 14:03:32 2021 -0500

    [SEDONA-19] Update JoinParams to not always set useIndex to false (#511)
    
    Co-authored-by: Adam Binford <ad...@maxar.com>
---
 .../sedona/core/spatialOperator/JoinQuery.java     |  9 +++----
 .../core/spatialOperator/LineStringJoinTest.java   |  2 +-
 .../sedona/core/spatialOperator/PointJoinTest.java |  2 +-
 .../core/spatialOperator/PolygonJoinTest.java      |  2 +-
 .../core/spatialOperator/RectangleJoinTest.java    |  2 +-
 .../scala/org/apache/sedona/core/scalaTest.scala   |  2 +-
 .../adapters/JoinParamsAdapter.scala               |  4 +--
 python/sedona/core/spatialOperator/join_params.py  |  6 +++--
 .../core/test_avoiding_python_jvm_serde_df.py      | 29 ++++++++++++----------
 .../core/test_avoiding_python_jvm_serde_to_rdd.py  |  2 +-
 python/tests/core/test_rdd.py                      |  2 +-
 .../tests/spatial_operator/test_linestring_join.py |  2 +-
 python/tests/spatial_operator/test_point_join.py   |  2 +-
 python/tests/spatial_operator/test_polygon_join.py |  2 +-
 .../tests/spatial_operator/test_rectangle_join.py  |  2 +-
 python/tests/sql/test_function.py                  |  2 +-
 .../strategy/join/TraitJoinQueryExec.scala         |  2 +-
 17 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/core/src/main/java/org/apache/sedona/core/spatialOperator/JoinQuery.java b/core/src/main/java/org/apache/sedona/core/spatialOperator/JoinQuery.java
index ecdd90e..4cd70be 100644
--- a/core/src/main/java/org/apache/sedona/core/spatialOperator/JoinQuery.java
+++ b/core/src/main/java/org/apache/sedona/core/spatialOperator/JoinQuery.java
@@ -423,15 +423,12 @@ public class JoinQuery
 
         public JoinParams(boolean useIndex, boolean considerBoundaryIntersection)
         {
-            this.useIndex = useIndex;
-            this.considerBoundaryIntersection = considerBoundaryIntersection;
-            this.indexType = IndexType.RTREE;
-            this.joinBuildSide = JoinBuildSide.RIGHT;
+            this(useIndex, considerBoundaryIntersection, IndexType.RTREE, JoinBuildSide.RIGHT);
         }
 
-        public JoinParams(boolean considerBoundaryIntersection, IndexType polygonIndexType, JoinBuildSide joinBuildSide)
+        public JoinParams(boolean useIndex, boolean considerBoundaryIntersection, IndexType polygonIndexType, JoinBuildSide joinBuildSide)
         {
-            this.useIndex = false;
+            this.useIndex = useIndex;
             this.considerBoundaryIntersection = considerBoundaryIntersection;
             this.indexType = polygonIndexType;
             this.joinBuildSide = joinBuildSide;
diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java
index 46ff4a6..477cedf 100644
--- a/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java
+++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java
@@ -168,7 +168,7 @@ public class LineStringJoinTest
 
         partitionRdds(queryRDD, spatialRDD);
 
-        JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, indexType, JoinBuildSide.LEFT);
+        JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, true, indexType, JoinBuildSide.LEFT);
         List<Tuple2<Polygon, LineString>> results = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect();
 
         sanityCheckFlatJoinResults(results);
diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java
index a4aff87..ea2c10d 100644
--- a/core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java
+++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java
@@ -226,7 +226,7 @@ public class PointJoinTest
 
         partitionRdds(queryRDD, spatialRDD);
 
-        JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, indexType, JoinBuildSide.LEFT);
+        JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, true, indexType, JoinBuildSide.LEFT);
         List<Tuple2<Polygon, Point>> results = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect();
 
         sanityCheckFlatJoinResults(results);
diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java
index ece60f6..1b1606d 100644
--- a/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java
+++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java
@@ -120,7 +120,7 @@ public class PolygonJoinTest
         final PolygonRDD spatialRDD = createPolygonRDD(InputLocation);
         partitionRdds(queryRDD, spatialRDD);
 
-        final JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(intersects, indexType, JoinBuildSide.LEFT);
+        final JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, intersects, indexType, JoinBuildSide.LEFT);
         final List<Tuple2<Polygon, Polygon>> results = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect();
         sanityCheckFlatJoinResults(results);
 
diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java
index 87ffc2c..0b72723 100644
--- a/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java
+++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java
@@ -164,7 +164,7 @@ public class RectangleJoinTest
 
         partitionRdds(queryRDD, spatialRDD);
 
-        JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, indexType, JoinBuildSide.LEFT);
+        JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, true, indexType, JoinBuildSide.LEFT);
         List<Tuple2<Polygon, Polygon>> result = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect();
 
         sanityCheckFlatJoinResults(result);
diff --git a/core/src/test/scala/org/apache/sedona/core/scalaTest.scala b/core/src/test/scala/org/apache/sedona/core/scalaTest.scala
index e73deaf..3dab72d 100644
--- a/core/src/test/scala/org/apache/sedona/core/scalaTest.scala
+++ b/core/src/test/scala/org/apache/sedona/core/scalaTest.scala
@@ -147,7 +147,7 @@ class scalaTest extends SparkUtil {
     queryWindowRDD.spatialPartitioning(objectRDD.getPartitioner)
 
     for (i <- 1 to eachQueryLoopTimes) {
-      val joinParams = new JoinParams(false, PolygonRDDIndexType, JoinBuildSide.LEFT)
+      val joinParams = new JoinParams(true, false, PolygonRDDIndexType, JoinBuildSide.LEFT)
       val resultSize = JoinQuery.spatialJoin(queryWindowRDD, objectRDD, joinParams).count()
     }
   }
diff --git a/python-adapter/src/main/scala/org.apache.sedona.python.wrapper/adapters/JoinParamsAdapter.scala b/python-adapter/src/main/scala/org.apache.sedona.python.wrapper/adapters/JoinParamsAdapter.scala
index 3785844..5ec1cb0 100644
--- a/python-adapter/src/main/scala/org.apache.sedona.python.wrapper/adapters/JoinParamsAdapter.scala
+++ b/python-adapter/src/main/scala/org.apache.sedona.python.wrapper/adapters/JoinParamsAdapter.scala
@@ -23,9 +23,9 @@ import org.apache.sedona.core.enums.{IndexType, JoinBuildSide}
 import org.apache.sedona.core.spatialOperator.JoinQuery.JoinParams
 
 object JoinParamsAdapter {
-  def createJoinParams(useIndex: Boolean = false, indexType: String, joinBuildSide: String): JoinParams = {
+  def createJoinParams(useIndex: Boolean = true, considerBoundaryIntersection: Boolean = false, indexType: String, joinBuildSide: String): JoinParams = {
     val buildSide = JoinBuildSide.getBuildSide(joinBuildSide)
     val currIndexType = IndexType.getIndexType(indexType)
-    new JoinParams(useIndex, currIndexType, buildSide)
+    new JoinParams(useIndex, considerBoundaryIntersection, currIndexType, buildSide)
   }
 }
\ No newline at end of file
diff --git a/python/sedona/core/spatialOperator/join_params.py b/python/sedona/core/spatialOperator/join_params.py
index 981df65..232e6b2 100644
--- a/python/sedona/core/spatialOperator/join_params.py
+++ b/python/sedona/core/spatialOperator/join_params.py
@@ -25,21 +25,23 @@ from sedona.core.jvm.abstract import JvmObject
 @attr.s
 class JoinParams:
     useIndex = attr.ib(type=bool, default=True)
+    considerBoundaryIntersection = attr.ib(type=bool, default=False)
     indexType = attr.ib(type=str, default=IndexType.RTREE)
     joinBuildSide = attr.ib(type=str, default=JoinBuildSide.LEFT)
 
     def jvm_instance(self, jvm):
-        return JvmJoinParams(jvm, self.useIndex, self.indexType, self.joinBuildSide).jvm_instance
+        return JvmJoinParams(jvm, self.useIndex, self.considerBoundaryIntersection, self.indexType, self.joinBuildSide).jvm_instance
 
 
 @attr.s
 class JvmJoinParams(JvmObject):
     useIndex = attr.ib(type=bool, default=True)
+    considerBoundaryIntersection = attr.ib(type=bool, default=False)
     indexType = attr.ib(type=str, default=IndexType.RTREE)
     joinBuildSide = attr.ib(type=str, default=JoinBuildSide.LEFT)
 
     def _create_jvm_instance(self):
-        return self.jvm_reference(self.useIndex, self.indexType.value, self.joinBuildSide)
+        return self.jvm_reference(self.useIndex, self.considerBoundaryIntersection, self.indexType.value, self.joinBuildSide)
 
     @property
     def jvm_reference(self):
diff --git a/python/tests/core/test_avoiding_python_jvm_serde_df.py b/python/tests/core/test_avoiding_python_jvm_serde_df.py
index 8677718..d86a965 100644
--- a/python/tests/core/test_avoiding_python_jvm_serde_df.py
+++ b/python/tests/core/test_avoiding_python_jvm_serde_df.py
@@ -47,7 +47,7 @@ class TestOmitPythonJvmSerdeToDf(TestBase):
         poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
         areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())
 
-        jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams())
+        jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams(considerBoundaryIntersection=True))
         sedona_df = Adapter.toDf(jvm_sedona_rdd, ["area_id", "area_name"], ["poi_id", "poi_name"], self.spark)
 
         assert sedona_df.count() == 5
@@ -130,18 +130,21 @@ class TestOmitPythonJvmSerdeToDf(TestBase):
 
         right_geometries = self.__row_to_list(right_geometries_raw)
 
-        assert left_geometries == [
-            ['POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))'],
-            ['POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))'],
-            ['POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))'],
-            ['POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))'],
-            ['POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))']
-        ]
-        assert right_geometries == [['POINT (-3 5)'],
-                                    ['POINT (4 3)'],
-                                    ['POINT (11 5)'],
-                                    ['POINT (-1 -1)'],
-                                    ['POINT (-4 -5)']]
+        # Ignore the ordering of these
+        assert set(geom[0] for geom in left_geometries) == set([
+            'POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))',
+            'POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))',
+            'POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))',
+            'POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))',
+            'POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))'
+        ])
+        assert set(geom[0] for geom in right_geometries) == set([
+            'POINT (-3 5)',
+            'POINT (11 5)',
+            'POINT (4 3)',
+            'POINT (-1 -1)',
+            'POINT (-4 -5)'
+        ])
 
     def test_range_query_flat_to_df(self):
         poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False)
diff --git a/python/tests/core/test_avoiding_python_jvm_serde_to_rdd.py b/python/tests/core/test_avoiding_python_jvm_serde_to_rdd.py
index 1a4b304..bfbd1de 100644
--- a/python/tests/core/test_avoiding_python_jvm_serde_to_rdd.py
+++ b/python/tests/core/test_avoiding_python_jvm_serde_to_rdd.py
@@ -44,7 +44,7 @@ class TestOmitPythonJvmSerdeToRDD(TestBase):
         poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
         areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())
 
-        jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams())
+        jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams(considerBoundaryIntersection=True))
         sedona_rdd = jvm_sedona_rdd.to_rdd().collect()
         assert sedona_rdd.__len__() == 5
 
diff --git a/python/tests/core/test_rdd.py b/python/tests/core/test_rdd.py
index 2ccbf32..df51966 100644
--- a/python/tests/core/test_rdd.py
+++ b/python/tests/core/test_rdd.py
@@ -269,7 +269,7 @@ class TestSpatialRDD(TestBase):
         query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())
 
         for i in range(each_query_loop_times):
-            join_params = JoinParams(False, polygon_rdd_index_type, JoinBuildSide.LEFT)
+            join_params = JoinParams(True, False, polygon_rdd_index_type, JoinBuildSide.LEFT)
             resultSize = JoinQuery.spatialJoin(
                 query_window_rdd,
                 object_rdd,
diff --git a/python/tests/spatial_operator/test_linestring_join.py b/python/tests/spatial_operator/test_linestring_join.py
index 1390713..14afa15 100644
--- a/python/tests/spatial_operator/test_linestring_join.py
+++ b/python/tests/spatial_operator/test_linestring_join.py
@@ -78,7 +78,7 @@ class TestRectangleJoin(TestJoinBase):
 
         self.partition_rdds(query_rdd, spatial_rdd, grid_type)
 
-        join_params = JoinParams(True, index_type, JoinBuildSide.LEFT)
+        join_params = JoinParams(True, True, index_type, JoinBuildSide.LEFT)
         result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()
 
         self.sanity_check_flat_join_results(result)
diff --git a/python/tests/spatial_operator/test_point_join.py b/python/tests/spatial_operator/test_point_join.py
index d0547a2..5da2161 100644
--- a/python/tests/spatial_operator/test_point_join.py
+++ b/python/tests/spatial_operator/test_point_join.py
@@ -152,7 +152,7 @@ class TestRectangleJoin(TestJoinBase):
         spatial_rdd = self.create_point_rdd(input_location, splitter, num_partitions)
 
         self.partition_rdds(query_rdd, spatial_rdd, grid_type)
-        join_params = JoinParams(True, index_type, JoinBuildSide.LEFT)
+        join_params = JoinParams(True, True, index_type, JoinBuildSide.LEFT)
         results = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()
 
         self.sanity_check_flat_join_results(results)
diff --git a/python/tests/spatial_operator/test_polygon_join.py b/python/tests/spatial_operator/test_polygon_join.py
index 5191fc1..7713709 100644
--- a/python/tests/spatial_operator/test_polygon_join.py
+++ b/python/tests/spatial_operator/test_polygon_join.py
@@ -79,7 +79,7 @@ class TestRectangleJoin(TestJoinBase):
 
         self.partition_rdds(query_rdd, spatial_rdd, grid_type)
 
-        join_params = JoinParams(intersects, index_type, JoinBuildSide.LEFT)
+        join_params = JoinParams(True, intersects, index_type, JoinBuildSide.LEFT)
         result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()
 
         self.sanity_check_flat_join_results(result)
diff --git a/python/tests/spatial_operator/test_rectangle_join.py b/python/tests/spatial_operator/test_rectangle_join.py
index a8ce71d..2c101d8 100644
--- a/python/tests/spatial_operator/test_rectangle_join.py
+++ b/python/tests/spatial_operator/test_rectangle_join.py
@@ -84,7 +84,7 @@ class TestRectangleJoin(TestJoinBase):
 
         self.partition_rdds(query_rdd, spatial_rdd, grid_type)
 
-        join_params = JoinParams(True, index_type, JoinBuildSide.LEFT)
+        join_params = JoinParams(True, True, index_type, JoinBuildSide.LEFT)
         result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()
 
         self.sanity_check_flat_join_results(result)
diff --git a/python/tests/sql/test_function.py b/python/tests/sql/test_function.py
index 774e4fc..77a7c0b 100644
--- a/python/tests/sql/test_function.py
+++ b/python/tests/sql/test_function.py
@@ -484,7 +484,7 @@ class TestPredicateJoin(TestBase):
         dumped_points = geometry_df.selectExpr("ST_DumpPoints(geom) as geom") \
             .select(explode(col("geom")).alias("geom"))
 
-        assert(dumped_points.count(), 10)
+        assert(dumped_points.count() == 10)
 
         collected_points = [geom_row[0] for geom_row in dumped_points.selectExpr("ST_AsText(geom)").collect()]
         assert(collected_points == expected_points)
diff --git a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/TraitJoinQueryExec.scala b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/TraitJoinQueryExec.scala
index b6fb28b..d00194b 100644
--- a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/TraitJoinQueryExec.scala
+++ b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/TraitJoinQueryExec.scala
@@ -122,7 +122,7 @@ Predicate.create(extraCondition.get, left.output ++ right.output).eval _ // SPAR
     }
 
 
-    val joinParams = new JoinParams(intersects, sedonaConf.getIndexType, sedonaConf.getJoinBuildSide)
+    val joinParams = new JoinParams(sedonaConf.getUseIndex, intersects, sedonaConf.getIndexType, sedonaConf.getJoinBuildSide)
 
     //logInfo(s"leftShape count ${leftShapes.spatialPartitionedRDD.count()}")
     //logInfo(s"rightShape count ${rightShapes.spatialPartitionedRDD.count()}")