You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2021/03/03 19:03:38 UTC
[incubator-sedona] branch master updated: [SEDONA-19] Update
JoinParams to not always set useIndex to false (#511)
This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git
The following commit(s) were added to refs/heads/master by this push:
new efd9915 [SEDONA-19] Update JoinParams to not always set useIndex to false (#511)
efd9915 is described below
commit efd9915832d99d81b68d577fafb4f0a661a1c8dc
Author: Adam Binford <ad...@gmail.com>
AuthorDate: Wed Mar 3 14:03:32 2021 -0500
[SEDONA-19] Update JoinParams to not always set useIndex to false (#511)
Co-authored-by: Adam Binford <ad...@maxar.com>
---
.../sedona/core/spatialOperator/JoinQuery.java | 9 +++----
.../core/spatialOperator/LineStringJoinTest.java | 2 +-
.../sedona/core/spatialOperator/PointJoinTest.java | 2 +-
.../core/spatialOperator/PolygonJoinTest.java | 2 +-
.../core/spatialOperator/RectangleJoinTest.java | 2 +-
.../scala/org/apache/sedona/core/scalaTest.scala | 2 +-
.../adapters/JoinParamsAdapter.scala | 4 +--
python/sedona/core/spatialOperator/join_params.py | 6 +++--
.../core/test_avoiding_python_jvm_serde_df.py | 29 ++++++++++++----------
.../core/test_avoiding_python_jvm_serde_to_rdd.py | 2 +-
python/tests/core/test_rdd.py | 2 +-
.../tests/spatial_operator/test_linestring_join.py | 2 +-
python/tests/spatial_operator/test_point_join.py | 2 +-
python/tests/spatial_operator/test_polygon_join.py | 2 +-
.../tests/spatial_operator/test_rectangle_join.py | 2 +-
python/tests/sql/test_function.py | 2 +-
.../strategy/join/TraitJoinQueryExec.scala | 2 +-
17 files changed, 38 insertions(+), 36 deletions(-)
diff --git a/core/src/main/java/org/apache/sedona/core/spatialOperator/JoinQuery.java b/core/src/main/java/org/apache/sedona/core/spatialOperator/JoinQuery.java
index ecdd90e..4cd70be 100644
--- a/core/src/main/java/org/apache/sedona/core/spatialOperator/JoinQuery.java
+++ b/core/src/main/java/org/apache/sedona/core/spatialOperator/JoinQuery.java
@@ -423,15 +423,12 @@ public class JoinQuery
public JoinParams(boolean useIndex, boolean considerBoundaryIntersection)
{
- this.useIndex = useIndex;
- this.considerBoundaryIntersection = considerBoundaryIntersection;
- this.indexType = IndexType.RTREE;
- this.joinBuildSide = JoinBuildSide.RIGHT;
+ this(useIndex, considerBoundaryIntersection, IndexType.RTREE, JoinBuildSide.RIGHT);
}
- public JoinParams(boolean considerBoundaryIntersection, IndexType polygonIndexType, JoinBuildSide joinBuildSide)
+ public JoinParams(boolean useIndex, boolean considerBoundaryIntersection, IndexType polygonIndexType, JoinBuildSide joinBuildSide)
{
- this.useIndex = false;
+ this.useIndex = useIndex;
this.considerBoundaryIntersection = considerBoundaryIntersection;
this.indexType = polygonIndexType;
this.joinBuildSide = joinBuildSide;
diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java
index 46ff4a6..477cedf 100644
--- a/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java
+++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/LineStringJoinTest.java
@@ -168,7 +168,7 @@ public class LineStringJoinTest
partitionRdds(queryRDD, spatialRDD);
- JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, indexType, JoinBuildSide.LEFT);
+ JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, true, indexType, JoinBuildSide.LEFT);
List<Tuple2<Polygon, LineString>> results = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect();
sanityCheckFlatJoinResults(results);
diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java
index a4aff87..ea2c10d 100644
--- a/core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java
+++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/PointJoinTest.java
@@ -226,7 +226,7 @@ public class PointJoinTest
partitionRdds(queryRDD, spatialRDD);
- JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, indexType, JoinBuildSide.LEFT);
+ JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, true, indexType, JoinBuildSide.LEFT);
List<Tuple2<Polygon, Point>> results = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect();
sanityCheckFlatJoinResults(results);
diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java
index ece60f6..1b1606d 100644
--- a/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java
+++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/PolygonJoinTest.java
@@ -120,7 +120,7 @@ public class PolygonJoinTest
final PolygonRDD spatialRDD = createPolygonRDD(InputLocation);
partitionRdds(queryRDD, spatialRDD);
- final JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(intersects, indexType, JoinBuildSide.LEFT);
+ final JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, intersects, indexType, JoinBuildSide.LEFT);
final List<Tuple2<Polygon, Polygon>> results = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect();
sanityCheckFlatJoinResults(results);
diff --git a/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java b/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java
index 87ffc2c..0b72723 100644
--- a/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java
+++ b/core/src/test/java/org/apache/sedona/core/spatialOperator/RectangleJoinTest.java
@@ -164,7 +164,7 @@ public class RectangleJoinTest
partitionRdds(queryRDD, spatialRDD);
- JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, indexType, JoinBuildSide.LEFT);
+ JoinQuery.JoinParams joinParams = new JoinQuery.JoinParams(true, true, indexType, JoinBuildSide.LEFT);
List<Tuple2<Polygon, Polygon>> result = JoinQuery.spatialJoin(queryRDD, spatialRDD, joinParams).collect();
sanityCheckFlatJoinResults(result);
diff --git a/core/src/test/scala/org/apache/sedona/core/scalaTest.scala b/core/src/test/scala/org/apache/sedona/core/scalaTest.scala
index e73deaf..3dab72d 100644
--- a/core/src/test/scala/org/apache/sedona/core/scalaTest.scala
+++ b/core/src/test/scala/org/apache/sedona/core/scalaTest.scala
@@ -147,7 +147,7 @@ class scalaTest extends SparkUtil {
queryWindowRDD.spatialPartitioning(objectRDD.getPartitioner)
for (i <- 1 to eachQueryLoopTimes) {
- val joinParams = new JoinParams(false, PolygonRDDIndexType, JoinBuildSide.LEFT)
+ val joinParams = new JoinParams(true, false, PolygonRDDIndexType, JoinBuildSide.LEFT)
val resultSize = JoinQuery.spatialJoin(queryWindowRDD, objectRDD, joinParams).count()
}
}
diff --git a/python-adapter/src/main/scala/org.apache.sedona.python.wrapper/adapters/JoinParamsAdapter.scala b/python-adapter/src/main/scala/org.apache.sedona.python.wrapper/adapters/JoinParamsAdapter.scala
index 3785844..5ec1cb0 100644
--- a/python-adapter/src/main/scala/org.apache.sedona.python.wrapper/adapters/JoinParamsAdapter.scala
+++ b/python-adapter/src/main/scala/org.apache.sedona.python.wrapper/adapters/JoinParamsAdapter.scala
@@ -23,9 +23,9 @@ import org.apache.sedona.core.enums.{IndexType, JoinBuildSide}
import org.apache.sedona.core.spatialOperator.JoinQuery.JoinParams
object JoinParamsAdapter {
- def createJoinParams(useIndex: Boolean = false, indexType: String, joinBuildSide: String): JoinParams = {
+ def createJoinParams(useIndex: Boolean = true, considerBoundaryIntersection: Boolean = false, indexType: String, joinBuildSide: String): JoinParams = {
val buildSide = JoinBuildSide.getBuildSide(joinBuildSide)
val currIndexType = IndexType.getIndexType(indexType)
- new JoinParams(useIndex, currIndexType, buildSide)
+ new JoinParams(useIndex, considerBoundaryIntersection, currIndexType, buildSide)
}
}
\ No newline at end of file
diff --git a/python/sedona/core/spatialOperator/join_params.py b/python/sedona/core/spatialOperator/join_params.py
index 981df65..232e6b2 100644
--- a/python/sedona/core/spatialOperator/join_params.py
+++ b/python/sedona/core/spatialOperator/join_params.py
@@ -25,21 +25,23 @@ from sedona.core.jvm.abstract import JvmObject
@attr.s
class JoinParams:
useIndex = attr.ib(type=bool, default=True)
+ considerBoundaryIntersection = attr.ib(type=bool, default=False)
indexType = attr.ib(type=str, default=IndexType.RTREE)
joinBuildSide = attr.ib(type=str, default=JoinBuildSide.LEFT)
def jvm_instance(self, jvm):
- return JvmJoinParams(jvm, self.useIndex, self.indexType, self.joinBuildSide).jvm_instance
+ return JvmJoinParams(jvm, self.useIndex, self.considerBoundaryIntersection, self.indexType, self.joinBuildSide).jvm_instance
@attr.s
class JvmJoinParams(JvmObject):
useIndex = attr.ib(type=bool, default=True)
+ considerBoundaryIntersection = attr.ib(type=bool, default=False)
indexType = attr.ib(type=str, default=IndexType.RTREE)
joinBuildSide = attr.ib(type=str, default=JoinBuildSide.LEFT)
def _create_jvm_instance(self):
- return self.jvm_reference(self.useIndex, self.indexType.value, self.joinBuildSide)
+ return self.jvm_reference(self.useIndex, self.considerBoundaryIntersection, self.indexType.value, self.joinBuildSide)
@property
def jvm_reference(self):
diff --git a/python/tests/core/test_avoiding_python_jvm_serde_df.py b/python/tests/core/test_avoiding_python_jvm_serde_df.py
index 8677718..d86a965 100644
--- a/python/tests/core/test_avoiding_python_jvm_serde_df.py
+++ b/python/tests/core/test_avoiding_python_jvm_serde_df.py
@@ -47,7 +47,7 @@ class TestOmitPythonJvmSerdeToDf(TestBase):
poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())
- jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams())
+ jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams(considerBoundaryIntersection=True))
sedona_df = Adapter.toDf(jvm_sedona_rdd, ["area_id", "area_name"], ["poi_id", "poi_name"], self.spark)
assert sedona_df.count() == 5
@@ -130,18 +130,21 @@ class TestOmitPythonJvmSerdeToDf(TestBase):
right_geometries = self.__row_to_list(right_geometries_raw)
- assert left_geometries == [
- ['POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))'],
- ['POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))'],
- ['POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))'],
- ['POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))'],
- ['POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))']
- ]
- assert right_geometries == [['POINT (-3 5)'],
- ['POINT (4 3)'],
- ['POINT (11 5)'],
- ['POINT (-1 -1)'],
- ['POINT (-4 -5)']]
+ # Ignore the ordering of these
+ assert set(geom[0] for geom in left_geometries) == set([
+ 'POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))',
+ 'POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))',
+ 'POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))',
+ 'POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))',
+ 'POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))'
+ ])
+ assert set(geom[0] for geom in right_geometries) == set([
+ 'POINT (-3 5)',
+ 'POINT (11 5)',
+ 'POINT (4 3)',
+ 'POINT (-1 -1)',
+ 'POINT (-4 -5)'
+ ])
def test_range_query_flat_to_df(self):
poi_point_rdd = WktReader.readToGeometryRDD(self.sc, bank_csv_path, 1, False, False)
diff --git a/python/tests/core/test_avoiding_python_jvm_serde_to_rdd.py b/python/tests/core/test_avoiding_python_jvm_serde_to_rdd.py
index 1a4b304..bfbd1de 100644
--- a/python/tests/core/test_avoiding_python_jvm_serde_to_rdd.py
+++ b/python/tests/core/test_avoiding_python_jvm_serde_to_rdd.py
@@ -44,7 +44,7 @@ class TestOmitPythonJvmSerdeToRDD(TestBase):
poi_point_rdd.spatialPartitioning(GridType.QUADTREE)
areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())
- jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams())
+ jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams(considerBoundaryIntersection=True))
sedona_rdd = jvm_sedona_rdd.to_rdd().collect()
assert sedona_rdd.__len__() == 5
diff --git a/python/tests/core/test_rdd.py b/python/tests/core/test_rdd.py
index 2ccbf32..df51966 100644
--- a/python/tests/core/test_rdd.py
+++ b/python/tests/core/test_rdd.py
@@ -269,7 +269,7 @@ class TestSpatialRDD(TestBase):
query_window_rdd.spatialPartitioning(object_rdd.getPartitioner())
for i in range(each_query_loop_times):
- join_params = JoinParams(False, polygon_rdd_index_type, JoinBuildSide.LEFT)
+ join_params = JoinParams(True, False, polygon_rdd_index_type, JoinBuildSide.LEFT)
resultSize = JoinQuery.spatialJoin(
query_window_rdd,
object_rdd,
diff --git a/python/tests/spatial_operator/test_linestring_join.py b/python/tests/spatial_operator/test_linestring_join.py
index 1390713..14afa15 100644
--- a/python/tests/spatial_operator/test_linestring_join.py
+++ b/python/tests/spatial_operator/test_linestring_join.py
@@ -78,7 +78,7 @@ class TestRectangleJoin(TestJoinBase):
self.partition_rdds(query_rdd, spatial_rdd, grid_type)
- join_params = JoinParams(True, index_type, JoinBuildSide.LEFT)
+ join_params = JoinParams(True, True, index_type, JoinBuildSide.LEFT)
result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()
self.sanity_check_flat_join_results(result)
diff --git a/python/tests/spatial_operator/test_point_join.py b/python/tests/spatial_operator/test_point_join.py
index d0547a2..5da2161 100644
--- a/python/tests/spatial_operator/test_point_join.py
+++ b/python/tests/spatial_operator/test_point_join.py
@@ -152,7 +152,7 @@ class TestRectangleJoin(TestJoinBase):
spatial_rdd = self.create_point_rdd(input_location, splitter, num_partitions)
self.partition_rdds(query_rdd, spatial_rdd, grid_type)
- join_params = JoinParams(True, index_type, JoinBuildSide.LEFT)
+ join_params = JoinParams(True, True, index_type, JoinBuildSide.LEFT)
results = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()
self.sanity_check_flat_join_results(results)
diff --git a/python/tests/spatial_operator/test_polygon_join.py b/python/tests/spatial_operator/test_polygon_join.py
index 5191fc1..7713709 100644
--- a/python/tests/spatial_operator/test_polygon_join.py
+++ b/python/tests/spatial_operator/test_polygon_join.py
@@ -79,7 +79,7 @@ class TestRectangleJoin(TestJoinBase):
self.partition_rdds(query_rdd, spatial_rdd, grid_type)
- join_params = JoinParams(intersects, index_type, JoinBuildSide.LEFT)
+ join_params = JoinParams(True, intersects, index_type, JoinBuildSide.LEFT)
result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()
self.sanity_check_flat_join_results(result)
diff --git a/python/tests/spatial_operator/test_rectangle_join.py b/python/tests/spatial_operator/test_rectangle_join.py
index a8ce71d..2c101d8 100644
--- a/python/tests/spatial_operator/test_rectangle_join.py
+++ b/python/tests/spatial_operator/test_rectangle_join.py
@@ -84,7 +84,7 @@ class TestRectangleJoin(TestJoinBase):
self.partition_rdds(query_rdd, spatial_rdd, grid_type)
- join_params = JoinParams(True, index_type, JoinBuildSide.LEFT)
+ join_params = JoinParams(True, True, index_type, JoinBuildSide.LEFT)
result = JoinQuery.spatialJoin(query_rdd, spatial_rdd, join_params).collect()
self.sanity_check_flat_join_results(result)
diff --git a/python/tests/sql/test_function.py b/python/tests/sql/test_function.py
index 774e4fc..77a7c0b 100644
--- a/python/tests/sql/test_function.py
+++ b/python/tests/sql/test_function.py
@@ -484,7 +484,7 @@ class TestPredicateJoin(TestBase):
dumped_points = geometry_df.selectExpr("ST_DumpPoints(geom) as geom") \
.select(explode(col("geom")).alias("geom"))
- assert(dumped_points.count(), 10)
+ assert(dumped_points.count() == 10)
collected_points = [geom_row[0] for geom_row in dumped_points.selectExpr("ST_AsText(geom)").collect()]
assert(collected_points == expected_points)
diff --git a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/TraitJoinQueryExec.scala b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/TraitJoinQueryExec.scala
index b6fb28b..d00194b 100644
--- a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/TraitJoinQueryExec.scala
+++ b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/TraitJoinQueryExec.scala
@@ -122,7 +122,7 @@ Predicate.create(extraCondition.get, left.output ++ right.output).eval _ // SPAR
}
- val joinParams = new JoinParams(intersects, sedonaConf.getIndexType, sedonaConf.getJoinBuildSide)
+ val joinParams = new JoinParams(sedonaConf.getUseIndex, intersects, sedonaConf.getIndexType, sedonaConf.getJoinBuildSide)
//logInfo(s"leftShape count ${leftShapes.spatialPartitionedRDD.count()}")
//logInfo(s"rightShape count ${rightShapes.spatialPartitionedRDD.count()}")