You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2024/02/08 02:05:40 UTC
(sedona) branch master updated: [SEDONA-471] Support adding pandas df to SedonaKepler if geometry column is not present (#1233)
This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 712a284ed [SEDONA-471] Support adding pandas df to SedonaKepler if geometry column is not present (#1233)
712a284ed is described below
commit 712a284edcd06cd74d2fdc66df4b0ec8ab549eee
Author: Nilesh Gajwani <ni...@gmail.com>
AuthorDate: Thu Feb 8 07:35:35 2024 +0530
[SEDONA-471] Support adding pandas df to SedonaKepler if geometry column is not present (#1233)
* Add ST_DWithin
* Add documentation for ST_DWithin
* Remove unwanted code
* removed null check test for ST_DWithin
* Fix EOF lint error
* Add explanation for ST_DWithin
* Remove CRS checking logic in ST_DWithin
* Add optimized join support for ST_DWithin
* Remove test change to resourceFolder
* remove unnecessary cast to double
* Add broadcast join test
* Add example of ST_DWithin in Optimizer.md
* Add useSpheroid version to ST_DWithin | Add optimized join support
* remove accidental resourceFolder change
* Fix mistake in making useSpheroid optional in ST_DWithin
* Fix incorrect test data in test_dataframe_api.py
* fix failing test in test_predicate.py
* Address PR changes | Move ST_DWithin to DistanceJoin
* fix failing test
* Remove randomness from sphere test case generation
* Refactor documentation of ST_DWithin
* revert resourceFolder path
* Handle complex boolean expressions in ST_DWithin
* add a blanket try catch for ST_DWithin to handle complex boolean expressions
* add collect to the python test
* replace head() with count()
* Add null check for geometry column while adding a df to keplergl
* Revert "Add null check for geometry column while adding a df to keplergl"
This reverts commit 5352fb949f8dba96cd6f445c43fc857968a36d2f.
* Add null check for geometry column while adding df to keplergl
* support adding pandas df to sedonaKepler and SedonaPyDeck if geometry column is not available
---
python/sedona/maps/SedonaKepler.py | 2 +-
python/sedona/maps/SedonaMapUtils.py | 12 +++++++++---
python/sedona/maps/SedonaPyDeck.py | 2 +-
python/tests/maps/test_sedonakepler_visualization.py | 20 +++++++++++++++++++-
4 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/python/sedona/maps/SedonaKepler.py b/python/sedona/maps/SedonaKepler.py
index a8cf7b5f0..113699258 100644
--- a/python/sedona/maps/SedonaKepler.py
+++ b/python/sedona/maps/SedonaKepler.py
@@ -54,5 +54,5 @@ class SedonaKepler:
:param name: [Optional] Name to assign to the dataframe, default name assigned is 'unnamed'
:return: Does not return anything, adds df directly to the given map object
"""
- geo_df = SedonaMapUtils.__convert_to_gdf__(df)
+ geo_df = SedonaMapUtils.__convert_to_gdf_or_pdf__(df)
kepler_map.add_data(geo_df, name=name)
diff --git a/python/sedona/maps/SedonaMapUtils.py b/python/sedona/maps/SedonaMapUtils.py
index d50d7e95e..5ab875d83 100644
--- a/python/sedona/maps/SedonaMapUtils.py
+++ b/python/sedona/maps/SedonaMapUtils.py
@@ -24,14 +24,20 @@ from sedona.sql.types import GeometryType
class SedonaMapUtils:
@classmethod
- def __convert_to_gdf__(cls, df, rename=True, geometry_col=None):
+ def __convert_to_gdf_or_pdf__(cls, df, rename=True, geometry_col=None):
"""
Converts a SedonaDataFrame to a GeoPandasDataFrame and also renames geometry column to a standard name of
- 'geometry' :param df: SedonaDataFrame to convert :param geometry_col: [Optional] :return:
+ 'geometry'
+ However, if no geometry column is found even after traversing schema, returns a Pandas Dataframe
+ :param df: SedonaDataFrame to convert
+ :param geometry_col: [Optional]
+ :return: GeoPandas Dataframe or Pandas Dataframe
"""
if geometry_col is None:
geometry_col = SedonaMapUtils.__get_geometry_col__(df)
pandas_df = df.toPandas()
+ if geometry_col is None: # No geometry column found even after searching schema, return Pandas Dataframe
+ return pandas_df
geo_df = gpd.GeoDataFrame(pandas_df, geometry=geometry_col)
if geometry_col != "geometry" and rename is True:
geo_df.rename_geometry("geometry", inplace=True)
@@ -44,7 +50,7 @@ class SedonaMapUtils:
:param df: SedonaDataFrame to convert
:return: GeoJSON object
"""
- gdf = SedonaMapUtils.__convert_to_gdf__(df)
+ gdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df)
gjson_str = gdf.to_json()
gjson = json.loads(gjson_str)
return gjson
diff --git a/python/sedona/maps/SedonaPyDeck.py b/python/sedona/maps/SedonaPyDeck.py
index b6062e67b..8270e59fc 100644
--- a/python/sedona/maps/SedonaPyDeck.py
+++ b/python/sedona/maps/SedonaPyDeck.py
@@ -201,7 +201,7 @@ class SedonaPyDeck:
"""
if geometry_col is None:
geometry_col = SedonaMapUtils.__get_geometry_col__(df=df)
- gdf = SedonaMapUtils.__convert_to_gdf__(df, rename=False, geometry_col=geometry_col)
+ gdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df, rename=False, geometry_col=geometry_col)
if add_coords is True:
SedonaPyDeck._create_coord_column_(gdf=gdf, geometry_col=geometry_col)
return gdf
diff --git a/python/tests/maps/test_sedonakepler_visualization.py b/python/tests/maps/test_sedonakepler_visualization.py
index b636fbf39..fc8ae321a 100644
--- a/python/tests/maps/test_sedonakepler_visualization.py
+++ b/python/tests/maps/test_sedonakepler_visualization.py
@@ -21,6 +21,7 @@ from tests.test_base import TestBase
from tests import mixed_wkt_geometry_input_location
from tests import csv_point_input_location
import geopandas as gpd
+from pyspark.sql.functions import explode, hex
class TestVisualization(TestBase):
@@ -70,6 +71,22 @@ class TestVisualization(TestBase):
assert sedona_kepler_empty_map._repr_html_() == kepler_map._repr_html_()
assert sedona_kepler_empty_map.config == kepler_map.config
+ def test_pandas_df_addition(self):
+ polygon_wkt_df = self.spark.read.format("csv"). \
+ option("delimiter", "\t"). \
+ option("header", "false"). \
+ load(mixed_wkt_geometry_input_location)
+
+ polygon_wkt_df.createOrReplaceTempView("polygontable")
+ polygon_h3_df = self.spark.sql(
+ "select ST_H3CellIDs(ST_GeomFromWKT(polygontable._c0), 3, false) as h3_cellID from polygontable")
+ polygon_exploded_h3 = polygon_h3_df.select(explode(polygon_h3_df.h3_cellID).alias("h3"))
+ polygon_hex_exploded_h3 = polygon_exploded_h3.select(hex(polygon_exploded_h3.h3).alias("hex_h3"))
+ kepler_map = SedonaKepler.create_map(df=polygon_hex_exploded_h3, name="h3")
+
+ # just test if the map creation is successful.
+ assert kepler_map is not None
+
def test_adding_multiple_datasets(self):
config = {'version': 'v1',
'config': {'visState': {'filters': [],
@@ -180,7 +197,8 @@ class TestVisualization(TestBase):
load(csv_point_input_location)
point_csv_df.createOrReplaceTempView("pointtable")
- point_df = self.spark.sql("select ST_Point(cast(pointtable._c0 as Decimal(24,20)), cast(pointtable._c1 as Decimal(24,20))) as arealandmark from pointtable")
+ point_df = self.spark.sql(
+ "select ST_Point(cast(pointtable._c0 as Decimal(24,20)), cast(pointtable._c1 as Decimal(24,20))) as arealandmark from pointtable")
polygon_wkt_df.createOrReplaceTempView("polygontable")
polygon_df = self.spark.sql("select ST_GeomFromWKT(polygontable._c0) as countyshape from polygontable")