You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2024/02/08 02:05:40 UTC
(sedona) branch master updated: [SEDONA-471] Support adding pandas df to SedonaKepler if geometry column is not present (#1233)

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 712a284ed [SEDONA-471] Support adding pandas df to SedonaKepler if geometry column is not present (#1233)
712a284ed is described below

commit 712a284edcd06cd74d2fdc66df4b0ec8ab549eee
Author: Nilesh Gajwani <ni...@gmail.com>
AuthorDate: Thu Feb 8 07:35:35 2024 +0530

    [SEDONA-471] Support adding pandas df to SedonaKepler if geometry column is not present (#1233)
    
    * Add ST_DWithin
    
    * Add documentation for ST_DWithin
    
    * Remove unwanted code
    
    * removed null check test for ST_DWithin
    
    * Fix EOF lint error
    
    * Add explanation for ST_DWithin
    
    * Remove CRS checking logic in ST_DWithin
    
    * Add optimized join support for ST_DWithin
    
    * Remove test change to resourceFolder
    
    * remove unnecessary cast to double
    
    * Add broadcast join test
    
    * Add example of ST_DWithin in Optimizer.md
    
    * Add useSpheroid version to ST_DWithin | Add optimized join support
    
    * remove accidental resourceFolder change
    
    * Fix mistake in making useSpheroid optional in ST_DWithin
    
    * Fix incorrect test data in test_dataframe_api.py
    
    * fix failing test in test_predicate.py
    
    * Address PR changes | Move ST_DWithin to DistanceJoin
    
    * fix failing test
    
    * Remove randomness from sphere test case generation
    
    * Refactor documentation of ST_DWithin
    
    * revert resourceFolder path
    
    * Handle complex boolean expressions in ST_DWithin
    
    * add a blanket try catch for ST_DWithin to handle complex boolean expressions
    
    * add collect to the python test
    
    * replace head() with count()
    
    * Add null check for geometry column while adding a df to keplergl
    
    * Revert "Add null check for geometry column while adding a df to keplergl"
    
    This reverts commit 5352fb949f8dba96cd6f445c43fc857968a36d2f.
    
    * Add null check for geometry column while adding df to keplergl
    
    * support adding pandas df to sedonaKepler and SedonaPyDeck if geometry column is not available
---
 python/sedona/maps/SedonaKepler.py                   |  2 +-
 python/sedona/maps/SedonaMapUtils.py                 | 12 +++++++++---
 python/sedona/maps/SedonaPyDeck.py                   |  2 +-
 python/tests/maps/test_sedonakepler_visualization.py | 20 +++++++++++++++++++-
 4 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/python/sedona/maps/SedonaKepler.py b/python/sedona/maps/SedonaKepler.py
index a8cf7b5f0..113699258 100644
--- a/python/sedona/maps/SedonaKepler.py
+++ b/python/sedona/maps/SedonaKepler.py
@@ -54,5 +54,5 @@ class SedonaKepler:
         :param name: [Optional] Name to assign to the dataframe, default name assigned is 'unnamed'
         :return: Does not return anything, adds df directly to the given map object
         """
-        geo_df = SedonaMapUtils.__convert_to_gdf__(df)
+        geo_df = SedonaMapUtils.__convert_to_gdf_or_pdf__(df)
         kepler_map.add_data(geo_df, name=name)
diff --git a/python/sedona/maps/SedonaMapUtils.py b/python/sedona/maps/SedonaMapUtils.py
index d50d7e95e..5ab875d83 100644
--- a/python/sedona/maps/SedonaMapUtils.py
+++ b/python/sedona/maps/SedonaMapUtils.py
@@ -24,14 +24,20 @@ from sedona.sql.types import GeometryType
 class SedonaMapUtils:
 
     @classmethod
-    def __convert_to_gdf__(cls, df, rename=True, geometry_col=None):
+    def __convert_to_gdf_or_pdf__(cls, df, rename=True, geometry_col=None):
         """
         Converts a SedonaDataFrame to a GeoPandasDataFrame and also renames geometry column to a standard name of
-        'geometry' :param df: SedonaDataFrame to convert :param geometry_col: [Optional] :return:
+        'geometry'
+        However, if no geometry column is found even after traversing schema, returns a Pandas Dataframe
+        :param df: SedonaDataFrame to convert
+        :param geometry_col: [Optional]
+        :return: GeoPandas Dataframe or Pandas Dataframe
         """
         if geometry_col is None:
             geometry_col = SedonaMapUtils.__get_geometry_col__(df)
         pandas_df = df.toPandas()
+        if geometry_col is None:  # No geometry column found even after searching schema, return Pandas Dataframe
+            return pandas_df
         geo_df = gpd.GeoDataFrame(pandas_df, geometry=geometry_col)
         if geometry_col != "geometry" and rename is True:
             geo_df.rename_geometry("geometry", inplace=True)
@@ -44,7 +50,7 @@ class SedonaMapUtils:
         :param df: SedonaDataFrame to convert
         :return: GeoJSON object
         """
-        gdf = SedonaMapUtils.__convert_to_gdf__(df)
+        gdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df)
         gjson_str = gdf.to_json()
         gjson = json.loads(gjson_str)
         return gjson
diff --git a/python/sedona/maps/SedonaPyDeck.py b/python/sedona/maps/SedonaPyDeck.py
index b6062e67b..8270e59fc 100644
--- a/python/sedona/maps/SedonaPyDeck.py
+++ b/python/sedona/maps/SedonaPyDeck.py
@@ -201,7 +201,7 @@ class SedonaPyDeck:
         """
         if geometry_col is None:
             geometry_col = SedonaMapUtils.__get_geometry_col__(df=df)
-        gdf = SedonaMapUtils.__convert_to_gdf__(df, rename=False, geometry_col=geometry_col)
+        gdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df, rename=False, geometry_col=geometry_col)
         if add_coords is True:
             SedonaPyDeck._create_coord_column_(gdf=gdf, geometry_col=geometry_col)
         return gdf
diff --git a/python/tests/maps/test_sedonakepler_visualization.py b/python/tests/maps/test_sedonakepler_visualization.py
index b636fbf39..fc8ae321a 100644
--- a/python/tests/maps/test_sedonakepler_visualization.py
+++ b/python/tests/maps/test_sedonakepler_visualization.py
@@ -21,6 +21,7 @@ from tests.test_base import TestBase
 from tests import mixed_wkt_geometry_input_location
 from tests import csv_point_input_location
 import geopandas as gpd
+from pyspark.sql.functions import explode, hex
 
 
 class TestVisualization(TestBase):
@@ -70,6 +71,22 @@ class TestVisualization(TestBase):
         assert sedona_kepler_empty_map._repr_html_() == kepler_map._repr_html_()
         assert sedona_kepler_empty_map.config == kepler_map.config
 
+    def test_pandas_df_addition(self):
+        polygon_wkt_df = self.spark.read.format("csv"). \
+            option("delimiter", "\t"). \
+            option("header", "false"). \
+            load(mixed_wkt_geometry_input_location)
+
+        polygon_wkt_df.createOrReplaceTempView("polygontable")
+        polygon_h3_df = self.spark.sql(
+            "select ST_H3CellIDs(ST_GeomFromWKT(polygontable._c0), 3, false) as h3_cellID from polygontable")
+        polygon_exploded_h3 = polygon_h3_df.select(explode(polygon_h3_df.h3_cellID).alias("h3"))
+        polygon_hex_exploded_h3 = polygon_exploded_h3.select(hex(polygon_exploded_h3.h3).alias("hex_h3"))
+        kepler_map = SedonaKepler.create_map(df=polygon_hex_exploded_h3, name="h3")
+
+        # just test if the map creation is successful.
+        assert kepler_map is not None
+
     def test_adding_multiple_datasets(self):
         config = {'version': 'v1',
                   'config': {'visState': {'filters': [],
@@ -180,7 +197,8 @@ class TestVisualization(TestBase):
             load(csv_point_input_location)
 
         point_csv_df.createOrReplaceTempView("pointtable")
-        point_df = self.spark.sql("select ST_Point(cast(pointtable._c0 as Decimal(24,20)), cast(pointtable._c1 as Decimal(24,20))) as arealandmark from pointtable")
+        point_df = self.spark.sql(
+            "select ST_Point(cast(pointtable._c0 as Decimal(24,20)), cast(pointtable._c1 as Decimal(24,20))) as arealandmark from pointtable")
         polygon_wkt_df.createOrReplaceTempView("polygontable")
         polygon_df = self.spark.sql("select ST_GeomFromWKT(polygontable._c0) as countyshape from polygontable")