You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2020/12/22 19:56:27 UTC
[incubator-sedona] branch master updated: [SEDONA-5] Update Sedona documentation (#498)

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 6e144cc  [SEDONA-5] Update Sedona documentation (#498)
6e144cc is described below

commit 6e144cc159e2e32e07c8811ca6015419e225cb3d
Author: Paweł Kociński <pa...@gmail.com>
AuthorDate: Tue Dec 22 20:56:19 2020 +0100

    [SEDONA-5] Update Sedona documentation (#498)
    
    * [SEDONA-5] Fix issue with confusing adapter methods.
    
    * [SEDONA-5] Update Notebooks for new release.
    
    * [SEDONA-5] Add test coverage lib to the project.
    
    * [SEDONA-5] Add test coverage lib to the project.
    
    * [SEDONA-5] Add documentation update.
    
    * Add notes about adding additional jar files.
---
 docs/tutorial/geospark-core-python.md              | 151 ++++-
 docs/tutorial/geospark-sql-python.md               |  48 +-
 python/ApacheSedonaCore.ipynb                      | 662 ++++++++++++++-------
 python/ApacheSedonaSQL.ipynb                       |  54 +-
 python/Pipfile                                     |   2 +
 python/sedona/core/spatialOperator/__init__.py     |   4 +-
 python/sedona/core/spatialOperator/rdd.py          |  27 -
 python/sedona/utils/adapter.py                     |  11 +-
 .../core/test_avoiding_python_jvm_serde_df.py      |  30 +-
 9 files changed, 676 insertions(+), 313 deletions(-)

diff --git a/docs/tutorial/geospark-core-python.md b/docs/tutorial/geospark-core-python.md
index 8b5c7e6..6941e38 100644
--- a/docs/tutorial/geospark-core-python.md
+++ b/docs/tutorial/geospark-core-python.md
@@ -32,26 +32,32 @@ userData is string representation of other attributes separated by "\t"
 GeoData has one method to get user data.
 <li> getUserData() -> str </li>
 
-## Installing the package
+## Installation
 
-Sedona extends pyspark functions which depend on Python packages and Scala libraries. To see all dependencies
-please look at the dependencies section.
-https://pypi.org/project/pyspark/.
+Apache Sedona extends pyspark functions which depends on libraries:
 
-This package needs 1 jar files to work properly:
+- pyspark
 
-- sedona-python-adapter.jar
+- shapely
 
+- attrs
+
+Apache Sedona needs one additional jar file to work properly it is generated by project python-adapter within this repository.
+It is published on maven repositories. It can be build from source by running
+```bash
+mvn clean install -DskipTests
+```
+
+within main project directory.
 
 ### Installing from PyPi repositories
 
 Please use command below
- 
+
 ```bash
 pip install sedona
 ```
 
-
 ### Installing from source
 
 
@@ -61,7 +67,6 @@ python3 setup.py install
 
 ```
 
-
 ## Apache Sedona Serializers
 Sedona has a suite of well-written geometry and index serializers. Forgetting to enable these serializers will lead to high memory consumption.
 
@@ -70,6 +75,26 @@ conf.set("spark.serializer", KryoSerializer.getName)
 conf.set("spark.kryo.registrator", SedonaKryoRegistrator.getName)
 ```
 
+## Register package 
+Before writing any code with Sedona please use the following code.
+
+```python
+from sedona.register import SedonaRegistrator
+
+SedonaRegistrator.registerAll(spark)
+```
+
+!!!note
+    before running the command please copy jar file produced by python-adapter project
+    to $SPARK_HOME/jars directory or add it while defining spark session.
+    ```python
+    spark = SparkSession.\
+        builder.\
+        appName('appName').\
+        config('spark.jars.packages', 'path to jar or coordinates from maven').\
+        getOrCreate()
+    ```
+    
 ## Create a SpatialRDD
 
 ### Create a typed SpatialRDD
@@ -198,6 +223,26 @@ using_index = False
 query_result = RangeQuery.SpatialRangeQuery(spatial_rdd, range_query_window, consider_boundary_intersection, using_index)
 ```
 
+!!!note
+    Please use RangeQueryRaw from the same module
+    if you want to avoid jvm python serde while converting to Spatial DataFrame
+    It takes the same parameters as RangeQuery but returns reference to jvm rdd which
+    can be converted to dataframe without python - jvm serde using Adapter.
+    
+    Example:
+    ```python
+    from sedona.core.geom.envelope import Envelope
+    from sedona.core.spatialOperator import RangeQueryRaw
+    from sedona.utils.adapter import Adapter
+    
+    range_query_window = Envelope(-90.01, -80.01, 30.01, 40.01)
+    consider_boundary_intersection = False  ## Only return gemeotries fully covered by the window
+    using_index = False
+    query_result = RangeQueryRaw.SpatialRangeQuery(spatial_rdd, range_query_window, consider_boundary_intersection, using_index)
+    gdf = Adapter.toDf(query_result, spark, ["col1", ..., "coln"])
+
+    ```
+
 ### Range query window
 
 Besides the rectangle (Envelope) type range query window, Apache Sedona range query window can be 
@@ -482,6 +527,42 @@ using_index = False
 
 result = JoinQuery.DistanceJoinQueryFlat(spatial_rdd, circle_rdd, using_index, consider_boundary_intersection)
 ```
+
+!!!note
+    Please use JoinQueryRaw from the same module for methods 
+    
+    - spatialJoin
+    
+    - DistanceJoinQueryFlat
+
+    - SpatialJoinQueryFlat
+
+    For better performance while converting to dataframe with adapter. 
+    That approach allows to avoid costly serialization between Python 
+    and jvm and in result operating on python object instead of native geometries.
+    
+    Example:
+    ```python
+    from sedona.core.SpatialRDD import CircleRDD
+    from sedona.core.enums import GridType
+    from sedona.core.spatialOperator import JoinQueryRaw
+    
+    object_rdd.analyze()
+    
+    circle_rdd = CircleRDD(object_rdd, 0.1) ## Create a CircleRDD using the given distance
+    circle_rdd.analyze()
+    
+    circle_rdd.spatialPartitioning(GridType.KDBTREE)
+    spatial_rdd.spatialPartitioning(circle_rdd.getPartitioner())
+    
+    consider_boundary_intersection = False ## Only return gemeotries fully covered by each query window in queryWindowRDD
+    using_index = False
+    
+    result = JoinQueryRaw.DistanceJoinQueryFlat(spatial_rdd, circle_rdd, using_index, consider_boundary_intersection)
+    
+    gdf = Adapter.toDf(result, ["left_col1", ..., "lefcoln"], ["rightcol1", ..., "rightcol2"], spark)
+    ```
+
 ### Output format
 
 Result for this query is RDD which holds two GeoData objects within list of lists.
@@ -645,4 +726,56 @@ ShapefileReader.readToGeometryRDD(sc, shape_file_location)
 ```
 ```
 <sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7f8fd2ee0710>
+```
+
+### Tips
+When you use Sedona functions such as
+
+- JoinQuery.spatialJoin
+
+- JoinQuery.DistanceJoinQueryFlat
+
+- JoinQuery.SpatialJoinQueryFlat
+
+- RangeQuery.SpatialRangeQuery
+
+For better performance when converting to dataframe you can use
+JoinQueryRaw and RangeQueryRaw from the same module and adapter to convert 
+to Spatial DataFrame. 
+
+Example, JoinQueryRaw:
+
+```python
+from sedona.core.SpatialRDD import CircleRDD
+from sedona.core.enums import GridType
+from sedona.core.spatialOperator import JoinQueryRaw
+
+object_rdd.analyze()
+
+circle_rdd = CircleRDD(object_rdd, 0.1) ## Create a CircleRDD using the given distance
+circle_rdd.analyze()
+
+circle_rdd.spatialPartitioning(GridType.KDBTREE)
+spatial_rdd.spatialPartitioning(circle_rdd.getPartitioner())
+
+consider_boundary_intersection = False ## Only return gemeotries fully covered by each query window in queryWindowRDD
+using_index = False
+
+result = JoinQueryRaw.DistanceJoinQueryFlat(spatial_rdd, circle_rdd, using_index, consider_boundary_intersection)
+
+gdf = Adapter.toDf(result, ["left_col1", ..., "lefcoln"], ["rightcol1", ..., "rightcol2"], spark)
+```
+
+and RangeQueryRaw
+
+```python
+from sedona.core.geom.envelope import Envelope
+from sedona.core.spatialOperator import RangeQueryRaw
+from sedona.utils.adapter import Adapter
+
+range_query_window = Envelope(-90.01, -80.01, 30.01, 40.01)
+consider_boundary_intersection = False  ## Only return gemeotries fully covered by the window
+using_index = False
+query_result = RangeQueryRaw.SpatialRangeQuery(spatial_rdd, range_query_window, consider_boundary_intersection, using_index)
+gdf = Adapter.toDf(query_result, spark, ["col1", ..., "coln"])
 ```
\ No newline at end of file
diff --git a/docs/tutorial/geospark-sql-python.md b/docs/tutorial/geospark-sql-python.md
index bb12763..479ae4f 100644
--- a/docs/tutorial/geospark-sql-python.md
+++ b/docs/tutorial/geospark-sql-python.md
@@ -2,19 +2,34 @@
 
 ## Introduction
 
-This package allows users to use all SedonaSQL functions and transform it to Python Shapely geometry objects. Also it allows to create Spark DataFrame with Apache Sedona UDT from Shapely geometry objects. Spark DataFrame can be converted to GeoPandas easily, in addition all fiona drivers for shape file are available to load data from files and convert them to Spark DataFrame. Please look at examples.
+This package is an extension to Apache Spark SQL package. It allow to use 
+spatial functions on dataframes.
 
+SedonaSQL supports SQL/MM Part3 Spatial SQL Standard. 
+It includes four kinds of SQL operators as follows.
+All these operators can be directly called through:
 
+```python
+spark.sql("YOUR_SQL")
+```
 
 ## Installation
 
-Apache Sedona extends pyspark functions which depends on Python packages and Scala libraries. To see all dependencies
-please look at Dependencies section.
-https://pypi.org/project/pyspark/.
+Apache Sedona extends pyspark functions which depends on libraries:
 
-Package needs 1 jar files to work properly:
+- pyspark
+  
+- shapely
+  
+- attrs
 
-- sedona-python-adapter.jar
+Apache Sedona needs one additional jar file to work properly it is generated by project python-adapter within this repository.
+It is published on maven repositories. It can be build from source by running
+```bash
+mvn clean install -DskipTests
+```
+
+within main project directory.
 
 ### Installing from PyPi repositories
 
@@ -33,6 +48,15 @@ python3 setup.py install
 
 ```
 
+## Register package
+Before writing any code with Sedona please use the following code.
+
+```python
+from sedona.register import SedonaRegistrator
+
+SedonaRegistrator.registerAll(spark)
+```
+
 ## Writing Application
 
 Use KryoSerializer.getName and SedonaKryoRegistrator.getName class properties to reduce memory impact.
@@ -44,14 +68,20 @@ Use KryoSerializer.getName and SedonaKryoRegistrator.getName class properties to
 
 ```
 
-If jars was not uploaded manually please use function `upload_jars()`
-
 To turn on SedonaSQL function inside pyspark code use SedonaRegistrator.registerAll method on existing pyspark.sql.SparkSession instance ex.
 
 `SedonaRegistrator.registerAll(spark)`
 
-After that all the functions from SedonaSQL will be available, moreover using collect or toPandas methods on Spark DataFrame will return Shapely BaseGeometry objects. Based on GeoPandas DataFrame, Pandas DataFrame with shapely objects or Sequence with shapely objects, Spark DataFrame can be created using spark.createDataFrame method. To specify Schema with geometry inside please use `GeometryType()` instance (look at examples section to see that in practice).
+After that all the functions from SedonaSQL are available,
+moreover using collect or toPandas methods on Spark DataFrame 
+returns Shapely BaseGeometry objects. 
 
+Based on GeoPandas DataFrame,
+Pandas DataFrame with shapely objects or Sequence with 
+shapely objects, Spark DataFrame can be created using 
+spark.createDataFrame method. To specify Schema with 
+geometry inside please use `GeometryType()` instance 
+(look at examples section to see that in practice).
 
 
 ### Examples
diff --git a/python/ApacheSedonaCore.ipynb b/python/ApacheSedonaCore.ipynb
index 4ed6288..699831e 100644
--- a/python/ApacheSedonaCore.ipynb
+++ b/python/ApacheSedonaCore.ipynb
@@ -26,7 +26,9 @@
     "from sedona.utils.adapter import Adapter\n",
     "from sedona.core.spatialOperator import KNNQuery\n",
     "from sedona.core.spatialOperator import JoinQuery\n",
+    "from sedona.core.spatialOperator import JoinQueryRaw\n",
     "from sedona.core.spatialOperator import RangeQuery\n",
+    "from sedona.core.spatialOperator import RangeQueryRaw\n",
     "from sedona.core.formatMapper.shapefileParser import ShapefileReader\n",
     "from sedona.core.formatMapper import WkbReader\n",
     "from sedona.core.formatMapper import WktReader\n",
@@ -48,7 +50,7 @@
     "spark = SparkSession.\\\n",
     "    builder.\\\n",
     "    master(\"local[*]\").\\\n",
-    "    appName(\"GeoPySparkCoreExample\").\\\n",
+    "    appName(\"Sedona App\").\\\n",
     "    config(\"spark.serializer\", KryoSerializer.getName).\\\n",
     "    config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName) .\\\n",
     "    getOrCreate()"
@@ -162,7 +164,7 @@
        "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"-176.64696132 26.718666680000002 95.20719264000002 48.162659640000015\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,101.59999300000001)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"1.9041438528000003\" opacity=\"0.6\" d=\"M -173.120769,30.244859 L -173.120769,71.355134 L -84.965961,71.35513 [...]
       ],
       "text/plain": [
-       "<sedona.core.geom.envelope.Envelope at 0x7fe218c3d710>"
+       "Envelope(-173.120769, -84.965961, 30.244859, 71.355134)"
       ]
      },
      "execution_count": 8,
@@ -208,7 +210,7 @@
        "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"-176.64696132 26.718666680000002 95.20719264000002 48.162659640000015\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,101.59999300000001)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"1.9041438528000003\" opacity=\"0.6\" d=\"M -173.120769,30.244859 L -173.120769,71.355134 L -84.965961,71.35513 [...]
       ],
       "text/plain": [
-       "<sedona.core.geom.envelope.Envelope at 0x7fe218c8ea50>"
+       "Envelope(-173.120769, -84.965961, 30.244859, 71.355134)"
       ]
      },
      "execution_count": 10,
@@ -245,17 +247,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Getting filed names for PointRDD or other SpatialRDD, it return list with field names\n",
-    "point_rdd.fieldNames"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -264,7 +256,7 @@
        "'epsg:4326'"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -276,7 +268,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -285,7 +277,7 @@
        "'epsg:4326'"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -297,57 +289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Spatial partitioning data\n",
-    "point_rdd.spatialPartitioning(GridType.EQUALGRID)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[<sedona.core.geom.envelope.Envelope at 0x7fe2189c3b10>,\n",
-       " <sedona.core.geom.envelope.Envelope at 0x7fe2189c3dd0>,\n",
-       " <sedona.core.geom.envelope.Envelope at 0x7fe2189440d0>,\n",
-       " <sedona.core.geom.envelope.Envelope at 0x7fe218944390>,\n",
-       " <sedona.core.geom.envelope.Envelope at 0x7fe218944650>,\n",
-       " <sedona.core.geom.envelope.Envelope at 0x7fe218944950>,\n",
-       " <sedona.core.geom.envelope.Envelope at 0x7fe218944c10>,\n",
-       " <sedona.core.geom.envelope.Envelope at 0x7fe218944ed0>,\n",
-       " <sedona.core.geom.envelope.Envelope at 0x7fe2189481d0>]"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Get PointRDD or other SpatialRDD, it returns list of Envelopes \n",
-    "point_rdd.grids"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -356,14 +298,14 @@
        "True"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "# Spatial partitioning data\n",
-    "point_rdd.spatialPartitioning(GridType.VORONOI)"
+    "point_rdd.spatialPartitioning(GridType.KDBTREE)"
    ]
   },
   {
@@ -386,7 +328,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -395,7 +337,7 @@
        "[Geometry: Point userData: testattribute0\ttestattribute1\ttestattribute2]"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -407,7 +349,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -420,7 +362,7 @@
        " Geometry: Point userData: testattribute0\ttestattribute1\ttestattribute2]"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -432,7 +374,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -445,7 +387,7 @@
        " 110.97122518072091]"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -478,7 +420,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -487,7 +429,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -498,7 +440,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -577,7 +519,7 @@
        "4  POINT (-88.32399 32.95067)  testattribute0  testattribute1  testattribute2"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -595,7 +537,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Adapter allow you to convert geospatial data types introduced with sedona to other ones"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -603,12 +554,12 @@
     "    toDf(point_rdd, [\"attr1\", \"attr2\", \"attr3\"], spark).\\\n",
     "    createOrReplaceTempView(\"spatial_df\")\n",
     "\n",
-    "spatial_gdf = spark.sql(\"Select attr1, attr2, attr3, st_GeomFromWKT(geometry) as geom from spatial_df\")"
+    "spatial_gdf = spark.sql(\"Select attr1, attr2, attr3, geometry as geom from spatial_df\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -635,7 +586,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -714,7 +665,7 @@
        "4  testattribute0  testattribute1  testattribute2  POINT (-88.32399 32.95067)"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -732,7 +683,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -748,7 +699,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -757,7 +708,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -836,7 +787,7 @@
        "4  POINT (-88.32399 32.95067)  testattribute0  testattribute1  testattribute2"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -866,7 +817,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -878,7 +829,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -887,7 +838,7 @@
        "True"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -915,7 +866,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
@@ -924,7 +875,7 @@
        "True"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -949,7 +900,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -997,7 +948,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1020,16 +971,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "MapPartitionsRDD[70] at map at FlatPairRddConverter.scala:30"
+       "MapPartitionsRDD[63] at map at FlatPairRddConverter.scala:30"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1040,7 +991,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -1050,7 +1001,7 @@
        " [Geometry: Polygon userData: , Geometry: Point userData: ]]"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1061,7 +1012,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -1072,7 +1023,7 @@
        " [Geometry: Polygon userData: , Geometry: Point userData: ]]"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1083,7 +1034,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
@@ -1092,7 +1043,7 @@
        "[0.0, 0.0, 0.0, 0.0, 0.0]"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1104,20 +1055,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[0.057069904940998895,\n",
-       " 0.057069904940998895,\n",
-       " 0.057069904940998895,\n",
-       " 0.057069904940998895,\n",
+       "[0.010747596697999453,\n",
+       " 0.010747596697999453,\n",
+       " 0.010747596697999453,\n",
+       " 0.010747596697999453,\n",
        " 0.026651558685001447]"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1129,7 +1080,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1138,7 +1089,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1152,7 +1103,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -1162,11 +1113,11 @@
       "+--------------------+--------------------+\n",
       "|           geom_left|          geom_right|\n",
       "+--------------------+--------------------+\n",
-      "|POLYGON ((-87.285...|POINT (-87.28468 ...|\n",
-      "|POLYGON ((-87.285...|POINT (-87.278485...|\n",
-      "|POLYGON ((-87.285...|POINT (-87.280556...|\n",
-      "|POLYGON ((-87.285...|POINT (-87.28285 ...|\n",
-      "|POLYGON ((-87.229...|POINT (-87.10534 ...|\n",
+      "|POLYGON ((-86.749...|POINT (-86.736302...|\n",
+      "|POLYGON ((-86.749...|POINT (-86.735506...|\n",
+      "|POLYGON ((-86.749...|POINT (-86.68645 ...|\n",
+      "|POLYGON ((-86.749...|POINT (-86.675405...|\n",
+      "|POLYGON ((-87.229...|POINT (-87.105455...|\n",
       "+--------------------+--------------------+\n",
       "only showing top 5 rows\n",
       "\n"
@@ -1181,7 +1132,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1190,7 +1141,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [
     {
@@ -1217,7 +1168,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -1227,11 +1178,11 @@
       "+--------------------+-----+--------------------+-----+\n",
       "|              geom_1|attr1|              geom_2|attr2|\n",
       "+--------------------+-----+--------------------+-----+\n",
-      "|POLYGON ((-87.285...|     |POINT (-87.28468 ...|     |\n",
-      "|POLYGON ((-87.285...|     |POINT (-87.278485...|     |\n",
-      "|POLYGON ((-87.285...|     |POINT (-87.280556...|     |\n",
-      "|POLYGON ((-87.285...|     |POINT (-87.28285 ...|     |\n",
-      "|POLYGON ((-87.229...|     |POINT (-87.10534 ...|     |\n",
+      "|POLYGON ((-86.749...|     |POINT (-86.736302...|     |\n",
+      "|POLYGON ((-86.749...|     |POINT (-86.735506...|     |\n",
+      "|POLYGON ((-86.749...|     |POINT (-86.68645 ...|     |\n",
+      "|POLYGON ((-86.749...|     |POINT (-86.675405...|     |\n",
+      "|POLYGON ((-87.229...|     |POINT (-87.105455...|     |\n",
       "+--------------------+-----+--------------------+-----+\n",
       "only showing top 5 rows\n",
       "\n"
@@ -1251,7 +1202,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [
     {
@@ -1288,7 +1239,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1298,7 +1249,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1307,7 +1258,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1317,7 +1268,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 48,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1329,7 +1280,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [
     {
@@ -1339,16 +1290,16 @@
       "+--------------------+----------------+\n",
       "|            geometry|number_of_points|\n",
       "+--------------------+----------------+\n",
-      "|POLYGON ((-86.749...|               4|\n",
+      "|POLYGON ((-87.114...|              15|\n",
+      "|POLYGON ((-87.082...|              12|\n",
+      "|POLYGON ((-86.697...|               1|\n",
       "|POLYGON ((-87.285...|              26|\n",
-      "|POLYGON ((-87.092...|               5|\n",
+      "|POLYGON ((-87.105...|              15|\n",
       "|POLYGON ((-86.816...|               6|\n",
-      "|POLYGON ((-86.860...|              12|\n",
-      "|POLYGON ((-87.114...|              15|\n",
       "|POLYGON ((-87.229...|               7|\n",
-      "|POLYGON ((-87.105...|              15|\n",
-      "|POLYGON ((-86.697...|               1|\n",
-      "|POLYGON ((-87.082...|              12|\n",
+      "|POLYGON ((-87.092...|               5|\n",
+      "|POLYGON ((-86.749...|               4|\n",
+      "|POLYGON ((-86.860...|              12|\n",
       "+--------------------+----------------+\n",
       "\n"
      ]
@@ -1381,7 +1332,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1390,7 +1341,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1399,7 +1350,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 52,
    "metadata": {},
    "outputs": [
     {
@@ -1412,7 +1363,7 @@
        " Geometry: Point userData: ]"
       ]
      },
-     "execution_count": 54,
+     "execution_count": 52,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1430,7 +1381,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 53,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1444,7 +1395,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 54,
    "metadata": {},
    "outputs": [
     {
@@ -1457,7 +1408,7 @@
        " Geometry: Polygon userData: ]"
       ]
      },
-     "execution_count": 56,
+     "execution_count": 54,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1468,7 +1419,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 55,
    "metadata": {},
    "outputs": [
     {
@@ -1477,7 +1428,7 @@
        "'POLYGON ((-84.031975 34.043824, -84.031975 34.131247, -83.959903 34.131247, -83.959903 34.043824, -84.031975 34.043824))'"
       ]
      },
-     "execution_count": 57,
+     "execution_count": 55,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1507,7 +1458,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 56,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1518,7 +1469,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 57,
    "metadata": {},
    "outputs": [
     {
@@ -1527,7 +1478,7 @@
        "MapPartitionsRDD[126] at map at GeometryRddConverter.scala:30"
       ]
      },
-     "execution_count": 59,
+     "execution_count": 57,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1538,7 +1489,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 58,
    "metadata": {},
    "outputs": [
     {
@@ -1552,7 +1503,7 @@
        " Geometry: LineString userData: ]"
       ]
      },
-     "execution_count": 60,
+     "execution_count": 58,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1563,7 +1514,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 59,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1572,7 +1523,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 60,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1581,7 +1532,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 61,
    "metadata": {},
    "outputs": [
     {
@@ -1630,7 +1581,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 62,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1639,7 +1590,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 63,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1648,16 +1599,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
+   "execution_count": 64,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7fe2185f1890>"
+       "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7f2cfdc70150>"
       ]
      },
-     "execution_count": 68,
+     "execution_count": 64,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1668,22 +1619,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 65,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "+--------------------+---+\n",
-      "|            geometry|_c1|\n",
-      "+--------------------+---+\n",
-      "|MULTIPOLYGON (((1...|   |\n",
-      "|MULTIPOLYGON (((-...|   |\n",
-      "|MULTIPOLYGON (((1...|   |\n",
-      "|POLYGON ((118.362...|   |\n",
-      "|MULTIPOLYGON (((-...|   |\n",
-      "+--------------------+---+\n",
+      "+--------------------+\n",
+      "|            geometry|\n",
+      "+--------------------+\n",
+      "|MULTIPOLYGON (((1...|\n",
+      "|MULTIPOLYGON (((-...|\n",
+      "|MULTIPOLYGON (((1...|\n",
+      "|POLYGON ((118.362...|\n",
+      "|MULTIPOLYGON (((-...|\n",
+      "+--------------------+\n",
       "only showing top 5 rows\n",
       "\n"
      ]
@@ -1695,7 +1646,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
+   "execution_count": 66,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1713,7 +1664,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
+   "execution_count": 67,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1722,16 +1673,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
+   "execution_count": 68,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7fe21854fdd0>"
+       "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7f2cfda4fa50>"
       ]
      },
-     "execution_count": 72,
+     "execution_count": 68,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1742,7 +1693,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
+   "execution_count": 69,
    "metadata": {},
    "outputs": [
     {
@@ -1769,7 +1720,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": 70,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1778,7 +1729,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
+   "execution_count": 71,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1787,16 +1738,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
+   "execution_count": 72,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7fe218577990>"
+       "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x7f2cfda31310>"
       ]
      },
-     "execution_count": 76,
+     "execution_count": 72,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1807,7 +1758,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
+   "execution_count": 73,
    "metadata": {},
    "outputs": [
     {
@@ -1815,24 +1766,7 @@
      "output_type": "stream",
      "text": [
       "root\n",
-      " |-- geometry: string (nullable = true)\n",
-      " |-- _c1: string (nullable = true)\n",
-      " |-- _c2: string (nullable = true)\n",
-      " |-- _c3: string (nullable = true)\n",
-      " |-- _c4: string (nullable = true)\n",
-      " |-- _c5: string (nullable = true)\n",
-      " |-- _c6: string (nullable = true)\n",
-      " |-- _c7: string (nullable = true)\n",
-      " |-- _c8: string (nullable = true)\n",
-      " |-- _c9: string (nullable = true)\n",
-      " |-- _c10: string (nullable = true)\n",
-      " |-- _c11: string (nullable = true)\n",
-      " |-- _c12: string (nullable = true)\n",
-      " |-- _c13: string (nullable = true)\n",
-      " |-- _c14: string (nullable = true)\n",
-      " |-- _c15: string (nullable = true)\n",
-      " |-- _c16: string (nullable = true)\n",
-      " |-- _c17: string (nullable = true)\n",
+      " |-- geometry: geometry (nullable = true)\n",
       "\n"
      ]
     }
@@ -1843,22 +1777,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
+   "execution_count": 74,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "+--------------------+---+---+--------+-----+---------+----------------+---+---+-----+----+-----+----+----+----------+--------+-----------+------------+\n",
-      "|            geometry|_c1|_c2|     _c3|  _c4|      _c5|             _c6|_c7|_c8|  _c9|_c10| _c11|_c12|_c13|      _c14|    _c15|       _c16|        _c17|\n",
-      "+--------------------+---+---+--------+-----+---------+----------------+---+---+-----+----+-----+----+----+----------+--------+-----------+------------+\n",
-      "|POLYGON ((-97.019...| 31|039|00835841|31039|   Cuming|   Cuming County| 06| H1|G4020|    |     |    |   A|1477895811|10447360|+41.9158651|-096.7885168|\n",
-      "|POLYGON ((-123.43...| 53|069|01513275|53069|Wahkiakum|Wahkiakum County| 06| H1|G4020|    |     |    |   A| 682138871|61658258|+46.2946377|-123.4244583|\n",
-      "|POLYGON ((-104.56...| 35|011|00933054|35011|  De Baca|  De Baca County| 06| H1|G4020|    |     |    |   A|6015539696|29159492|+34.3592729|-104.3686961|\n",
-      "|POLYGON ((-96.910...| 31|109|00835876|31109|Lancaster|Lancaster County| 06| H1|G4020| 339|30700|    |   A|2169240202|22877180|+40.7835474|-096.6886584|\n",
-      "|POLYGON ((-98.273...| 31|129|00835886|31129| Nuckolls| Nuckolls County| 06| H1|G4020|    |     |    |   A|1489645187| 1718484|+40.1764918|-098.0468422|\n",
-      "+--------------------+---+---+--------+-----+---------+----------------+---+---+-----+----+-----+----+----+----------+--------+-----------+------------+\n",
+      "+--------------------+\n",
+      "|            geometry|\n",
+      "+--------------------+\n",
+      "|POLYGON ((-97.019...|\n",
+      "|POLYGON ((-123.43...|\n",
+      "|POLYGON ((-104.56...|\n",
+      "|POLYGON ((-96.910...|\n",
+      "|POLYGON ((-98.273...|\n",
+      "+--------------------+\n",
       "only showing top 5 rows\n",
       "\n"
      ]
@@ -1870,7 +1804,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
+   "execution_count": 75,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1879,7 +1813,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": 76,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1888,22 +1822,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 77,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "+--------------------+---+---+--------+-----+---------+----------------+---+---+-----+----+-----+----+----+----------+--------+-----------+------------+\n",
-      "|            geometry|_c1|_c2|     _c3|  _c4|      _c5|             _c6|_c7|_c8|  _c9|_c10| _c11|_c12|_c13|      _c14|    _c15|       _c16|        _c17|\n",
-      "+--------------------+---+---+--------+-----+---------+----------------+---+---+-----+----+-----+----+----+----------+--------+-----------+------------+\n",
-      "|POLYGON ((-97.019...| 31|039|00835841|31039|   Cuming|   Cuming County| 06| H1|G4020|    |     |    |   A|1477895811|10447360|+41.9158651|-096.7885168|\n",
-      "|POLYGON ((-123.43...| 53|069|01513275|53069|Wahkiakum|Wahkiakum County| 06| H1|G4020|    |     |    |   A| 682138871|61658258|+46.2946377|-123.4244583|\n",
-      "|POLYGON ((-104.56...| 35|011|00933054|35011|  De Baca|  De Baca County| 06| H1|G4020|    |     |    |   A|6015539696|29159492|+34.3592729|-104.3686961|\n",
-      "|POLYGON ((-96.910...| 31|109|00835876|31109|Lancaster|Lancaster County| 06| H1|G4020| 339|30700|    |   A|2169240202|22877180|+40.7835474|-096.6886584|\n",
-      "|POLYGON ((-98.273...| 31|129|00835886|31129| Nuckolls| Nuckolls County| 06| H1|G4020|    |     |    |   A|1489645187| 1718484|+40.1764918|-098.0468422|\n",
-      "+--------------------+---+---+--------+-----+---------+----------------+---+---+-----+----+-----+----+----+----------+--------+-----------+------------+\n",
+      "+--------------------+\n",
+      "|            geometry|\n",
+      "+--------------------+\n",
+      "|POLYGON ((-97.019...|\n",
+      "|POLYGON ((-123.43...|\n",
+      "|POLYGON ((-104.56...|\n",
+      "|POLYGON ((-96.910...|\n",
+      "|POLYGON ((-98.273...|\n",
+      "+--------------------+\n",
       "only showing top 5 rows\n",
       "\n"
      ]
@@ -1914,6 +1848,286 @@
    ]
   },
   {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Coverting RDD Spatial join result to DF directly, avoiding jvm python serde"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "point_rdd.spatialPartitioning(GridType.KDBTREE)\n",
+    "rectangle_rdd.spatialPartitioning(point_rdd.getPartitioner())\n",
+    "# building an index\n",
+    "point_rdd.buildIndex(IndexType.RTREE, True)\n",
+    "# Perform Spatial Join Query\n",
+    "result = JoinQueryRaw.SpatialJoinQueryFlat(point_rdd, rectangle_rdd, False, True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# without passing column names, the result will contain only two geometries columns\n",
+    "geometry_df = Adapter.toDf(result, spark)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root\n",
+      " |-- leftgeometry: geometry (nullable = true)\n",
+      " |-- rightgeometry: geometry (nullable = true)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "geometry_df.printSchema()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+--------------------+--------------------+\n",
+      "|        leftgeometry|       rightgeometry|\n",
+      "+--------------------+--------------------+\n",
+      "|POLYGON ((-87.229...|POINT (-87.105455...|\n",
+      "|POLYGON ((-87.229...|POINT (-87.10534 ...|\n",
+      "|POLYGON ((-87.229...|POINT (-87.160372...|\n",
+      "|POLYGON ((-87.229...|POINT (-87.204033...|\n",
+      "|POLYGON ((-87.229...|POINT (-87.204299...|\n",
+      "+--------------------+--------------------+\n",
+      "only showing top 5 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "geometry_df.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Row(leftgeometry=<shapely.geometry.polygon.Polygon object at 0x7f2cfda84110>, rightgeometry=<shapely.geometry.point.Point object at 0x7f2cfda84690>)"
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "geometry_df.collect()[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Passing column names"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "geometry_df = Adapter.toDf(result, [\"left_user_data\"], [\"right_user_data\"], spark)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+--------------------+--------------+--------------------+---------------+\n",
+      "|        leftgeometry|left_user_data|       rightgeometry|right_user_data|\n",
+      "+--------------------+--------------+--------------------+---------------+\n",
+      "|POLYGON ((-87.229...|              |POINT (-87.105455...|           null|\n",
+      "|POLYGON ((-87.229...|              |POINT (-87.10534 ...|           null|\n",
+      "|POLYGON ((-87.229...|              |POINT (-87.160372...|           null|\n",
+      "|POLYGON ((-87.229...|              |POINT (-87.204033...|           null|\n",
+      "|POLYGON ((-87.229...|              |POINT (-87.204299...|           null|\n",
+      "+--------------------+--------------+--------------------+---------------+\n",
+      "only showing top 5 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "geometry_df.show(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Coverting RDD Spatial join result to DF directly, avoiding jvm python serde"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_envelope = Envelope(-85.01, -60.01, 34.01, 50.01)\n",
+    "\n",
+    "result_range_query = RangeQueryRaw.SpatialRangeQuery(linestring_rdd, query_envelope, False, False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# converting to df\n",
+    "gdf = Adapter.toDf(result_range_query, spark)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+--------------------+\n",
+      "|            geometry|\n",
+      "+--------------------+\n",
+      "|LINESTRING (-72.1...|\n",
+      "|LINESTRING (-72.4...|\n",
+      "|LINESTRING (-72.4...|\n",
+      "|LINESTRING (-73.4...|\n",
+      "|LINESTRING (-73.6...|\n",
+      "+--------------------+\n",
+      "only showing top 5 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "gdf.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root\n",
+      " |-- geometry: geometry (nullable = true)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "gdf.printSchema()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Passing column names\n",
+    "# converting to df\n",
+    "gdf_with_columns = Adapter.toDf(result_range_query, spark, [\"_c1\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+--------------------+---+\n",
+      "|            geometry|_c1|\n",
+      "+--------------------+---+\n",
+      "|LINESTRING (-72.1...|   |\n",
+      "|LINESTRING (-72.4...|   |\n",
+      "|LINESTRING (-72.4...|   |\n",
+      "|LINESTRING (-73.4...|   |\n",
+      "|LINESTRING (-73.6...|   |\n",
+      "+--------------------+---+\n",
+      "only showing top 5 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "gdf_with_columns.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root\n",
+      " |-- geometry: geometry (nullable = true)\n",
+      " |-- _c1: string (nullable = true)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "gdf_with_columns.printSchema()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
diff --git a/python/ApacheSedonaSQL.ipynb b/python/ApacheSedonaSQL.ipynb
index 4601bda..8d68a2c 100644
--- a/python/ApacheSedonaSQL.ipynb
+++ b/python/ApacheSedonaSQL.ipynb
@@ -247,9 +247,9 @@
       "== Physical Plan ==\n",
       "DistanceJoin pointshape1#261: geometry, pointshape2#285: geometry, 2.0, false\n",
       ":- Project [st_point(cast(_c0#255 as decimal(24,20)), cast(_c1#256 as decimal(24,20))) AS pointshape1#261]\n",
-      ":  +- FileScan csv [_c0#255,_c1#256] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[file:/home/pawel/Desktop/forks/GeoSpark/python/data/testpoint.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<_c0:string,_c1:string>\n",
+      ":  +- FileScan csv [_c0#255,_c1#256] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[file:/home/pawel/Desktop/forks/incubator-sedona/python/data/testpoint.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<_c0:string,_c1:string>\n",
       "+- Project [st_point(cast(_c0#279 as decimal(24,20)), cast(_c1#280 as decimal(24,20))) AS pointshape2#285]\n",
-      "   +- FileScan csv [_c0#279,_c1#280] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[file:/home/pawel/Desktop/forks/GeoSpark/python/data/testpoint.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<_c0:string,_c1:string>\n",
+      "   +- FileScan csv [_c0#279,_c1#280] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[file:/home/pawel/Desktop/forks/incubator-sedona/python/data/testpoint.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<_c0:string,_c1:string>\n",
       "\n",
       "\n",
       "+-----------------+-----------------+\n",
@@ -455,15 +455,6 @@
       "+---------+----------+--------------------+\n",
       "|     id_1|      id_2|                geom|\n",
       "+---------+----------+--------------------+\n",
-      "| 29947498|  29947498|POINT (232447.203...|\n",
-      "| 29947498|4165181885|POINT (232447.203...|\n",
-      "| 29947498|5818905324|POINT (232447.203...|\n",
-      "| 29947498|5846858758|POINT (232447.203...|\n",
-      "| 29947499|  29947499|POINT (232208.377...|\n",
-      "| 29947499|  30077461|POINT (232208.377...|\n",
-      "| 29947505|  29947505|POINT (228595.321...|\n",
-      "| 30077461|  29947499|POINT (232185.872...|\n",
-      "| 30077461|  30077461|POINT (232185.872...|\n",
       "|197624402| 197624402|POINT (203703.035...|\n",
       "|197663196| 197663196|POINT (203936.327...|\n",
       "|197953474| 197953474|POINT (203724.746...|\n",
@@ -475,6 +466,15 @@
       "|270306609| 270306609|POINT (203639.141...|\n",
       "|270306746| 270306746|POINT (203694.827...|\n",
       "|270306746|1257728000|POINT (203694.827...|\n",
+      "|270306746|1401424769|POINT (203694.827...|\n",
+      "|275183554| 275183554|POINT (222119.004...|\n",
+      "|275183554| 275566930|POINT (222119.004...|\n",
+      "|275183554|5339602517|POINT (222119.004...|\n",
+      "|275183554|5339602518|POINT (222119.004...|\n",
+      "|275183554|5339602519|POINT (222119.004...|\n",
+      "|275183554|5339602520|POINT (222119.004...|\n",
+      "|275183903| 275183903|POINT (222167.415...|\n",
+      "|275183903|1244226205|POINT (222167.415...|\n",
       "+---------+----------+--------------------+\n",
       "only showing top 20 rows\n",
       "\n"
@@ -580,27 +580,27 @@
        "    </tr>\n",
        "    <tr>\n",
        "      <th>65670</th>\n",
-       "      <td>6797128792</td>\n",
-       "      <td>6797128792</td>\n",
-       "      <td>POINT (245819.115 547966.886)</td>\n",
+       "      <td>6785548357</td>\n",
+       "      <td>2276133152</td>\n",
+       "      <td>POINT (254859.612 569916.156)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>65671</th>\n",
-       "      <td>6805686891</td>\n",
-       "      <td>904970835</td>\n",
-       "      <td>POINT (219509.560 518667.674)</td>\n",
+       "      <td>6785548357</td>\n",
+       "      <td>6785548357</td>\n",
+       "      <td>POINT (254859.612 569916.156)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>65672</th>\n",
-       "      <td>6805686891</td>\n",
-       "      <td>6805686891</td>\n",
-       "      <td>POINT (219509.560 518667.674)</td>\n",
+       "      <td>6785548358</td>\n",
+       "      <td>6785548358</td>\n",
+       "      <td>POINT (255246.168 569632.391)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>65673</th>\n",
-       "      <td>6805686914</td>\n",
-       "      <td>6805686914</td>\n",
-       "      <td>POINT (219600.284 518778.349)</td>\n",
+       "      <td>6794972812</td>\n",
+       "      <td>6794972812</td>\n",
+       "      <td>POINT (246450.694 546941.569)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>65674</th>\n",
@@ -621,10 +621,10 @@
        "3       262310516   262310516  POINT (203507.731 417345.373)\n",
        "4       262310516  1074233123  POINT (203507.731 417345.373)\n",
        "...           ...         ...                            ...\n",
-       "65670  6797128792  6797128792  POINT (245819.115 547966.886)\n",
-       "65671  6805686891   904970835  POINT (219509.560 518667.674)\n",
-       "65672  6805686891  6805686891  POINT (219509.560 518667.674)\n",
-       "65673  6805686914  6805686914  POINT (219600.284 518778.349)\n",
+       "65670  6785548357  2276133152  POINT (254859.612 569916.156)\n",
+       "65671  6785548357  6785548357  POINT (254859.612 569916.156)\n",
+       "65672  6785548358  6785548358  POINT (255246.168 569632.391)\n",
+       "65673  6794972812  6794972812  POINT (246450.694 546941.569)\n",
        "65674  6817416704  6817416704  POINT (286325.570 557253.517)\n",
        "\n",
        "[65675 rows x 3 columns]"
diff --git a/python/Pipfile b/python/Pipfile
index 308e5d5..8e596d3 100644
--- a/python/Pipfile
+++ b/python/Pipfile
@@ -7,6 +7,8 @@ verify_ssl = true
 pytest="*"
 notebook="==6.0.0"
 jupyter="*"
+mkdocs="*"
+pytest-cov = "*"
 
 [packages]
 pandas="*"
diff --git a/python/sedona/core/spatialOperator/__init__.py b/python/sedona/core/spatialOperator/__init__.py
index 4ab8573..1022e39 100644
--- a/python/sedona/core/spatialOperator/__init__.py
+++ b/python/sedona/core/spatialOperator/__init__.py
@@ -18,7 +18,9 @@
 from .join_query import JoinQuery
 from .range_query import RangeQuery
 from .knn_query import KNNQuery
+from .join_query_raw import JoinQueryRaw
+from .range_query_raw import RangeQueryRaw
 
 __all__ = [
-    "JoinQuery", "RangeQuery", "KNNQuery"
+    "JoinQuery", "RangeQuery", "KNNQuery", "JoinQueryRaw", "RangeQueryRaw"
 ]
diff --git a/python/sedona/core/spatialOperator/rdd.py b/python/sedona/core/spatialOperator/rdd.py
index cf7ba83..61969a1 100644
--- a/python/sedona/core/spatialOperator/rdd.py
+++ b/python/sedona/core/spatialOperator/rdd.py
@@ -15,12 +15,8 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from typing import List, Optional
-
 from pyspark import SparkContext, RDD
-from pyspark.sql import SparkSession, DataFrame
 
-from sedona.core.SpatialRDD import SpatialRDD
 from sedona.core.jvm.translate import JvmSedonaPythonConverter
 from sedona.utils.spatial_rdd_parser import SedonaPickler
 
@@ -31,15 +27,6 @@ class SedonaRDD:
         self.jsrdd = jsrdd
         self.sc = sc
 
-    def to_df(self, spark: SparkSession, field_names: List[str] = None) -> DataFrame:
-        from sedona.utils.adapter import Adapter
-        srdd = SpatialRDD(self.sc)
-        srdd.setRawSpatialRDD(self.jsrdd)
-        if field_names:
-            return Adapter.toDf(srdd, field_names, spark)
-        else:
-            return Adapter.toDf(srdd, spark)
-
     def to_rdd(self) -> RDD:
         jvm = self.sc._jvm
         serialized = JvmSedonaPythonConverter(jvm). \
@@ -54,20 +41,6 @@ class SedonaPairRDD:
         self.jsrdd = jsrdd
         self.sc = sc
 
-    def to_df(self, spark: SparkSession,
-              left_field_names: Optional[List] = None,
-              right_field_names: Optional[List] = None) -> DataFrame:
-        from sedona.utils.adapter import Adapter
-        if left_field_names is not None and right_field_names is not None:
-            df = Adapter.toDf(self, left_field_names, right_field_names, spark)
-            return df
-
-        elif left_field_names is None and right_field_names is None:
-            df = Adapter.toDf(self, spark)
-            return df
-        else:
-            raise AttributeError("when passing left_field_names you have also pass right_field_names and reverse")
-
     def to_rdd(self) -> RDD:
         jvm = self.sc._jvm
         serialized = JvmSedonaPythonConverter(jvm). \
diff --git a/python/sedona/utils/adapter.py b/python/sedona/utils/adapter.py
index 260e036..8168efb 100644
--- a/python/sedona/utils/adapter.py
+++ b/python/sedona/utils/adapter.py
@@ -22,7 +22,7 @@ from pyspark.sql import DataFrame, SparkSession
 
 from sedona.core.SpatialRDD.spatial_rdd import SpatialRDD
 from sedona.core.enums.spatial import SpatialType
-from sedona.core.spatialOperator.rdd import SedonaPairRDD
+from sedona.core.spatialOperator.rdd import SedonaPairRDD, SedonaRDD
 from sedona.utils.meta import MultipleMeta
 
 
@@ -160,3 +160,12 @@ class Adapter(metaclass=MultipleMeta):
             rawPairRDD.jsrdd, leftFieldnames, rightFieldNames, sparkSession._jsparkSession)
         df = DataFrame(jdf, sparkSession._wrapped)
         return df
+
+    @classmethod
+    def toDf(cls, spatialRDD: SedonaRDD, spark: SparkSession, fieldNames: List = None) -> DataFrame:
+        srdd = SpatialRDD(spatialRDD.sc)
+        srdd.setRawSpatialRDD(spatialRDD.jsrdd)
+        if fieldNames:
+            return Adapter.toDf(srdd, fieldNames, spark)
+        else:
+            return Adapter.toDf(srdd, spark)
diff --git a/python/tests/core/test_avoiding_python_jvm_serde_df.py b/python/tests/core/test_avoiding_python_jvm_serde_df.py
index 3b78bce..a67de70 100644
--- a/python/tests/core/test_avoiding_python_jvm_serde_df.py
+++ b/python/tests/core/test_avoiding_python_jvm_serde_df.py
@@ -23,6 +23,7 @@ from sedona.core.spatialOperator.join_params import JoinParams
 from sedona.core.spatialOperator.join_query_raw import JoinQueryRaw
 from sedona.core.spatialOperator.range_query_raw import RangeQueryRaw
 from sedona.sql.types import GeometryType
+from sedona.utils.adapter import Adapter
 from tests.test_base import TestBase
 
 import os
@@ -47,9 +48,7 @@ class TestOmitPythonJvmSerdeToDf(TestBase):
         areas_polygon_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())
 
         jvm_sedona_rdd = JoinQueryRaw.spatialJoin(poi_point_rdd, areas_polygon_rdd, JoinParams())
-        sedona_df = jvm_sedona_rdd.to_df(spark=self.spark,
-                                         left_field_names=["area_id", "area_name"],
-                                         right_field_names=["poi_id", "poi_name"])
+        sedona_df = Adapter.toDf(jvm_sedona_rdd, ["area_id", "area_name"], ["poi_id", "poi_name"], self.spark)
 
         assert sedona_df.count() == 5
         assert sedona_df.columns == ["leftgeometry", "area_id", "area_name", "rightgeometry",
@@ -66,11 +65,11 @@ class TestOmitPythonJvmSerdeToDf(TestBase):
         circle_rdd.spatialPartitioning(poi_point_rdd.getPartitioner())
 
         jvm_sedona_rdd = JoinQueryRaw.DistanceJoinQueryFlat(poi_point_rdd, circle_rdd, False, True)
-
-        df_sedona_rdd = jvm_sedona_rdd.to_df(
-            self.spark,
-            left_field_names=["poi_from_id", "poi_from_name"],
-            right_field_names=["poi_to_id", "poi_to_name"]
+        df_sedona_rdd = Adapter.toDf(
+            jvm_sedona_rdd,
+            ["poi_from_id", "poi_from_name"],
+            ["poi_to_id", "poi_to_name"],
+            self.spark
         )
 
         assert df_sedona_rdd.count() == 10
@@ -95,14 +94,15 @@ class TestOmitPythonJvmSerdeToDf(TestBase):
         jvm_sedona_rdd = JoinQueryRaw.SpatialJoinQueryFlat(
             poi_point_rdd, areas_polygon_rdd, False, True)
 
-        pois_within_areas_with_default_column_names = jvm_sedona_rdd.to_df(self.spark)
+        pois_within_areas_with_default_column_names = Adapter.toDf(jvm_sedona_rdd, self.spark)
 
         assert pois_within_areas_with_default_column_names.count() == 5
 
-        pois_within_areas_with_passed_column_names = jvm_sedona_rdd.to_df(
-            spark=self.spark,
-            left_field_names=["area_id", "area_name"],
-            right_field_names=["poi_id", "poi_name"]
+        pois_within_areas_with_passed_column_names = Adapter.toDf(
+            jvm_sedona_rdd,
+            ["area_id", "area_name"],
+            ["poi_id", "poi_name"],
+            self.spark
         )
 
         assert pois_within_areas_with_passed_column_names.count() == 5
@@ -159,7 +159,7 @@ class TestOmitPythonJvmSerdeToDf(TestBase):
 
         assert rdd.collect().__len__() == 4
 
-        df_without_column_names = result.to_df(self.spark)
+        df_without_column_names = Adapter.toDf(result, self.spark)
 
         raw_geometries = self.__row_to_list(
             df_without_column_names.collect()
@@ -171,7 +171,7 @@ class TestOmitPythonJvmSerdeToDf(TestBase):
         assert df_without_column_names.count() == 4
         assert df_without_column_names.schema == StructType([StructField("geometry", GeometryType())])
 
-        df = result.to_df(self.spark, field_names=["poi_id", "poi_name"])
+        df = Adapter.toDf(result, self.spark, ["poi_id", "poi_name"])
 
         assert df.count() == 4
         assert df.columns == ["geometry", "poi_id", "poi_name"]