You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sedona.apache.org by ji...@apache.org on 2022/06/26 06:40:56 UTC
[incubator-sedona] branch master updated: [SEDONA-129] Bump spark to 3.3.0 (#636)
This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 2d0c51f8 [SEDONA-129] Bump spark to 3.3.0 (#636)
2d0c51f8 is described below
commit 2d0c51f8c3d233029d1b9d6bd0401fa8bdca6469
Author: Adam Binford <ad...@gmail.com>
AuthorDate: Sun Jun 26 02:40:50 2022 -0400
[SEDONA-129] Bump spark to 3.3.0 (#636)
---
.github/workflows/java.yml | 15 ++++++---------
.github/workflows/python.yml | 26 ++++++++++++++++++++------
pom.xml | 6 +++---
python/sedona/utils/adapter.py | 18 ++++++++++++++----
python/sedona/utils/spatial_rdd_parser.py | 7 +++++--
5 files changed, 48 insertions(+), 24 deletions(-)
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index ebc5f00f..6d8f7b59 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -14,18 +14,15 @@ jobs:
runs-on: ubuntu-18.04
strategy:
matrix:
- spark: [2.4.8, 3.0.3, 3.1.2, 3.2.0]
- scala: [2.11.8, 2.12.15]
- exclude:
- - spark: 3.2.0
- scala: 2.11.8
- - spark: 3.1.2
- scala: 2.11.8
- - spark: 3.0.3
- scala: 2.11.8
+ spark: [2.4.8, 3.0.3, 3.1.2, 3.2.0, 3.3.0]
+ scala: [2.12.15]
include:
+ - spark: 3.3.0
+ scala: 2.13.8
- spark: 3.2.0
scala: 2.13.5
+ - spark: 2.4.8
+ scala: 2.11.8
steps:
- uses: actions/checkout@v2
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index b3b09c3c..52d23b78 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -15,24 +15,34 @@ jobs:
strategy:
matrix:
include:
- - spark: 3.2.0
+ - spark: 3.3.0
scala: 2.12.8
python: 3.9
- - spark: 3.2.0
+ hadoop: 3
+ - spark: 3.3.0
scala: 2.12.8
python: 3.8
+ hadoop: 3
+ - spark: 3.3.0
+ scala: 2.12.8
+ python: 3.7
+ hadoop: 3
- spark: 3.2.0
scala: 2.12.8
python: 3.7
+ hadoop: 2.7
- spark: 3.1.2
scala: 2.12.8
python: 3.7
+ hadoop: 2.7
- spark: 3.0.3
scala: 2.12.8
python: 3.7
+ hadoop: 2.7
- spark: 2.4.8
scala: 2.11.8
python: 3.7
+ hadoop: 2.7
steps:
- uses: actions/checkout@v2
@@ -55,10 +65,12 @@ jobs:
run: if [ ${SPARK_VERSION:0:1} == "3" ]; then mvn -q clean install -DskipTests -Dscala=${SCALA_VERSION:0:4} -Dspark=3.0 -Dgeotools ; else mvn -q clean install -DskipTests -Dscala=${SCALA_VERSION:0:4} -Dspark=2.4 -Dgeotools ; fi
- env:
SPARK_VERSION: ${{ matrix.spark }}
- run: wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz
+ HADOOP_VERSION: ${{ matrix.hadoop }}
+ run: wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
- env:
SPARK_VERSION: ${{ matrix.spark }}
- run: tar -xzf spark-${SPARK_VERSION}-bin-hadoop2.7.tgz
+ HADOOP_VERSION: ${{ matrix.hadoop }}
+ run: tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
- run: sudo apt-get -y install python3-pip python-dev libgeos-dev
- run: sudo pip3 install -U setuptools
- run: sudo pip3 install -U wheel
@@ -70,7 +82,9 @@ jobs:
run: (cd python;pipenv --python ${PYTHON_VERSION};pipenv install pyspark==${SPARK_VERSION};pipenv install --dev;pipenv graph)
- env:
SPARK_VERSION: ${{ matrix.spark }}
- run: find python-adapter/target -name sedona-* -exec cp {} spark-${SPARK_VERSION}-bin-hadoop2.7/jars/ \;
+ HADOOP_VERSION: ${{ matrix.hadoop }}
+ run: find python-adapter/target -name sedona-* -exec cp {} spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ \;
- env:
SPARK_VERSION: ${{ matrix.spark }}
- run: (export SPARK_HOME=$PWD/spark-${SPARK_VERSION}-bin-hadoop2.7;export PYTHONPATH=$SPARK_HOME/python;cd python;pipenv run pytest tests)
\ No newline at end of file
+ HADOOP_VERSION: ${{ matrix.hadoop }}
+ run: (export SPARK_HOME=$PWD/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION};export PYTHONPATH=$SPARK_HOME/python;cd python;pipenv run pytest tests)
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index b798e690..27b89854 100644
--- a/pom.xml
+++ b/pom.xml
@@ -529,10 +529,10 @@
<activeByDefault>true</activeByDefault>
</activation>
<properties>
- <spark.version>3.2.0</spark.version>
+ <spark.version>3.3.0</spark.version>
<spark.compat.version>3.0</spark.compat.version>
<spark.converter.version>spark3</spark.converter.version>
- <jackson.version>2.12.5</jackson.version>
+ <jackson.version>2.13.3</jackson.version>
<maven.deploy.skip>false</maven.deploy.skip>
</properties>
</profile>
@@ -563,7 +563,7 @@
<activeByDefault>false</activeByDefault>
</activation>
<properties>
- <scala.version>2.13.5</scala.version>
+ <scala.version>2.13.8</scala.version>
<scala.compat.version>2.13</scala.compat.version>
<scaladoc.arg>-no-java-comments</scaladoc.arg>
</properties>
diff --git a/python/sedona/utils/adapter.py b/python/sedona/utils/adapter.py
index 8168efb2..fb8b2aca 100644
--- a/python/sedona/utils/adapter.py
+++ b/python/sedona/utils/adapter.py
@@ -31,6 +31,16 @@ class Adapter(metaclass=MultipleMeta):
Class which allow to convert between Spark DataFrame and SpatialRDD and reverse.
"""
+ @staticmethod
+ def _create_dataframe(jdf, sparkSession: SparkSession) -> DataFrame:
+ if hasattr(sparkSession, '_wrapped'):
+ # In Spark < 3.3, use the _wrapped SQLContext
+ return DataFrame(jdf, sparkSession._wrapped)
+ else:
+ # In Spark >= 3.3, use the session directly
+ return DataFrame(jdf, sparkSession)
+
+
@classmethod
def toRdd(cls, dataFrame: DataFrame) -> 'JvmSpatialRDD':
from sedona.core.SpatialRDD.spatial_rdd import JvmSpatialRDD
@@ -92,7 +102,7 @@ class Adapter(metaclass=MultipleMeta):
jdf = jvm.PythonAdapterWrapper.toDf(spatialRDD._srdd, fieldNames, sparkSession._jsparkSession)
- df = DataFrame(jdf, sparkSession._wrapped)
+ df = Adapter._create_dataframe(jdf, sparkSession)
return df
@@ -109,7 +119,7 @@ class Adapter(metaclass=MultipleMeta):
jdf = jvm.Adapter.toDf(spatialRDD._srdd, sparkSession._jsparkSession)
- df = DataFrame(jdf, sparkSession._wrapped)
+ df = Adapter._create_dataframe(jdf, sparkSession)
return df
@@ -150,7 +160,7 @@ class Adapter(metaclass=MultipleMeta):
def toDf(cls, rawPairRDD: SedonaPairRDD, sparkSession: SparkSession):
jvm = sparkSession._jvm
jdf = jvm.Adapter.toDf(rawPairRDD.jsrdd, sparkSession._jsparkSession)
- df = DataFrame(jdf, sparkSession._wrapped)
+ df = Adapter._create_dataframe(jdf, sparkSession)
return df
@classmethod
@@ -158,7 +168,7 @@ class Adapter(metaclass=MultipleMeta):
jvm = sparkSession._jvm
jdf = jvm.PythonAdapterWrapper.toDf(
rawPairRDD.jsrdd, leftFieldnames, rightFieldNames, sparkSession._jsparkSession)
- df = DataFrame(jdf, sparkSession._wrapped)
+ df = Adapter._create_dataframe(jdf, sparkSession)
return df
@classmethod
diff --git a/python/sedona/utils/spatial_rdd_parser.py b/python/sedona/utils/spatial_rdd_parser.py
index f897ecd9..7f963444 100644
--- a/python/sedona/utils/spatial_rdd_parser.py
+++ b/python/sedona/utils/spatial_rdd_parser.py
@@ -22,7 +22,10 @@ from typing import List, Any
import attr
from shapely.geometry.base import BaseGeometry
-from pyspark import PickleSerializer
+try:
+ from pyspark import CPickleSerializer
+except ImportError:
+ from pyspark import PickleSerializer as CPickleSerializer
from shapely.wkb import dumps
from sedona.core.geom.circle import Circle
@@ -177,7 +180,7 @@ PARSERS = {
}
-class SedonaPickler(PickleSerializer):
+class SedonaPickler(CPickleSerializer):
def __init__(self):
super().__init__()