You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/02/20 17:11:30 UTC

[spark] branch branch-3.0 updated: [SPARK-30884][PYSPARK] Upgrade to Py4J 0.10.9

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 30e0e46  [SPARK-30884][PYSPARK] Upgrade to Py4J 0.10.9
30e0e46 is described below

commit 30e0e46f40b88daa7a0711350ea3129ba29fedc4
Author: Dongjoon Hyun <dh...@apple.com>
AuthorDate: Thu Feb 20 09:08:33 2020 -0800

    [SPARK-30884][PYSPARK] Upgrade to Py4J 0.10.9
    
    This PR aims to upgrade Py4J to `0.10.9` for better Python 3.7 support in Apache Spark 3.0.0 (master/branch-3.0). This is not for `branch-2.4`.
    
    - Apache Spark 3.0.0 is using `Py4J 0.10.8.1` (released on 2018-10-21) because `0.10.8.1` was the first official release to support Python 3.7.
        - https://www.py4j.org/changelog.html#py4j-0-10-8-and-py4j-0-10-8-1
    - `Py4J 0.10.9` was released on January 25th 2020 with better Python 3.7 support and `magic_member` bug fix.
        - https://github.com/bartdag/py4j/releases/tag/0.10.9
        - https://www.py4j.org/changelog.html#py4j-0-10-9
    
    No.
    
    Pass the Jenkins with the existing tests.
    
    Closes #27641 from dongjoon-hyun/SPARK-30884.
    
    Authored-by: Dongjoon Hyun <dh...@apple.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
    (cherry picked from commit fc4e56a54c15e20baf085e6061d3d83f5ce1185d)
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 bin/pyspark                                             |   2 +-
 bin/pyspark2.cmd                                        |   2 +-
 core/pom.xml                                            |   2 +-
 .../scala/org/apache/spark/api/python/PythonUtils.scala |   2 +-
 dev/deps/spark-deps-hadoop-2.7-hive-1.2                 |   2 +-
 dev/deps/spark-deps-hadoop-2.7-hive-2.3                 |   2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3                 |   2 +-
 python/README.md                                        |   2 +-
 python/docs/Makefile                                    |   2 +-
 python/docs/make2.bat                                   |   2 +-
 python/lib/py4j-0.10.8.1-src.zip                        | Bin 41255 -> 0 bytes
 python/lib/py4j-0.10.9-src.zip                          | Bin 0 -> 41587 bytes
 python/setup.py                                         |   2 +-
 sbin/spark-config.sh                                    |   2 +-
 14 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/bin/pyspark b/bin/pyspark
index 44891ae..ad4132f 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -50,7 +50,7 @@ export PYSPARK_DRIVER_PYTHON_OPTS
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.8.1-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index 479fd46..dc34be1 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.8.1-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
diff --git a/core/pom.xml b/core/pom.xml
index 9d54d21..11faf6b 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -414,7 +414,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.10.8.1</version>
+      <version>0.10.9</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 62d6047..490b487 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -27,7 +27,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 
 private[spark] object PythonUtils {
-  val PY4J_ZIP_NAME = "py4j-0.10.8.1-src.zip"
+  val PY4J_ZIP_NAME = "py4j-0.10.9-src.zip"
 
   /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */
   def sparkPythonPath: String = {
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
index 62d5772..113b7d7 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-1.2
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
@@ -174,7 +174,7 @@ parquet-hadoop-bundle/1.6.0//parquet-hadoop-bundle-1.6.0.jar
 parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
 parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.8.1//py4j-0.10.8.1.jar
+py4j/0.10.9//py4j-0.10.9.jar
 pyrolite/4.30//pyrolite-4.30.jar
 scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
 scala-compiler/2.12.10//scala-compiler-2.12.10.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 4f4d8b1..7b7423a 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -188,7 +188,7 @@ parquet-format/2.4.0//parquet-format-2.4.0.jar
 parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
 parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.8.1//py4j-0.10.8.1.jar
+py4j/0.10.9//py4j-0.10.9.jar
 pyrolite/4.30//pyrolite-4.30.jar
 scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
 scala-compiler/2.12.10//scala-compiler-2.12.10.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 18e4246..4dddbba 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -203,7 +203,7 @@ parquet-format/2.4.0//parquet-format-2.4.0.jar
 parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
 parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.8.1//py4j-0.10.8.1.jar
+py4j/0.10.9//py4j-0.10.9.jar
 pyrolite/4.30//pyrolite-4.30.jar
 re2j/1.1//re2j-1.1.jar
 scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
diff --git a/python/README.md b/python/README.md
index 430efe5..7e2cd13 100644
--- a/python/README.md
+++ b/python/README.md
@@ -29,4 +29,4 @@ The Python packaging for Spark is not intended to replace all of the other use c
 
 ## Python Requirements
 
-At its core PySpark depends on Py4J (currently version 0.10.8.1), but some additional sub-packages have their own extra requirements for some features (including numpy, pandas, and pyarrow).
+At its core PySpark depends on Py4J, but some additional sub-packages have their own extra requirements for some features (including numpy, pandas, and pyarrow).
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 66d3fc4..b11e358 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -7,7 +7,7 @@ SPHINXBUILD   ?= sphinx-build
 PAPER         ?=
 BUILDDIR      ?= _build
 
-export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.8.1-src.zip)
+export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9-src.zip)
 
 # User-friendly check for sphinx-build
 ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
diff --git a/python/docs/make2.bat b/python/docs/make2.bat
index 742df37..2d7d929 100644
--- a/python/docs/make2.bat
+++ b/python/docs/make2.bat
@@ -2,7 +2,7 @@
 
 REM Command file for Sphinx documentation
 
-set PYTHONPATH=..;..\lib\py4j-0.10.8.1-src.zip
+set PYTHONPATH=..;..\lib\py4j-0.10.9-src.zip
 
 
 if "%SPHINXBUILD%" == "" (
diff --git a/python/lib/py4j-0.10.8.1-src.zip b/python/lib/py4j-0.10.8.1-src.zip
deleted file mode 100644
index 1b5dede..0000000
Binary files a/python/lib/py4j-0.10.8.1-src.zip and /dev/null differ
diff --git a/python/lib/py4j-0.10.9-src.zip b/python/lib/py4j-0.10.9-src.zip
new file mode 100644
index 0000000..2c49836
Binary files /dev/null and b/python/lib/py4j-0.10.9-src.zip differ
diff --git a/python/setup.py b/python/setup.py
index 40b49aa..622e607 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -206,7 +206,7 @@ try:
             'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
         scripts=scripts,
         license='http://www.apache.org/licenses/LICENSE-2.0',
-        install_requires=['py4j==0.10.8.1'],
+        install_requires=['py4j==0.10.9'],
         extras_require={
             'ml': ['numpy>=1.7'],
             'mllib': ['numpy>=1.7'],
diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index 0771e2a..b53442e 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
 # Add the PySpark classes to the PYTHONPATH:
 if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
   export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
-  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.8.1-src.zip:${PYTHONPATH}"
+  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:${PYTHONPATH}"
   export PYSPARK_PYTHONPATH_SET=1
 fi


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org