You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by va...@apache.org on 2018/05/09 18:02:37 UTC
spark git commit: [SPARK-21278][PYSPARK] Upgrade to Py4J 0.10.6
Repository: spark
Updated Branches:
refs/heads/branch-2.1 c4ecc04c6 -> 8177b2148
[SPARK-21278][PYSPARK] Upgrade to Py4J 0.10.6
This PR aims to bump Py4J in order to fix the following float/double bug.
Py4J 0.10.5 fixes this (https://github.com/bartdag/py4j/issues/272) and the latest Py4J is 0.10.6.
**BEFORE**
```
>>> df = spark.range(1)
>>> df.select(df['id'] + 17.133574204226083).show()
+--------------------+
|(id + 17.1335742042)|
+--------------------+
| 17.1335742042|
+--------------------+
```
**AFTER**
```
>>> df = spark.range(1)
>>> df.select(df['id'] + 17.133574204226083).show()
+-------------------------+
|(id + 17.133574204226083)|
+-------------------------+
| 17.133574204226083|
+-------------------------+
```
Manual.
Author: Dongjoon Hyun <do...@apache.org>
Closes #18546 from dongjoon-hyun/SPARK-21278.
(cherry picked from commit c8d0aba198c0f593c2b6b656c23b3d0fb7ea98a2)
Signed-off-by: Marcelo Vanzin <va...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8177b214
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8177b214
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8177b214
Branch: refs/heads/branch-2.1
Commit: 8177b214899320ea55558cf18666f31e4653a42b
Parents: c4ecc04
Author: Dongjoon Hyun <do...@apache.org>
Authored: Wed Jul 5 16:33:23 2017 -0700
Committer: Marcelo Vanzin <va...@cloudera.com>
Committed: Tue May 8 12:15:34 2018 -0700
----------------------------------------------------------------------
LICENSE | 2 +-
bin/pyspark | 2 +-
bin/pyspark2.cmd | 2 +-
core/pom.xml | 2 +-
.../org/apache/spark/api/python/PythonUtils.scala | 2 +-
dev/deps/spark-deps-hadoop-2.6 | 2 +-
dev/deps/spark-deps-hadoop-2.7 | 2 +-
python/README.md | 2 +-
python/docs/Makefile | 2 +-
python/lib/py4j-0.10.4-src.zip | Bin 74096 -> 0 bytes
python/lib/py4j-0.10.6-src.zip | Bin 0 -> 80352 bytes
python/setup.py | 2 +-
sbin/spark-config.sh | 2 +-
.../org/apache/spark/deploy/yarn/Client.scala | 2 +-
.../spark/deploy/yarn/YarnClusterSuite.scala | 2 +-
15 files changed, 13 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/LICENSE
----------------------------------------------------------------------
diff --git a/LICENSE b/LICENSE
index 119ecbe..02aaef2 100644
--- a/LICENSE
+++ b/LICENSE
@@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
- (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.4 - http://py4j.sourceforge.net/)
+ (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.6 - http://py4j.sourceforge.net/)
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
(BSD licence) sbt and sbt-launch-lib.bash
(BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/bin/pyspark
----------------------------------------------------------------------
diff --git a/bin/pyspark b/bin/pyspark
index 98387c2..d3b512e 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -57,7 +57,7 @@ export PYSPARK_PYTHON
# Add the PySpark classes to the Python path:
export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.6-src.zip:$PYTHONPATH"
# Load the PySpark shell.py script when ./pyspark is used interactively:
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/bin/pyspark2.cmd
----------------------------------------------------------------------
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index f211c08..46d4d5c 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
)
set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.4-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.6-src.zip;%PYTHONPATH%
set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index fbb0eda..79b06dd 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -331,7 +331,7 @@
<dependency>
<groupId>net.sf.py4j</groupId>
<artifactId>py4j</artifactId>
- <version>0.10.4</version>
+ <version>0.10.6</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index c4e55b5..92e228a 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -32,7 +32,7 @@ private[spark] object PythonUtils {
val pythonPath = new ArrayBuffer[String]
for (sparkHome <- sys.env.get("SPARK_HOME")) {
pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
- pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.4-src.zip").mkString(File.separator)
+ pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.6-src.zip").mkString(File.separator)
}
pythonPath ++= SparkContext.jarOfObject(this)
pythonPath.mkString(File.pathSeparator)
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/dev/deps/spark-deps-hadoop-2.6
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index df9cb01..7e9bf27 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -154,7 +154,7 @@ parquet-jackson-1.8.1.jar
pmml-model-1.2.15.jar
pmml-schema-1.2.15.jar
protobuf-java-2.5.0.jar
-py4j-0.10.4.jar
+py4j-0.10.6.jar
pyrolite-4.13.jar
scala-compiler-2.11.8.jar
scala-library-2.11.8.jar
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/dev/deps/spark-deps-hadoop-2.7
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 8cee1be..b5a0d26 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -155,7 +155,7 @@ parquet-jackson-1.8.1.jar
pmml-model-1.2.15.jar
pmml-schema-1.2.15.jar
protobuf-java-2.5.0.jar
-py4j-0.10.4.jar
+py4j-0.10.6.jar
pyrolite-4.13.jar
scala-compiler-2.11.8.jar
scala-library-2.11.8.jar
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/python/README.md
----------------------------------------------------------------------
diff --git a/python/README.md b/python/README.md
index 0a5c801..84ec881 100644
--- a/python/README.md
+++ b/python/README.md
@@ -29,4 +29,4 @@ The Python packaging for Spark is not intended to replace all of the other use c
## Python Requirements
-At its core PySpark depends on Py4J (currently version 0.10.4), but additional sub-packages have their own requirements (including numpy and pandas).
\ No newline at end of file
+At its core PySpark depends on Py4J (currently version 0.10.6), but additional sub-packages have their own requirements (including numpy and pandas).
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/python/docs/Makefile
----------------------------------------------------------------------
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 5e4cfb8..09898f2 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -7,7 +7,7 @@ SPHINXBUILD ?= sphinx-build
PAPER ?=
BUILDDIR ?= _build
-export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.4-src.zip)
+export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.6-src.zip)
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/python/lib/py4j-0.10.4-src.zip
----------------------------------------------------------------------
diff --git a/python/lib/py4j-0.10.4-src.zip b/python/lib/py4j-0.10.4-src.zip
deleted file mode 100644
index 8c3829e..0000000
Binary files a/python/lib/py4j-0.10.4-src.zip and /dev/null differ
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/python/lib/py4j-0.10.6-src.zip
----------------------------------------------------------------------
diff --git a/python/lib/py4j-0.10.6-src.zip b/python/lib/py4j-0.10.6-src.zip
new file mode 100644
index 0000000..2f8edcc
Binary files /dev/null and b/python/lib/py4j-0.10.6-src.zip differ
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/python/setup.py
----------------------------------------------------------------------
diff --git a/python/setup.py b/python/setup.py
index f500354..51f417f 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -194,7 +194,7 @@ try:
'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
scripts=scripts,
license='http://www.apache.org/licenses/LICENSE-2.0',
- install_requires=['py4j==0.10.4'],
+ install_requires=['py4j==0.10.6'],
setup_requires=['pypandoc'],
extras_require={
'ml': ['numpy>=1.7'],
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/sbin/spark-config.sh
----------------------------------------------------------------------
diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index f2d9e6b..bac154e 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
# Add the PySpark classes to the PYTHONPATH:
if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
- export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${PYTHONPATH}"
+ export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.6-src.zip:${PYTHONPATH}"
export PYSPARK_PYTHONPATH_SET=1
fi
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 1ba736b..8260d2f 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -1197,7 +1197,7 @@ private[spark] class Client(
val pyArchivesFile = new File(pyLibPath, "pyspark.zip")
require(pyArchivesFile.exists(),
s"$pyArchivesFile not found; cannot run pyspark application in YARN mode.")
- val py4jFile = new File(pyLibPath, "py4j-0.10.4-src.zip")
+ val py4jFile = new File(pyLibPath, "py4j-0.10.6-src.zip")
require(py4jFile.exists(),
s"$py4jFile not found; cannot run pyspark application in YARN mode.")
Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath())
http://git-wip-us.apache.org/repos/asf/spark/blob/8177b214/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
----------------------------------------------------------------------
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 99fb58a..4714f5e 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -242,7 +242,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
// needed locations.
val sparkHome = sys.props("spark.test.home")
val pythonPath = Seq(
- s"$sparkHome/python/lib/py4j-0.10.4-src.zip",
+ s"$sparkHome/python/lib/py4j-0.10.6-src.zip",
s"$sparkHome/python")
val extraEnvVars = Map(
"PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org