You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2021/03/12 07:22:20 UTC
[zeppelin] branch master updated: [ZEPPELIN-5275] Add missing
java_import's to pyspark bootstrapping
This is an automated email from the ASF dual-hosted git repository.
zjffdu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push:
new 8528170 [ZEPPELIN-5275] Add missing java_import's to pyspark bootstrapping
8528170 is described below
commit 85281702cb59129fee964b342fc1d475afe67f78
Author: Adam Binford <ad...@maxar.com>
AuthorDate: Wed Mar 3 06:24:12 2021 -0500
[ZEPPELIN-5275] Add missing java_import's to pyspark bootstrapping
### What is this PR for?
The pyspark bootstrap script is missing some java imports that are in the native Spark bootstrapping. (See https://github.com/apache/spark/blob/master/python/pyspark/java_gateway.py#L152). This prevents some of the SQL functions from working correctly, most notably `.explain()`, in pyspark. This adds the imports that are missing from the built-in spark implementation.
### What type of PR is it?
Bug Fix
### What is the Jira issue?
https://issues.apache.org/jira/browse/ZEPPELIN-5275
### How should this be tested?
I added a check to make sure `.explain` works in the pyspark test, but I can't actually get that test to run locally (it just hangs running the simple `sc.range(1,10).sum()`, not sure why.
### Screenshots (if appropriate)
### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: Adam Binford <ad...@maxar.com>
Closes #4067 from Kimahriman/bug/pyspark-java-imports and squashes the following commits:
f2aa04817 [Adam Binford] [ZEPPELIN-5275] Add missing java_import's to PySpark bootstrapping
---
spark/interpreter/src/main/resources/python/zeppelin_ipyspark.py | 3 +++
spark/interpreter/src/main/resources/python/zeppelin_pyspark.py | 3 +++
.../test/java/org/apache/zeppelin/spark/IPySparkInterpreterTest.java | 4 ++++
3 files changed, 10 insertions(+)
diff --git a/spark/interpreter/src/main/resources/python/zeppelin_ipyspark.py b/spark/interpreter/src/main/resources/python/zeppelin_ipyspark.py
index d8ec931..4b0dbaa 100644
--- a/spark/interpreter/src/main/resources/python/zeppelin_ipyspark.py
+++ b/spark/interpreter/src/main/resources/python/zeppelin_ipyspark.py
@@ -36,7 +36,9 @@ java_import(gateway.jvm, "org.apache.spark.SparkEnv")
java_import(gateway.jvm, "org.apache.spark.SparkConf")
java_import(gateway.jvm, "org.apache.spark.api.java.*")
java_import(gateway.jvm, "org.apache.spark.api.python.*")
+java_import(gateway.jvm, "org.apache.spark.ml.python.*")
java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
+java_import(gateway.jvm, "org.apache.spark.resource.*")
intp = gateway.entry_point
@@ -46,6 +48,7 @@ if intp.isSpark3():
jsc = intp.getJavaSparkContext()
java_import(gateway.jvm, "org.apache.spark.sql.*")
+java_import(gateway.jvm, "org.apache.spark.sql.api.python.*")
java_import(gateway.jvm, "org.apache.spark.sql.hive.*")
java_import(gateway.jvm, "scala.Tuple2")
diff --git a/spark/interpreter/src/main/resources/python/zeppelin_pyspark.py b/spark/interpreter/src/main/resources/python/zeppelin_pyspark.py
index b710721..2038c14 100644
--- a/spark/interpreter/src/main/resources/python/zeppelin_pyspark.py
+++ b/spark/interpreter/src/main/resources/python/zeppelin_pyspark.py
@@ -34,9 +34,12 @@ java_import(gateway.jvm, "org.apache.spark.SparkEnv")
java_import(gateway.jvm, "org.apache.spark.SparkConf")
java_import(gateway.jvm, "org.apache.spark.api.java.*")
java_import(gateway.jvm, "org.apache.spark.api.python.*")
+java_import(gateway.jvm, "org.apache.spark.ml.python.*")
java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
+java_import(gateway.jvm, "org.apache.spark.resource.*")
java_import(gateway.jvm, "org.apache.spark.sql.*")
+java_import(gateway.jvm, "org.apache.spark.sql.api.python.*")
java_import(gateway.jvm, "org.apache.spark.sql.hive.*")
java_import(gateway.jvm, "scala.Tuple2")
diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/IPySparkInterpreterTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/IPySparkInterpreterTest.java
index 101b742..0589820 100644
--- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/IPySparkInterpreterTest.java
+++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/IPySparkInterpreterTest.java
@@ -167,6 +167,10 @@ public class IPySparkInterpreterTest extends IPythonInterpreterTest {
"_1 _2\n" +
"1 a\n" +
"2 b", interpreterResultMessages.get(0).getData().trim());
+
+ // spark sql python API bindings
+ result = interpreter.interpret("df.explain()", context);
+ assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
// cancel
if (interpreter instanceof IPySparkInterpreter) {