You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2021/03/12 07:22:20 UTC
[zeppelin] branch master updated: [ZEPPELIN-5275] Add missing java_import's to pyspark bootstrapping

This is an automated email from the ASF dual-hosted git repository.

zjffdu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new 8528170  [ZEPPELIN-5275] Add missing java_import's to pyspark bootstrapping
8528170 is described below

commit 85281702cb59129fee964b342fc1d475afe67f78
Author: Adam Binford <ad...@maxar.com>
AuthorDate: Wed Mar 3 06:24:12 2021 -0500

    [ZEPPELIN-5275] Add missing java_import's to pyspark bootstrapping
    
    ### What is this PR for?
    The pyspark bootstrap script is missing some java imports that are in the native Spark bootstrapping. (See https://github.com/apache/spark/blob/master/python/pyspark/java_gateway.py#L152). This prevents some of the SQL functions from working correctly, most notably `.explain()`, in pyspark. This adds the imports that are missing from the built-in spark implementation.
    
    ### What type of PR is it?
    Bug Fix
    
    ### What is the Jira issue?
    https://issues.apache.org/jira/browse/ZEPPELIN-5275
    
    ### How should this be tested?
    I added a check to make sure `.explain` works in the pyspark test, but I can't actually get that test to run locally (it just hangs running the simple `sc.range(1,10).sum()`, not sure why.
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Adam Binford <ad...@maxar.com>
    
    Closes #4067 from Kimahriman/bug/pyspark-java-imports and squashes the following commits:
    
    f2aa04817 [Adam Binford] [ZEPPELIN-5275] Add missing java_import's to PySpark bootstrapping
---
 spark/interpreter/src/main/resources/python/zeppelin_ipyspark.py      | 3 +++
 spark/interpreter/src/main/resources/python/zeppelin_pyspark.py       | 3 +++
 .../test/java/org/apache/zeppelin/spark/IPySparkInterpreterTest.java  | 4 ++++
 3 files changed, 10 insertions(+)

diff --git a/spark/interpreter/src/main/resources/python/zeppelin_ipyspark.py b/spark/interpreter/src/main/resources/python/zeppelin_ipyspark.py
index d8ec931..4b0dbaa 100644
--- a/spark/interpreter/src/main/resources/python/zeppelin_ipyspark.py
+++ b/spark/interpreter/src/main/resources/python/zeppelin_ipyspark.py
@@ -36,7 +36,9 @@ java_import(gateway.jvm, "org.apache.spark.SparkEnv")
 java_import(gateway.jvm, "org.apache.spark.SparkConf")
 java_import(gateway.jvm, "org.apache.spark.api.java.*")
 java_import(gateway.jvm, "org.apache.spark.api.python.*")
+java_import(gateway.jvm, "org.apache.spark.ml.python.*")
 java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
+java_import(gateway.jvm, "org.apache.spark.resource.*")
 
 intp = gateway.entry_point
 
@@ -46,6 +48,7 @@ if intp.isSpark3():
 jsc = intp.getJavaSparkContext()
 
 java_import(gateway.jvm, "org.apache.spark.sql.*")
+java_import(gateway.jvm, "org.apache.spark.sql.api.python.*")
 java_import(gateway.jvm, "org.apache.spark.sql.hive.*")
 java_import(gateway.jvm, "scala.Tuple2")
 
diff --git a/spark/interpreter/src/main/resources/python/zeppelin_pyspark.py b/spark/interpreter/src/main/resources/python/zeppelin_pyspark.py
index b710721..2038c14 100644
--- a/spark/interpreter/src/main/resources/python/zeppelin_pyspark.py
+++ b/spark/interpreter/src/main/resources/python/zeppelin_pyspark.py
@@ -34,9 +34,12 @@ java_import(gateway.jvm, "org.apache.spark.SparkEnv")
 java_import(gateway.jvm, "org.apache.spark.SparkConf")
 java_import(gateway.jvm, "org.apache.spark.api.java.*")
 java_import(gateway.jvm, "org.apache.spark.api.python.*")
+java_import(gateway.jvm, "org.apache.spark.ml.python.*")
 java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
+java_import(gateway.jvm, "org.apache.spark.resource.*")
 
 java_import(gateway.jvm, "org.apache.spark.sql.*")
+java_import(gateway.jvm, "org.apache.spark.sql.api.python.*")
 java_import(gateway.jvm, "org.apache.spark.sql.hive.*")
 
 java_import(gateway.jvm, "scala.Tuple2")
diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/IPySparkInterpreterTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/IPySparkInterpreterTest.java
index 101b742..0589820 100644
--- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/IPySparkInterpreterTest.java
+++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/IPySparkInterpreterTest.java
@@ -167,6 +167,10 @@ public class IPySparkInterpreterTest extends IPythonInterpreterTest {
           "_1	_2\n" +
               "1	a\n" +
               "2	b", interpreterResultMessages.get(0).getData().trim());
+
+      // spark sql python API bindings
+      result = interpreter.interpret("df.explain()", context);
+      assertEquals(InterpreterResult.Code.SUCCESS, result.code());
     }
     // cancel
     if (interpreter instanceof IPySparkInterpreter) {