You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2020/03/30 03:59:17 UTC

[zeppelin] branch branch-0.9 updated: [ZEPPELIN-4673]. Add option to only allow yarn-cluster mode for spark interpreter

This is an automated email from the ASF dual-hosted git repository.

zjffdu pushed a commit to branch branch-0.9
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/branch-0.9 by this push:
     new 01bc2fc  [ZEPPELIN-4673]. Add option to only allow yarn-cluster mode for spark interpreter
01bc2fc is described below

commit 01bc2fc90116f50b2e9c25db29e0f1a960f772a3
Author: Jeff Zhang <zj...@apache.org>
AuthorDate: Tue Mar 10 23:39:11 2020 +0800

    [ZEPPELIN-4673]. Add option to only allow yarn-cluster mode for spark interpreter
    
    ### What is this PR for?
    Yarn client mode and local mode will run driver in the same machine with zeppelin server, this would be dangerous for production. Because it may run out of memory when there's many spark interpreters running at the same time. So in this PR, I introduce one configuration `zeppelin.spark.only_yarn_cluster`, by default it is turned off. When it is turned on, only yarn-cluster mode can be used.
    
    ### What type of PR is it?
    [ Improvement ]
    
    ### Todos
    * [ ] - Task
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-4673
    
    ### How should this be tested?
    * Manually tested
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Jeff Zhang <zj...@apache.org>
    
    Closes #3697 from zjffdu/ZEPPELIN-4673 and squashes the following commits:
    
    d51e423a8 [Jeff Zhang] [ZEPPELIN-4673]. Add option to only allow yarn-cluster mode for spark interpreter
    
    (cherry picked from commit 768bbf9ff4f735c2f3e65085cb8fdc6eabff779f)
    Signed-off-by: Jeff Zhang <zj...@apache.org>
---
 conf/zeppelin-site.xml.template                                    | 6 ++++++
 docs/interpreter/spark.md                                          | 2 ++
 .../main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java  | 7 ++++++-
 .../zeppelin/interpreter/launcher/SparkInterpreterLauncher.java    | 4 ++++
 4 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template
index 8ef4edc..6d33433 100755
--- a/conf/zeppelin-site.xml.template
+++ b/conf/zeppelin-site.xml.template
@@ -718,4 +718,10 @@
   Disable it can save lots of memory</description>
 </property>
 
+<property>
+  <name>zeppelin.spark.only_yarn_cluster</name>
+  <value>false</value>
+  <description>Whether only allow yarn cluster mode</description>
+</property>
+
 </configuration>
diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md
index 277efb3..3c07e01 100644
--- a/docs/interpreter/spark.md
+++ b/docs/interpreter/spark.md
@@ -259,6 +259,8 @@ For the further information about Spark & Zeppelin version compatibility, please
 
 > Note that without exporting `SPARK_HOME`, it's running in local mode with included version of Spark. The included version may vary depending on the build profile.
 
+> Yarn client mode and local mode will run driver in the same machine with zeppelin server, this would be dangerous for production. Because it may run out of memory when there's many spark interpreters running at the same time. So we suggest you only allow yarn-cluster mode via setting `zeppelin.spark.only_yarn_cluster` in `zeppelin-site.xml`.
+
 ## SparkContext, SQLContext, SparkSession, ZeppelinContext
 
 SparkContext, SQLContext, SparkSession (for spark 2.x) and ZeppelinContext are automatically created and exposed as variable names `sc`, `sqlContext`, `spark` and `z`, respectively, in Scala, Kotlin, Python and R environments.
diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
index dfcf840..cbd894e 100644
--- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
@@ -739,6 +739,10 @@ public class ZeppelinConfiguration extends XMLConfiguration {
     return getRelativeDir(ConfVars.ZEPPELIN_SEARCH_INDEX_PATH);
   }
 
+  public Boolean isOnlyYarnCluster() {
+    return getBoolean(ConfVars.ZEPPELIN_SPARK_ONLY_YARN_CLUSTER);
+  }
+
   public String getClusterAddress() {
     return getString(ConfVars.ZEPPELIN_CLUSTER_ADDR);
   }
@@ -998,7 +1002,8 @@ public class ZeppelinConfiguration extends XMLConfiguration {
     ZEPPELIN_SEARCH_INDEX_REBUILD("zeppelin.search.index.rebuild", false),
     ZEPPELIN_SEARCH_USE_DISK("zeppelin.search.use.disk", true),
     ZEPPELIN_SEARCH_INDEX_PATH("zeppelin.search.index.path", "/tmp/zeppelin-index"),
-    ZEPPELIN_JOBMANAGER_ENABLE("zeppelin.jobmanager.enable", true);
+    ZEPPELIN_JOBMANAGER_ENABLE("zeppelin.jobmanager.enable", false),
+    ZEPPELIN_SPARK_ONLY_YARN_CLUSTER("zeppelin.spark.only_yarn_cluster", false);
 
     private String varName;
     @SuppressWarnings("rawtypes")
diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java
index 5252eeb..9255c98 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java
@@ -76,6 +76,10 @@ public class SparkInterpreterLauncher extends StandardInterpreterLauncher {
     if (isYarnMode() && getDeployMode().equals("cluster")) {
       env.put("ZEPPELIN_SPARK_YARN_CLUSTER", "true");
       sparkProperties.setProperty("spark.yarn.submit.waitAppCompletion", "false");
+    } else if (zConf.isOnlyYarnCluster()){
+      throw new IOException("Only yarn-cluster mode is allowed, please set " +
+              ZeppelinConfiguration.ConfVars.ZEPPELIN_SPARK_ONLY_YARN_CLUSTER.getVarName() +
+              " to false if you want to use other modes.");
     }
 
     StringBuilder sparkConfBuilder = new StringBuilder();