You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by jo...@apache.org on 2017/01/12 07:03:25 UTC

zeppelin git commit: [ZEPPELIN-1730, 1587] add spark impersonation through --proxy-user option

Repository: zeppelin
Updated Branches:
  refs/heads/master 434215668 -> 5e0aacf8a


[ZEPPELIN-1730, 1587] add spark impersonation through --proxy-user option

### What is this PR for?
This is to add spark impersonation using --proxy-user option. note that it enables also to use spark impersonation without having logged user as system user with configured ssh.

### What type of PR is it?
Improvement

### Todos
* [x] - add `--proxy-user`
* [x] - try on standalone spark 1.6.2
* [x] - try on yarn-client mode spark 2.0.1

### What is the Jira issue?
Directly solves [ZEPPELIN-1730](https://issues.apache.org/jira/browse/ZEPPELIN-1730) and also solves [ZEPPELIN-1587](https://issues.apache.org/jira/browse/ZEPPELIN-1587) according to discussion in #1566 since using `--proxy-user` in `spark-submit` is preferable method.

### How should this be tested?
1. switch your spark cluster to `per user` and `isolated` mode
2. set up `user impersonation` flag
3. run some job using that spark interpreter
4. spark context should be created with currently logged in user credentials on behalf of system user

### Screenshots (if appropriate)
standalone
![spark_sc_impersonation](https://cloud.githubusercontent.com/assets/1642088/21639292/24240286-d224-11e6-8099-9bc74a06f0c2.gif)

yarn-client
<img width="997" alt="screen shot 2017-01-04 at 10 00 13 am" src="https://cloud.githubusercontent.com/assets/1642088/21653117/75410fde-d264-11e6-886f-11d8b5dbd29e.png">

### Questions:
* Does the licenses files need update? no
* Is there breaking changes for older versions? no
* Does this needs documentation? yes

Author: Khalid Huseynov <kh...@gmail.com>

Closes #1840 from khalidhuseynov/feat/spark-proxy-user and squashes the following commits:

e4251de [Khalid Huseynov] update doc with env var
dc61cae [Khalid Huseynov] check for env spark_proxy in interpreter.sh
8b66740 [Khalid Huseynov] add spark_proxy_user to env.sh
892b7e4 [Khalid Huseynov] add note in docs
4c3dba9 [Khalid Huseynov] add --proxy-user option for spark


Project: http://git-wip-us.apache.org/repos/asf/zeppelin/repo
Commit: http://git-wip-us.apache.org/repos/asf/zeppelin/commit/5e0aacf8
Tree: http://git-wip-us.apache.org/repos/asf/zeppelin/tree/5e0aacf8
Diff: http://git-wip-us.apache.org/repos/asf/zeppelin/diff/5e0aacf8

Branch: refs/heads/master
Commit: 5e0aacf8a8f187702452d7cd2ee83b26c56dec90
Parents: 4342156
Author: Khalid Huseynov <kh...@gmail.com>
Authored: Tue Jan 10 20:48:26 2017 -0800
Committer: Jongyoul Lee <jo...@apache.org>
Committed: Thu Jan 12 16:03:14 2017 +0900

----------------------------------------------------------------------
 bin/interpreter.sh               | 19 +++++++++++++------
 conf/zeppelin-env.sh.template    |  3 +++
 docs/manual/userimpersonation.md | 12 +++++++-----
 3 files changed, 23 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/zeppelin/blob/5e0aacf8/bin/interpreter.sh
----------------------------------------------------------------------
diff --git a/bin/interpreter.sh b/bin/interpreter.sh
index 300b18a..0132b42 100755
--- a/bin/interpreter.sh
+++ b/bin/interpreter.sh
@@ -171,7 +171,7 @@ elif [[ "${INTERPRETER_ID}" == "pig" ]]; then
   if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
     ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
   fi
-  
+
   # autodetect TEZ_CONF_DIR
   if [[ -n "${TEZ_CONF_DIR}" ]]; then
     ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}"
@@ -187,19 +187,26 @@ addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}"
 CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}"
 
 if [[ ! -z "$ZEPPELIN_IMPERSONATE_USER" ]]; then
-    INTERPRETER_RUN_COMMAND=${ZEPPELIN_IMPERSONATE_RUN_CMD}" '"
-    if [[ -f "${ZEPPELIN_CONF_DIR}/zeppelin-env.sh" ]]; then
-        INTERPRETER_RUN_COMMAND+=" source "${ZEPPELIN_CONF_DIR}'/zeppelin-env.sh;'
+    suid="$(id -u ${ZEPPELIN_IMPERSONATE_USER})"
+    if [[ -n  "${suid}" || -z "${SPARK_SUBMIT}" ]]; then
+       INTERPRETER_RUN_COMMAND=${ZEPPELIN_IMPERSONATE_RUN_CMD}" '"
+       if [[ -f "${ZEPPELIN_CONF_DIR}/zeppelin-env.sh" ]]; then
+           INTERPRETER_RUN_COMMAND+=" source "${ZEPPELIN_CONF_DIR}'/zeppelin-env.sh;'
+       fi
     fi
 fi
 
 if [[ -n "${SPARK_SUBMIT}" ]]; then
-    INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT}`
+    if [[ -n "$ZEPPELIN_IMPERSONATE_USER" ]] && [[ "$ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER" != "false" ]];  then
+       INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} --proxy-user ${ZEPPELIN_IMPERSONATE_USER} ${SPARK_APP_JAR} ${PORT}`
+    else
+       INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT}`
+    fi
 else
     INTERPRETER_RUN_COMMAND+=' '` echo ${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} `
 fi
 
-if [[ ! -z "$ZEPPELIN_IMPERSONATE_USER" ]]; then
+if [[ ! -z "$ZEPPELIN_IMPERSONATE_USER" ]] && [[ -n "${suid}" || -z "${SPARK_SUBMIT}" ]]; then
     INTERPRETER_RUN_COMMAND+="'"
 fi
 

http://git-wip-us.apache.org/repos/asf/zeppelin/blob/5e0aacf8/conf/zeppelin-env.sh.template
----------------------------------------------------------------------
diff --git a/conf/zeppelin-env.sh.template b/conf/zeppelin-env.sh.template
index 64db29d..7e777b6 100644
--- a/conf/zeppelin-env.sh.template
+++ b/conf/zeppelin-env.sh.template
@@ -82,4 +82,7 @@
 # export ZEPPELINHUB_API_ADDRESS		# Refers to the address of the ZeppelinHub service in use
 # export ZEPPELINHUB_API_TOKEN			# Refers to the Zeppelin instance token of the user
 # export ZEPPELINHUB_USER_KEY			# Optional, when using Zeppelin with authentication.
+
+#### Zeppelin impersonation configuration
 # export ZEPPELIN_IMPERSONATE_CMD       # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c '
+# export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER  #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled

http://git-wip-us.apache.org/repos/asf/zeppelin/blob/5e0aacf8/docs/manual/userimpersonation.md
----------------------------------------------------------------------
diff --git a/docs/manual/userimpersonation.md b/docs/manual/userimpersonation.md
index f0f01b4..6b592e7 100644
--- a/docs/manual/userimpersonation.md
+++ b/docs/manual/userimpersonation.md
@@ -43,10 +43,10 @@ cat ~/.ssh/id_rsa.pub | ssh user1@localhost 'cat >> .ssh/authorized_keys'
 ```
 export ZEPPELIN_IMPERSONATE_CMD='sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c '
 ```
-  
+
 
  * Start zeppelin server.
- 
+
 <hr>
 <div class="row">
   <div class="col-md-12">
@@ -57,13 +57,13 @@ export ZEPPELIN_IMPERSONATE_CMD='sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c
       <a data-lightbox="compiler" href="../assets/themes/zeppelin/img/screenshots/user-impersonation.gif">
         <img class="img-responsive" src="../assets/themes/zeppelin/img/screenshots/user-impersonation.gif" />
       </a>
-    
+
   </div>
 </div>
 <hr>
- 
+
  * Go to interpreter setting page, and enable "User Impersonate" in any of the interpreter (in my example its shell interpreter)
- 
+
  * Test with a simple paragraph
 
 ```
@@ -71,3 +71,5 @@ export ZEPPELIN_IMPERSONATE_CMD='sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c
 whoami
 ```
 
+
+Note that usage of "User Impersonate" option will enable Spark interpreter to use `--proxy-user` option with current user by default. If you want to disable `--proxy-user` option, then refer to `ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER` variable in `conf/zeppelin-env.sh`