You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by tg...@apache.org on 2015/05/29 18:06:45 UTC

spark git commit: [SPARK-7524] [SPARK-7846] add configs for keytab and principal, pass these two configs with different way in different modes

Repository: spark
Updated Branches:
  refs/heads/master 8db40f671 -> a51b133de


[SPARK-7524] [SPARK-7846] add configs for keytab and principal, pass these two configs with different way in different modes

* As spark now supports long running service by updating tokens for namenode, but only accept parameters passed with "--k=v" format which is not very convinient. This patch add spark.* configs in properties file and system property.

*  --principal and --keytabl options are passed to client but when we started thrift server or spark-shell these two are also passed into the Main class (org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 and org.apache.spark.repl.Main).
In these two main class, arguments passed in will be processed with some 3rd libraries, which will lead to some error: "Invalid option: --principal" or "Unrecgnised option: --principal".
We should pass these command args in different forms, say system properties.

Author: WangTaoTheTonic <wa...@huawei.com>

Closes #6051 from WangTaoTheTonic/SPARK-7524 and squashes the following commits:

e65699a [WangTaoTheTonic] change logic to loadEnvironments
ebd9ea0 [WangTaoTheTonic] merge master
ecfe43a [WangTaoTheTonic] pass keytab and principal seperately in different mode
33a7f40 [WangTaoTheTonic] expand the use of the current configs
08bb4e8 [WangTaoTheTonic] fix wrong cite
73afa64 [WangTaoTheTonic] add configs for keytab and principal, move originals to internal


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a51b133d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a51b133d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a51b133d

Branch: refs/heads/master
Commit: a51b133de3c65a991ab105b6f020082080121b4c
Parents: 8db40f6
Author: WangTaoTheTonic <wa...@huawei.com>
Authored: Fri May 29 11:06:11 2015 -0500
Committer: Thomas Graves <tg...@thatenemy-lm.champ.corp.yahoo.com>
Committed: Fri May 29 11:06:11 2015 -0500

----------------------------------------------------------------------
 .../scala/org/apache/spark/deploy/SparkSubmit.scala |  8 ++++----
 .../apache/spark/deploy/SparkSubmitArguments.scala  |  2 ++
 docs/running-on-yarn.md                             | 16 ++++++++++++++++
 .../deploy/yarn/AMDelegationTokenRenewer.scala      | 14 ++++++++------
 .../apache/spark/deploy/yarn/ClientArguments.scala  |  6 ++++++
 5 files changed, 36 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 92bb505..d1b32ea 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -428,6 +428,8 @@ object SparkSubmit {
       OptionAssigner(args.executorCores, YARN, CLIENT, sysProp = "spark.executor.cores"),
       OptionAssigner(args.files, YARN, CLIENT, sysProp = "spark.yarn.dist.files"),
       OptionAssigner(args.archives, YARN, CLIENT, sysProp = "spark.yarn.dist.archives"),
+      OptionAssigner(args.principal, YARN, CLIENT, sysProp = "spark.yarn.principal"),
+      OptionAssigner(args.keytab, YARN, CLIENT, sysProp = "spark.yarn.keytab"),
 
       // Yarn cluster only
       OptionAssigner(args.name, YARN, CLUSTER, clOption = "--name"),
@@ -440,10 +442,8 @@ object SparkSubmit {
       OptionAssigner(args.files, YARN, CLUSTER, clOption = "--files"),
       OptionAssigner(args.archives, YARN, CLUSTER, clOption = "--archives"),
       OptionAssigner(args.jars, YARN, CLUSTER, clOption = "--addJars"),
-
-      // Yarn client or cluster
-      OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, clOption = "--principal"),
-      OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, clOption = "--keytab"),
+      OptionAssigner(args.principal, YARN, CLUSTER, clOption = "--principal"),
+      OptionAssigner(args.keytab, YARN, CLUSTER, clOption = "--keytab"),
 
       // Other options
       OptionAssigner(args.executorCores, STANDALONE, ALL_DEPLOY_MODES,

http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index c0e4c77..cc6a7bd 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -169,6 +169,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     deployMode = Option(deployMode).orElse(env.get("DEPLOY_MODE")).orNull
     numExecutors = Option(numExecutors)
       .getOrElse(sparkProperties.get("spark.executor.instances").orNull)
+    keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull
+    principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull
 
     // Try to set main class from JAR if no --class argument is given
     if (mainClass == null && !isPython && !isR && primaryResource != null) {

http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/docs/running-on-yarn.md
----------------------------------------------------------------------
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 9d55f43..96cf612 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -242,6 +242,22 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
   running against earlier versions, this property will be ignored.
   </td>
 </tr>
+<tr>
+  <td><code>spark.yarn.keytab</code></td>
+  <td>(none)</td>
+  <td>
+  The full path to the file that contains the keytab for the principal specified above.
+  This keytab will be copied to the node running the Application Master via the Secure Distributed Cache,
+  for renewing the login tickets and the delegation tokens periodically.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.yarn.principal</code></td>
+  <td>(none)</td>
+  <td>
+  Principal to be used to login to KDC, while running on secure HDFS.
+  </td>
+</tr>
 </table>
 
 # Launching Spark on YARN

http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
index aaae6f9..77af46c 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
@@ -60,8 +60,11 @@ private[yarn] class AMDelegationTokenRenewer(
 
   private val hadoopUtil = YarnSparkHadoopUtil.get
 
-  private val daysToKeepFiles = sparkConf.getInt("spark.yarn.credentials.file.retention.days", 5)
-  private val numFilesToKeep = sparkConf.getInt("spark.yarn.credentials.file.retention.count", 5)
+  private val credentialsFile = sparkConf.get("spark.yarn.credentials.file")
+  private val daysToKeepFiles =
+    sparkConf.getInt("spark.yarn.credentials.file.retention.days", 5)
+  private val numFilesToKeep =
+    sparkConf.getInt("spark.yarn.credentials.file.retention.count", 5)
 
   /**
    * Schedule a login from the keytab and principal set using the --principal and --keytab
@@ -121,7 +124,7 @@ private[yarn] class AMDelegationTokenRenewer(
     import scala.concurrent.duration._
     try {
       val remoteFs = FileSystem.get(hadoopConf)
-      val credentialsPath = new Path(sparkConf.get("spark.yarn.credentials.file"))
+      val credentialsPath = new Path(credentialsFile)
       val thresholdTime = System.currentTimeMillis() - (daysToKeepFiles days).toMillis
       hadoopUtil.listFilesSorted(
         remoteFs, credentialsPath.getParent,
@@ -160,7 +163,7 @@ private[yarn] class AMDelegationTokenRenewer(
     val keytabLoggedInUGI = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab)
     logInfo("Successfully logged into KDC.")
     val tempCreds = keytabLoggedInUGI.getCredentials
-    val credentialsPath = new Path(sparkConf.get("spark.yarn.credentials.file"))
+    val credentialsPath = new Path(credentialsFile)
     val dst = credentialsPath.getParent
     keytabLoggedInUGI.doAs(new PrivilegedExceptionAction[Void] {
       // Get a copy of the credentials
@@ -186,8 +189,7 @@ private[yarn] class AMDelegationTokenRenewer(
     }
     val nextSuffix = lastCredentialsFileSuffix + 1
     val tokenPathStr =
-      sparkConf.get("spark.yarn.credentials.file") +
-        SparkHadoopUtil.SPARK_YARN_CREDS_COUNTER_DELIM + nextSuffix
+      credentialsFile + SparkHadoopUtil.SPARK_YARN_CREDS_COUNTER_DELIM + nextSuffix
     val tokenPath = new Path(tokenPathStr)
     val tempTokenPath = new Path(tokenPathStr + SparkHadoopUtil.SPARK_YARN_CREDS_TEMP_EXTENSION)
     logInfo("Writing out delegation tokens to " + tempTokenPath.toString)

http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index 5653c9f..9c7b1b3 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -98,6 +98,12 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf)
 
       numExecutors = initialNumExecutors
     }
+    principal = Option(principal)
+      .orElse(sparkConf.getOption("spark.yarn.principal"))
+      .orNull
+    keytab = Option(keytab)
+      .orElse(sparkConf.getOption("spark.yarn.keytab"))
+      .orNull
   }
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org