You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by tg...@apache.org on 2015/05/29 18:06:45 UTC
spark git commit: [SPARK-7524] [SPARK-7846] add configs for keytab
and principal, pass these two configs with different way in different modes
Repository: spark
Updated Branches:
refs/heads/master 8db40f671 -> a51b133de
[SPARK-7524] [SPARK-7846] add configs for keytab and principal, pass these two configs with different way in different modes
* As spark now supports long running service by updating tokens for namenode, but only accept parameters passed with "--k=v" format which is not very convinient. This patch add spark.* configs in properties file and system property.
* --principal and --keytabl options are passed to client but when we started thrift server or spark-shell these two are also passed into the Main class (org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 and org.apache.spark.repl.Main).
In these two main class, arguments passed in will be processed with some 3rd libraries, which will lead to some error: "Invalid option: --principal" or "Unrecgnised option: --principal".
We should pass these command args in different forms, say system properties.
Author: WangTaoTheTonic <wa...@huawei.com>
Closes #6051 from WangTaoTheTonic/SPARK-7524 and squashes the following commits:
e65699a [WangTaoTheTonic] change logic to loadEnvironments
ebd9ea0 [WangTaoTheTonic] merge master
ecfe43a [WangTaoTheTonic] pass keytab and principal seperately in different mode
33a7f40 [WangTaoTheTonic] expand the use of the current configs
08bb4e8 [WangTaoTheTonic] fix wrong cite
73afa64 [WangTaoTheTonic] add configs for keytab and principal, move originals to internal
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a51b133d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a51b133d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a51b133d
Branch: refs/heads/master
Commit: a51b133de3c65a991ab105b6f020082080121b4c
Parents: 8db40f6
Author: WangTaoTheTonic <wa...@huawei.com>
Authored: Fri May 29 11:06:11 2015 -0500
Committer: Thomas Graves <tg...@thatenemy-lm.champ.corp.yahoo.com>
Committed: Fri May 29 11:06:11 2015 -0500
----------------------------------------------------------------------
.../scala/org/apache/spark/deploy/SparkSubmit.scala | 8 ++++----
.../apache/spark/deploy/SparkSubmitArguments.scala | 2 ++
docs/running-on-yarn.md | 16 ++++++++++++++++
.../deploy/yarn/AMDelegationTokenRenewer.scala | 14 ++++++++------
.../apache/spark/deploy/yarn/ClientArguments.scala | 6 ++++++
5 files changed, 36 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 92bb505..d1b32ea 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -428,6 +428,8 @@ object SparkSubmit {
OptionAssigner(args.executorCores, YARN, CLIENT, sysProp = "spark.executor.cores"),
OptionAssigner(args.files, YARN, CLIENT, sysProp = "spark.yarn.dist.files"),
OptionAssigner(args.archives, YARN, CLIENT, sysProp = "spark.yarn.dist.archives"),
+ OptionAssigner(args.principal, YARN, CLIENT, sysProp = "spark.yarn.principal"),
+ OptionAssigner(args.keytab, YARN, CLIENT, sysProp = "spark.yarn.keytab"),
// Yarn cluster only
OptionAssigner(args.name, YARN, CLUSTER, clOption = "--name"),
@@ -440,10 +442,8 @@ object SparkSubmit {
OptionAssigner(args.files, YARN, CLUSTER, clOption = "--files"),
OptionAssigner(args.archives, YARN, CLUSTER, clOption = "--archives"),
OptionAssigner(args.jars, YARN, CLUSTER, clOption = "--addJars"),
-
- // Yarn client or cluster
- OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, clOption = "--principal"),
- OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, clOption = "--keytab"),
+ OptionAssigner(args.principal, YARN, CLUSTER, clOption = "--principal"),
+ OptionAssigner(args.keytab, YARN, CLUSTER, clOption = "--keytab"),
// Other options
OptionAssigner(args.executorCores, STANDALONE, ALL_DEPLOY_MODES,
http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index c0e4c77..cc6a7bd 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -169,6 +169,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
deployMode = Option(deployMode).orElse(env.get("DEPLOY_MODE")).orNull
numExecutors = Option(numExecutors)
.getOrElse(sparkProperties.get("spark.executor.instances").orNull)
+ keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull
+ principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull
// Try to set main class from JAR if no --class argument is given
if (mainClass == null && !isPython && !isR && primaryResource != null) {
http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/docs/running-on-yarn.md
----------------------------------------------------------------------
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 9d55f43..96cf612 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -242,6 +242,22 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
running against earlier versions, this property will be ignored.
</td>
</tr>
+<tr>
+ <td><code>spark.yarn.keytab</code></td>
+ <td>(none)</td>
+ <td>
+ The full path to the file that contains the keytab for the principal specified above.
+ This keytab will be copied to the node running the Application Master via the Secure Distributed Cache,
+ for renewing the login tickets and the delegation tokens periodically.
+ </td>
+</tr>
+<tr>
+ <td><code>spark.yarn.principal</code></td>
+ <td>(none)</td>
+ <td>
+ Principal to be used to login to KDC, while running on secure HDFS.
+ </td>
+</tr>
</table>
# Launching Spark on YARN
http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
index aaae6f9..77af46c 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
@@ -60,8 +60,11 @@ private[yarn] class AMDelegationTokenRenewer(
private val hadoopUtil = YarnSparkHadoopUtil.get
- private val daysToKeepFiles = sparkConf.getInt("spark.yarn.credentials.file.retention.days", 5)
- private val numFilesToKeep = sparkConf.getInt("spark.yarn.credentials.file.retention.count", 5)
+ private val credentialsFile = sparkConf.get("spark.yarn.credentials.file")
+ private val daysToKeepFiles =
+ sparkConf.getInt("spark.yarn.credentials.file.retention.days", 5)
+ private val numFilesToKeep =
+ sparkConf.getInt("spark.yarn.credentials.file.retention.count", 5)
/**
* Schedule a login from the keytab and principal set using the --principal and --keytab
@@ -121,7 +124,7 @@ private[yarn] class AMDelegationTokenRenewer(
import scala.concurrent.duration._
try {
val remoteFs = FileSystem.get(hadoopConf)
- val credentialsPath = new Path(sparkConf.get("spark.yarn.credentials.file"))
+ val credentialsPath = new Path(credentialsFile)
val thresholdTime = System.currentTimeMillis() - (daysToKeepFiles days).toMillis
hadoopUtil.listFilesSorted(
remoteFs, credentialsPath.getParent,
@@ -160,7 +163,7 @@ private[yarn] class AMDelegationTokenRenewer(
val keytabLoggedInUGI = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab)
logInfo("Successfully logged into KDC.")
val tempCreds = keytabLoggedInUGI.getCredentials
- val credentialsPath = new Path(sparkConf.get("spark.yarn.credentials.file"))
+ val credentialsPath = new Path(credentialsFile)
val dst = credentialsPath.getParent
keytabLoggedInUGI.doAs(new PrivilegedExceptionAction[Void] {
// Get a copy of the credentials
@@ -186,8 +189,7 @@ private[yarn] class AMDelegationTokenRenewer(
}
val nextSuffix = lastCredentialsFileSuffix + 1
val tokenPathStr =
- sparkConf.get("spark.yarn.credentials.file") +
- SparkHadoopUtil.SPARK_YARN_CREDS_COUNTER_DELIM + nextSuffix
+ credentialsFile + SparkHadoopUtil.SPARK_YARN_CREDS_COUNTER_DELIM + nextSuffix
val tokenPath = new Path(tokenPathStr)
val tempTokenPath = new Path(tokenPathStr + SparkHadoopUtil.SPARK_YARN_CREDS_TEMP_EXTENSION)
logInfo("Writing out delegation tokens to " + tempTokenPath.toString)
http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index 5653c9f..9c7b1b3 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -98,6 +98,12 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf)
numExecutors = initialNumExecutors
}
+ principal = Option(principal)
+ .orElse(sparkConf.getOption("spark.yarn.principal"))
+ .orNull
+ keytab = Option(keytab)
+ .orElse(sparkConf.getOption("spark.yarn.keytab"))
+ .orNull
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org