You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/04/24 20:16:42 UTC
git commit: Spark 1490 Add kerberos support to the HistoryServer
Repository: spark
Updated Branches:
refs/heads/master 78a49b253 -> bd375094a
Spark 1490 Add kerberos support to the HistoryServer
Here I've added the ability for the History server to login from a kerberos keytab file so that the history server can be run as a super user and stay up for along period of time while reading the history files from HDFS.
Author: Thomas Graves <tg...@apache.org>
Closes #513 from tgravescs/SPARK-1490 and squashes the following commits:
e204a99 [Thomas Graves] remove extra logging
5418daa [Thomas Graves] fix typo in config
0076b99 [Thomas Graves] Update docs
4d76545 [Thomas Graves] SPARK-1490 Add kerberos support to the HistoryServer
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bd375094
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bd375094
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bd375094
Branch: refs/heads/master
Commit: bd375094a1480b0ff9c16ab8ddd2dba8731506df
Parents: 78a49b2
Author: Thomas Graves <tg...@apache.org>
Authored: Thu Apr 24 11:15:12 2014 -0700
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Thu Apr 24 11:16:30 2014 -0700
----------------------------------------------------------------------
.../apache/spark/deploy/SparkHadoopUtil.scala | 4 ++++
.../spark/deploy/history/HistoryServer.scala | 16 +++++++++++++
docs/monitoring.md | 24 ++++++++++++++++++++
3 files changed, 44 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/bd375094/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 9bdbfb3..498fcc5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -75,6 +75,10 @@ class SparkHadoopUtil {
def getSecretKeyFromUserCredentials(key: String): Array[Byte] = { null }
+ def loginUserFromKeytab(principalName: String, keytabFilename: String) {
+ UserGroupInformation.loginUserFromKeytab(principalName, keytabFilename)
+ }
+
}
object SparkHadoopUtil {
http://git-wip-us.apache.org/repos/asf/spark/blob/bd375094/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index b8f5623..d7a3246 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable
import org.apache.hadoop.fs.{FileStatus, Path}
import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.scheduler._
import org.apache.spark.ui.{WebUI, SparkUI}
import org.apache.spark.ui.JettyUtils._
@@ -257,6 +258,7 @@ object HistoryServer {
val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR
def main(argStrings: Array[String]) {
+ initSecurity()
val args = new HistoryServerArguments(argStrings)
val securityManager = new SecurityManager(conf)
val server = new HistoryServer(args.logDir, securityManager, conf)
@@ -266,6 +268,20 @@ object HistoryServer {
while(true) { Thread.sleep(Int.MaxValue) }
server.stop()
}
+
+ def initSecurity() {
+ // If we are accessing HDFS and it has security enabled (Kerberos), we have to login
+ // from a keytab file so that we can access HDFS beyond the kerberos ticket expiration.
+ // As long as it is using Hadoop rpc (hdfs://), a relogin will automatically
+ // occur from the keytab.
+ if (conf.getBoolean("spark.history.kerberos.enabled", false)) {
+ // if you have enabled kerberos the following 2 params must be set
+ val principalName = conf.get("spark.history.kerberos.principal")
+ val keytabFilename = conf.get("spark.history.kerberos.keytab")
+ SparkHadoopUtil.get.loginUserFromKeytab(principalName, keytabFilename)
+ }
+ }
+
}
http://git-wip-us.apache.org/repos/asf/spark/blob/bd375094/docs/monitoring.md
----------------------------------------------------------------------
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 144be3d..347a9b1 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -91,6 +91,30 @@ represents an application's event logs. This creates a web interface at
The port to which the web interface of the history server binds.
</td>
</tr>
+ <tr>
+ <td>spark.history.kerberos.enabled</td>
+ <td>false</td>
+ <td>
+ Indicates whether the history server should use kerberos to login. This is useful
+ if the history server is accessing HDFS files on a secure Hadoop cluster. If this is
+ true it looks uses the configs <code>spark.history.kerberos.principal</code> and
+ <code>spark.history.kerberos.keytab</code>.
+ </td>
+ </tr>
+ <tr>
+ <td>spark.history.kerberos.principal</td>
+ <td>(none)</td>
+ <td>
+ Kerberos principal name for the History Server.
+ </td>
+ </tr>
+ <tr>
+ <td>spark.history.kerberos.keytab</td>
+ <td>(none)</td>
+ <td>
+ Location of the kerberos keytab file for the History Server.
+ </td>
+ </tr>
</table>
Note that in all of these UIs, the tables are sortable by clicking their headers,