You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by va...@apache.org on 2017/04/25 01:19:08 UTC

spark git commit: [SPARK-20239][CORE] Improve HistoryServer's ACL mechanism

Repository: spark
Updated Branches:
  refs/heads/master 8a272ddc9 -> 5280d93e6


[SPARK-20239][CORE] Improve HistoryServer's ACL mechanism

## What changes were proposed in this pull request?

Current SHS (Spark History Server) two different ACLs:

* ACL of base URL, it is controlled by "spark.acls.enabled" or "spark.ui.acls.enabled", and with this enabled, only user configured with "spark.admin.acls" (or group) or "spark.ui.view.acls" (or group), or the user who started SHS could list all the applications, otherwise none of them can be listed. This will also affect REST APIs which listing the summary of all apps and one app.
* Per application ACL. This is controlled by "spark.history.ui.acls.enabled". With this enabled only history admin user and user/group who ran this app can access the details of this app.

With this two ACLs, we may encounter several unexpected behaviors:

1. if base URL's ACL (`spark.acls.enable`) is enabled but user A has no view permission. User "A" cannot see the app list but could still access details of it's own app.
2. if ACLs of base URL (`spark.acls.enable`) is disabled, then user "A" could download any application's event log, even it is not run by user "A".
3. The changes of Live UI's ACL will affect History UI's ACL which share the same conf file.

The unexpected behaviors is mainly because we have two different ACLs, ideally we should have only one to manage all.

So to improve SHS's ACL mechanism, here in this PR proposed to:

1. Disable "spark.acls.enable" and only use "spark.history.ui.acls.enable" for history server.
2. Check permission for event-log download REST API.

With this PR:

1. Admin user could see/download the list of all applications, as well as application details.
2. Normal user could see the list of all applications, but can only download and check the details of applications accessible to him.

## How was this patch tested?

New UTs are added, also verified in real cluster.

CC tgravescs vanzin please help to review, this PR changes the semantics you did previously. Thanks a lot.

Author: jerryshao <ss...@hortonworks.com>

Closes #17582 from jerryshao/SPARK-20239.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5280d93e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5280d93e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5280d93e

Branch: refs/heads/master
Commit: 5280d93e6ecec7327e7fcd3d8d1cb90e01e774fc
Parents: 8a272dd
Author: jerryshao <ss...@hortonworks.com>
Authored: Mon Apr 24 18:18:59 2017 -0700
Committer: Marcelo Vanzin <va...@cloudera.com>
Committed: Mon Apr 24 18:18:59 2017 -0700

----------------------------------------------------------------------
 .../history/ApplicationHistoryProvider.scala      |  4 ++--
 .../spark/deploy/history/HistoryServer.scala      |  8 ++++++++
 .../spark/status/api/v1/ApiRootResource.scala     | 18 +++++++++++++++---
 .../spark/deploy/history/HistoryServerSuite.scala | 14 ++++++++------
 4 files changed, 33 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5280d93e/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index d7d8280..6d8758a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -86,7 +86,7 @@ private[history] abstract class ApplicationHistoryProvider {
    * @return Count of application event logs that are currently under process
    */
   def getEventLogsUnderProcess(): Int = {
-    return 0;
+    0
   }
 
   /**
@@ -95,7 +95,7 @@ private[history] abstract class ApplicationHistoryProvider {
    * @return 0 if this is undefined or unsupported, otherwise the last updated time in millis
    */
   def getLastUpdatedTime(): Long = {
-    return 0;
+    0
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/5280d93e/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 54f39f7..d9c8fda 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -301,6 +301,14 @@ object HistoryServer extends Logging {
       logDebug(s"Clearing ${SecurityManager.SPARK_AUTH_CONF}")
       config.set(SecurityManager.SPARK_AUTH_CONF, "false")
     }
+
+    if (config.getBoolean("spark.acls.enable", config.getBoolean("spark.ui.acls.enable", false))) {
+      logInfo("Either spark.acls.enable or spark.ui.acls.enable is configured, clearing it and " +
+        "only using spark.history.ui.acl.enable")
+      config.set("spark.acls.enable", "false")
+      config.set("spark.ui.acls.enable", "false")
+    }
+
     new SecurityManager(config)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5280d93e/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
index 00f918c..f17b637 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
@@ -184,14 +184,27 @@ private[v1] class ApiRootResource extends ApiRequestContext {
   @Path("applications/{appId}/logs")
   def getEventLogs(
       @PathParam("appId") appId: String): EventLogDownloadResource = {
-    new EventLogDownloadResource(uiRoot, appId, None)
+    try {
+      // withSparkUI will throw NotFoundException if attemptId exists for this application.
+      // So we need to try again with attempt id "1".
+      withSparkUI(appId, None) { _ =>
+        new EventLogDownloadResource(uiRoot, appId, None)
+      }
+    } catch {
+      case _: NotFoundException =>
+        withSparkUI(appId, Some("1")) { _ =>
+          new EventLogDownloadResource(uiRoot, appId, None)
+        }
+    }
   }
 
   @Path("applications/{appId}/{attemptId}/logs")
   def getEventLogs(
       @PathParam("appId") appId: String,
       @PathParam("attemptId") attemptId: String): EventLogDownloadResource = {
-    new EventLogDownloadResource(uiRoot, appId, Some(attemptId))
+    withSparkUI(appId, Some(attemptId)) { _ =>
+      new EventLogDownloadResource(uiRoot, appId, Some(attemptId))
+    }
   }
 
   @Path("version")
@@ -291,7 +304,6 @@ private[v1] trait ApiRequestContext {
       case None => throw new NotFoundException("no such app: " + appId)
     }
   }
-
 }
 
 private[v1] class ForbiddenException(msg: String) extends WebApplicationException(

http://git-wip-us.apache.org/repos/asf/spark/blob/5280d93e/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 764156c..95acb9a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -565,13 +565,12 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     assert(jobcount === getNumJobs("/jobs"))
 
     // no need to retain the test dir now the tests complete
-    logDir.deleteOnExit();
-
+    logDir.deleteOnExit()
   }
 
   test("ui and api authorization checks") {
-    val appId = "app-20161115172038-0000"
-    val owner = "jose"
+    val appId = "local-1430917381535"
+    val owner = "irashid"
     val admin = "root"
     val other = "alice"
 
@@ -590,8 +589,11 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
 
     val port = server.boundPort
     val testUrls = Seq(
-      s"http://localhost:$port/api/v1/applications/$appId/jobs",
-      s"http://localhost:$port/history/$appId/jobs/")
+      s"http://localhost:$port/api/v1/applications/$appId/1/jobs",
+      s"http://localhost:$port/history/$appId/1/jobs/",
+      s"http://localhost:$port/api/v1/applications/$appId/logs",
+      s"http://localhost:$port/api/v1/applications/$appId/1/logs",
+      s"http://localhost:$port/api/v1/applications/$appId/2/logs")
 
     tests.foreach { case (user, expectedCode) =>
       testUrls.foreach { url =>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org