You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/11/20 22:37:03 UTC

[GitHub] [spark] otterc commented on a change in pull request #30450: [SPARK-33185][YARN][FOLLOW-ON] Leverage RM's RPC API instead of REST to fetch driver log links in yarn.Client

otterc commented on a change in pull request #30450:
URL: https://github.com/apache/spark/pull/30450#discussion_r528002512



##########
File path: resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
##########
@@ -1192,32 +1192,31 @@ private[spark] class Client(
   }
 
   /**
-   * Fetch links to the logs of the driver for the given application ID. This requires hitting the
-   * RM REST API. Returns an empty map if the links could not be fetched. If this feature is
-   * disabled via [[CLIENT_INCLUDE_DRIVER_LOGS_LINK]], an empty map is returned immediately.
+   * Fetch links to the logs of the driver for the given application report. This requires
+   * query the ResourceManager via RPC. Returns an empty map if the links could not be fetched.
+   * If this feature is disabled via [[CLIENT_INCLUDE_DRIVER_LOGS_LINK]], or if the application
+   * report indicates that the driver container isn't yet running
+   * (states `NEW`, `NEW_SAVING`, `SUBMITTED`, or `ACCEPTED`), an empty map is returned immediately.
    */
-  private def getDriverLogsLink(appId: ApplicationId): IMap[String, String] = {
-    if (!sparkConf.get(CLIENT_INCLUDE_DRIVER_LOGS_LINK)) {
+  private def getDriverLogsLink(appReport: ApplicationReport): IMap[String, String] = {
+    if (!sparkConf.get(CLIENT_INCLUDE_DRIVER_LOGS_LINK)
+        || appReport.getYarnApplicationState == YarnApplicationState.NEW
+        || appReport.getYarnApplicationState == YarnApplicationState.NEW_SAVING
+        || appReport.getYarnApplicationState == YarnApplicationState.SUBMITTED
+        || appReport.getYarnApplicationState == YarnApplicationState.ACCEPTED) {
       return IMap()
     }
     try {
-      val baseRmUrl = WebAppUtils.getRMWebAppURLWithScheme(hadoopConf)
-      val response = ClientBuilder.newClient()
-          .target(baseRmUrl)
-          .path("ws").path("v1").path("cluster").path("apps")
-          .path(appId.toString).path("appattempts")
-          .request(MediaType.APPLICATION_JSON)
-          .get()
-      response.getStatusInfo.getFamily match {
-        case Family.SUCCESSFUL => parseAppAttemptsJsonResponse(response.readEntity(classOf[String]))
-        case _ =>
-          logWarning(s"Unable to fetch app attempts info from $baseRmUrl, got "
-              + s"status code ${response.getStatus}: ${response.getStatusInfo.getReasonPhrase}")
-          IMap()
-      }
+      val amContainerId = yarnClient
+          .getApplicationAttemptReport(appReport.getCurrentApplicationAttemptId)

Review comment:
       Nit: indentation should be 2

##########
File path: resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
##########
@@ -1192,32 +1192,31 @@ private[spark] class Client(
   }
 
   /**
-   * Fetch links to the logs of the driver for the given application ID. This requires hitting the
-   * RM REST API. Returns an empty map if the links could not be fetched. If this feature is
-   * disabled via [[CLIENT_INCLUDE_DRIVER_LOGS_LINK]], an empty map is returned immediately.
+   * Fetch links to the logs of the driver for the given application report. This requires
+   * query the ResourceManager via RPC. Returns an empty map if the links could not be fetched.
+   * If this feature is disabled via [[CLIENT_INCLUDE_DRIVER_LOGS_LINK]], or if the application
+   * report indicates that the driver container isn't yet running
+   * (states `NEW`, `NEW_SAVING`, `SUBMITTED`, or `ACCEPTED`), an empty map is returned immediately.
    */
-  private def getDriverLogsLink(appId: ApplicationId): IMap[String, String] = {
-    if (!sparkConf.get(CLIENT_INCLUDE_DRIVER_LOGS_LINK)) {
+  private def getDriverLogsLink(appReport: ApplicationReport): IMap[String, String] = {
+    if (!sparkConf.get(CLIENT_INCLUDE_DRIVER_LOGS_LINK)
+        || appReport.getYarnApplicationState == YarnApplicationState.NEW

Review comment:
       Nit: indentation should be 2




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org