You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2018/12/20 22:48:49 UTC
[GitHub] vanzin closed pull request #20326: [SPARK-23155][DEPLOY] log.server.url links in SHS

vanzin closed pull request #20326: [SPARK-23155][DEPLOY] log.server.url links in SHS
URL: https://github.com/apache/spark/pull/20326
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
index 974697890dd03..79a81d3c74713 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.status.api.v1
 
 import java.io.OutputStream
+import java.net.URI
 import java.util.{List => JList}
 import java.util.zip.ZipOutputStream
 import javax.ws.rs._
@@ -49,7 +50,7 @@ private[v1] class AbstractApplicationResource extends BaseAppResource {
 
   @GET
   @Path("executors")
-  def executorList(): Seq[ExecutorSummary] = withUI(_.store.executorList(true))
+  def executorList(): Seq[ExecutorSummary] = fetchExecutors(true)
 
   @GET
   @Path("executors/{executorId}/threads")
@@ -76,7 +77,7 @@ private[v1] class AbstractApplicationResource extends BaseAppResource {
 
   @GET
   @Path("allexecutors")
-  def allExecutorList(): Seq[ExecutorSummary] = withUI(_.store.executorList(false))
+  def allExecutorList(): Seq[ExecutorSummary] = fetchExecutors(false)
 
   @Path("stages")
   def stages(): Class[StagesResource] = classOf[StagesResource]
@@ -160,6 +161,62 @@ private[v1] class AbstractApplicationResource extends BaseAppResource {
     classOf[OneApplicationAttemptResource]
   }
 
+  private def fetchExecutors(activeOnly: Boolean): Seq[ExecutorSummary] = {
+    withUI(ui => {
+      val tmpExecutorList = ui.store.executorList(activeOnly)
+      ui.yarnLogServerUrl.map(lurl =>
+        tmpExecutorList.map(withYarnLogServerLogs(toYarnLogServerUrl(lurl, ui.nmRpcPort)))
+      ).getOrElse(tmpExecutorList)
+    })
+  }
+
+  private def toYarnLogServerUrl(logServerUrl: String, nmPort: Int)(nmLogUrl: String): String = {
+    val containerSuffixPos = nmLogUrl.indexOf("container_")
+    if (containerSuffixPos >= 0) {
+      val nodeId = URI.create(nmLogUrl).getHost + ":" + nmPort
+      val containerSuffix = nmLogUrl.substring(containerSuffixPos)
+      val containerEndPos = containerSuffix.indexOf("/")
+      if (containerEndPos >= 0) {
+        val container = containerSuffix.substring(0, containerEndPos)
+        s"$logServerUrl/$nodeId/$container/$containerSuffix"
+      } else {
+        nmLogUrl
+      }
+    } else {
+      nmLogUrl
+    }
+  }
+
+  private def withYarnLogServerLogs(
+    logRewrite: String => String)(
+    info: ExecutorSummary): ExecutorSummary = {
+      new ExecutorSummary(
+        id = info.id,
+        hostPort = info.hostPort,
+        isActive = info.isActive,
+        rddBlocks = info.rddBlocks,
+        memoryUsed = info.memoryUsed,
+        diskUsed = info.diskUsed,
+        totalCores = info.totalCores,
+        maxTasks = info.maxTasks,
+        activeTasks = info.activeTasks,
+        failedTasks = info.failedTasks,
+        completedTasks = info.completedTasks,
+        totalTasks = info.totalTasks,
+        totalDuration = info.totalDuration,
+        totalGCTime = info.totalGCTime,
+        totalInputBytes = info.totalInputBytes,
+        totalShuffleRead = info.totalShuffleRead,
+        totalShuffleWrite = info.totalShuffleWrite,
+        isBlacklisted = info.isBlacklisted,
+        maxMemory = info.maxMemory,
+        addTime = info.addTime,
+        removeTime = info.removeTime,
+        removeReason = info.removeReason,
+        executorLogs = info.executorLogs.mapValues(logRewrite),
+        memoryMetrics = info.memoryMetrics
+      )
+  }
 }
 
 private[v1] class OneApplicationResource extends AbstractApplicationResource {
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index b44ac0ea1febc..8acfe7161ff51 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.ui
 
-import java.util.{Date, List => JList, ServiceLoader}
+import java.util.Date
 
-import scala.collection.JavaConverters._
+import org.apache.hadoop.yarn.conf.YarnConfiguration
 
-import org.apache.spark.{JobExecutionStatus, SecurityManager, SparkConf, SparkContext}
+import org.apache.spark.{SecurityManager, SparkConf, SparkContext}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler._
 import org.apache.spark.status.AppStatusStore
@@ -31,7 +32,6 @@ import org.apache.spark.ui.env.EnvironmentTab
 import org.apache.spark.ui.exec.ExecutorsTab
 import org.apache.spark.ui.jobs.{JobsTab, StagesTab}
 import org.apache.spark.ui.storage.StorageTab
-import org.apache.spark.util.Utils
 
 /**
  * Top level user interface for a Spark application.
@@ -52,6 +52,22 @@ private[spark] class SparkUI private (
 
   val killEnabled = sc.map(_.conf.getBoolean("spark.ui.killEnabled", true)).getOrElse(false)
 
+  private val yarnConf = SparkHadoopUtil.get.newConfiguration(conf)
+  private val portReg = "^.*:([0-9]+)$".r
+  private[spark] val nmRpcPort = Option(yarnConf.get(YarnConfiguration.NM_ADDRESS))
+    .map { case portReg(port) => port.toInt }
+    .getOrElse(0)
+
+  private val isHistoryUI = sc.isEmpty
+  private val useAggregatedLogs = isHistoryUI && nmRpcPort > 0 &&
+    yarnConf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, false)
+
+  private[spark] val yarnLogServerUrl = if (useAggregatedLogs) {
+      Option(yarnConf.get(YarnConfiguration.YARN_LOG_SERVER_URL))
+    } else {
+      None
+    }
+
   var appId: String = _
 
   private var streamingJobProgressListener: Option[SparkListener] = None


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org