You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by tg...@apache.org on 2014/04/07 20:28:21 UTC

git commit: SPARK-1252. On YARN, use container-log4j.properties for executors

Repository: spark
Updated Branches:
  refs/heads/master 83f2a2f14 -> 9dd8b9166


SPARK-1252. On YARN, use container-log4j.properties for executors

container-log4j.properties is a file that YARN provides so that containers can have log4j.properties distinct from that of the NodeManagers.

Logs now go to syslog, and stderr and stdout just have the process's standard err and standard out.

I tested this on pseudo-distributed clusters for both yarn (Hadoop 2.2) and yarn-alpha (Hadoop 0.23.7)/

Author: Sandy Ryza <sa...@cloudera.com>

Closes #148 from sryza/sandy-spark-1252 and squashes the following commits:

c0043b8 [Sandy Ryza] Put log4j.properties file under common
55823da [Sandy Ryza] Add license headers to new files
10934b8 [Sandy Ryza] Add log4j-spark-container.properties and support SPARK_LOG4J_CONF
e74450b [Sandy Ryza] SPARK-1252. On YARN, use container-log4j.properties for executors


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9dd8b916
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9dd8b916
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9dd8b916

Branch: refs/heads/master
Commit: 9dd8b9166225cbcfcab1f39268244c2feb42a658
Parents: 83f2a2f
Author: Sandy Ryza <sa...@cloudera.com>
Authored: Mon Apr 7 13:28:14 2014 -0500
Committer: Thomas Graves <tg...@apache.org>
Committed: Mon Apr 7 13:28:14 2014 -0500

----------------------------------------------------------------------
 .../spark/deploy/yarn/ExecutorRunnable.scala    |  3 ++-
 .../resources/log4j-spark-container.properties  | 24 ++++++++++++++++++++
 .../apache/spark/deploy/yarn/ClientBase.scala   |  8 +++++--
 .../deploy/yarn/ExecutorRunnableUtil.scala      |  7 +++++-
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |  7 ++++++
 yarn/pom.xml                                    |  6 +++++
 .../spark/deploy/yarn/ExecutorRunnable.scala    |  3 ++-
 7 files changed, 53 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9dd8b916/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
----------------------------------------------------------------------
diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 981e8b0..3469b7d 100644
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -81,7 +81,8 @@ class ExecutorRunnable(
     credentials.writeTokenStorageToStream(dob)
     ctx.setContainerTokens(ByteBuffer.wrap(dob.getData()))
 
-    val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores)
+    val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores,
+      localResources.contains(ClientBase.LOG4J_PROP))
     logInfo("Setting up executor with commands: " + commands)
     ctx.setCommands(commands)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9dd8b916/yarn/common/src/main/resources/log4j-spark-container.properties
----------------------------------------------------------------------
diff --git a/yarn/common/src/main/resources/log4j-spark-container.properties b/yarn/common/src/main/resources/log4j-spark-container.properties
new file mode 100644
index 0000000..a1e37a0
--- /dev/null
+++ b/yarn/common/src/main/resources/log4j-spark-container.properties
@@ -0,0 +1,24 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. See accompanying LICENSE file.
+
+# Set everything to be logged to the console
+log4j.rootCategory=INFO, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.eclipse.jetty=WARN
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

http://git-wip-us.apache.org/repos/asf/spark/blob/9dd8b916/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
----------------------------------------------------------------------
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index bc26790..eb42922 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -266,11 +266,11 @@ trait ClientBase extends Logging {
       localResources: HashMap[String, LocalResource],
       stagingDir: String): HashMap[String, String] = {
     logInfo("Setting up the launch environment")
-    val log4jConfLocalRes = localResources.getOrElse(ClientBase.LOG4J_PROP, null)
 
     val env = new HashMap[String, String]()
 
-    ClientBase.populateClasspath(yarnConf, sparkConf, log4jConfLocalRes != null, env)
+    ClientBase.populateClasspath(yarnConf, sparkConf, localResources.contains(ClientBase.LOG4J_PROP),
+      env)
     env("SPARK_YARN_MODE") = "true"
     env("SPARK_YARN_STAGING_DIR") = stagingDir
     env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
@@ -344,6 +344,10 @@ trait ClientBase extends Logging {
       JAVA_OPTS += " " + env("SPARK_JAVA_OPTS")
     }
 
+    if (!localResources.contains(ClientBase.LOG4J_PROP)) {
+      JAVA_OPTS += " " + YarnSparkHadoopUtil.getLoggingArgsForContainerCommandLine()
+    }
+
     // Command for the ApplicationMaster
     val commands = List[String](
       Environment.JAVA_HOME.$() + "/bin/java" +

http://git-wip-us.apache.org/repos/asf/spark/blob/9dd8b916/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
----------------------------------------------------------------------
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
index 2079697..b3696c5 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
@@ -50,7 +50,8 @@ trait ExecutorRunnableUtil extends Logging {
       slaveId: String,
       hostname: String,
       executorMemory: Int,
-      executorCores: Int) = {
+      executorCores: Int,
+      userSpecifiedLogFile: Boolean) = {
     // Extra options for the JVM
     var JAVA_OPTS = ""
     // Set the JVM memory
@@ -63,6 +64,10 @@ trait ExecutorRunnableUtil extends Logging {
     JAVA_OPTS += " -Djava.io.tmpdir=" +
       new Path(Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR) + " "
 
+    if (!userSpecifiedLogFile) {
+      JAVA_OPTS += " " + YarnSparkHadoopUtil.getLoggingArgsForContainerCommandLine()
+    }
+
     // Commenting it out for now - so that people can refer to the properties if required. Remove
     // it once cpuset version is pushed out.
     // The context is, default gc for server class machines end up using all cores to do gc - hence

http://git-wip-us.apache.org/repos/asf/spark/blob/9dd8b916/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
----------------------------------------------------------------------
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index 4c6e1dc..314a755 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -22,6 +22,7 @@ import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.security.Credentials
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.apache.hadoop.yarn.api.ApplicationConstants
 import org.apache.hadoop.conf.Configuration
 import org.apache.spark.deploy.SparkHadoopUtil
 
@@ -67,3 +68,9 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
   }
 
 }
+
+object YarnSparkHadoopUtil {
+  def getLoggingArgsForContainerCommandLine(): String = {
+    "-Dlog4j.configuration=log4j-spark-container.properties"
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/9dd8b916/yarn/pom.xml
----------------------------------------------------------------------
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 35e3176..3342cb6 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -167,6 +167,12 @@
 
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  
+    <resources>
+      <resource>
+        <directory>../common/src/main/resources</directory>
+      </resource>
+    </resources>
   </build>
 
 </project>

http://git-wip-us.apache.org/repos/asf/spark/blob/9dd8b916/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
----------------------------------------------------------------------
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 53c403f..81d9d1b 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -78,7 +78,8 @@ class ExecutorRunnable(
     credentials.writeTokenStorageToStream(dob)
     ctx.setTokens(ByteBuffer.wrap(dob.getData()))
 
-    val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores)
+    val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores,
+      localResources.contains(ClientBase.LOG4J_PROP))
 
     logInfo("Setting up executor with commands: " + commands)
     ctx.setCommands(commands)