You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/01/22 23:16:28 UTC
git commit: Merge pull request #478 from sryza/sandy-spark-1033

Updated Branches:
  refs/heads/branch-0.9 e1dc5bedb -> dd533c9e4


Merge pull request #478 from sryza/sandy-spark-1033

SPARK-1033. Ask for cores in Yarn container requests

Tested on a pseudo-distributed cluster against the Fair Scheduler and observed a worker taking more than a single core.
(cherry picked from commit 576c4a4c502ccca5fcd6b3552dd93cc2f3c50666)

Signed-off-by: Patrick Wendell <pw...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/dd533c9e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/dd533c9e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/dd533c9e

Branch: refs/heads/branch-0.9
Commit: dd533c9e42a01319ebcbc0b01c3190a25784a2e1
Parents: e1dc5be
Author: Patrick Wendell <pw...@gmail.com>
Authored: Wed Jan 22 14:10:07 2014 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Wed Jan 22 14:15:58 2014 -0800

----------------------------------------------------------------------
 docs/running-on-yarn.md                                        | 2 +-
 .../org/apache/spark/deploy/yarn/YarnAllocationHandler.scala   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/dd533c9e/docs/running-on-yarn.md
----------------------------------------------------------------------
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 3bd6264..5dadd54 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -133,7 +133,7 @@ See [Building Spark with Maven](building-with-maven.html) for instructions on ho
 
 # Important Notes
 
-- We do not requesting container resources based on the number of cores. Thus the numbers of cores given via command line arguments cannot be guaranteed.
+- Before Hadoop 2.2, YARN does not support cores in container resource requests. Thus, when running against an earlier version, the numbers of cores given via command line arguments cannot be passed to YARN.  Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
 - The local directories used for spark will be the local directories configured for YARN (Hadoop Yarn config yarn.nodemanager.local-dirs). If the user specifies spark.local.dir, it will be ignored.
 - The --files and --archives options support specifying file names with the # similar to Hadoop. For example you can specify: --files localtest.txt#appSees.txt and this will upload the file you have locally named localtest.txt into HDFS but this will be linked to by the name appSees.txt and your application should use the name as appSees.txt to reference it when running on YARN.
 - The --addJars option allows the SparkContext.addJar function to work if you are using it with local files. It does not need to be used if you are using it with HDFS, HTTP, HTTPS, or FTP files.

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/dd533c9e/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
----------------------------------------------------------------------
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index 738ff98..1ac6112 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -532,15 +532,15 @@ private[yarn] class YarnAllocationHandler(
       priority: Int
     ): ArrayBuffer[ContainerRequest] = {
 
-    val memoryResource = Records.newRecord(classOf[Resource])
-    memoryResource.setMemory(workerMemory + YarnAllocationHandler.MEMORY_OVERHEAD)
+    val memoryRequest = workerMemory + YarnAllocationHandler.MEMORY_OVERHEAD
+    val resource = Resource.newInstance(memoryRequest, workerCores)
 
     val prioritySetting = Records.newRecord(classOf[Priority])
     prioritySetting.setPriority(priority)
 
     val requests = new ArrayBuffer[ContainerRequest]()
     for (i <- 0 until numWorkers) {
-      requests += new ContainerRequest(memoryResource, hosts, racks, prioritySetting)
+      requests += new ContainerRequest(resource, hosts, racks, prioritySetting)
     }
     requests
   }