You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ad...@apache.org on 2014/07/03 19:14:43 UTC

git commit: [SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error

Repository: spark
Updated Branches:
  refs/heads/master bc7041a42 -> 3bbeca648


[SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error

The spark.local.dir is configured as a list of multiple paths as follows /data1/sparkenv/local,/data2/sparkenv/local. If the disk data2 of the driver node has error, the application will exit since DiskBlockManager exits directly at createLocalDirs. If the disk data2 of the worker node has error, the executor will exit either.
DiskBlockManager should not exit directly at createLocalDirs if one of spark.local.dir has error. Since spark.local.dir has multiple paths, a problem should not affect the overall situation.
I think DiskBlockManager could ignore the bad directory at createLocalDirs.

Author: yantangzhai <ty...@163.com>

Closes #1274 from YanTangZhai/SPARK-2324 and squashes the following commits:

609bf48 [yantangzhai] [SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error
df08673 [yantangzhai] [SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3bbeca64
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3bbeca64
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3bbeca64

Branch: refs/heads/master
Commit: 3bbeca648985b32bdf1eedef779cb2817eb6dfa4
Parents: bc7041a
Author: yantangzhai <ty...@163.com>
Authored: Thu Jul 3 10:14:35 2014 -0700
Committer: Aaron Davidson <aa...@databricks.com>
Committed: Thu Jul 3 10:14:35 2014 -0700

----------------------------------------------------------------------
 .../org/apache/spark/storage/DiskBlockManager.scala | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3bbeca64/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index 2ec46d4..673fc19 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -44,6 +44,10 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD
    * directory, create multiple subdirectories that we will hash files into, in order to avoid
    * having really large inodes at the top level. */
   private val localDirs: Array[File] = createLocalDirs()
+  if (localDirs.isEmpty) {
+    logError("Failed to create any local dir.")
+    System.exit(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR)
+  }
   private val subDirs = Array.fill(localDirs.length)(new Array[File](subDirsPerLocalDir))
   private var shuffleSender : ShuffleSender = null
 
@@ -116,7 +120,7 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD
   private def createLocalDirs(): Array[File] = {
     logDebug(s"Creating local directories at root dirs '$rootDirs'")
     val dateFormat = new SimpleDateFormat("yyyyMMddHHmmss")
-    rootDirs.split(",").map { rootDir =>
+    rootDirs.split(",").flatMap { rootDir =>
       var foundLocalDir = false
       var localDir: File = null
       var localDirId: String = null
@@ -136,11 +140,13 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD
         }
       }
       if (!foundLocalDir) {
-        logError(s"Failed $MAX_DIR_CREATION_ATTEMPTS attempts to create local dir in $rootDir")
-        System.exit(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR)
+        logError(s"Failed $MAX_DIR_CREATION_ATTEMPTS attempts to create local dir in $rootDir." +
+                  " Ignoring this directory.")
+        None
+      } else {
+        logInfo(s"Created local directory at $localDir")
+        Some(localDir)
       }
-      logInfo(s"Created local directory at $localDir")
-      localDir
     }
   }