You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2021/07/07 14:16:53 UTC

[spark] branch master updated: [SPARK-35907][CORE] Instead of File#mkdirs, Files#createDirectories is expected

This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 55373b1  [SPARK-35907][CORE] Instead of File#mkdirs, Files#createDirectories is expected
55373b1 is described below

commit 55373b118f961ee030274a5e3e03348ceb26496b
Author: Shockang <sh...@163.com>
AuthorDate: Wed Jul 7 09:16:13 2021 -0500

    [SPARK-35907][CORE] Instead of File#mkdirs, Files#createDirectories is expected
    
    ### What changes were proposed in this pull request?
    
    The code of method: createDirectory in class: org.apache.spark.util.Utils is modified.
    
    ### Why are the changes needed?
    
    To solve the problem of ambiguous exception handling in traditional IO creating directories.
    
    What's more, there shouldn't be an improper comment in Spark's source code.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes
    
    The modified method would be called to create the working directory when Worker starts.
    
    The modified method would be called to create local directories for storing block data when the class: DiskBlockManager instantiates.
    
    The modified method would be called to create a temporary directory inside the given parent directory in several classes.
    
    ### How was this patch tested?
    
    I have provided test cases as much as possible.
    
    Authored-by: Shockang <shockangaliyun.com>
    
    Closes #33101 from Shockang/SPARK-35907.
    
    Authored-by: Shockang <sh...@163.com>
    Signed-off-by: Sean Owen <sr...@gmail.com>
---
 .../main/scala/org/apache/spark/util/Utils.scala   | 18 ++++---
 .../scala/org/apache/spark/util/UtilsSuite.scala   | 62 ++++++++++++++++++++++
 2 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index b1df7bd..1a6a689 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -285,9 +285,11 @@ private[spark] object Utils extends Logging {
    */
   def createDirectory(dir: File): Boolean = {
     try {
-      // This sporadically fails - not sure why ... !dir.exists() && !dir.mkdirs()
-      // So attempting to create and then check if directory was created or not.
-      dir.mkdirs()
+      // SPARK-35907: The check was required by File.mkdirs() because it could sporadically
+      // fail silently. After switching to Files.createDirectories(), ideally, there should
+      // no longer be silent fails. But the check is kept for the safety concern. We can
+      // remove the check when we're sure that Files.createDirectories() would never fail silently.
+      Files.createDirectories(dir.toPath)
       if ( !dir.exists() || !dir.isDirectory) {
         logError(s"Failed to create directory " + dir)
       }
@@ -315,10 +317,14 @@ private[spark] object Utils extends Logging {
       }
       try {
         dir = new File(root, namePrefix + "-" + UUID.randomUUID.toString)
-        if (dir.exists() || !dir.mkdirs()) {
+        // SPARK-35907:
+        // This could throw more meaningful exception information if directory creation failed.
+        Files.createDirectories(dir.toPath)
+      } catch {
+        case e @ (_ : IOException | _ : SecurityException) =>
+          logError(s"Failed to create directory $dir", e)
           dir = null
-        }
-      } catch { case e: SecurityException => dir = null; }
+      }
     }
 
     dir.getCanonicalFile
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index dba7e39..f34b1c0 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -477,6 +477,68 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     }
   }
 
+  test("SPARK-35907: createDirectory") {
+    val tmpDir = new File(System.getProperty("java.io.tmpdir"))
+    val testDir = new File(tmpDir, "createDirectory" + System.nanoTime())
+    val testDirPath = testDir.getCanonicalPath
+
+    // 1. Directory created successfully
+    val scenario1 = new File(testDir, "scenario1")
+    assert(Utils.createDirectory(scenario1))
+    assert(scenario1.exists())
+    assert(Utils.createDirectory(testDirPath, "scenario1").exists())
+
+    // 2. Illegal file path
+    val scenario2 = new File(testDir, "scenario2" * 256)
+    assert(!Utils.createDirectory(scenario2))
+    assert(!scenario2.exists())
+    assertThrows[IOException](Utils.createDirectory(testDirPath, "scenario2" * 256))
+
+    // 3. The parent directory cannot read
+    val scenario3 = new File(testDir, "scenario3")
+    assert(testDir.canRead)
+    assert(testDir.setReadable(false))
+    assert(Utils.createDirectory(scenario3))
+    assert(scenario3.exists())
+    assert(Utils.createDirectory(testDirPath, "scenario3").exists())
+    assert(testDir.setReadable(true))
+
+    // 4. The parent directory cannot write
+    val scenario4 = new File(testDir, "scenario4")
+    assert(testDir.canWrite)
+    assert(testDir.setWritable(false))
+    assert(!Utils.createDirectory(scenario4))
+    assert(!scenario4.exists())
+    assertThrows[IOException](Utils.createDirectory(testDirPath, "scenario4"))
+    assert(testDir.setWritable(true))
+
+    // 5. The parent directory cannot execute
+    val scenario5 = new File(testDir, "scenario5")
+    assert(testDir.canExecute)
+    assert(testDir.setExecutable(false))
+    assert(!Utils.createDirectory(scenario5))
+    assert(!scenario5.exists())
+    assertThrows[IOException](Utils.createDirectory(testDirPath, "scenario5"))
+    assert(testDir.setExecutable(true))
+
+    // The following 3 scenarios are only for the method: createDirectory(File)
+    // 6. Symbolic link
+    val scenario6 = java.nio.file.Files.createSymbolicLink(new File(testDir, "scenario6")
+      .toPath, scenario1.toPath).toFile
+    assert(!Utils.createDirectory(scenario6))
+    assert(scenario6.exists())
+
+    // 7. Directory exists
+    assert(scenario1.exists())
+    assert(Utils.createDirectory(scenario1))
+    assert(scenario1.exists())
+
+    // 8. Not directory
+    val scenario8 = new File(testDir.getCanonicalPath + File.separator + "scenario8")
+    assert(scenario8.createNewFile())
+    assert(!Utils.createDirectory(scenario8))
+  }
+
   test("doesDirectoryContainFilesNewerThan") {
     // create some temporary directories and files
     withTempDir { parent =>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org