You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2019/10/28 17:52:04 UTC

[GitHub] [spark] squito commented on a change in pull request #25962: [SPARK-29285][Shuffle] Temporary shuffle files should be able to handle disk failures

squito commented on a change in pull request #25962: [SPARK-29285][Shuffle] Temporary shuffle files should be able to handle disk failures
URL: https://github.com/apache/spark/pull/25962#discussion_r339706848
 
 

 ##########
 File path: core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
 ##########
 @@ -117,20 +119,38 @@ private[spark] class DiskBlockManager(conf: SparkConf, deleteFilesOnStop: Boolea
 
   /** Produces a unique block id and File suitable for storing local intermediate results. */
   def createTempLocalBlock(): (TempLocalBlockId, File) = {
-    var blockId = new TempLocalBlockId(UUID.randomUUID())
-    while (getFile(blockId).exists()) {
-      blockId = new TempLocalBlockId(UUID.randomUUID())
+    var blockId = TempLocalBlockId(UUID.randomUUID())
+    var tempLocalFile = getFile(blockId)
+    var count = 0
+    while (!canCreateFile(tempLocalFile) && count < Utils.MAX_DIR_CREATION_ATTEMPTS) {
+      blockId = TempLocalBlockId(UUID.randomUUID())
+      tempLocalFile = getFile(blockId)
+      count += 1
     }
-    (blockId, getFile(blockId))
+    (blockId, tempLocalFile)
   }
 
   /** Produces a unique block id and File suitable for storing shuffled intermediate results. */
   def createTempShuffleBlock(): (TempShuffleBlockId, File) = {
-    var blockId = new TempShuffleBlockId(UUID.randomUUID())
-    while (getFile(blockId).exists()) {
-      blockId = new TempShuffleBlockId(UUID.randomUUID())
+    var blockId = TempShuffleBlockId(UUID.randomUUID())
+    var tempShuffleFile = getFile(blockId)
+    var count = 0
+    while (!canCreateFile(tempShuffleFile) && count < Utils.MAX_DIR_CREATION_ATTEMPTS) {
+      blockId = TempShuffleBlockId(UUID.randomUUID())
+      tempShuffleFile = getFile(blockId)
+      count += 1
+    }
+    (blockId, tempShuffleFile)
+  }
+
+  private def canCreateFile(file: File): Boolean = {
+    try {
+      file.createNewFile()
 
 Review comment:
   This is a really good question.  It looks safe to me, I don't think any of the writers check for file existence before opening a FileOutputStream etc.  I also checked IndexBlockResolver, which has some checks on pre-existing files -- but those are checks on the final destination files, not the temporary intermediate files.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org