You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2019/02/26 00:20:27 UTC
[spark] branch master updated: [SPARK-26673][FOLLOWUP][SQL] File
Source V2: check existence of output path before delete it
This is an automated email from the ASF dual-hosted git repository.
lixiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 4baa2d4 [SPARK-26673][FOLLOWUP][SQL] File Source V2: check existence of output path before delete it
4baa2d4 is described below
commit 4baa2d4449e103b15370d284b0ffdf09b4a9c1b7
Author: Gengliang Wang <ge...@databricks.com>
AuthorDate: Mon Feb 25 16:20:06 2019 -0800
[SPARK-26673][FOLLOWUP][SQL] File Source V2: check existence of output path before delete it
## What changes were proposed in this pull request?
This is a followup PR to resolve comment: https://github.com/apache/spark/pull/23601#pullrequestreview-207101115
When Spark writes DataFrame with "overwrite" mode, it deletes the output path before actual writes. To safely handle the case that the output path doesn't exist, it is suggested to follow the V1 code by checking the existence.
## How was this patch tested?
Apply https://github.com/apache/spark/pull/23836 and run unit tests
Closes #23889 from gengliangwang/checkFileBeforeOverwrite.
Authored-by: Gengliang Wang <ge...@databricks.com>
Signed-off-by: gatorsmile <ga...@gmail.com>
---
.../apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
index 6a94248..75c9224 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
@@ -16,6 +16,7 @@
*/
package org.apache.spark.sql.execution.datasources.v2
+import java.io.IOException
import java.util.UUID
import scala.collection.JavaConverters._
@@ -83,7 +84,9 @@ abstract class FileWriteBuilder(options: DataSourceOptions)
null
case SaveMode.Overwrite =>
- committer.deleteWithJob(fs, path, true)
+ if (fs.exists(path) && !committer.deleteWithJob(fs, path, true)) {
+ throw new IOException(s"Unable to clear directory $path prior to writing to it")
+ }
committer.setupJob(job)
new FileBatchWrite(job, description, committer)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org