You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sa...@apache.org on 2018/02/12 07:46:38 UTC
spark git commit: [SPARK-23390][SQL] Flaky Test Suite: FileBasedDataSourceSuite in Spark 2.3/hadoop 2.7

Repository: spark
Updated Branches:
  refs/heads/master c0c902aed -> 6efd5d117


[SPARK-23390][SQL] Flaky Test Suite: FileBasedDataSourceSuite in Spark 2.3/hadoop 2.7

## What changes were proposed in this pull request?

This test only fails with sbt on Hadoop 2.7, I can't reproduce it locally, but here is my speculation by looking at the code:
1. FileSystem.delete doesn't delete the directory entirely, somehow we can still open the file as a 0-length empty file.(just speculation)
2. ORC intentionally allow empty files, and the reader fails during reading without closing the file stream.

This PR improves the test to make sure all files are deleted and can't be opened.

## How was this patch tested?

N/A

Author: Wenchen Fan <we...@databricks.com>

Closes #20584 from cloud-fan/flaky-test.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6efd5d11
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6efd5d11
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6efd5d11

Branch: refs/heads/master
Commit: 6efd5d117e98074d1b16a5c991fbd38df9aa196e
Parents: c0c902a
Author: Wenchen Fan <we...@databricks.com>
Authored: Sun Feb 11 23:46:23 2018 -0800
Committer: Sameer Agarwal <sa...@apache.org>
Committed: Sun Feb 11 23:46:23 2018 -0800

----------------------------------------------------------------------
 .../apache/spark/sql/FileBasedDataSourceSuite.scala   | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/6efd5d11/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 640d6b1..2e33236 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql
 
+import java.io.FileNotFoundException
+
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
@@ -102,17 +104,27 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext {
       def testIgnoreMissingFiles(): Unit = {
         withTempDir { dir =>
           val basePath = dir.getCanonicalPath
+
           Seq("0").toDF("a").write.format(format).save(new Path(basePath, "first").toString)
           Seq("1").toDF("a").write.format(format).save(new Path(basePath, "second").toString)
+
           val thirdPath = new Path(basePath, "third")
+          val fs = thirdPath.getFileSystem(spark.sparkContext.hadoopConfiguration)
           Seq("2").toDF("a").write.format(format).save(thirdPath.toString)
+          val files = fs.listStatus(thirdPath).filter(_.isFile).map(_.getPath)
+
           val df = spark.read.format(format).load(
             new Path(basePath, "first").toString,
             new Path(basePath, "second").toString,
             new Path(basePath, "third").toString)
 
-          val fs = thirdPath.getFileSystem(spark.sparkContext.hadoopConfiguration)
+          // Make sure all data files are deleted and can't be opened.
+          files.foreach(f => fs.delete(f, false))
           assert(fs.delete(thirdPath, true))
+          for (f <- files) {
+            intercept[FileNotFoundException](fs.open(f))
+          }
+
           checkAnswer(df, Seq(Row("0"), Row("1")))
         }
       }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org