You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/03 08:04:04 UTC
spark git commit: [SPARK-23305][SQL][TEST] Test
`spark.sql.files.ignoreMissingFiles` for all file-based data sources
Repository: spark
Updated Branches:
refs/heads/master 63b49fa2e -> 522e0b186
[SPARK-23305][SQL][TEST] Test `spark.sql.files.ignoreMissingFiles` for all file-based data sources
## What changes were proposed in this pull request?
Like Parquet, all file-based data source handles `spark.sql.files.ignoreMissingFiles` correctly. We had better have a test coverage for feature parity and in order to prevent future accidental regression for all data sources.
## How was this patch tested?
Pass Jenkins with a newly added test case.
Author: Dongjoon Hyun <do...@apache.org>
Closes #20479 from dongjoon-hyun/SPARK-23305.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/522e0b18
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/522e0b18
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/522e0b18
Branch: refs/heads/master
Commit: 522e0b1866a0298669c83de5a47ba380dc0b7c84
Parents: 63b49fa
Author: Dongjoon Hyun <do...@apache.org>
Authored: Sat Feb 3 00:04:00 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Sat Feb 3 00:04:00 2018 -0800
----------------------------------------------------------------------
.../spark/sql/FileBasedDataSourceSuite.scala | 37 ++++++++++++++++++++
.../datasources/parquet/ParquetQuerySuite.scala | 33 -----------------
2 files changed, 37 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/522e0b18/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index c272c99..640d6b1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -17,6 +17,10 @@
package org.apache.spark.sql
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSQLContext
class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext {
@@ -92,4 +96,37 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext {
}
}
}
+
+ allFileBasedDataSources.foreach { format =>
+ testQuietly(s"Enabling/disabling ignoreMissingFiles using $format") {
+ def testIgnoreMissingFiles(): Unit = {
+ withTempDir { dir =>
+ val basePath = dir.getCanonicalPath
+ Seq("0").toDF("a").write.format(format).save(new Path(basePath, "first").toString)
+ Seq("1").toDF("a").write.format(format).save(new Path(basePath, "second").toString)
+ val thirdPath = new Path(basePath, "third")
+ Seq("2").toDF("a").write.format(format).save(thirdPath.toString)
+ val df = spark.read.format(format).load(
+ new Path(basePath, "first").toString,
+ new Path(basePath, "second").toString,
+ new Path(basePath, "third").toString)
+
+ val fs = thirdPath.getFileSystem(spark.sparkContext.hadoopConfiguration)
+ assert(fs.delete(thirdPath, true))
+ checkAnswer(df, Seq(Row("0"), Row("1")))
+ }
+ }
+
+ withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "true") {
+ testIgnoreMissingFiles()
+ }
+
+ withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "false") {
+ val exception = intercept[SparkException] {
+ testIgnoreMissingFiles()
+ }
+ assert(exception.getMessage().contains("does not exist"))
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/522e0b18/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 6ad88ed..55b0f72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -355,39 +355,6 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
}
}
- testQuietly("Enabling/disabling ignoreMissingFiles") {
- def testIgnoreMissingFiles(): Unit = {
- withTempDir { dir =>
- val basePath = dir.getCanonicalPath
- spark.range(1).toDF("a").write.parquet(new Path(basePath, "first").toString)
- spark.range(1, 2).toDF("a").write.parquet(new Path(basePath, "second").toString)
- val thirdPath = new Path(basePath, "third")
- spark.range(2, 3).toDF("a").write.parquet(thirdPath.toString)
- val df = spark.read.parquet(
- new Path(basePath, "first").toString,
- new Path(basePath, "second").toString,
- new Path(basePath, "third").toString)
-
- val fs = thirdPath.getFileSystem(spark.sparkContext.hadoopConfiguration)
- fs.delete(thirdPath, true)
- checkAnswer(
- df,
- Seq(Row(0), Row(1)))
- }
- }
-
- withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "true") {
- testIgnoreMissingFiles()
- }
-
- withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "false") {
- val exception = intercept[SparkException] {
- testIgnoreMissingFiles()
- }
- assert(exception.getMessage().contains("does not exist"))
- }
- }
-
/**
* this is part of test 'Enabling/disabling ignoreCorruptFiles' but run in a loop
* to increase the chance of failure
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org