You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/03 08:04:04 UTC
spark git commit: [SPARK-23305][SQL][TEST] Test `spark.sql.files.ignoreMissingFiles` for all file-based data sources

Repository: spark
Updated Branches:
  refs/heads/master 63b49fa2e -> 522e0b186


[SPARK-23305][SQL][TEST] Test `spark.sql.files.ignoreMissingFiles` for all file-based data sources

## What changes were proposed in this pull request?

Like Parquet, all file-based data source handles `spark.sql.files.ignoreMissingFiles` correctly. We had better have a test coverage for feature parity and in order to prevent future accidental regression for all data sources.

## How was this patch tested?

Pass Jenkins with a newly added test case.

Author: Dongjoon Hyun <do...@apache.org>

Closes #20479 from dongjoon-hyun/SPARK-23305.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/522e0b18
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/522e0b18
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/522e0b18

Branch: refs/heads/master
Commit: 522e0b1866a0298669c83de5a47ba380dc0b7c84
Parents: 63b49fa
Author: Dongjoon Hyun <do...@apache.org>
Authored: Sat Feb 3 00:04:00 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Sat Feb 3 00:04:00 2018 -0800

----------------------------------------------------------------------
 .../spark/sql/FileBasedDataSourceSuite.scala    | 37 ++++++++++++++++++++
 .../datasources/parquet/ParquetQuerySuite.scala | 33 -----------------
 2 files changed, 37 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/522e0b18/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index c272c99..640d6b1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.sql
 
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 
 class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext {
@@ -92,4 +96,37 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  allFileBasedDataSources.foreach { format =>
+    testQuietly(s"Enabling/disabling ignoreMissingFiles using $format") {
+      def testIgnoreMissingFiles(): Unit = {
+        withTempDir { dir =>
+          val basePath = dir.getCanonicalPath
+          Seq("0").toDF("a").write.format(format).save(new Path(basePath, "first").toString)
+          Seq("1").toDF("a").write.format(format).save(new Path(basePath, "second").toString)
+          val thirdPath = new Path(basePath, "third")
+          Seq("2").toDF("a").write.format(format).save(thirdPath.toString)
+          val df = spark.read.format(format).load(
+            new Path(basePath, "first").toString,
+            new Path(basePath, "second").toString,
+            new Path(basePath, "third").toString)
+
+          val fs = thirdPath.getFileSystem(spark.sparkContext.hadoopConfiguration)
+          assert(fs.delete(thirdPath, true))
+          checkAnswer(df, Seq(Row("0"), Row("1")))
+        }
+      }
+
+      withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "true") {
+        testIgnoreMissingFiles()
+      }
+
+      withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "false") {
+        val exception = intercept[SparkException] {
+          testIgnoreMissingFiles()
+        }
+        assert(exception.getMessage().contains("does not exist"))
+      }
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/522e0b18/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 6ad88ed..55b0f72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -355,39 +355,6 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
     }
   }
 
-  testQuietly("Enabling/disabling ignoreMissingFiles") {
-    def testIgnoreMissingFiles(): Unit = {
-      withTempDir { dir =>
-        val basePath = dir.getCanonicalPath
-        spark.range(1).toDF("a").write.parquet(new Path(basePath, "first").toString)
-        spark.range(1, 2).toDF("a").write.parquet(new Path(basePath, "second").toString)
-        val thirdPath = new Path(basePath, "third")
-        spark.range(2, 3).toDF("a").write.parquet(thirdPath.toString)
-        val df = spark.read.parquet(
-          new Path(basePath, "first").toString,
-          new Path(basePath, "second").toString,
-          new Path(basePath, "third").toString)
-
-        val fs = thirdPath.getFileSystem(spark.sparkContext.hadoopConfiguration)
-        fs.delete(thirdPath, true)
-        checkAnswer(
-          df,
-          Seq(Row(0), Row(1)))
-      }
-    }
-
-    withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "true") {
-      testIgnoreMissingFiles()
-    }
-
-    withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "false") {
-      val exception = intercept[SparkException] {
-        testIgnoreMissingFiles()
-      }
-      assert(exception.getMessage().contains("does not exist"))
-    }
-  }
-
   /**
    * this is part of test 'Enabling/disabling ignoreCorruptFiles' but run in a loop
    * to increase the chance of failure


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org