You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2021/09/23 16:13:40 UTC

[GitHub] [spark] sarutak commented on a change in pull request #34057: [SPARK-36825][SQL] Read/write dataframes with ANSI intervals from/to parquet files

sarutak commented on a change in pull request #34057:
URL: https://github.com/apache/spark/pull/34057#discussion_r714944703



##########
File path: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
##########
@@ -1060,6 +1061,25 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       }
     }
   }
+
+  test("SPARK-36825: year-month/day-time intervals written and read as INT32/INT64") {

Review comment:
       Should we care both V1 and V2 Parquet sources?

##########
File path: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala
##########
@@ -36,22 +36,25 @@ trait CommonFileDataSourceSuite extends SQLHelper { self: AnyFunSuite =>
   protected def inputDataset: Dataset[_] = spark.createDataset(Seq("abc"))(Encoders.STRING)
 
   test(s"SPARK-36349: disallow saving of ANSI intervals to $dataSourceFormat") {
-    Seq("INTERVAL '1' DAY", "INTERVAL '1' YEAR").foreach { i =>
-      withTempPath { dir =>
-        val errMsg = intercept[AnalysisException] {
-          spark.sql(s"SELECT $i").write.format(dataSourceFormat).save(dir.getAbsolutePath)
-        }.getMessage
-        assert(errMsg.contains("Cannot save interval data type into external storage"))
+    if (!Set("parquet").contains(dataSourceFormat.toLowerCase(Locale.ROOT))) {
+      Seq("INTERVAL '1' DAY", "INTERVAL '1' YEAR").foreach { i =>
+        withTempPath { dir =>
+          val errMsg = intercept[AnalysisException] {
+            spark.sql(s"SELECT $i").write.format(dataSourceFormat).save(dir.getAbsolutePath)
+          }.getMessage
+          assert(errMsg.contains("Cannot save interval data type into external storage"))
+        }
       }
-    }
 
-    // Check all built-in file-based datasources except of libsvm which requires particular schema.
-    if (!Set("libsvm").contains(dataSourceFormat.toLowerCase(Locale.ROOT))) {
-      Seq("INTERVAL DAY TO SECOND", "INTERVAL YEAR TO MONTH").foreach { it =>
-        val errMsg = intercept[AnalysisException] {
-          spark.sql(s"CREATE TABLE t (i $it) USING $dataSourceFormat")
-        }.getMessage
-        assert(errMsg.contains("data source does not support"))
+      // Check all built-in file-based datasources except of libsvm which
+      // requires particular schema.
+      if (!Set("libsvm").contains(dataSourceFormat.toLowerCase(Locale.ROOT))) {
+        Seq("INTERVAL DAY TO SECOND", "INTERVAL YEAR TO MONTH").foreach { it =>
+          val errMsg = intercept[AnalysisException] {
+            spark.sql(s"CREATE TABLE t (i $it) USING $dataSourceFormat")

Review comment:
       Now that reading/writing ANSI interval types from/to Parquet no longer throws `AnalysysException`, should we add tests for `CREATE TABLE USING PARQUET` to ensure it's supported with Parquet?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org