You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/01/29 13:26:15 UTC

[spark] branch branch-3.0 updated: [SPARK-33163][SQL][TESTS][FOLLOWUP] Fix the test for the parquet metadata key 'org.apache.spark.legacyDateTime'

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 3a2d143  [SPARK-33163][SQL][TESTS][FOLLOWUP] Fix the test for the parquet metadata key 'org.apache.spark.legacyDateTime'
3a2d143 is described below

commit 3a2d1433aae970765bbcf695d0d36c5ad9fdb507
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Fri Jan 29 22:25:01 2021 +0900

    [SPARK-33163][SQL][TESTS][FOLLOWUP] Fix the test for the parquet metadata key 'org.apache.spark.legacyDateTime'
    
    ### What changes were proposed in this pull request?
    1. Test both date and timestamp column types
    2. Write the timestamp as the `TIMESTAMP_MICROS` logical type
    3. Change the timestamp value to `'1000-01-01 01:02:03'` to check exception throwing.
    
    ### Why are the changes needed?
    To improve test coverage.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    By running the modified test suite:
    ```
    $ build/sbt "testOnly org.apache.spark.sql.execution.datasources.parquet.ParquetIOSuite"
    ```
    
    Closes #31396 from MaxGekk/parquet-test-metakey-followup.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
    (cherry picked from commit 588ddcdf22fccec2ea3775d17ac3d19cd5328eb5)
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../datasources/parquet/ParquetIOSuite.scala       | 35 ++++++++++++++--------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 0984711..f90a998 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -51,6 +51,7 @@ import org.apache.spark.sql.execution.datasources.SQLHadoopMapReduceCommitProtoc
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy._
+import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -1120,26 +1121,34 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
   }
 
   test("SPARK-33163: write the metadata key 'org.apache.spark.legacyDateTime'") {
-    def saveTs(dir: java.io.File): Unit = {
-      Seq(Timestamp.valueOf("2020-10-15 01:02:03")).toDF()
-        .repartition(1)
-        .write
-        .parquet(dir.getAbsolutePath)
+    def checkMetadataKey(dir: java.io.File, exists: Boolean): Unit = {
+      Seq("timestamp '1000-01-01 01:02:03'", "date '1000-01-01'").foreach { dt =>
+        withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key ->
+          ParquetOutputTimestampType.TIMESTAMP_MICROS.toString) {
+          sql(s"SELECT $dt AS dt")
+            .repartition(1)
+            .write
+            .mode("overwrite")
+            .parquet(dir.getAbsolutePath)
+          val metaData = getMetaData(dir)
+          val expected = if (exists) Some("") else None
+          assert(metaData.get(SPARK_LEGACY_DATETIME) === expected)
+        }
+      }
     }
     withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) {
       withTempPath { dir =>
-        saveTs(dir)
-        assert(getMetaData(dir)(SPARK_LEGACY_DATETIME) === "")
+        checkMetadataKey(dir, exists = true)
       }
     }
-    Seq(CORRECTED, EXCEPTION).foreach { mode =>
-      withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> mode.toString) {
-        withTempPath { dir =>
-          saveTs(dir)
-          assert(getMetaData(dir).get(SPARK_LEGACY_DATETIME).isEmpty)
-        }
+    withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) {
+      withTempPath { dir =>
+        checkMetadataKey(dir, exists = false)
       }
     }
+    withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> EXCEPTION.toString) {
+      withTempPath { dir => intercept[SparkException] { checkMetadataKey(dir, exists = false) } }
+    }
   }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org