You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2021/02/15 01:42:51 UTC

[spark] branch master updated: [SPARK-34434][SQL] Mention DS rebase options in `SparkUpgradeException`

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new aca6db1  [SPARK-34434][SQL] Mention DS rebase options in `SparkUpgradeException`
aca6db1 is described below

commit aca6db1868c7fd7632b490e5f20254989b270e8e
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Sun Feb 14 17:42:15 2021 -0800

    [SPARK-34434][SQL] Mention DS rebase options in `SparkUpgradeException`
    
    ### What changes were proposed in this pull request?
    Mention the DS options introduced by https://github.com/apache/spark/pull/31529 and by https://github.com/apache/spark/pull/31489 in `SparkUpgradeException`.
    
    ### Why are the changes needed?
    To improve user experience with Spark SQL. Before the changes, the error message recommends to set SQL configs but the configs cannot help in the some situations (see the PRs for more details).
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. After the changes, the error message is:
    
    _org.apache.spark.SparkUpgradeException: You may get a different result due to the upgrading of Spark 3.0: reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z from Parquet files can be ambiguous, as the files may be written by Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar that is different from Spark 3.0+'s Proleptic Gregorian calendar. See more details in SPARK-31404. You can set the SQL config 'spark.sql.legacy.parquet.datetimeRebase [...]
    
    ### How was this patch tested?
    1. By checking coding style: `./dev/scalastyle`
    2. By running the related test suite:
    ```
    $ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ParquetRebaseDatetimeV1Suite"
    ```
    
    Closes #31562 from MaxGekk/rebase-upgrade-exception.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../sql/execution/datasources/DataSourceUtils.scala   | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index b54747a..a17c317 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.{SPARK_LEGACY_DATETIME, SPARK_LEGACY_INT96, SPARK_VE
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
 import org.apache.spark.sql.catalyst.util.RebaseDateTime
+import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 import org.apache.spark.sql.sources.BaseRelation
@@ -132,19 +133,23 @@ object DataSourceUtils {
   }
 
   def newRebaseExceptionInRead(format: String): SparkUpgradeException = {
-    val config = format match {
-      case "Parquet INT96" => SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ.key
-      case "Parquet" => SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key
-      case "Avro" => SQLConf.LEGACY_AVRO_REBASE_MODE_IN_READ.key
+    val (config, option) = format match {
+      case "Parquet INT96" =>
+        (SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ.key, ParquetOptions.INT96_REBASE_MODE)
+      case "Parquet" =>
+        (SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key, ParquetOptions.DATETIME_REBASE_MODE)
+      case "Avro" =>
+        (SQLConf.LEGACY_AVRO_REBASE_MODE_IN_READ.key, "datetimeRebaseMode")
       case _ => throw new IllegalStateException("unrecognized format " + format)
     }
     new SparkUpgradeException("3.0", "reading dates before 1582-10-15 or timestamps before " +
       s"1900-01-01T00:00:00Z from $format files can be ambiguous, as the files may be written by " +
       "Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar that is " +
       "different from Spark 3.0+'s Proleptic Gregorian calendar. See more details in " +
-      s"SPARK-31404. You can set $config to 'LEGACY' to rebase the datetime values w.r.t. " +
-      s"the calendar difference during reading. Or set $config to 'CORRECTED' to read the " +
-      "datetime values as it is.", null)
+      s"SPARK-31404. You can set the SQL config '$config' or the datasource option '$option' to " +
+      "'LEGACY' to rebase the datetime values w.r.t. the calendar difference during reading. " +
+      s"To read the datetime values as it is, set the SQL config '$config' or " +
+      s"the datasource option '$option' to 'CORRECTED'.", null)
   }
 
   def newRebaseExceptionInWrite(format: String): SparkUpgradeException = {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org