You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2021/07/01 20:26:28 UTC
[spark] branch master updated: [SPARK-35975][SQL] New configuration `spark.sql.timestampType` for the default timestamp type

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a643076  [SPARK-35975][SQL] New configuration `spark.sql.timestampType` for the default timestamp type
a643076 is described below

commit a643076d4ef622eac505ebf22c9aa2cc909320ac
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Thu Jul 1 23:25:18 2021 +0300

    [SPARK-35975][SQL] New configuration `spark.sql.timestampType` for the default timestamp type
    
    ### What changes were proposed in this pull request?
    
    Add a new configuration `spark.sql.timestampType`, which configures the default timestamp type of Spark SQL, including SQL DDL and Cast clause. Setting the configuration as `TIMESTAMP_NTZ` will use `TIMESTAMP WITHOUT TIME ZONE` as the default type while putting it as `TIMESTAMP_LTZ` will use `TIMESTAMP WITH LOCAL TIME ZONE`.
    
    The default value of the new configuration is TIMESTAMP_LTZ, which is consistent with previous Spark releases.
    
    ### Why are the changes needed?
    
    A new configuration for switching the default timestamp type as timestamp without time zone.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, it's a new feature.
    
    ### How was this patch tested?
    
    Unit test
    
    Closes #33176 from gengliangwang/newTsTypeConf.
    
    Authored-by: Gengliang Wang <ge...@apache.org>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala     |  2 +-
 .../org/apache/spark/sql/internal/SQLConf.scala    | 28 ++++++++++++++++++++++
 .../sql/catalyst/parser/DataTypeParserSuite.scala  | 14 ++++++++++-
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 224c2d0..361ecc1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2502,7 +2502,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       case ("float" | "real", Nil) => FloatType
       case ("double", Nil) => DoubleType
       case ("date", Nil) => DateType
-      case ("timestamp", Nil) => TimestampType
+      case ("timestamp", Nil) => SQLConf.get.timestampType
       case ("string", Nil) => StringType
       case ("character" | "char", length :: Nil) => CharType(length.getText.toInt)
       case ("varchar", length :: Nil) => VarcharType(length.getText.toInt)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 30e5a16..3aed3c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -44,6 +44,7 @@ import org.apache.spark.sql.catalyst.plans.logical.HintErrorHandler
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.types.{AtomicType, TimestampNTZType, TimestampType}
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils
 
@@ -2820,6 +2821,24 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  object TimestampTypes extends Enumeration {
+    val TIMESTAMP_NTZ, TIMESTAMP_LTZ = Value
+  }
+
+  val TIMESTAMP_TYPE =
+    buildConf("spark.sql.timestampType")
+      .doc("Configures the default timestamp type of Spark SQL, including SQL DDL and Cast " +
+        s"clause. Setting the configuration as ${TimestampTypes.TIMESTAMP_NTZ.toString} will " +
+        "use TIMESTAMP WITHOUT TIME ZONE as the default type while putting it as " +
+        s"${TimestampTypes.TIMESTAMP_LTZ.toString} will use TIMESTAMP WITH LOCAL TIME ZONE. " +
+        "Before the 3.2.0 release, Spark only supports the TIMESTAMP WITH " +
+        "LOCAL TIME ZONE type.")
+      .version("3.2.0")
+      .stringConf
+      .transform(_.toUpperCase(Locale.ROOT))
+      .checkValues(TimestampTypes.values.map(_.toString))
+      .createWithDefault(TimestampTypes.TIMESTAMP_LTZ.toString)
+
   val DATETIME_JAVA8API_ENABLED = buildConf("spark.sql.datetime.java8API.enabled")
     .doc("If the configuration property is set to true, java.time.Instant and " +
       "java.time.LocalDate classes of Java 8 API are used as external types for " +
@@ -3897,6 +3916,15 @@ class SQLConf extends Serializable with Logging {
 
   def ansiEnabled: Boolean = getConf(ANSI_ENABLED)
 
+  def timestampType: AtomicType = getConf(TIMESTAMP_TYPE) match {
+    case "TIMESTAMP_LTZ" =>
+      // For historical reason, the TimestampType maps to TIMESTAMP WITH LOCAL TIME ZONE
+      TimestampType
+
+    case "TIMESTAMP_NTZ" =>
+      TimestampNTZType
+  }
+
   def nestedSchemaPruningEnabled: Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED)
 
   def serializerNestedSchemaPruningEnabled: Boolean =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index bd8b4cf..a6b78e0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -18,9 +18,12 @@
 package org.apache.spark.sql.catalyst.parser
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.TimestampTypes
 import org.apache.spark.sql.types._
 
-class DataTypeParserSuite extends SparkFunSuite {
+class DataTypeParserSuite extends SparkFunSuite with SQLHelper {
 
   def parse(sql: String): DataType = CatalystSqlParser.parseDataType(sql)
 
@@ -135,6 +138,15 @@ class DataTypeParserSuite extends SparkFunSuite {
     assert(intercept("unknown(1,2,3)").getMessage.contains("unknown(1,2,3) is not supported"))
   }
 
+  test("Set default timestamp type") {
+    withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> TimestampTypes.TIMESTAMP_NTZ.toString) {
+      assert(parse("timestamp") === TimestampNTZType)
+    }
+    withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> TimestampTypes.TIMESTAMP_LTZ.toString) {
+      assert(parse("timestamp") === TimestampType)
+    }
+  }
+
   // DataType parser accepts certain reserved keywords.
   checkDataType(
     "Struct<TABLE: string, DATE:boolean>",

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org