You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2023/03/14 04:00:36 UTC
[spark] branch branch-3.4 updated: [SPARK-42777][SQL] Support converting TimestampNTZ catalog stats to plan stats
This is an automated email from the ASF dual-hosted git repository.
gengliang pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new a352507a12c [SPARK-42777][SQL] Support converting TimestampNTZ catalog stats to plan stats
a352507a12c is described below
commit a352507a12c256b8d196c142f65291db21c96a5f
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Mon Mar 13 21:00:02 2023 -0700
[SPARK-42777][SQL] Support converting TimestampNTZ catalog stats to plan stats
### What changes were proposed in this pull request?
When `spark.sql.cbo.planStats.enabled` or `spark.sql.cbo.enabled` is enabled, the logical plan will fetch row counts and column statistics from catalog.
This PR is to support converting TimestampNTZ catalog stats to plan stats.
### Why are the changes needed?
Implement a missing piece of the TimestampNTZ type.
### Does this PR introduce _any_ user-facing change?
No, TimestampNTZ is not released yet.
### How was this patch tested?
New UT
Closes #40404 from gengliangwang/fromExternalString.
Authored-by: Gengliang Wang <ge...@apache.org>
Signed-off-by: Gengliang Wang <ge...@apache.org>
(cherry picked from commit c3ac782450583e6073b88d940af60714eb4cdf44)
Signed-off-by: Gengliang Wang <ge...@apache.org>
---
.../spark/sql/catalyst/catalog/interface.scala | 2 ++
.../spark/sql/StatisticsCollectionSuite.scala | 24 ++++++++++++++++++++++
2 files changed, 26 insertions(+)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 6f4c4f27efc..08dd2dfd5bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -681,6 +681,8 @@ object CatalogColumnStat extends Logging {
case TimestampType if version == 1 =>
DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf(s))
case TimestampType => getTimestampFormatter(isParsing = true).parse(s)
+ case TimestampNTZType =>
+ getTimestampFormatter(isParsing = true, forTimestampNTZ = true).parse(s)
case ByteType => s.toByte
case ShortType => s.toShort
case IntegerType => s.toInt
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 2ab8bb25a8b..e6b74a328e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -571,6 +571,30 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
}
}
+ test("SPARK-42777: describe column stats (min, max) for timestamp_ntz column") {
+ val table = "insert_desc_same_time_zone"
+ val tsCol = "timestamp_ntz_typed_col"
+ withTable(table) {
+ val minTimestamp = "make_timestamp_ntz(2022, 1, 1, 0, 0, 1.123456)"
+ val maxTimestamp = "make_timestamp_ntz(2022, 1, 3, 0, 0, 2.987654)"
+ sql(s"CREATE TABLE $table ($tsCol timestamp_ntz) USING parquet")
+ sql(s"INSERT INTO $table VALUES $minTimestamp, $maxTimestamp")
+ sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR ALL COLUMNS")
+
+ checkDescTimestampColStats(
+ tableName = table,
+ timestampColumn = tsCol,
+ expectedMinTimestamp = "2022-01-01 00:00:01.123456",
+ expectedMaxTimestamp = "2022-01-03 00:00:02.987654")
+
+ // Converting TimestampNTZ catalog stats to plan stats
+ val columnStat = getCatalogTable(table)
+ .stats.get.colStats(tsCol).toPlanStat(tsCol, TimestampNTZType)
+ assert(columnStat.min.contains(1640995201123456L))
+ assert(columnStat.max.contains(1641168002987654L))
+ }
+ }
+
private def getStatAttrNames(tableName: String): Set[String] = {
val queryStats = spark.table(tableName).queryExecution.optimizedPlan.stats.attributeStats
queryStats.map(_._1.name).toSet
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org