You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2021/07/07 09:41:41 UTC

[spark] branch master updated: [SPARK-36017][SQL] Support TimestampNTZType in expression ApproximatePercentile

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new cc4463e  [SPARK-36017][SQL] Support TimestampNTZType in expression ApproximatePercentile
cc4463e is described below

commit cc4463e818749faaf648ec71699d1e2fd3828c3f
Author: gengjiaan <ge...@360.cn>
AuthorDate: Wed Jul 7 12:41:11 2021 +0300

    [SPARK-36017][SQL] Support TimestampNTZType in expression ApproximatePercentile
    
    ### What changes were proposed in this pull request?
    The current `ApproximatePercentile` supports `TimestampType`, but not supports timestamp without time zone yet.
    This PR will add the function.
    
    ### Why are the changes needed?
    `ApproximatePercentile` need supports `TimestampNTZType`.
    
    ### Does this PR introduce _any_ user-facing change?
    'Yes'. `ApproximatePercentile` accepts `TimestampNTZType`.
    
    ### How was this patch tested?
    New tests.
    
    Closes #33241 from beliefer/SPARK-36017.
    
    Authored-by: gengjiaan <ge...@360.cn>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 .../expressions/aggregate/ApproximatePercentile.scala      | 10 +++++-----
 .../apache/spark/sql/ApproximatePercentileQuerySuite.scala | 14 +++++++++-----
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index 78e64bf..8cce79c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -92,9 +92,9 @@ case class ApproximatePercentile(
   private lazy val accuracy: Long = accuracyExpression.eval().asInstanceOf[Number].longValue
 
   override def inputTypes: Seq[AbstractDataType] = {
-    // Support NumericType, DateType and TimestampType since their internal types are all numeric,
-    // and can be easily cast to double for processing.
-    Seq(TypeCollection(NumericType, DateType, TimestampType),
+    // Support NumericType, DateType, TimestampType and TimestampNTZType since their internal types
+    // are all numeric, and can be easily cast to double for processing.
+    Seq(TypeCollection(NumericType, DateType, TimestampType, TimestampNTZType),
       TypeCollection(DoubleType, ArrayType(DoubleType, containsNull = false)), IntegralType)
   }
 
@@ -139,7 +139,7 @@ case class ApproximatePercentile(
       // Convert the value to a double value
       val doubleValue = child.dataType match {
         case DateType => value.asInstanceOf[Int].toDouble
-        case TimestampType => value.asInstanceOf[Long].toDouble
+        case TimestampType | TimestampNTZType => value.asInstanceOf[Long].toDouble
         case n: NumericType => n.numeric.toDouble(value.asInstanceOf[n.InternalType])
         case other: DataType =>
           throw QueryExecutionErrors.dataTypeUnexpectedError(other)
@@ -158,7 +158,7 @@ case class ApproximatePercentile(
     val doubleResult = buffer.getPercentiles(percentages)
     val result = child.dataType match {
       case DateType => doubleResult.map(_.toInt)
-      case TimestampType => doubleResult.map(_.toLong)
+      case TimestampType | TimestampNTZType => doubleResult.map(_.toLong)
       case ByteType => doubleResult.map(_.toByte)
       case ShortType => doubleResult.map(_.toShort)
       case IntegerType => doubleResult.map(_.toInt)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
index 4991e39..5ff15c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
+import java.time.LocalDateTime
 
 import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
 import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile.DEFAULT_PERCENTILE_ACCURACY
@@ -89,23 +90,26 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession
   test("percentile_approx, different column types") {
     withTempView(table) {
       val intSeq = 1 to 1000
-      val data: Seq[(java.math.BigDecimal, Date, Timestamp)] = intSeq.map { i =>
-        (new java.math.BigDecimal(i), DateTimeUtils.toJavaDate(i), DateTimeUtils.toJavaTimestamp(i))
+      val data: Seq[(java.math.BigDecimal, Date, Timestamp, LocalDateTime)] = intSeq.map { i =>
+        (new java.math.BigDecimal(i), DateTimeUtils.toJavaDate(i),
+          DateTimeUtils.toJavaTimestamp(i), DateTimeUtils.microsToLocalDateTime(i))
       }
-      data.toDF("cdecimal", "cdate", "ctimestamp").createOrReplaceTempView(table)
+      data.toDF("cdecimal", "cdate", "ctimestamp", "ctimestampntz").createOrReplaceTempView(table)
       checkAnswer(
         spark.sql(
           s"""SELECT
              |  percentile_approx(cdecimal, array(0.25, 0.5, 0.75D)),
              |  percentile_approx(cdate, array(0.25, 0.5, 0.75D)),
-             |  percentile_approx(ctimestamp, array(0.25, 0.5, 0.75D))
+             |  percentile_approx(ctimestamp, array(0.25, 0.5, 0.75D)),
+             |  percentile_approx(ctimestampntz, array(0.25, 0.5, 0.75D))
              |FROM $table
            """.stripMargin),
         Row(
           Seq("250.000000000000000000", "500.000000000000000000", "750.000000000000000000")
               .map(i => new java.math.BigDecimal(i)),
           Seq(250, 500, 750).map(DateTimeUtils.toJavaDate),
-          Seq(250, 500, 750).map(i => DateTimeUtils.toJavaTimestamp(i.toLong)))
+          Seq(250, 500, 750).map(i => DateTimeUtils.toJavaTimestamp(i.toLong)),
+          Seq(250, 500, 750).map(i => DateTimeUtils.microsToLocalDateTime(i.toLong)))
       )
     }
   }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org