You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2021/07/07 09:41:41 UTC
[spark] branch master updated: [SPARK-36017][SQL] Support
TimestampNTZType in expression ApproximatePercentile
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new cc4463e [SPARK-36017][SQL] Support TimestampNTZType in expression ApproximatePercentile
cc4463e is described below
commit cc4463e818749faaf648ec71699d1e2fd3828c3f
Author: gengjiaan <ge...@360.cn>
AuthorDate: Wed Jul 7 12:41:11 2021 +0300
[SPARK-36017][SQL] Support TimestampNTZType in expression ApproximatePercentile
### What changes were proposed in this pull request?
The current `ApproximatePercentile` supports `TimestampType`, but not supports timestamp without time zone yet.
This PR will add the function.
### Why are the changes needed?
`ApproximatePercentile` need supports `TimestampNTZType`.
### Does this PR introduce _any_ user-facing change?
'Yes'. `ApproximatePercentile` accepts `TimestampNTZType`.
### How was this patch tested?
New tests.
Closes #33241 from beliefer/SPARK-36017.
Authored-by: gengjiaan <ge...@360.cn>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../expressions/aggregate/ApproximatePercentile.scala | 10 +++++-----
.../apache/spark/sql/ApproximatePercentileQuerySuite.scala | 14 +++++++++-----
2 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index 78e64bf..8cce79c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -92,9 +92,9 @@ case class ApproximatePercentile(
private lazy val accuracy: Long = accuracyExpression.eval().asInstanceOf[Number].longValue
override def inputTypes: Seq[AbstractDataType] = {
- // Support NumericType, DateType and TimestampType since their internal types are all numeric,
- // and can be easily cast to double for processing.
- Seq(TypeCollection(NumericType, DateType, TimestampType),
+ // Support NumericType, DateType, TimestampType and TimestampNTZType since their internal types
+ // are all numeric, and can be easily cast to double for processing.
+ Seq(TypeCollection(NumericType, DateType, TimestampType, TimestampNTZType),
TypeCollection(DoubleType, ArrayType(DoubleType, containsNull = false)), IntegralType)
}
@@ -139,7 +139,7 @@ case class ApproximatePercentile(
// Convert the value to a double value
val doubleValue = child.dataType match {
case DateType => value.asInstanceOf[Int].toDouble
- case TimestampType => value.asInstanceOf[Long].toDouble
+ case TimestampType | TimestampNTZType => value.asInstanceOf[Long].toDouble
case n: NumericType => n.numeric.toDouble(value.asInstanceOf[n.InternalType])
case other: DataType =>
throw QueryExecutionErrors.dataTypeUnexpectedError(other)
@@ -158,7 +158,7 @@ case class ApproximatePercentile(
val doubleResult = buffer.getPercentiles(percentages)
val result = child.dataType match {
case DateType => doubleResult.map(_.toInt)
- case TimestampType => doubleResult.map(_.toLong)
+ case TimestampType | TimestampNTZType => doubleResult.map(_.toLong)
case ByteType => doubleResult.map(_.toByte)
case ShortType => doubleResult.map(_.toShort)
case IntegerType => doubleResult.map(_.toInt)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
index 4991e39..5ff15c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql
import java.sql.{Date, Timestamp}
+import java.time.LocalDateTime
import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile.DEFAULT_PERCENTILE_ACCURACY
@@ -89,23 +90,26 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession
test("percentile_approx, different column types") {
withTempView(table) {
val intSeq = 1 to 1000
- val data: Seq[(java.math.BigDecimal, Date, Timestamp)] = intSeq.map { i =>
- (new java.math.BigDecimal(i), DateTimeUtils.toJavaDate(i), DateTimeUtils.toJavaTimestamp(i))
+ val data: Seq[(java.math.BigDecimal, Date, Timestamp, LocalDateTime)] = intSeq.map { i =>
+ (new java.math.BigDecimal(i), DateTimeUtils.toJavaDate(i),
+ DateTimeUtils.toJavaTimestamp(i), DateTimeUtils.microsToLocalDateTime(i))
}
- data.toDF("cdecimal", "cdate", "ctimestamp").createOrReplaceTempView(table)
+ data.toDF("cdecimal", "cdate", "ctimestamp", "ctimestampntz").createOrReplaceTempView(table)
checkAnswer(
spark.sql(
s"""SELECT
| percentile_approx(cdecimal, array(0.25, 0.5, 0.75D)),
| percentile_approx(cdate, array(0.25, 0.5, 0.75D)),
- | percentile_approx(ctimestamp, array(0.25, 0.5, 0.75D))
+ | percentile_approx(ctimestamp, array(0.25, 0.5, 0.75D)),
+ | percentile_approx(ctimestampntz, array(0.25, 0.5, 0.75D))
|FROM $table
""".stripMargin),
Row(
Seq("250.000000000000000000", "500.000000000000000000", "750.000000000000000000")
.map(i => new java.math.BigDecimal(i)),
Seq(250, 500, 750).map(DateTimeUtils.toJavaDate),
- Seq(250, 500, 750).map(i => DateTimeUtils.toJavaTimestamp(i.toLong)))
+ Seq(250, 500, 750).map(i => DateTimeUtils.toJavaTimestamp(i.toLong)),
+ Seq(250, 500, 750).map(i => DateTimeUtils.microsToLocalDateTime(i.toLong)))
)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org