You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2020/02/18 20:23:58 UTC

[spark] branch branch-3.0 updated: revert SPARK-29663 and SPARK-29688

This is an automated email from the ASF dual-hosted git repository.

hvanhovell pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 9947f3d  revert SPARK-29663 and SPARK-29688
9947f3d is described below

commit 9947f3d6b568685287c9a5f98bf0a3ecfcd1ae88
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Tue Feb 18 21:19:57 2020 +0100

    revert SPARK-29663 and SPARK-29688
    
    ### What changes were proposed in this pull request?
    
    This PR reverts https://github.com/apache/spark/pull/26325 and https://github.com/apache/spark/pull/26347
    
    ### Why are the changes needed?
    
    When we do sum/avg, we need a wider type of input to hold the sum value, to reduce the possibility of overflow. For example, we use long to hold the sum of integral inputs, use double to hold the sum of float/double.
    
    However, we don't have a wider type of interval. Also the semantic is unclear: what if the days field overflows but the months field doesn't? Currently the avg of `1 month` and `2 month` is `1 month 15 days`, which assumes 1 month has 30 days and we should avoid this assumption.
    
    ### Does this PR introduce any user-facing change?
    
    yes, remove 2 features added in 3.0
    
    ### How was this patch tested?
    
    N/A
    
    Closes #27619 from cloud-fan/revert.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: herman <he...@databricks.com>
    (cherry picked from commit 1b67d546bd96412943de0c7a3b4295cbde887bd2)
    Signed-off-by: herman <he...@databricks.com>
---
 .../catalyst/expressions/aggregate/Average.scala   |  15 +-
 .../sql/catalyst/expressions/aggregate/Sum.scala   |  14 +-
 .../analysis/ExpressionTypeCheckingSuite.scala     |   4 +-
 .../test/resources/sql-tests/inputs/interval.sql   |  64 --------
 .../sql-tests/results/ansi/interval.sql.out        | 176 +--------------------
 .../resources/sql-tests/results/interval.sql.out   | 176 +--------------------
 .../apache/spark/sql/DataFrameAggregateSuite.scala |  13 --
 7 files changed, 17 insertions(+), 445 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
index 996c548..17f906c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.sql.catalyst.analysis.{DecimalPrecision, FunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{DecimalPrecision, FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 
 @ExpressionDescription(
@@ -30,8 +31,6 @@ import org.apache.spark.sql.types._
        2.0
       > SELECT _FUNC_(col) FROM VALUES (1), (2), (NULL) AS tab(col);
        1.5
-      > SELECT _FUNC_(cast(v as interval)) FROM VALUES ('-1 weeks'), ('2 seconds'), (null) t(v);
-       -3 days -11 hours -59 minutes -59 seconds
   """,
   since = "1.0.0")
 case class Average(child: Expression) extends DeclarativeAggregate with ImplicitCastInputTypes {
@@ -40,7 +39,10 @@ case class Average(child: Expression) extends DeclarativeAggregate with Implicit
 
   override def children: Seq[Expression] = child :: Nil
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
+  override def inputTypes: Seq[AbstractDataType] = Seq(NumericType)
+
+  override def checkInputDataTypes(): TypeCheckResult =
+    TypeUtils.checkForNumericExpr(child.dataType, "function average")
 
   override def nullable: Boolean = true
 
@@ -50,13 +52,11 @@ case class Average(child: Expression) extends DeclarativeAggregate with Implicit
   private lazy val resultType = child.dataType match {
     case DecimalType.Fixed(p, s) =>
       DecimalType.bounded(p + 4, s + 4)
-    case interval: CalendarIntervalType => interval
     case _ => DoubleType
   }
 
   private lazy val sumDataType = child.dataType match {
     case _ @ DecimalType.Fixed(p, s) => DecimalType.bounded(p + 10, s)
-    case interval: CalendarIntervalType => interval
     case _ => DoubleType
   }
 
@@ -79,9 +79,6 @@ case class Average(child: Expression) extends DeclarativeAggregate with Implicit
   override lazy val evaluateExpression = child.dataType match {
     case _: DecimalType =>
       DecimalPrecision.decimalAndDecimal(sum / count.cast(DecimalType.LongDecimal)).cast(resultType)
-    case CalendarIntervalType =>
-      val newCount = If(EqualTo(count, Literal(0L)), Literal(null, LongType), count)
-      DivideInterval(sum.cast(resultType), newCount.cast(DoubleType))
     case _ =>
       sum.cast(resultType) / count.cast(resultType)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
index 87f1a4f..8bfd889 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-// scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Returns the sum calculated from values of a group.",
   examples = """
@@ -33,11 +34,8 @@ import org.apache.spark.sql.types._
        25
       > SELECT _FUNC_(col) FROM VALUES (NULL), (NULL) AS tab(col);
        NULL
-      > SELECT _FUNC_(cast(col as interval)) FROM VALUES ('1 seconds'), ('2 seconds'), (null) tab(col);
-       3 seconds
   """,
   since = "1.0.0")
-// scalastyle:on line.size.limit
 case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCastInputTypes {
 
   override def children: Seq[Expression] = child :: Nil
@@ -47,12 +45,14 @@ case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCast
   // Return data type.
   override def dataType: DataType = resultType
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
+  override def inputTypes: Seq[AbstractDataType] = Seq(NumericType)
+
+  override def checkInputDataTypes(): TypeCheckResult =
+    TypeUtils.checkForNumericExpr(child.dataType, "function sum")
 
   private lazy val resultType = child.dataType match {
     case DecimalType.Fixed(precision, scale) =>
       DecimalType.bounded(precision + 10, scale)
-    case _: CalendarIntervalType => CalendarIntervalType
     case _: IntegralType => LongType
     case _ => DoubleType
   }
@@ -61,7 +61,7 @@ case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCast
 
   private lazy val sum = AttributeReference("sum", sumDataType)()
 
-  private lazy val zero = Literal.default(resultType)
+  private lazy val zero = Literal.default(sumDataType)
 
   override lazy val aggBufferAttributes = sum :: Nil
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
index 86a1f1f..46634c9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
@@ -158,8 +158,8 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
 
     assertError(Min(Symbol("mapField")), "min does not support ordering on type")
     assertError(Max(Symbol("mapField")), "max does not support ordering on type")
-    assertError(Sum(Symbol("booleanField")), "requires (numeric or interval) type")
-    assertError(Average(Symbol("booleanField")), "requires (numeric or interval) type")
+    assertError(Sum(Symbol("booleanField")), "function sum requires numeric type")
+    assertError(Average(Symbol("booleanField")), "function average requires numeric type")
   }
 
   test("check types for others") {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index a4e621e..facd632 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -84,70 +84,6 @@ select interval (-30) day;
 select interval (a + 1) day;
 select interval 30 day day day;
 
--- sum interval values
--- null
-select sum(cast(null as interval));
-
--- empty set
-select sum(cast(v as interval)) from VALUES ('1 seconds') t(v) where 1=0;
-
--- basic interval sum
-select sum(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v);
-select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v);
-select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v);
-select sum(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v);
-
--- group by
-select
-    i,
-    sum(cast(v as interval))
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-group by i;
-
--- having
-select
-    sum(cast(v as interval)) as sv
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-having sv is not null;
-
--- window
-SELECT
-    i,
-    sum(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
-FROM VALUES(1, '1 seconds'), (1, '2 seconds'), (2, NULL), (2, NULL) t(i,v);
-
--- average with interval type
--- null
-select avg(cast(v as interval)) from VALUES (null) t(v);
-
--- empty set
-select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v) where 1=0;
-
--- basic interval avg
-select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v);
-select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v);
-select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v);
-select avg(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v);
-
--- group by
-select
-    i,
-    avg(cast(v as interval))
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-group by i;
-
--- having
-select
-    avg(cast(v as interval)) as sv
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-having sv is not null;
-
--- window
-SELECT
-    i,
-    avg(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
-FROM VALUES (1,'1 seconds'), (1,'2 seconds'), (2,NULL), (2,NULL) t(i,v);
-
 -- Interval year-month arithmetic
 
 create temporary view interval_arithmetic as
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 7fdb4c5..4b46540 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 99
+-- Number of queries: 81
 
 
 -- !query
@@ -632,180 +632,6 @@ select interval 30 day day day
 
 
 -- !query
-select sum(cast(null as interval))
--- !query schema
-struct<sum(CAST(NULL AS INTERVAL)):interval>
--- !query output
-NULL
-
-
--- !query
-select sum(cast(v as interval)) from VALUES ('1 seconds') t(v) where 1=0
--- !query schema
-struct<sum(CAST(v AS INTERVAL)):interval>
--- !query output
-NULL
-
-
--- !query
-select sum(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<sum(CAST(v AS INTERVAL)):interval>
--- !query output
-3 seconds
-
-
--- !query
-select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<sum(CAST(v AS INTERVAL)):interval>
--- !query output
-1 seconds
-
-
--- !query
-select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v)
--- !query schema
-struct<sum(CAST(v AS INTERVAL)):interval>
--- !query output
--3 seconds
-
-
--- !query
-select sum(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<sum(CAST(v AS INTERVAL)):interval>
--- !query output
--7 days 2 seconds
-
-
--- !query
-select
-    i,
-    sum(cast(v as interval))
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-group by i
--- !query schema
-struct<i:int,sum(CAST(v AS INTERVAL)):interval>
--- !query output
-1	-2 days
-2	2 seconds
-3	NULL
-
-
--- !query
-select
-    sum(cast(v as interval)) as sv
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-having sv is not null
--- !query schema
-struct<sv:interval>
--- !query output
--2 days 2 seconds
-
-
--- !query
-SELECT
-    i,
-    sum(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
-FROM VALUES(1, '1 seconds'), (1, '2 seconds'), (2, NULL), (2, NULL) t(i,v)
--- !query schema
-struct<i:int,sum(CAST(v AS INTERVAL)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):interval>
--- !query output
-1	2 seconds
-1	3 seconds
-2	NULL
-2	NULL
-
-
--- !query
-select avg(cast(v as interval)) from VALUES (null) t(v)
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
-NULL
-
-
--- !query
-select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v) where 1=0
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
-NULL
-
-
--- !query
-select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
-1.5 seconds
-
-
--- !query
-select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
-0.5 seconds
-
-
--- !query
-select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v)
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
--1.5 seconds
-
-
--- !query
-select avg(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
--3 days -11 hours -59 minutes -59 seconds
-
-
--- !query
-select
-    i,
-    avg(cast(v as interval))
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-group by i
--- !query schema
-struct<i:int,avg(CAST(v AS INTERVAL)):interval>
--- !query output
-1	-1 days
-2	2 seconds
-3	NULL
-
-
--- !query
-select
-    avg(cast(v as interval)) as sv
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-having sv is not null
--- !query schema
-struct<sv:interval>
--- !query output
--15 hours -59 minutes -59.333333 seconds
-
-
--- !query
-SELECT
-    i,
-    avg(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
-FROM VALUES (1,'1 seconds'), (1,'2 seconds'), (2,NULL), (2,NULL) t(i,v)
--- !query schema
-struct<i:int,avg(CAST(v AS INTERVAL)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):interval>
--- !query output
-1	1.5 seconds
-1	2 seconds
-2	NULL
-2	NULL
-
-
--- !query
 create temporary view interval_arithmetic as
   select CAST(dateval AS date), CAST(tsval AS timestamp) from values
     ('2012-01-01', '2012-01-01')
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 3c4b430..0509594 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 99
+-- Number of queries: 81
 
 
 -- !query
@@ -616,180 +616,6 @@ select interval 30 day day day
 
 
 -- !query
-select sum(cast(null as interval))
--- !query schema
-struct<sum(CAST(NULL AS INTERVAL)):interval>
--- !query output
-NULL
-
-
--- !query
-select sum(cast(v as interval)) from VALUES ('1 seconds') t(v) where 1=0
--- !query schema
-struct<sum(CAST(v AS INTERVAL)):interval>
--- !query output
-NULL
-
-
--- !query
-select sum(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<sum(CAST(v AS INTERVAL)):interval>
--- !query output
-3 seconds
-
-
--- !query
-select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<sum(CAST(v AS INTERVAL)):interval>
--- !query output
-1 seconds
-
-
--- !query
-select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v)
--- !query schema
-struct<sum(CAST(v AS INTERVAL)):interval>
--- !query output
--3 seconds
-
-
--- !query
-select sum(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<sum(CAST(v AS INTERVAL)):interval>
--- !query output
--7 days 2 seconds
-
-
--- !query
-select
-    i,
-    sum(cast(v as interval))
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-group by i
--- !query schema
-struct<i:int,sum(CAST(v AS INTERVAL)):interval>
--- !query output
-1	-2 days
-2	2 seconds
-3	NULL
-
-
--- !query
-select
-    sum(cast(v as interval)) as sv
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-having sv is not null
--- !query schema
-struct<sv:interval>
--- !query output
--2 days 2 seconds
-
-
--- !query
-SELECT
-    i,
-    sum(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
-FROM VALUES(1, '1 seconds'), (1, '2 seconds'), (2, NULL), (2, NULL) t(i,v)
--- !query schema
-struct<i:int,sum(CAST(v AS INTERVAL)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):interval>
--- !query output
-1	2 seconds
-1	3 seconds
-2	NULL
-2	NULL
-
-
--- !query
-select avg(cast(v as interval)) from VALUES (null) t(v)
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
-NULL
-
-
--- !query
-select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v) where 1=0
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
-NULL
-
-
--- !query
-select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
-1.5 seconds
-
-
--- !query
-select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
-0.5 seconds
-
-
--- !query
-select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v)
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
--1.5 seconds
-
-
--- !query
-select avg(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v)
--- !query schema
-struct<avg(CAST(v AS INTERVAL)):interval>
--- !query output
--3 days -11 hours -59 minutes -59 seconds
-
-
--- !query
-select
-    i,
-    avg(cast(v as interval))
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-group by i
--- !query schema
-struct<i:int,avg(CAST(v AS INTERVAL)):interval>
--- !query output
-1	-1 days
-2	2 seconds
-3	NULL
-
-
--- !query
-select
-    avg(cast(v as interval)) as sv
-from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
-having sv is not null
--- !query schema
-struct<sv:interval>
--- !query output
--15 hours -59 minutes -59.333333 seconds
-
-
--- !query
-SELECT
-    i,
-    avg(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
-FROM VALUES (1,'1 seconds'), (1,'2 seconds'), (2,NULL), (2,NULL) t(i,v)
--- !query schema
-struct<i:int,avg(CAST(v AS INTERVAL)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):interval>
--- !query output
-1	1.5 seconds
-1	2 seconds
-2	NULL
-2	NULL
-
-
--- !query
 create temporary view interval_arithmetic as
   select CAST(dateval AS date), CAST(tsval AS timestamp) from values
     ('2012-01-01', '2012-01-01')
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index d7df75f..288f3da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -957,17 +957,4 @@ class DataFrameAggregateSuite extends QueryTest
       assert(error.message.contains("function count_if requires boolean type"))
     }
   }
-
-  test("calendar interval agg support hash aggregate") {
-    val df1 = Seq((1, "1 day"), (2, "2 day"), (3, "3 day"), (3, null)).toDF("a", "b")
-    val df2 = df1.select(avg($"b" cast CalendarIntervalType))
-    checkAnswer(df2, Row(new CalendarInterval(0, 2, 0)) :: Nil)
-    assert(find(df2.queryExecution.executedPlan)(_.isInstanceOf[HashAggregateExec]).isDefined)
-    val df3 = df1.groupBy($"a").agg(avg($"b" cast CalendarIntervalType))
-    checkAnswer(df3,
-      Row(1, new CalendarInterval(0, 1, 0)) ::
-        Row(2, new CalendarInterval(0, 2, 0)) ::
-        Row(3, new CalendarInterval(0, 3, 0)) :: Nil)
-    assert(find(df3.queryExecution.executedPlan)(_.isInstanceOf[HashAggregateExec]).isDefined)
-  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org