You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2016/02/02 09:49:07 UTC
spark git commit: [SPARK-13087][SQL] Fix group by function for sort
based aggregation
Repository: spark
Updated Branches:
refs/heads/master b8666fd0e -> 22ba21348
[SPARK-13087][SQL] Fix group by function for sort based aggregation
It is not valid to call `toAttribute` on a `NamedExpression` unless we know for sure that the child produced that `NamedExpression`. The current code worked fine when the grouping expressions were simple, but when they were a derived value this blew up at execution time.
Author: Michael Armbrust <mi...@databricks.com>
Closes #11013 from marmbrus/groupByFunction-master.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/22ba2134
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/22ba2134
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/22ba2134
Branch: refs/heads/master
Commit: 22ba21348b28d8b1909ccde6fe17fb9e68531e5a
Parents: b8666fd
Author: Michael Armbrust <mi...@databricks.com>
Authored: Tue Feb 2 16:48:59 2016 +0800
Committer: Yin Huai <yh...@databricks.com>
Committed: Tue Feb 2 16:48:59 2016 +0800
----------------------------------------------------------------------
.../org/apache/spark/sql/execution/aggregate/utils.scala | 5 ++---
.../spark/sql/hive/execution/AggregationQuerySuite.scala | 8 ++++++++
2 files changed, 10 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/22ba2134/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/utils.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/utils.scala
index 83379ae..1e113cc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/utils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/utils.scala
@@ -33,15 +33,14 @@ object Utils {
resultExpressions: Seq[NamedExpression],
child: SparkPlan): Seq[SparkPlan] = {
- val groupingAttributes = groupingExpressions.map(_.toAttribute)
val completeAggregateExpressions = aggregateExpressions.map(_.copy(mode = Complete))
val completeAggregateAttributes = completeAggregateExpressions.map {
expr => aggregateFunctionToAttribute(expr.aggregateFunction, expr.isDistinct)
}
SortBasedAggregate(
- requiredChildDistributionExpressions = Some(groupingAttributes),
- groupingExpressions = groupingAttributes,
+ requiredChildDistributionExpressions = Some(groupingExpressions),
+ groupingExpressions = groupingExpressions,
aggregateExpressions = completeAggregateExpressions,
aggregateAttributes = completeAggregateAttributes,
initialInputBufferOffset = 0,
http://git-wip-us.apache.org/repos/asf/spark/blob/22ba2134/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index 3e4cf3f..7a9ed1e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -193,6 +193,14 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
sqlContext.dropTempTable("emptyTable")
}
+ test("group by function") {
+ Seq((1, 2)).toDF("a", "b").registerTempTable("data")
+
+ checkAnswer(
+ sql("SELECT floor(a) AS a, collect_set(b) FROM data GROUP BY floor(a) ORDER BY a"),
+ Row(1, Array(2)) :: Nil)
+ }
+
test("empty table") {
// If there is no GROUP BY clause and the table is empty, we will generate a single row.
checkAnswer(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org