You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/08/05 05:55:09 UTC
spark git commit: [SPARK-21580][SQL] Integers in aggregation
expressions are wrongly taken as group-by ordinal
Repository: spark
Updated Branches:
refs/heads/master 6cbd18c9d -> 894d5a453
[SPARK-21580][SQL] Integers in aggregation expressions are wrongly taken as group-by ordinal
## What changes were proposed in this pull request?
create temporary view data as select * from values
(1, 1),
(1, 2),
(2, 1),
(2, 2),
(3, 1),
(3, 2)
as data(a, b);
`select 3, 4, sum(b) from data group by 1, 2;`
`select 3 as c, 4 as d, sum(b) from data group by c, d;`
When running these two cases, the following exception occurred:
`Error in query: GROUP BY position 4 is not in select list (valid range is [1, 3]); line 1 pos 10`
The cause of this failure:
If an aggregateExpression is integer, after replaced with this aggregateExpression, the
groupExpression still considered as an ordinal.
The solution:
This bug is due to re-entrance of an analyzed plan. We can solve it by using `resolveOperators` in `SubstituteUnresolvedOrdinals`.
## How was this patch tested?
Added unit test case
Author: liuxian <li...@zte.com.cn>
Closes #18779 from 10110346/groupby.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/894d5a45
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/894d5a45
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/894d5a45
Branch: refs/heads/master
Commit: 894d5a453a3f47525408ee8c91b3b594daa43ccb
Parents: 6cbd18c
Author: liuxian <li...@zte.com.cn>
Authored: Fri Aug 4 22:55:06 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Fri Aug 4 22:55:06 2017 -0700
----------------------------------------------------------------------
.../analysis/SubstituteUnresolvedOrdinals.scala | 2 +-
.../resources/sql-tests/inputs/group-by-ordinal.sql | 2 +-
.../apache/spark/sql/DataFrameAggregateSuite.scala | 16 ++++++++++++++++
.../scala/org/apache/spark/sql/DataFrameSuite.scala | 6 ++++++
4 files changed, 24 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/894d5a45/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
index 256b187..860d20f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
@@ -33,7 +33,7 @@ class SubstituteUnresolvedOrdinals(conf: SQLConf) extends Rule[LogicalPlan] {
case _ => false
}
- def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+ def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) =>
val newOrders = s.order.map {
case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _, _) =>
http://git-wip-us.apache.org/repos/asf/spark/blob/894d5a45/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql
index 6566338..928f766 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql
@@ -52,7 +52,7 @@ select count(a), a from (select 1 as a) tmp group by 2 having a > 0;
-- mixed cases: group-by ordinals and aliases
select a, a AS k, count(b) from data group by k, 1;
--- turn of group by ordinal
+-- turn off group by ordinal
set spark.sql.groupByOrdinal=false;
-- can now group by negative literal
http://git-wip-us.apache.org/repos/asf/spark/blob/894d5a45/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index d802557..69ea62e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -557,4 +557,20 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
}
assert(e.message.contains("aggregate functions are not allowed in GROUP BY"))
}
+
+ test("SPARK-21580 ints in aggregation expressions are taken as group-by ordinal.") {
+ checkAnswer(
+ testData2.groupBy(lit(3), lit(4)).agg(lit(6), lit(7), sum("b")),
+ Seq(Row(3, 4, 6, 7, 9)))
+ checkAnswer(
+ testData2.groupBy(lit(3), lit(4)).agg(lit(6), 'b, sum("b")),
+ Seq(Row(3, 4, 6, 1, 3), Row(3, 4, 6, 2, 6)))
+
+ checkAnswer(
+ spark.sql("SELECT 3, 4, SUM(b) FROM testData2 GROUP BY 1, 2"),
+ Seq(Row(3, 4, 9)))
+ checkAnswer(
+ spark.sql("SELECT 3 AS c, 4 AS d, SUM(b) FROM testData2 GROUP BY c, d"),
+ Seq(Row(3, 4, 9)))
+ }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/894d5a45/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index b41ff3f..5eb34e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2023,4 +2023,10 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
assert(df1.join(df2, $"t1.i" === $"t2.i").cache().count() == 1)
}
}
+
+ test("order-by ordinal.") {
+ checkAnswer(
+ testData2.select(lit(7), 'a, 'b).orderBy(lit(1), lit(2), lit(3)),
+ Seq(Row(7, 1, 1), Row(7, 1, 2), Row(7, 2, 1), Row(7, 2, 2), Row(7, 3, 1), Row(7, 3, 2)))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org