You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/09/13 17:06:56 UTC
spark git commit: [SPARK-21980][SQL] References in grouping functions
should be indexed with semanticEquals
Repository: spark
Updated Branches:
refs/heads/master b6ef1f57b -> 21c4450fb
[SPARK-21980][SQL] References in grouping functions should be indexed with semanticEquals
## What changes were proposed in this pull request?
https://issues.apache.org/jira/browse/SPARK-21980
This PR fixes the issue in ResolveGroupingAnalytics rule, which indexes the column references in grouping functions without considering case sensitive configurations.
The problem can be reproduced by:
`val df = spark.createDataFrame(Seq((1, 1), (2, 1), (2, 2))).toDF("a", "b")
df.cube("a").agg(grouping("A")).show()`
## How was this patch tested?
unit tests
Author: donnyzone <we...@gmail.com>
Closes #19202 from DonnyZone/ResolveGroupingAnalytics.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/21c4450f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/21c4450f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/21c4450f
Branch: refs/heads/master
Commit: 21c4450fb24635fab6481a3756fefa9c6f6d6235
Parents: b6ef1f5
Author: donnyzone <we...@gmail.com>
Authored: Wed Sep 13 10:06:53 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Wed Sep 13 10:06:53 2017 -0700
----------------------------------------------------------------------
.../spark/sql/catalyst/analysis/Analyzer.scala | 2 +-
.../apache/spark/sql/DataFrameAggregateSuite.scala | 16 ++++++++++++++++
2 files changed, 17 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/21c4450f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 1e934d0..0880bd6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -314,7 +314,7 @@ class Analyzer(
s"grouping columns (${groupByExprs.mkString(",")})")
}
case e @ Grouping(col: Expression) =>
- val idx = groupByExprs.indexOf(col)
+ val idx = groupByExprs.indexWhere(_.semanticEquals(col))
if (idx >= 0) {
Alias(Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length - 1 - idx)),
Literal(1)), ByteType), toPrettySQL(e))()
http://git-wip-us.apache.org/repos/asf/spark/blob/21c4450f/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index affe971..8549eac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -190,6 +190,22 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
)
}
+ test("SPARK-21980: References in grouping functions should be indexed with semanticEquals") {
+ checkAnswer(
+ courseSales.cube("course", "year")
+ .agg(grouping("CouRse"), grouping("year")),
+ Row("Java", 2012, 0, 0) ::
+ Row("Java", 2013, 0, 0) ::
+ Row("Java", null, 0, 1) ::
+ Row("dotNET", 2012, 0, 0) ::
+ Row("dotNET", 2013, 0, 0) ::
+ Row("dotNET", null, 0, 1) ::
+ Row(null, 2012, 1, 0) ::
+ Row(null, 2013, 1, 0) ::
+ Row(null, null, 1, 1) :: Nil
+ )
+ }
+
test("rollup overlapping columns") {
checkAnswer(
testData2.rollup($"a" + $"b" as "foo", $"b" as "bar").agg(sum($"a" - $"b") as "foo"),
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org