You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/01/12 17:47:01 UTC
spark git commit: [SPARK-17237][SQL] Remove backticks in a pivot
result schema
Repository: spark
Updated Branches:
refs/heads/master 2bc4d4e28 -> 5585ed93b
[SPARK-17237][SQL] Remove backticks in a pivot result schema
## What changes were proposed in this pull request?
Pivoting adds backticks (e.g. 3_count(\`c\`)) in column names and, in some cases,
thes causes analysis exceptions like;
```
scala> val df = Seq((2, 3, 4), (3, 4, 5)).toDF("a", "x", "y")
scala> df.groupBy("a").pivot("x").agg(count("y"), avg("y")).na.fill(0)
org.apache.spark.sql.AnalysisException: syntax error in attribute name: `3_count(`y`)`;
at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute$.e$1(unresolved.scala:134)
at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute$.parseAttributeName(unresolved.scala:144)
...
```
So, this pr proposes to remove these backticks from column names.
## How was this patch tested?
Added a test in `DataFrameAggregateSuite`.
Author: Takeshi YAMAMURO <li...@gmail.com>
Closes #14812 from maropu/SPARK-17237.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5585ed93
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5585ed93
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5585ed93
Branch: refs/heads/master
Commit: 5585ed93b09bc05cdd7a731650eca50d43d7159b
Parents: 2bc4d4e
Author: Takeshi YAMAMURO <li...@gmail.com>
Authored: Thu Jan 12 09:46:53 2017 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Thu Jan 12 09:46:53 2017 -0800
----------------------------------------------------------------------
.../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +-
.../scala/org/apache/spark/sql/DataFrameAggregateSuite.scala | 8 ++++++++
.../scala/org/apache/spark/sql/DataFramePivotSuite.scala | 2 +-
3 files changed, 10 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/5585ed93/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 3c58832..1957df8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -477,7 +477,7 @@ class Analyzer(
} else {
val suffix = aggregate match {
case n: NamedExpression => n.name
- case _ => aggregate.sql
+ case _ => toPrettySQL(aggregate)
}
value + "_" + suffix
}
http://git-wip-us.apache.org/repos/asf/spark/blob/5585ed93/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 7853b22fe..e707912 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -530,4 +530,12 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
limit2Df.groupBy("id").count().select($"id"),
limit2Df.select($"id"))
}
+
+ test("SPARK-17237 remove backticks in a pivot result schema") {
+ val df = Seq((2, 3, 4), (3, 4, 5)).toDF("a", "x", "y")
+ checkAnswer(
+ df.groupBy("a").pivot("x").agg(count("y"), avg("y")).na.fill(0),
+ Seq(Row(3, 0, 0.0, 1, 5.0), Row(2, 1, 4.0, 0, 0.0))
+ )
+ }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/5585ed93/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index a8d854c..51ffe34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -200,7 +200,7 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{
test("pivot preserves aliases if given") {
assertResult(
- Array("year", "dotNET_foo", "dotNET_avg(`earnings`)", "Java_foo", "Java_avg(`earnings`)")
+ Array("year", "dotNET_foo", "dotNET_avg(earnings)", "Java_foo", "Java_avg(earnings)")
)(
courseSales.groupBy($"year")
.pivot("course", Seq("dotNET", "Java"))
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org