You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2018/07/20 22:44:06 UTC
spark git commit: [SPARK-24488][SQL] Fix issue when generator is
aliased multiple times
Repository: spark
Updated Branches:
refs/heads/master f765bb782 -> 597bdeff2
[SPARK-24488][SQL] Fix issue when generator is aliased multiple times
## What changes were proposed in this pull request?
Currently, the Analyzer throws an exception if your try to nest a generator. However, it special cases generators "nested" in an alias, and allows that. If you try to alias a generator twice, it is not caught by the special case, so an exception is thrown.
This PR trims the unnecessary, non-top-level aliases, so that the generator is allowed.
## How was this patch tested?
new tests in AnalysisSuite.
Author: Brandon Krieger <bk...@palantir.com>
Closes #21508 from bkrieger/bk/SPARK-24488.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/597bdeff
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/597bdeff
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/597bdeff
Branch: refs/heads/master
Commit: 597bdeff2d07d690287526ab0e722f80749014d2
Parents: f765bb7
Author: Brandon Krieger <bk...@palantir.com>
Authored: Sat Jul 21 00:44:00 2018 +0200
Committer: Herman van Hovell <hv...@databricks.com>
Committed: Sat Jul 21 00:44:00 2018 +0200
----------------------------------------------------------------------
.../spark/sql/catalyst/analysis/Analyzer.scala | 53 +++++++++++---------
.../sql/catalyst/analysis/AnalysisSuite.scala | 8 +++
2 files changed, 38 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/597bdeff/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 957c468..866396c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1660,11 +1660,13 @@ class Analyzer(
expr.find(_.isInstanceOf[Generator]).isDefined
}
- private def hasNestedGenerator(expr: NamedExpression): Boolean = expr match {
- case UnresolvedAlias(_: Generator, _) => false
- case Alias(_: Generator, _) => false
- case MultiAlias(_: Generator, _) => false
- case other => hasGenerator(other)
+ private def hasNestedGenerator(expr: NamedExpression): Boolean = {
+ CleanupAliases.trimNonTopLevelAliases(expr) match {
+ case UnresolvedAlias(_: Generator, _) => false
+ case Alias(_: Generator, _) => false
+ case MultiAlias(_: Generator, _) => false
+ case other => hasGenerator(other)
+ }
}
private def trimAlias(expr: NamedExpression): Expression = expr match {
@@ -1705,24 +1707,26 @@ class Analyzer(
// Holds the resolved generator, if one exists in the project list.
var resolvedGenerator: Generate = null
- val newProjectList = projectList.flatMap {
- case AliasedGenerator(generator, names, outer) if generator.childrenResolved =>
- // It's a sanity check, this should not happen as the previous case will throw
- // exception earlier.
- assert(resolvedGenerator == null, "More than one generator found in SELECT.")
-
- resolvedGenerator =
- Generate(
- generator,
- unrequiredChildIndex = Nil,
- outer = outer,
- qualifier = None,
- generatorOutput = ResolveGenerate.makeGeneratorOutput(generator, names),
- child)
-
- resolvedGenerator.generatorOutput
- case other => other :: Nil
- }
+ val newProjectList = projectList
+ .map(CleanupAliases.trimNonTopLevelAliases(_).asInstanceOf[NamedExpression])
+ .flatMap {
+ case AliasedGenerator(generator, names, outer) if generator.childrenResolved =>
+ // It's a sanity check, this should not happen as the previous case will throw
+ // exception earlier.
+ assert(resolvedGenerator == null, "More than one generator found in SELECT.")
+
+ resolvedGenerator =
+ Generate(
+ generator,
+ unrequiredChildIndex = Nil,
+ outer = outer,
+ qualifier = None,
+ generatorOutput = ResolveGenerate.makeGeneratorOutput(generator, names),
+ child)
+
+ resolvedGenerator.generatorOutput
+ case other => other :: Nil
+ }
if (resolvedGenerator != null) {
Project(newProjectList, resolvedGenerator)
@@ -2433,6 +2437,7 @@ object CleanupAliases extends Rule[LogicalPlan] {
private def trimAliases(e: Expression): Expression = {
e.transformDown {
case Alias(child, _) => child
+ case MultiAlias(child, _) => child
}
}
@@ -2442,6 +2447,8 @@ object CleanupAliases extends Rule[LogicalPlan] {
exprId = a.exprId,
qualifier = a.qualifier,
explicitMetadata = Some(a.metadata))
+ case a: MultiAlias =>
+ a.copy(child = trimAliases(a.child))
case other => trimAliases(other)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/597bdeff/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index bbcdf6c..9e0db8d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -575,4 +575,12 @@ class AnalysisSuite extends AnalysisTest with Matchers {
assertAnalysisSuccess(
Project(Seq(UnresolvedAttribute("temp0.a"), UnresolvedAttribute("temp1.a")), join))
}
+
+ test("SPARK-24488 Generator with multiple aliases") {
+ assertAnalysisSuccess(
+ listRelation.select(Explode('list).as("first_alias").as("second_alias")))
+ assertAnalysisSuccess(
+ listRelation.select(MultiAlias(MultiAlias(
+ PosExplode('list), Seq("first_pos", "first_val")), Seq("second_pos", "second_val"))))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org