You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2016/03/18 03:16:54 UTC
spark git commit: [SPARK-13976][SQL] do not remove sub-queries added
by user when generate SQL
Repository: spark
Updated Branches:
refs/heads/master 453455c47 -> 6037ed0a1
[SPARK-13976][SQL] do not remove sub-queries added by user when generate SQL
## What changes were proposed in this pull request?
We haven't figured out the corrected logical to add sub-queries yet, so we should not clear all sub-queries before generate SQL. This PR changed the logic to only remove sub-queries above table relation.
an example for this bug, original SQL: `SELECT a FROM (SELECT a FROM tbl) t WHERE a = 1`
before this PR, we will generate:
```
SELECT attr_1 AS a FROM
SELECT attr_1 FROM (
SELECT a AS attr_1 FROM tbl
) AS sub_q0
WHERE attr_1 = 1
```
We missed a sub-query and this SQL string is illegal.
After this PR, we will generate:
```
SELECT attr_1 AS a FROM (
SELECT attr_1 FROM (
SELECT a AS attr_1 FROM tbl
) AS sub_q0
WHERE attr_1 = 1
) AS t
```
TODO: for long term, we should find a way to add sub-queries correctly, so that arbitrary logical plans can be converted to SQL string.
## How was this patch tested?
`LogicalPlanToSQLSuite`
Author: Wenchen Fan <we...@databricks.com>
Closes #11786 from cloud-fan/bug-fix.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6037ed0a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6037ed0a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6037ed0a
Branch: refs/heads/master
Commit: 6037ed0a1d7ecbb77140ddf4d0192a1dc60316bb
Parents: 453455c
Author: Wenchen Fan <we...@databricks.com>
Authored: Fri Mar 18 10:16:48 2016 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Fri Mar 18 10:16:48 2016 +0800
----------------------------------------------------------------------
.../scala/org/apache/spark/sql/hive/SQLBuilder.scala | 12 ++++++++++--
.../apache/spark/sql/hive/LogicalPlanToSQLSuite.scala | 4 ++++
2 files changed, 14 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/6037ed0a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
index 9fa8474..da05905 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
@@ -390,8 +390,6 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
// `Aggregate`s.
CollapseProject),
Batch("Recover Scoping Info", Once,
- // Remove all sub queries, as we will insert new ones when it's necessary.
- EliminateSubqueryAliases,
// A logical plan is allowed to have same-name outputs with different qualifiers(e.g. the
// `Join` operator). However, this kind of plan can't be put under a sub query as we will
// erase and assign a new qualifier to all outputs and make it impossible to distinguish
@@ -400,6 +398,10 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
// qualifiers, as attributes have unique names now and we don't need qualifiers to resolve
// ambiguity.
NormalizedAttribute,
+ // Our analyzer will add one or more sub-queries above table relation, this rule removes
+ // these sub-queries so that next rule can combine adjacent table relation and sample to
+ // SQLTable.
+ RemoveSubqueriesAboveSQLTable,
// Finds the table relations and wrap them with `SQLTable`s. If there are any `Sample`
// operators on top of a table relation, merge the sample information into `SQLTable` of
// that table relation, as we can only convert table sample to standard SQL string.
@@ -418,6 +420,12 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
}
}
+ object RemoveSubqueriesAboveSQLTable extends Rule[LogicalPlan] {
+ override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
+ case SubqueryAlias(_, t @ ExtractSQLTable(_)) => t
+ }
+ }
+
object ResolveSQLTable extends Rule[LogicalPlan] {
override def apply(plan: LogicalPlan): LogicalPlan = plan.transformDown {
case Sample(lowerBound, upperBound, _, _, ExtractSQLTable(table)) =>
http://git-wip-us.apache.org/repos/asf/spark/blob/6037ed0a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
index f3cb6f8..f86eba6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
@@ -737,4 +737,8 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
|LIMIT 5
""".stripMargin)
}
+
+ test("filter after subquery") {
+ checkHiveQl("SELECT a FROM (SELECT key + 1 AS a FROM parquet_t1) t WHERE a > 5")
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org