You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2016/03/18 03:16:54 UTC
spark git commit: [SPARK-13976][SQL] do not remove sub-queries added by user when generate SQL

Repository: spark
Updated Branches:
  refs/heads/master 453455c47 -> 6037ed0a1


[SPARK-13976][SQL] do not remove sub-queries added by user when generate SQL

## What changes were proposed in this pull request?

We haven't figured out the corrected logical to add sub-queries yet, so we should not clear all sub-queries before generate SQL. This PR changed the logic to only remove sub-queries above table relation.

an example for this bug, original SQL: `SELECT a FROM (SELECT a FROM tbl) t WHERE a = 1`
before this PR, we will generate:
```
SELECT attr_1 AS a FROM
  SELECT attr_1 FROM (
    SELECT a AS attr_1 FROM tbl
  ) AS sub_q0
  WHERE attr_1 = 1
```
We missed a sub-query and this SQL string is illegal.

After this PR, we will generate:
```
SELECT attr_1 AS a FROM (
  SELECT attr_1 FROM (
    SELECT a AS attr_1 FROM tbl
  ) AS sub_q0
  WHERE attr_1 = 1
) AS t
```

TODO: for long term, we should find a way to add sub-queries correctly, so that arbitrary logical plans can be converted to SQL string.

## How was this patch tested?

`LogicalPlanToSQLSuite`

Author: Wenchen Fan <we...@databricks.com>

Closes #11786 from cloud-fan/bug-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6037ed0a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6037ed0a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6037ed0a

Branch: refs/heads/master
Commit: 6037ed0a1d7ecbb77140ddf4d0192a1dc60316bb
Parents: 453455c
Author: Wenchen Fan <we...@databricks.com>
Authored: Fri Mar 18 10:16:48 2016 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Fri Mar 18 10:16:48 2016 +0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/hive/SQLBuilder.scala    | 12 ++++++++++--
 .../apache/spark/sql/hive/LogicalPlanToSQLSuite.scala   |  4 ++++
 2 files changed, 14 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/6037ed0a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
index 9fa8474..da05905 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala
@@ -390,8 +390,6 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
         // `Aggregate`s.
         CollapseProject),
       Batch("Recover Scoping Info", Once,
-        // Remove all sub queries, as we will insert new ones when it's necessary.
-        EliminateSubqueryAliases,
         // A logical plan is allowed to have same-name outputs with different qualifiers(e.g. the
         // `Join` operator). However, this kind of plan can't be put under a sub query as we will
         // erase and assign a new qualifier to all outputs and make it impossible to distinguish
@@ -400,6 +398,10 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
         // qualifiers, as attributes have unique names now and we don't need qualifiers to resolve
         // ambiguity.
         NormalizedAttribute,
+        // Our analyzer will add one or more sub-queries above table relation, this rule removes
+        // these sub-queries so that next rule can combine adjacent table relation and sample to
+        // SQLTable.
+        RemoveSubqueriesAboveSQLTable,
         // Finds the table relations and wrap them with `SQLTable`s.  If there are any `Sample`
         // operators on top of a table relation, merge the sample information into `SQLTable` of
         // that table relation, as we can only convert table sample to standard SQL string.
@@ -418,6 +420,12 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
       }
     }
 
+    object RemoveSubqueriesAboveSQLTable extends Rule[LogicalPlan] {
+      override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
+        case SubqueryAlias(_, t @ ExtractSQLTable(_)) => t
+      }
+    }
+
     object ResolveSQLTable extends Rule[LogicalPlan] {
       override def apply(plan: LogicalPlan): LogicalPlan = plan.transformDown {
         case Sample(lowerBound, upperBound, _, _, ExtractSQLTable(table)) =>

http://git-wip-us.apache.org/repos/asf/spark/blob/6037ed0a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
index f3cb6f8..f86eba6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
@@ -737,4 +737,8 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
         |LIMIT 5
       """.stripMargin)
   }
+
+  test("filter after subquery") {
+    checkHiveQl("SELECT a FROM (SELECT key + 1 AS a FROM parquet_t1) t WHERE a > 5")
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org