You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2016/02/11 08:42:01 UTC
spark git commit: [SPARK-13235][SQL] Removed an Extra Distinct from
the Plan when Using Union in SQL
Repository: spark
Updated Branches:
refs/heads/master 1842c55d8 -> e88bff127
[SPARK-13235][SQL] Removed an Extra Distinct from the Plan when Using Union in SQL
Currently, the parser added two `Distinct` operators in the plan if we are using `Union` or `Union Distinct` in the SQL. This PR is to remove the extra `Distinct` from the plan.
For example, before the fix, the following query has a plan with two `Distinct`
```scala
sql("select * from t0 union select * from t0").explain(true)
```
```
== Parsed Logical Plan ==
'Project [unresolvedalias(*,None)]
+- 'Subquery u_2
+- 'Distinct
+- 'Project [unresolvedalias(*,None)]
+- 'Subquery u_1
+- 'Distinct
+- 'Union
:- 'Project [unresolvedalias(*,None)]
: +- 'UnresolvedRelation `t0`, None
+- 'Project [unresolvedalias(*,None)]
+- 'UnresolvedRelation `t0`, None
== Analyzed Logical Plan ==
id: bigint
Project [id#16L]
+- Subquery u_2
+- Distinct
+- Project [id#16L]
+- Subquery u_1
+- Distinct
+- Union
:- Project [id#16L]
: +- Subquery t0
: +- Relation[id#16L] ParquetRelation
+- Project [id#16L]
+- Subquery t0
+- Relation[id#16L] ParquetRelation
== Optimized Logical Plan ==
Aggregate [id#16L], [id#16L]
+- Aggregate [id#16L], [id#16L]
+- Union
:- Project [id#16L]
: +- Relation[id#16L] ParquetRelation
+- Project [id#16L]
+- Relation[id#16L] ParquetRelation
```
After the fix, the plan is changed without the extra `Distinct` as follows:
```
== Parsed Logical Plan ==
'Project [unresolvedalias(*,None)]
+- 'Subquery u_1
+- 'Distinct
+- 'Union
:- 'Project [unresolvedalias(*,None)]
: +- 'UnresolvedRelation `t0`, None
+- 'Project [unresolvedalias(*,None)]
+- 'UnresolvedRelation `t0`, None
== Analyzed Logical Plan ==
id: bigint
Project [id#17L]
+- Subquery u_1
+- Distinct
+- Union
:- Project [id#16L]
: +- Subquery t0
: +- Relation[id#16L] ParquetRelation
+- Project [id#16L]
+- Subquery t0
+- Relation[id#16L] ParquetRelation
== Optimized Logical Plan ==
Aggregate [id#17L], [id#17L]
+- Union
:- Project [id#16L]
: +- Relation[id#16L] ParquetRelation
+- Project [id#16L]
+- Relation[id#16L] ParquetRelation
```
Author: gatorsmile <ga...@gmail.com>
Closes #11120 from gatorsmile/unionDistinct.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e88bff12
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e88bff12
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e88bff12
Branch: refs/heads/master
Commit: e88bff12795a6134e2e7204996b603e948380e18
Parents: 1842c55
Author: gatorsmile <ga...@gmail.com>
Authored: Thu Feb 11 08:40:27 2016 +0100
Committer: Herman van Hovell <hv...@questtec.nl>
Committed: Thu Feb 11 08:40:27 2016 +0100
----------------------------------------------------------------------
.../spark/sql/catalyst/parser/SparkSqlParser.g | 28 +----------------
.../spark/sql/catalyst/CatalystQlSuite.scala | 33 ++++++++++++++++++--
2 files changed, 32 insertions(+), 29 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/e88bff12/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
index 9f2a5eb..24483cc 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
@@ -2370,34 +2370,8 @@ setOpSelectStatement[CommonTree t, boolean topLevel]
u=setOperator LPAREN b=simpleSelectStatement RPAREN
|
u=setOperator b=simpleSelectStatement)
- -> {$setOpSelectStatement.tree != null && $u.tree.getType()==SparkSqlParser.TOK_UNIONDISTINCT}?
- ^(TOK_QUERY
- ^(TOK_FROM
- ^(TOK_SUBQUERY
- ^($u {$setOpSelectStatement.tree} $b)
- {adaptor.create(Identifier, generateUnionAlias())}
- )
- )
- ^(TOK_INSERT
- ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
- ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF))
- )
- )
- -> {$setOpSelectStatement.tree != null && $u.tree.getType()!=SparkSqlParser.TOK_UNIONDISTINCT}?
+ -> {$setOpSelectStatement.tree != null}?
^($u {$setOpSelectStatement.tree} $b)
- -> {$setOpSelectStatement.tree == null && $u.tree.getType()==SparkSqlParser.TOK_UNIONDISTINCT}?
- ^(TOK_QUERY
- ^(TOK_FROM
- ^(TOK_SUBQUERY
- ^($u {$t} $b)
- {adaptor.create(Identifier, generateUnionAlias())}
- )
- )
- ^(TOK_INSERT
- ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
- ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF))
- )
- )
-> ^($u {$t} $b)
)+
o=orderByClause?
http://git-wip-us.apache.org/repos/asf/spark/blob/e88bff12/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
index 682b77d..8d7d6b5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
@@ -18,10 +18,10 @@
package org.apache.spark.sql.catalyst
import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction}
+import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project}
+import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.unsafe.types.CalendarInterval
class CatalystQlSuite extends PlanTest {
@@ -45,6 +45,35 @@ class CatalystQlSuite extends PlanTest {
comparePlans(parsed, expected)
}
+ test("test Union Distinct operator") {
+ val parsed1 = parser.parsePlan("SELECT * FROM t0 UNION SELECT * FROM t1")
+ val parsed2 = parser.parsePlan("SELECT * FROM t0 UNION DISTINCT SELECT * FROM t1")
+ val expected =
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ Subquery("u_1",
+ Distinct(
+ Union(
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ UnresolvedRelation(TableIdentifier("t0"), None)),
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ UnresolvedRelation(TableIdentifier("t1"), None))))))
+ comparePlans(parsed1, expected)
+ comparePlans(parsed2, expected)
+ }
+
+ test("test Union All operator") {
+ val parsed = parser.parsePlan("SELECT * FROM t0 UNION ALL SELECT * FROM t1")
+ val expected =
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ Subquery("u_1",
+ Union(
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ UnresolvedRelation(TableIdentifier("t0"), None)),
+ Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
+ UnresolvedRelation(TableIdentifier("t1"), None)))))
+ comparePlans(parsed, expected)
+ }
+
test("support hive interval literal") {
def checkInterval(sql: String, result: CalendarInterval): Unit = {
val parsed = parser.parsePlan(sql)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org