You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/01/07 08:46:16 UTC
spark git commit: [SPARK-12542][SQL] support except/intersect in
HiveQl
Repository: spark
Updated Branches:
refs/heads/master 6a1c864ab -> fd1dcfaf2
[SPARK-12542][SQL] support except/intersect in HiveQl
Parse the SQL query with except/intersect in FROM clause for HivQL.
Author: Davies Liu <da...@databricks.com>
Closes #10622 from davies/intersect.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd1dcfaf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd1dcfaf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd1dcfaf
Branch: refs/heads/master
Commit: fd1dcfaf2608c2cc3a439ed3ca044ae655982306
Parents: 6a1c864
Author: Davies Liu <da...@databricks.com>
Authored: Wed Jan 6 23:46:12 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Wed Jan 6 23:46:12 2016 -0800
----------------------------------------------------------------------
.../spark/sql/catalyst/parser/SparkSqlLexer.g | 1 +
.../spark/sql/catalyst/parser/SparkSqlParser.g | 12 +++++---
.../apache/spark/sql/catalyst/CatalystQl.scala | 7 ++++-
.../spark/sql/catalyst/CatalystQlSuite.scala | 32 ++++++++++++++++++++
.../sql/hive/execution/HiveQuerySuite.scala | 18 +++++++++++
5 files changed, 65 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/fd1dcfaf/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
index e01e710..44a63fb 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlLexer.g
@@ -103,6 +103,7 @@ KW_CLUSTER: 'CLUSTER';
KW_DISTRIBUTE: 'DISTRIBUTE';
KW_SORT: 'SORT';
KW_UNION: 'UNION';
+KW_EXCEPT: 'EXCEPT';
KW_LOAD: 'LOAD';
KW_EXPORT: 'EXPORT';
KW_IMPORT: 'IMPORT';
http://git-wip-us.apache.org/repos/asf/spark/blob/fd1dcfaf/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
index 4afce30..cf8a565 100644
--- a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
+++ b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SparkSqlParser.g
@@ -88,6 +88,8 @@ TOK_DISTRIBUTEBY;
TOK_SORTBY;
TOK_UNIONALL;
TOK_UNIONDISTINCT;
+TOK_EXCEPT;
+TOK_INTERSECT;
TOK_JOIN;
TOK_LEFTOUTERJOIN;
TOK_RIGHTOUTERJOIN;
@@ -2122,6 +2124,8 @@ setOperator
@after { popMsg(state); }
: KW_UNION KW_ALL -> ^(TOK_UNIONALL)
| KW_UNION KW_DISTINCT? -> ^(TOK_UNIONDISTINCT)
+ | KW_EXCEPT -> ^(TOK_EXCEPT)
+ | KW_INTERSECT -> ^(TOK_INTERSECT)
;
queryStatementExpression[boolean topLevel]
@@ -2242,7 +2246,7 @@ setOpSelectStatement[CommonTree t, boolean topLevel]
^(TOK_QUERY
^(TOK_FROM
^(TOK_SUBQUERY
- ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b)
+ ^($u {$setOpSelectStatement.tree} $b)
{adaptor.create(Identifier, generateUnionAlias())}
)
)
@@ -2252,12 +2256,12 @@ setOpSelectStatement[CommonTree t, boolean topLevel]
)
)
-> {$setOpSelectStatement.tree != null && $u.tree.getType()!=SparkSqlParser.TOK_UNIONDISTINCT}?
- ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b)
+ ^($u {$setOpSelectStatement.tree} $b)
-> {$setOpSelectStatement.tree == null && $u.tree.getType()==SparkSqlParser.TOK_UNIONDISTINCT}?
^(TOK_QUERY
^(TOK_FROM
^(TOK_SUBQUERY
- ^(TOK_UNIONALL {$t} $b)
+ ^($u {$t} $b)
{adaptor.create(Identifier, generateUnionAlias())}
)
)
@@ -2266,7 +2270,7 @@ setOpSelectStatement[CommonTree t, boolean topLevel]
^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF))
)
)
- -> ^(TOK_UNIONALL {$t} $b)
+ -> ^($u {$t} $b)
)+
o=orderByClause?
c=clusterByClause?
http://git-wip-us.apache.org/repos/asf/spark/blob/fd1dcfaf/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
index 42bdf25..1eda4a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
@@ -399,9 +399,14 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
// return With plan if there is CTE
cteRelations.map(With(query, _)).getOrElse(query)
- // HIVE-9039 renamed TOK_UNION => TOK_UNIONALL while adding TOK_UNIONDISTINCT
case Token("TOK_UNIONALL", left :: right :: Nil) =>
Union(nodeToPlan(left), nodeToPlan(right))
+ case Token("TOK_UNIONDISTINCT", left :: right :: Nil) =>
+ Distinct(Union(nodeToPlan(left), nodeToPlan(right)))
+ case Token("TOK_EXCEPT", left :: right :: Nil) =>
+ Except(nodeToPlan(left), nodeToPlan(right))
+ case Token("TOK_INTERSECT", left :: right :: Nil) =>
+ Intersect(nodeToPlan(left), nodeToPlan(right))
case _ =>
noParseRule("Plan", node)
http://git-wip-us.apache.org/repos/asf/spark/blob/fd1dcfaf/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
new file mode 100644
index 0000000..0fee97f
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystQlSuite.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.catalyst.plans.PlanTest
+
+class CatalystQlSuite extends PlanTest {
+
+ test("parse union/except/intersect") {
+ val paresr = new CatalystQl()
+ paresr.createPlan("select * from t1 union all select * from t2")
+ paresr.createPlan("select * from t1 union distinct select * from t2")
+ paresr.createPlan("select * from t1 union select * from t2")
+ paresr.createPlan("select * from t1 except select * from t2")
+ paresr.createPlan("select * from t1 intersect select * from t2")
+ }
+}
http://git-wip-us.apache.org/repos/asf/spark/blob/fd1dcfaf/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 98e22c2..fa99289 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -787,6 +787,24 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
assert(sql("select key from src having key > 490").collect().size < 100)
}
+ test("union/except/intersect") {
+ assertResult(Array(Row(1), Row(1))) {
+ sql("select 1 as a union all select 1 as a").collect()
+ }
+ assertResult(Array(Row(1))) {
+ sql("select 1 as a union distinct select 1 as a").collect()
+ }
+ assertResult(Array(Row(1))) {
+ sql("select 1 as a union select 1 as a").collect()
+ }
+ assertResult(Array()) {
+ sql("select 1 as a except select 1 as a").collect()
+ }
+ assertResult(Array(Row(1))) {
+ sql("select 1 as a intersect select 1 as a").collect()
+ }
+ }
+
test("SPARK-5383 alias for udfs with multi output columns") {
assert(
sql("select stack(2, key, value, key, value) as (a, b) from src limit 5")
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org