You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/01/06 09:40:17 UTC

spark git commit: [SPARK-12578][SQL] Distinct should not be silently ignored when used in an aggregate function with OVER clause

Repository: spark
Updated Branches:
  refs/heads/master d1fea4136 -> b2467b381


[SPARK-12578][SQL] Distinct should not be silently ignored when used in an aggregate function with OVER clause

JIRA: https://issues.apache.org/jira/browse/SPARK-12578

Slightly update to Hive parser. We should keep the distinct keyword when used in an aggregate function with OVER clause. So the CheckAnalysis will detect it and throw exception later.

Author: Liang-Chi Hsieh <vi...@gmail.com>

Closes #10557 from viirya/keep-distinct-hivesql.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b2467b38
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b2467b38
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b2467b38

Branch: refs/heads/master
Commit: b2467b381096804b862990d9ecda554f67e07ee1
Parents: d1fea41
Author: Liang-Chi Hsieh <vi...@gmail.com>
Authored: Wed Jan 6 00:40:14 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Wed Jan 6 00:40:14 2016 -0800

----------------------------------------------------------------------
 .../apache/spark/sql/parser/IdentifiersParser.g |  2 +-
 .../sql/hive/execution/SQLQuerySuite.scala      | 21 ++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b2467b38/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g
index 5c3d7ef..9f1e168 100644
--- a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g
+++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g
@@ -195,7 +195,7 @@ function
     RPAREN (KW_OVER ws=window_specification)?
            -> {$star != null}? ^(TOK_FUNCTIONSTAR functionName $ws?)
            -> {$dist == null}? ^(TOK_FUNCTION functionName (selectExpression+)? $ws?)
-                            -> ^(TOK_FUNCTIONDI functionName (selectExpression+)?)
+                            -> ^(TOK_FUNCTIONDI functionName (selectExpression+)? $ws?)
     ;
 
 functionName

http://git-wip-us.apache.org/repos/asf/spark/blob/b2467b38/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index bf65325..593fac2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -915,6 +915,27 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       ).map(i => Row(i._1, i._2, i._3, i._4)))
   }
 
+  test("window function: distinct should not be silently ignored") {
+    val data = Seq(
+      WindowData(1, "a", 5),
+      WindowData(2, "a", 6),
+      WindowData(3, "b", 7),
+      WindowData(4, "b", 8),
+      WindowData(5, "c", 9),
+      WindowData(6, "c", 10)
+    )
+    sparkContext.parallelize(data).toDF().registerTempTable("windowData")
+
+    val e = intercept[AnalysisException] {
+      sql(
+        """
+          |select month, area, product, sum(distinct product + 1) over (partition by 1 order by 2)
+          |from windowData
+        """.stripMargin)
+    }
+    assert(e.getMessage.contains("Distinct window functions are not supported"))
+  }
+
   test("window function: expressions in arguments of a window functions") {
     val data = Seq(
       WindowData(1, "a", 5),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org