You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/06/19 07:51:29 UTC
spark git commit: [SPARK-21132][SQL] DISTINCT modifier of function
arguments should not be silently ignored
Repository: spark
Updated Branches:
refs/heads/master ea542d29b -> 9413b84b5
[SPARK-21132][SQL] DISTINCT modifier of function arguments should not be silently ignored
### What changes were proposed in this pull request?
We should not silently ignore `DISTINCT` when they are not supported in the function arguments. This PR is to block these cases and issue the error messages.
### How was this patch tested?
Added test cases for both regular functions and window functions
Author: Xiao Li <ga...@gmail.com>
Closes #18340 from gatorsmile/firstCount.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9413b84b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9413b84b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9413b84b
Branch: refs/heads/master
Commit: 9413b84b5a99e264816c61f72905b392c2f9cd35
Parents: ea542d2
Author: Xiao Li <ga...@gmail.com>
Authored: Mon Jun 19 15:51:21 2017 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Mon Jun 19 15:51:21 2017 +0800
----------------------------------------------------------------------
.../spark/sql/catalyst/analysis/Analyzer.scala | 14 ++++++++++++--
.../sql/catalyst/analysis/AnalysisErrorSuite.scala | 15 +++++++++++++--
.../spark/sql/catalyst/analysis/AnalysisTest.scala | 8 ++++++--
3 files changed, 31 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/9413b84b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 196b4a9..647fc0b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1206,11 +1206,21 @@ class Analyzer(
// AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
// the context of a Window clause. They do not need to be wrapped in an
// AggregateExpression.
- case wf: AggregateWindowFunction => wf
+ case wf: AggregateWindowFunction =>
+ if (isDistinct) {
+ failAnalysis(s"${wf.prettyName} does not support the modifier DISTINCT")
+ } else {
+ wf
+ }
// We get an aggregate function, we need to wrap it in an AggregateExpression.
case agg: AggregateFunction => AggregateExpression(agg, Complete, isDistinct)
// This function is not an aggregate function, just return the resolved one.
- case other => other
+ case other =>
+ if (isDistinct) {
+ failAnalysis(s"${other.prettyName} does not support the modifier DISTINCT")
+ } else {
+ other
+ }
}
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/9413b84b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index d2ebca5..5050318 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -24,7 +24,8 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count, Max}
-import org.apache.spark.sql.catalyst.plans.{Cross, Inner, LeftOuter, RightOuter}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.plans.{Cross, LeftOuter, RightOuter}
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
import org.apache.spark.sql.types._
@@ -152,7 +153,7 @@ class AnalysisErrorSuite extends AnalysisTest {
"not supported within a window function" :: Nil)
errorTest(
- "distinct window function",
+ "distinct aggregate function in window",
testRelation2.select(
WindowExpression(
AggregateExpression(Count(UnresolvedAttribute("b")), Complete, isDistinct = true),
@@ -163,6 +164,16 @@ class AnalysisErrorSuite extends AnalysisTest {
"Distinct window functions are not supported" :: Nil)
errorTest(
+ "distinct function",
+ CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"),
+ "hex does not support the modifier DISTINCT" :: Nil)
+
+ errorTest(
+ "distinct window function",
+ CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) over () FROM TaBlE"),
+ "percent_rank does not support the modifier DISTINCT" :: Nil)
+
+ errorTest(
"nested aggregate functions",
testRelation.groupBy('a)(
AggregateExpression(
http://git-wip-us.apache.org/repos/asf/spark/blob/9413b84b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index afc7ce4..edfa8c4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -17,10 +17,11 @@
package org.apache.spark.sql.catalyst.analysis
+import java.net.URI
import java.util.Locale
import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.internal.SQLConf
@@ -32,7 +33,10 @@ trait AnalysisTest extends PlanTest {
private def makeAnalyzer(caseSensitive: Boolean): Analyzer = {
val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
- val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
+ val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
+ catalog.createDatabase(
+ CatalogDatabase("default", "", new URI("loc"), Map.empty),
+ ignoreIfExists = false)
catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true)
catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true)
catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org