You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/09/27 13:19:35 UTC
spark git commit: [SPARK-24341][SQL][FOLLOWUP] remove duplicated
error checking
Repository: spark
Updated Branches:
refs/heads/master f856fe483 -> a1adde540
[SPARK-24341][SQL][FOLLOWUP] remove duplicated error checking
## What changes were proposed in this pull request?
There are 2 places we check for problematic `InSubquery`: the rule `ResolveSubquery` and `InSubquery.checkInputDataTypes`. We should unify them.
## How was this patch tested?
existing tests
Closes #22563 from cloud-fan/followup.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a1adde54
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a1adde54
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a1adde54
Branch: refs/heads/master
Commit: a1adde54086469b45950946d9143d17daab01f18
Parents: f856fe4
Author: Wenchen Fan <we...@databricks.com>
Authored: Thu Sep 27 21:19:25 2018 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Thu Sep 27 21:19:25 2018 +0800
----------------------------------------------------------------------
.../spark/sql/catalyst/analysis/Analyzer.scala | 16 +-----
.../sql/catalyst/expressions/predicates.scala | 60 ++++++++++----------
.../sql-tests/results/datetime.sql.out | 5 +-
.../results/higher-order-functions.sql.out | 1 +
.../subquery/in-subquery/in-basic.sql.out | 10 ++--
.../negative-cases/subq-input-typecheck.sql.out | 20 +++----
6 files changed, 49 insertions(+), 63 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e3b1712..7034dfd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1436,21 +1436,7 @@ class Analyzer(
val expr = resolveSubQuery(l, plans)((plan, exprs) => {
ListQuery(plan, exprs, exprId, plan.output)
})
- val subqueryOutput = expr.plan.output
- val resolvedIn = InSubquery(values, expr.asInstanceOf[ListQuery])
- if (values.length != subqueryOutput.length) {
- throw new AnalysisException(
- s"""Cannot analyze ${resolvedIn.sql}.
- |The number of columns in the left hand side of an IN subquery does not match the
- |number of columns in the output of subquery.
- |#columns in left hand side: ${values.length}
- |#columns in right hand side: ${subqueryOutput.length}
- |Left side columns:
- |[${values.map(_.sql).mkString(", ")}]
- |Right side columns:
- |[${subqueryOutput.map(_.sql).mkString(", ")}]""".stripMargin)
- }
- resolvedIn
+ InSubquery(values, expr.asInstanceOf[ListQuery])
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 149bd79..2125340 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -144,7 +144,7 @@ case class Not(child: Expression)
case class InSubquery(values: Seq[Expression], query: ListQuery)
extends Predicate with Unevaluable {
- @transient lazy val value: Expression = if (values.length > 1) {
+ @transient private lazy val value: Expression = if (values.length > 1) {
CreateNamedStruct(values.zipWithIndex.flatMap {
case (v: NamedExpression, _) => Seq(Literal(v.name), v)
case (v, idx) => Seq(Literal(s"_$idx"), v)
@@ -155,37 +155,35 @@ case class InSubquery(values: Seq[Expression], query: ListQuery)
override def checkInputDataTypes(): TypeCheckResult = {
- val mismatchOpt = !DataType.equalsStructurally(query.dataType, value.dataType,
- ignoreNullability = true)
- if (mismatchOpt) {
- if (values.length != query.childOutputs.length) {
- TypeCheckResult.TypeCheckFailure(
- s"""
- |The number of columns in the left hand side of an IN subquery does not match the
- |number of columns in the output of subquery.
- |#columns in left hand side: ${values.length}.
- |#columns in right hand side: ${query.childOutputs.length}.
- |Left side columns:
- |[${values.map(_.sql).mkString(", ")}].
- |Right side columns:
- |[${query.childOutputs.map(_.sql).mkString(", ")}].""".stripMargin)
- } else {
- val mismatchedColumns = values.zip(query.childOutputs).flatMap {
- case (l, r) if l.dataType != r.dataType =>
- Seq(s"(${l.sql}:${l.dataType.catalogString}, ${r.sql}:${r.dataType.catalogString})")
- case _ => None
- }
- TypeCheckResult.TypeCheckFailure(
- s"""
- |The data type of one or more elements in the left hand side of an IN subquery
- |is not compatible with the data type of the output of the subquery
- |Mismatched columns:
- |[${mismatchedColumns.mkString(", ")}]
- |Left side:
- |[${values.map(_.dataType.catalogString).mkString(", ")}].
- |Right side:
- |[${query.childOutputs.map(_.dataType.catalogString).mkString(", ")}].""".stripMargin)
+ if (values.length != query.childOutputs.length) {
+ TypeCheckResult.TypeCheckFailure(
+ s"""
+ |The number of columns in the left hand side of an IN subquery does not match the
+ |number of columns in the output of subquery.
+ |#columns in left hand side: ${values.length}.
+ |#columns in right hand side: ${query.childOutputs.length}.
+ |Left side columns:
+ |[${values.map(_.sql).mkString(", ")}].
+ |Right side columns:
+ |[${query.childOutputs.map(_.sql).mkString(", ")}].""".stripMargin)
+ } else if (!DataType.equalsStructurally(
+ query.dataType, value.dataType, ignoreNullability = true)) {
+
+ val mismatchedColumns = values.zip(query.childOutputs).flatMap {
+ case (l, r) if l.dataType != r.dataType =>
+ Seq(s"(${l.sql}:${l.dataType.catalogString}, ${r.sql}:${r.dataType.catalogString})")
+ case _ => None
}
+ TypeCheckResult.TypeCheckFailure(
+ s"""
+ |The data type of one or more elements in the left hand side of an IN subquery
+ |is not compatible with the data type of the output of the subquery
+ |Mismatched columns:
+ |[${mismatchedColumns.mkString(", ")}]
+ |Left side:
+ |[${values.map(_.dataType.catalogString).mkString(", ")}].
+ |Right side:
+ |[${query.childOutputs.map(_.dataType.catalogString).mkString(", ")}].""".stripMargin)
} else {
TypeUtils.checkForOrderingExpr(value.dataType, s"function $prettyName")
}
http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 4e1cfa6..63aa004 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -82,9 +82,10 @@ struct<a:int,b:int>
1 2
2 3
+
-- !query 9
select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), weekday(null), weekday('1582-10-15 13:10:15')
--- !query 3 schema
+-- !query 9 schema
struct<weekday(CAST(2007-02-03 AS DATE)):int,weekday(CAST(2009-07-30 AS DATE)):int,weekday(CAST(2017-05-27 AS DATE)):int,weekday(CAST(NULL AS DATE)):int,weekday(CAST(1582-10-15 13:10:15 AS DATE)):int>
--- !query 3 output
+-- !query 9 output
5 3 5 NULL 4
http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
index 32d20d1..1b7c6f4 100644
--- a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
@@ -201,6 +201,7 @@ struct<>
-- !query 20 output
+
-- !query 21
select transform_keys(ys, (k, v) -> k) as v from nested
-- !query 21 schema
http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
index 088db55..686fe49 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
@@ -41,15 +41,15 @@ select 1 from tab_a where (a1, b1) not in (select (a2, b2) from tab_b)
struct<>
-- !query 4 output
org.apache.spark.sql.AnalysisException
-Cannot analyze (named_struct('a1', tab_a.`a1`, 'b1', tab_a.`b1`) IN (listquery())).
+cannot resolve '(named_struct('a1', tab_a.`a1`, 'b1', tab_a.`b1`) IN (listquery()))' due to data type mismatch:
The number of columns in the left hand side of an IN subquery does not match the
number of columns in the output of subquery.
-#columns in left hand side: 2
-#columns in right hand side: 1
+#columns in left hand side: 2.
+#columns in right hand side: 1.
Left side columns:
-[tab_a.`a1`, tab_a.`b1`]
+[tab_a.`a1`, tab_a.`b1`].
Right side columns:
-[`named_struct(a2, a2, b2, b2)`];
+[`named_struct(a2, a2, b2, b2)`].;
-- !query 5
http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
index c52e570..dcd3005 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
@@ -92,15 +92,15 @@ t1a IN (SELECT t2a, t2b
struct<>
-- !query 7 output
org.apache.spark.sql.AnalysisException
-Cannot analyze (t1.`t1a` IN (listquery(t1.`t1a`))).
+cannot resolve '(t1.`t1a` IN (listquery(t1.`t1a`)))' due to data type mismatch:
The number of columns in the left hand side of an IN subquery does not match the
number of columns in the output of subquery.
-#columns in left hand side: 1
-#columns in right hand side: 2
+#columns in left hand side: 1.
+#columns in right hand side: 2.
Left side columns:
-[t1.`t1a`]
+[t1.`t1a`].
Right side columns:
-[t2.`t2a`, t2.`t2b`];
+[t2.`t2a`, t2.`t2b`].;
-- !query 8
@@ -113,15 +113,15 @@ WHERE
struct<>
-- !query 8 output
org.apache.spark.sql.AnalysisException
-Cannot analyze (named_struct('t1a', t1.`t1a`, 't1b', t1.`t1b`) IN (listquery(t1.`t1a`))).
+cannot resolve '(named_struct('t1a', t1.`t1a`, 't1b', t1.`t1b`) IN (listquery(t1.`t1a`)))' due to data type mismatch:
The number of columns in the left hand side of an IN subquery does not match the
number of columns in the output of subquery.
-#columns in left hand side: 2
-#columns in right hand side: 1
+#columns in left hand side: 2.
+#columns in right hand side: 1.
Left side columns:
-[t1.`t1a`, t1.`t1b`]
+[t1.`t1a`, t1.`t1b`].
Right side columns:
-[t2.`t2a`];
+[t2.`t2a`].;
-- !query 9
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org