You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/09/27 13:19:35 UTC

spark git commit: [SPARK-24341][SQL][FOLLOWUP] remove duplicated error checking

Repository: spark
Updated Branches:
  refs/heads/master f856fe483 -> a1adde540


[SPARK-24341][SQL][FOLLOWUP] remove duplicated error checking

## What changes were proposed in this pull request?

There are 2 places we check for problematic `InSubquery`: the rule `ResolveSubquery` and `InSubquery.checkInputDataTypes`. We should unify them.

## How was this patch tested?

existing tests

Closes #22563 from cloud-fan/followup.

Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a1adde54
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a1adde54
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a1adde54

Branch: refs/heads/master
Commit: a1adde54086469b45950946d9143d17daab01f18
Parents: f856fe4
Author: Wenchen Fan <we...@databricks.com>
Authored: Thu Sep 27 21:19:25 2018 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Thu Sep 27 21:19:25 2018 +0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/analysis/Analyzer.scala  | 16 +-----
 .../sql/catalyst/expressions/predicates.scala   | 60 ++++++++++----------
 .../sql-tests/results/datetime.sql.out          |  5 +-
 .../results/higher-order-functions.sql.out      |  1 +
 .../subquery/in-subquery/in-basic.sql.out       | 10 ++--
 .../negative-cases/subq-input-typecheck.sql.out | 20 +++----
 6 files changed, 49 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e3b1712..7034dfd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1436,21 +1436,7 @@ class Analyzer(
           val expr = resolveSubQuery(l, plans)((plan, exprs) => {
             ListQuery(plan, exprs, exprId, plan.output)
           })
-          val subqueryOutput = expr.plan.output
-          val resolvedIn = InSubquery(values, expr.asInstanceOf[ListQuery])
-          if (values.length != subqueryOutput.length) {
-            throw new AnalysisException(
-              s"""Cannot analyze ${resolvedIn.sql}.
-                 |The number of columns in the left hand side of an IN subquery does not match the
-                 |number of columns in the output of subquery.
-                 |#columns in left hand side: ${values.length}
-                 |#columns in right hand side: ${subqueryOutput.length}
-                 |Left side columns:
-                 |[${values.map(_.sql).mkString(", ")}]
-                 |Right side columns:
-                 |[${subqueryOutput.map(_.sql).mkString(", ")}]""".stripMargin)
-          }
-          resolvedIn
+          InSubquery(values, expr.asInstanceOf[ListQuery])
       }
     }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 149bd79..2125340 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -144,7 +144,7 @@ case class Not(child: Expression)
 case class InSubquery(values: Seq[Expression], query: ListQuery)
   extends Predicate with Unevaluable {
 
-  @transient lazy val value: Expression = if (values.length > 1) {
+  @transient private lazy val value: Expression = if (values.length > 1) {
     CreateNamedStruct(values.zipWithIndex.flatMap {
       case (v: NamedExpression, _) => Seq(Literal(v.name), v)
       case (v, idx) => Seq(Literal(s"_$idx"), v)
@@ -155,37 +155,35 @@ case class InSubquery(values: Seq[Expression], query: ListQuery)
 
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    val mismatchOpt = !DataType.equalsStructurally(query.dataType, value.dataType,
-      ignoreNullability = true)
-    if (mismatchOpt) {
-      if (values.length != query.childOutputs.length) {
-        TypeCheckResult.TypeCheckFailure(
-          s"""
-             |The number of columns in the left hand side of an IN subquery does not match the
-             |number of columns in the output of subquery.
-             |#columns in left hand side: ${values.length}.
-             |#columns in right hand side: ${query.childOutputs.length}.
-             |Left side columns:
-             |[${values.map(_.sql).mkString(", ")}].
-             |Right side columns:
-             |[${query.childOutputs.map(_.sql).mkString(", ")}].""".stripMargin)
-      } else {
-        val mismatchedColumns = values.zip(query.childOutputs).flatMap {
-          case (l, r) if l.dataType != r.dataType =>
-            Seq(s"(${l.sql}:${l.dataType.catalogString}, ${r.sql}:${r.dataType.catalogString})")
-          case _ => None
-        }
-        TypeCheckResult.TypeCheckFailure(
-          s"""
-             |The data type of one or more elements in the left hand side of an IN subquery
-             |is not compatible with the data type of the output of the subquery
-             |Mismatched columns:
-             |[${mismatchedColumns.mkString(", ")}]
-             |Left side:
-             |[${values.map(_.dataType.catalogString).mkString(", ")}].
-             |Right side:
-             |[${query.childOutputs.map(_.dataType.catalogString).mkString(", ")}].""".stripMargin)
+    if (values.length != query.childOutputs.length) {
+      TypeCheckResult.TypeCheckFailure(
+        s"""
+           |The number of columns in the left hand side of an IN subquery does not match the
+           |number of columns in the output of subquery.
+           |#columns in left hand side: ${values.length}.
+           |#columns in right hand side: ${query.childOutputs.length}.
+           |Left side columns:
+           |[${values.map(_.sql).mkString(", ")}].
+           |Right side columns:
+           |[${query.childOutputs.map(_.sql).mkString(", ")}].""".stripMargin)
+    } else if (!DataType.equalsStructurally(
+      query.dataType, value.dataType, ignoreNullability = true)) {
+
+      val mismatchedColumns = values.zip(query.childOutputs).flatMap {
+        case (l, r) if l.dataType != r.dataType =>
+          Seq(s"(${l.sql}:${l.dataType.catalogString}, ${r.sql}:${r.dataType.catalogString})")
+        case _ => None
       }
+      TypeCheckResult.TypeCheckFailure(
+        s"""
+           |The data type of one or more elements in the left hand side of an IN subquery
+           |is not compatible with the data type of the output of the subquery
+           |Mismatched columns:
+           |[${mismatchedColumns.mkString(", ")}]
+           |Left side:
+           |[${values.map(_.dataType.catalogString).mkString(", ")}].
+           |Right side:
+           |[${query.childOutputs.map(_.dataType.catalogString).mkString(", ")}].""".stripMargin)
     } else {
       TypeUtils.checkForOrderingExpr(value.dataType, s"function $prettyName")
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 4e1cfa6..63aa004 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -82,9 +82,10 @@ struct<a:int,b:int>
 1	2
 2	3
 
+
 -- !query 9
 select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), weekday(null), weekday('1582-10-15 13:10:15')
--- !query 3 schema
+-- !query 9 schema
 struct<weekday(CAST(2007-02-03 AS DATE)):int,weekday(CAST(2009-07-30 AS DATE)):int,weekday(CAST(2017-05-27 AS DATE)):int,weekday(CAST(NULL AS DATE)):int,weekday(CAST(1582-10-15 13:10:15 AS DATE)):int>
--- !query 3 output
+-- !query 9 output
 5	3	5	NULL	4

http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
index 32d20d1..1b7c6f4 100644
--- a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
@@ -201,6 +201,7 @@ struct<>
 -- !query 20 output
 
 
+
 -- !query 21
 select transform_keys(ys, (k, v) -> k) as v from nested
 -- !query 21 schema

http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
index 088db55..686fe49 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
@@ -41,15 +41,15 @@ select 1 from tab_a where (a1, b1) not in (select (a2, b2) from tab_b)
 struct<>
 -- !query 4 output
 org.apache.spark.sql.AnalysisException
-Cannot analyze (named_struct('a1', tab_a.`a1`, 'b1', tab_a.`b1`) IN (listquery())).
+cannot resolve '(named_struct('a1', tab_a.`a1`, 'b1', tab_a.`b1`) IN (listquery()))' due to data type mismatch: 
 The number of columns in the left hand side of an IN subquery does not match the
 number of columns in the output of subquery.
-#columns in left hand side: 2
-#columns in right hand side: 1
+#columns in left hand side: 2.
+#columns in right hand side: 1.
 Left side columns:
-[tab_a.`a1`, tab_a.`b1`]
+[tab_a.`a1`, tab_a.`b1`].
 Right side columns:
-[`named_struct(a2, a2, b2, b2)`];
+[`named_struct(a2, a2, b2, b2)`].;
 
 
 -- !query 5

http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
index c52e570..dcd3005 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
@@ -92,15 +92,15 @@ t1a IN (SELECT t2a, t2b
 struct<>
 -- !query 7 output
 org.apache.spark.sql.AnalysisException
-Cannot analyze (t1.`t1a` IN (listquery(t1.`t1a`))).
+cannot resolve '(t1.`t1a` IN (listquery(t1.`t1a`)))' due to data type mismatch: 
 The number of columns in the left hand side of an IN subquery does not match the
 number of columns in the output of subquery.
-#columns in left hand side: 1
-#columns in right hand side: 2
+#columns in left hand side: 1.
+#columns in right hand side: 2.
 Left side columns:
-[t1.`t1a`]
+[t1.`t1a`].
 Right side columns:
-[t2.`t2a`, t2.`t2b`];
+[t2.`t2a`, t2.`t2b`].;
 
 
 -- !query 8
@@ -113,15 +113,15 @@ WHERE
 struct<>
 -- !query 8 output
 org.apache.spark.sql.AnalysisException
-Cannot analyze (named_struct('t1a', t1.`t1a`, 't1b', t1.`t1b`) IN (listquery(t1.`t1a`))).
+cannot resolve '(named_struct('t1a', t1.`t1a`, 't1b', t1.`t1b`) IN (listquery(t1.`t1a`)))' due to data type mismatch: 
 The number of columns in the left hand side of an IN subquery does not match the
 number of columns in the output of subquery.
-#columns in left hand side: 2
-#columns in right hand side: 1
+#columns in left hand side: 2.
+#columns in right hand side: 1.
 Left side columns:
-[t1.`t1a`, t1.`t1b`]
+[t1.`t1a`, t1.`t1b`].
 Right side columns:
-[t2.`t2a`];
+[t2.`t2a`].;
 
 
 -- !query 9


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org