You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/06/02 10:06:57 UTC

[spark] branch master updated: [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 89fdb8a6fb6 [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time
89fdb8a6fb6 is described below

commit 89fdb8a6fb6a669c458891b3abeba236e64b1e89
Author: yangjie01 <ya...@baidu.com>
AuthorDate: Thu Jun 2 13:06:14 2022 +0300

    [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time
    
    ### What changes were proposed in this pull request?
    After SPARK-38118,  `dataTypeMismatchError` related to `Filter` will be checked and throw in `RemoveTempResolvedColumn`,  this will cause compatibility issue with exception message presentation.
    
    For example, the following case:
    
    ```
    spark.sql("create table t1(user_id int, auct_end_dt date) using parquet;")
    spark.sql("select * from t1 join t2 on t1.user_id = t2.user_id where t1.auct_end_dt >= Date_sub('2020-12-27', 90)").show
    ```
    
    The expected message is
    
    ```
    Table or view not found: t2
    ```
    
    But the actual message is
    ```
    org.apache.spark.sql.AnalysisException: cannot resolve 'date_sub('2020-12-27', 90)' due to data type mismatch: argument 1 requires date type, however, ''2020-12-27'' is of string type.; line 1 pos 76
    ```
    
    For forward compatibility, this pr change to only records `DATA_TYPE_MISMATCH_ERROR_MESSAGE` in the `RemoveTempResolvedColumn` check  process , and move `failAnalysis` to `CheckAnalysis#checkAnalysis`
    
    ### Why are the changes needed?
    Fix analysis exception message compatibility.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Pass Github Actions and add a new test case
    
    Closes #36746 from LuciferYang/SPARK-39354.
    
    Authored-by: yangjie01 <ya...@baidu.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala   |  7 ++-----
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala     | 17 ++++++++++++++++-
 .../spark/sql/catalyst/analysis/AnalysisSuite.scala     | 16 ++++++++++++++--
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index b13dede2acc..3017fc10dfd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -28,7 +28,7 @@ import scala.util.{Failure, Random, Success, Try}
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst._
-import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR}
+import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
 import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _}
@@ -4361,10 +4361,7 @@ object RemoveTempResolvedColumn extends Rule[LogicalPlan] {
           case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure =>
             e.checkInputDataTypes() match {
               case TypeCheckResult.TypeCheckFailure(message) =>
-                e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true)
-                e.failAnalysis(
-                  s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
-                    extraHintForAnsiTypeCoercionExpression(plan))
+                e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message)
             }
           case _ =>
         })
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 95b0226f00d..ed2e9ba2b6b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -50,6 +50,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
 
   val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError")
 
+  val DATA_TYPE_MISMATCH_ERROR_MESSAGE = TreeNodeTag[String]("dataTypeMismatchError")
+
   protected def failAnalysis(msg: String): Nothing = {
     throw new AnalysisException(msg)
   }
@@ -174,7 +176,20 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
             }
         }
 
-        getAllExpressions(operator).foreach(_.foreachUp {
+        val expressions = getAllExpressions(operator)
+
+        expressions.foreach(_.foreachUp {
+          case e: Expression =>
+            e.getTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE) match {
+              case Some(message) =>
+                e.failAnalysis(s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
+                  extraHintForAnsiTypeCoercionExpression(operator))
+              case _ =>
+            }
+          case _ =>
+        })
+
+        expressions.foreach(_.foreachUp {
           case a: Attribute if !a.resolved =>
             val missingCol = a.sql
             val candidates = operator.inputSet.toSeq.map(_.qualifiedName)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 84f9c6c5e76..a6e952fd865 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1170,13 +1170,25 @@ class AnalysisSuite extends AnalysisTest with Matchers {
            |WITH t as (SELECT true c, false d)
            |SELECT (t.c AND t.d) c
            |FROM t
-           |GROUP BY t.c
+           |GROUP BY t.c, t.d
            |HAVING ${func}(c) > 0d""".stripMargin),
-        Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"),
+        Seq(s"cannot resolve '$func(c)' due to data type mismatch"),
         false)
     }
   }
 
+  test("SPARK-39354: should be `Table or view not found`") {
+    assertAnalysisError(parsePlan(
+      s"""
+         |WITH t1 as (SELECT 1 user_id, CAST("2022-06-02" AS DATE) dt)
+         |SELECT *
+         |FROM t1
+         |JOIN t2 ON t1.user_id = t2.user_id
+         |WHERE t1.dt >= DATE_SUB('2020-12-27', 90)""".stripMargin),
+      Seq(s"Table or view not found: t2"),
+      false)
+  }
+
   test("SPARK-39144: nested subquery expressions deduplicate relations should be done bottom up") {
     val innerRelation = SubqueryAlias("src1", testRelation)
     val outerRelation = SubqueryAlias("src2", testRelation)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org