You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/06/02 10:06:57 UTC
[spark] branch master updated: [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 89fdb8a6fb6 [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time
89fdb8a6fb6 is described below
commit 89fdb8a6fb6a669c458891b3abeba236e64b1e89
Author: yangjie01 <ya...@baidu.com>
AuthorDate: Thu Jun 2 13:06:14 2022 +0300
[SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time
### What changes were proposed in this pull request?
After SPARK-38118, `dataTypeMismatchError` related to `Filter` will be checked and throw in `RemoveTempResolvedColumn`, this will cause compatibility issue with exception message presentation.
For example, the following case:
```
spark.sql("create table t1(user_id int, auct_end_dt date) using parquet;")
spark.sql("select * from t1 join t2 on t1.user_id = t2.user_id where t1.auct_end_dt >= Date_sub('2020-12-27', 90)").show
```
The expected message is
```
Table or view not found: t2
```
But the actual message is
```
org.apache.spark.sql.AnalysisException: cannot resolve 'date_sub('2020-12-27', 90)' due to data type mismatch: argument 1 requires date type, however, ''2020-12-27'' is of string type.; line 1 pos 76
```
For forward compatibility, this pr change to only records `DATA_TYPE_MISMATCH_ERROR_MESSAGE` in the `RemoveTempResolvedColumn` check process , and move `failAnalysis` to `CheckAnalysis#checkAnalysis`
### Why are the changes needed?
Fix analysis exception message compatibility.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Pass Github Actions and add a new test case
Closes #36746 from LuciferYang/SPARK-39354.
Authored-by: yangjie01 <ya...@baidu.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../apache/spark/sql/catalyst/analysis/Analyzer.scala | 7 ++-----
.../spark/sql/catalyst/analysis/CheckAnalysis.scala | 17 ++++++++++++++++-
.../spark/sql/catalyst/analysis/AnalysisSuite.scala | 16 ++++++++++++++--
3 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index b13dede2acc..3017fc10dfd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -28,7 +28,7 @@ import scala.util.{Failure, Random, Success, Try}
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst._
-import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR}
+import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.encoders.OuterScopes
import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _}
@@ -4361,10 +4361,7 @@ object RemoveTempResolvedColumn extends Rule[LogicalPlan] {
case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure =>
e.checkInputDataTypes() match {
case TypeCheckResult.TypeCheckFailure(message) =>
- e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true)
- e.failAnalysis(
- s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
- extraHintForAnsiTypeCoercionExpression(plan))
+ e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message)
}
case _ =>
})
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 95b0226f00d..ed2e9ba2b6b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -50,6 +50,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError")
+ val DATA_TYPE_MISMATCH_ERROR_MESSAGE = TreeNodeTag[String]("dataTypeMismatchError")
+
protected def failAnalysis(msg: String): Nothing = {
throw new AnalysisException(msg)
}
@@ -174,7 +176,20 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
}
}
- getAllExpressions(operator).foreach(_.foreachUp {
+ val expressions = getAllExpressions(operator)
+
+ expressions.foreach(_.foreachUp {
+ case e: Expression =>
+ e.getTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE) match {
+ case Some(message) =>
+ e.failAnalysis(s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
+ extraHintForAnsiTypeCoercionExpression(operator))
+ case _ =>
+ }
+ case _ =>
+ })
+
+ expressions.foreach(_.foreachUp {
case a: Attribute if !a.resolved =>
val missingCol = a.sql
val candidates = operator.inputSet.toSeq.map(_.qualifiedName)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 84f9c6c5e76..a6e952fd865 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1170,13 +1170,25 @@ class AnalysisSuite extends AnalysisTest with Matchers {
|WITH t as (SELECT true c, false d)
|SELECT (t.c AND t.d) c
|FROM t
- |GROUP BY t.c
+ |GROUP BY t.c, t.d
|HAVING ${func}(c) > 0d""".stripMargin),
- Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"),
+ Seq(s"cannot resolve '$func(c)' due to data type mismatch"),
false)
}
}
+ test("SPARK-39354: should be `Table or view not found`") {
+ assertAnalysisError(parsePlan(
+ s"""
+ |WITH t1 as (SELECT 1 user_id, CAST("2022-06-02" AS DATE) dt)
+ |SELECT *
+ |FROM t1
+ |JOIN t2 ON t1.user_id = t2.user_id
+ |WHERE t1.dt >= DATE_SUB('2020-12-27', 90)""".stripMargin),
+ Seq(s"Table or view not found: t2"),
+ false)
+ }
+
test("SPARK-39144: nested subquery expressions deduplicate relations should be done bottom up") {
val innerRelation = SubqueryAlias("src1", testRelation)
val outerRelation = SubqueryAlias("src2", testRelation)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org