You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2023/02/28 09:01:07 UTC
[spark] branch master updated: [SPARK-42608][SQL] Use full inner field names in resolution errors

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 74410ca2f13 [SPARK-42608][SQL] Use full inner field names in resolution errors
74410ca2f13 is described below

commit 74410ca2f1318177e558f1e719e0cac0f0196807
Author: aokolnychyi <ao...@apple.com>
AuthorDate: Tue Feb 28 01:00:24 2023 -0800

    [SPARK-42608][SQL] Use full inner field names in resolution errors
    
    ### What changes were proposed in this pull request?
    
    This PR makes `TableOutputResolver` use full names for inner fields in resolution errors.
    
    ### Why are the changes needed?
    
    These changes are needed to avoid confusion when there are multiple inner fields with the same name.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    This PR comes with tests.
    
    Closes #40202 from aokolnychyi/spark-42608.
    
    Authored-by: aokolnychyi <ao...@apple.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../sql/catalyst/analysis/TableOutputResolver.scala     | 13 +++++++------
 .../sql/catalyst/analysis/V2WriteAnalysisSuite.scala    | 17 +++++++++++++++++
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
index 6014536d872..61d24964d60 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
@@ -51,7 +51,7 @@ object TableOutputResolver {
 
       query.output.zip(expected).flatMap {
         case (queryExpr, tableAttr) =>
-          checkField(tableAttr, queryExpr, byName, conf, err => errors += err)
+          checkField(tableAttr, queryExpr, byName, conf, err => errors += err, Seq(tableAttr.name))
       }
     }
 
@@ -105,7 +105,7 @@ object TableOutputResolver {
             resolveMapType(
               matchedCol, matchedType, expectedType, expectedName, conf, addError, newColPath)
           case _ =>
-            checkField(expectedCol, matchedCol, byName = true, conf, addError)
+            checkField(expectedCol, matchedCol, byName = true, conf, addError, newColPath)
         }
       }
     }
@@ -251,7 +251,8 @@ object TableOutputResolver {
       queryExpr: NamedExpression,
       byName: Boolean,
       conf: SQLConf,
-      addError: String => Unit): Option[NamedExpression] = {
+      addError: String => Unit,
+      colPath: Seq[String]): Option[NamedExpression] = {
 
     val storeAssignmentPolicy = conf.storeAssignmentPolicy
     lazy val outputField = if (tableAttr.dataType.sameType(queryExpr.dataType) &&
@@ -264,7 +265,7 @@ object TableOutputResolver {
           val cast = Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone),
             ansiEnabled = true)
           cast.setTagValue(Cast.BY_TABLE_INSERTION, ())
-          checkCastOverflowInTableInsert(cast, tableAttr.name)
+          checkCastOverflowInTableInsert(cast, colPath.quoted)
         case StoreAssignmentPolicy.LEGACY =>
           Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone),
             ansiEnabled = false)
@@ -289,10 +290,10 @@ object TableOutputResolver {
       case StoreAssignmentPolicy.STRICT | StoreAssignmentPolicy.ANSI =>
         // run the type check first to ensure type errors are present
         val canWrite = DataType.canWrite(
-          queryExpr.dataType, tableAttr.dataType, byName, conf.resolver, tableAttr.name,
+          queryExpr.dataType, tableAttr.dataType, byName, conf.resolver, colPath.quoted,
           storeAssignmentPolicy, addError)
         if (queryExpr.nullable && !tableAttr.nullable) {
-          addError(s"Cannot write nullable values to non-null column '${tableAttr.name}'")
+          addError(s"Cannot write nullable values to non-null column '${colPath.quoted}'")
           None
 
         } else if (!canWrite) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
index 9f9df10b398..69cd838cfb2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
@@ -743,4 +743,21 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val writePlan = byName(table, query).analyze
     assert(writePlan.children.head.schema == table.schema)
   }
+
+  test("SPARK-42608: use full column names for inner fields in resolution errors") {
+    val table = TestRelation(Seq(
+      $"a".int,
+      $"b".struct($"x".int.notNull, $"y".int),
+      $"c".struct($"x".int, $"y".int)))
+    val query = TestRelation(Seq(
+      $"b".struct($"y".int, $"x".byte),
+      $"c".struct($"y".int, $"x".byte),
+      $"a".int))
+
+    val parsedPlan = byName(table, query)
+
+    assertAnalysisError(parsedPlan, Seq(
+      "Cannot write incompatible data to table", "'table-name'",
+      "Cannot write nullable values to non-null column 'b.x'"))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org