You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/10/25 06:02:14 UTC
spark git commit: [SPARK-13947][SQL] The error message from using an invalid column reference is not clear

Repository: spark
Updated Branches:
  refs/heads/master 524abb996 -> 427359f07


[SPARK-13947][SQL] The error message from using an invalid column reference is not clear

## What changes were proposed in this pull request?

 Rewritten error message for clarity. Added extra information in case of attribute name collision, hinting the user to double-check referencing two different tables

## How was this patch tested?

No functional changes, only final message has changed. It has been tested manually against the situation proposed in the JIRA ticket. Automated tests in repository pass.

This PR is original work from me and I license this work to the Spark project

Author: Ruben Berenguel Montoro <ru...@mostlymaths.net>
Author: Ruben Berenguel Montoro <ru...@dreamattic.com>
Author: Ruben Berenguel <ru...@mostlymaths.net>

Closes #17100 from rberenguel/SPARK-13947-error-message.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/427359f0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/427359f0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/427359f0

Branch: refs/heads/master
Commit: 427359f077ad469d78c97972d021535f30a1e418
Parents: 524abb9
Author: Ruben Berenguel Montoro <ru...@mostlymaths.net>
Authored: Tue Oct 24 23:02:11 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Tue Oct 24 23:02:11 2017 -0700

----------------------------------------------------------------------
 .../sql/catalyst/analysis/CheckAnalysis.scala   | 19 +++++++++++++---
 .../catalyst/analysis/AnalysisErrorSuite.scala  | 23 ++++++++++++++------
 .../negative-cases/invalid-correlation.sql.out  |  2 +-
 3 files changed, 33 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/427359f0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index d9906bb..b5e8bdd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -272,10 +272,23 @@ trait CheckAnalysis extends PredicateHelper {
           case o if o.children.nonEmpty && o.missingInput.nonEmpty =>
             val missingAttributes = o.missingInput.mkString(",")
             val input = o.inputSet.mkString(",")
+            val msgForMissingAttributes = s"Resolved attribute(s) $missingAttributes missing " +
+              s"from $input in operator ${operator.simpleString}."
 
-            failAnalysis(
-              s"resolved attribute(s) $missingAttributes missing from $input " +
-                s"in operator ${operator.simpleString}")
+            val resolver = plan.conf.resolver
+            val attrsWithSameName = o.missingInput.filter { missing =>
+              o.inputSet.exists(input => resolver(missing.name, input.name))
+            }
+
+            val msg = if (attrsWithSameName.nonEmpty) {
+              val sameNames = attrsWithSameName.map(_.name).mkString(",")
+              s"$msgForMissingAttributes Attribute(s) with the same name appear in the " +
+                s"operation: $sameNames. Please check if the right attribute(s) are used."
+            } else {
+              msgForMissingAttributes
+            }
+
+            failAnalysis(msg)
 
           case p @ Project(exprs, _) if containsMultipleGenerators(exprs) =>
             failAnalysis(

http://git-wip-us.apache.org/repos/asf/spark/blob/427359f0/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 884e113..5d2f8e7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -408,16 +408,25 @@ class AnalysisErrorSuite extends AnalysisTest {
     // CheckAnalysis should throw AnalysisException when Aggregate contains missing attribute(s)
     // Since we manually construct the logical plan at here and Sum only accept
     // LongType, DoubleType, and DecimalType. We use LongType as the type of a.
-    val plan =
-      Aggregate(
-        Nil,
-        Alias(sum(AttributeReference("a", LongType)(exprId = ExprId(1))), "b")() :: Nil,
-        LocalRelation(
-          AttributeReference("a", LongType)(exprId = ExprId(2))))
+    val attrA = AttributeReference("a", LongType)(exprId = ExprId(1))
+    val otherA = AttributeReference("a", LongType)(exprId = ExprId(2))
+    val attrC = AttributeReference("c", LongType)(exprId = ExprId(3))
+    val aliases = Alias(sum(attrA), "b")() :: Alias(sum(attrC), "d")() :: Nil
+    val plan = Aggregate(
+      Nil,
+      aliases,
+      LocalRelation(otherA))
 
     assert(plan.resolved)
 
-    assertAnalysisError(plan, "resolved attribute(s) a#1L missing from a#2L" :: Nil)
+    val resolved = s"${attrA.toString},${attrC.toString}"
+
+    val errorMsg = s"Resolved attribute(s) $resolved missing from ${otherA.toString} " +
+                     s"in operator !Aggregate [${aliases.mkString(", ")}]. " +
+                     s"Attribute(s) with the same name appear in the operation: a. " +
+                     "Please check if the right attribute(s) are used."
+
+    assertAnalysisError(plan, errorMsg :: Nil)
   }
 
   test("error test for self-join") {

http://git-wip-us.apache.org/repos/asf/spark/blob/427359f0/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index e4b1a2d..2586f26 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -63,7 +63,7 @@ WHERE  t1a IN (SELECT   min(t2a)
 struct<>
 -- !query 4 output
 org.apache.spark.sql.AnalysisException
-resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]);
+Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]).;
 
 
 -- !query 5


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org