You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/11/19 17:31:54 UTC
[spark] branch master updated: [SPARK-41172][SQL] Migrate the ambiguous ref error to an error class
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 62f8ce40ddb [SPARK-41172][SQL] Migrate the ambiguous ref error to an error class
62f8ce40ddb is described below
commit 62f8ce40ddbf76ce86fd5e51cc73c67d66e12f48
Author: panbingkun <pb...@gmail.com>
AuthorDate: Sat Nov 19 20:31:38 2022 +0300
[SPARK-41172][SQL] Migrate the ambiguous ref error to an error class
### What changes were proposed in this pull request?
The pr aims to migrate the ambiguous ref error to an error class.
### Why are the changes needed?
The changes improve the error framework.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Pass GA.
Closes #38721 from panbingkun/SPARK-41172.
Authored-by: panbingkun <pb...@gmail.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
core/src/main/resources/error/error-classes.json | 5 +
.../spark/sql/catalyst/expressions/package.scala | 5 +-
.../spark/sql/errors/QueryCompilationErrors.scala | 9 ++
.../sql/catalyst/analysis/AnalysisSuite.scala | 5 +-
.../catalyst/analysis/ResolveSubquerySuite.scala | 4 +-
.../expressions/AttributeResolutionSuite.scala | 30 +++--
.../results/columnresolution-negative.sql.out | 135 +++++++++++++++++++--
.../sql-tests/results/postgreSQL/join.sql.out | 30 ++++-
.../results/postgreSQL/select_implicit.sql.out | 45 ++++++-
.../results/udf/postgreSQL/udf-join.sql.out | 30 ++++-
.../udf/postgreSQL/udf-select_implicit.sql.out | 45 ++++++-
.../spark/sql/DataFrameNaFunctionsSuite.scala | 42 +++++--
.../org/apache/spark/sql/DataFrameStatSuite.scala | 52 ++++++--
.../execution/command/PlanResolutionSuite.scala | 22 ++--
.../execution/datasources/orc/OrcFilterSuite.scala | 20 ++-
15 files changed, 406 insertions(+), 73 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index fe340c517a2..4da9d2f9fbc 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -5,6 +5,11 @@
],
"sqlState" : "42000"
},
+ "AMBIGUOUS_REFERENCE" : {
+ "message" : [
+ "Reference <name> is ambiguous, could be: <referenceNames>."
+ ]
+ },
"ARITHMETIC_OVERFLOW" : {
"message" : [
"<message>.<alternative> If necessary set <config> to \"false\" to bypass this error."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 7913f396120..ededac3d917 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -21,9 +21,9 @@ import java.util.Locale
import com.google.common.collect.Maps
-import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute}
import org.apache.spark.sql.catalyst.util.MetadataColumnHelper
+import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.types.{StructField, StructType}
/**
@@ -368,8 +368,7 @@ package object expressions {
case ambiguousReferences =>
// More than one match.
- val referenceNames = ambiguousReferences.map(_.qualifiedName).mkString(", ")
- throw new AnalysisException(s"Reference '$name' is ambiguous, could be: $referenceNames.")
+ throw QueryCompilationErrors.ambiguousReferenceError(name, ambiguousReferences)
}
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 22b4cfdb3c6..cbdbb6adc11 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1834,6 +1834,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
"n" -> numMatches.toString))
}
+ def ambiguousReferenceError(name: String, ambiguousReferences: Seq[Attribute]): Throwable = {
+ new AnalysisException(
+ errorClass = "AMBIGUOUS_REFERENCE",
+ messageParameters = Map(
+ "name" -> toSQLId(name),
+ "referenceNames" ->
+ ambiguousReferences.map(ar => toSQLId(ar.qualifiedName)).sorted.mkString("[", ", ", "]")))
+ }
+
def cannotUseIntervalTypeInTableSchemaError(): Throwable = {
new AnalysisException(
errorClass = "_LEGACY_ERROR_TEMP_1183",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 2d6b3afb749..8b303ec3bb1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -681,7 +681,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
test("SPARK-34741: Avoid ambiguous reference in MergeIntoTable") {
val cond = $"a" > 1
- assertAnalysisError(
+ assertAnalysisErrorClass(
MergeIntoTable(
testRelation,
testRelation,
@@ -690,7 +690,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
Nil,
Nil
),
- "Reference 'a' is ambiguous" :: Nil)
+ "AMBIGUOUS_REFERENCE",
+ Map("name" -> "`a`", "referenceNames" -> "[`a`, `a`]"))
}
test("SPARK-24488 Generator with multiple aliases") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
index f4e5cf91188..577f663d8b1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
@@ -81,7 +81,9 @@ class ResolveSubquerySuite extends AnalysisTest {
test("lateral join with ambiguous join conditions") {
val plan = lateralJoin(t1, t0.select($"b"), condition = Some($"b" === 1))
- assertAnalysisError(plan, "Reference 'b' is ambiguous, could be: b, b." :: Nil)
+ assertAnalysisErrorClass(plan,
+ "AMBIGUOUS_REFERENCE", Map("name" -> "`b`", "referenceNames" -> "[`b`, `b`]")
+ )
}
test("prefer resolving lateral subquery attributes from the inner query") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
index a3885ac77f3..71fa60b0c03 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
@@ -65,11 +65,16 @@ class AttributeResolutionSuite extends SparkFunSuite {
AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t1")),
AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "ns2", "t2")))
- val ex = intercept[AnalysisException] {
- attrs.resolve(Seq("a"), resolver)
- }
- assert(ex.getMessage.contains(
- "Reference 'a' is ambiguous, could be: ns1.t1.a, ns1.ns2.t2.a."))
+ checkError(
+ exception = intercept[AnalysisException] {
+ attrs.resolve(Seq("a"), resolver)
+ },
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map(
+ "name" -> "`a`",
+ "referenceNames" -> "[`ns1`.`ns2`.`t2`.`a`, `ns1`.`t1`.`a`]"
+ )
+ )
}
test("attribute resolution ambiguity at the qualifier level") {
@@ -77,11 +82,16 @@ class AttributeResolutionSuite extends SparkFunSuite {
AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t")),
AttributeReference("a", IntegerType)(qualifier = Seq("ns2", "ns1", "t")))
- val ex = intercept[AnalysisException] {
- attrs.resolve(Seq("ns1", "t", "a"), resolver)
- }
- assert(ex.getMessage.contains(
- "Reference 'ns1.t.a' is ambiguous, could be: ns1.t.a, ns2.ns1.t.a."))
+ checkError(
+ exception = intercept[AnalysisException] {
+ attrs.resolve(Seq("ns1", "t", "a"), resolver)
+ },
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map(
+ "name" -> "`ns1`.`t`.`a`",
+ "referenceNames" -> "[`ns1`.`t`.`a`, `ns2`.`ns1`.`t`.`a`]"
+ )
+ )
}
test("attribute resolution with nested fields") {
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
index 03a3d75aa95..aafd9140385 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
@@ -69,7 +69,20 @@ SELECT i1 FROM t1, mydb1.t1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`i1`",
+ "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb1`.`t1`.`i1`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 9,
+ "fragment" : "i1"
+ } ]
+}
-- !query
@@ -78,7 +91,20 @@ SELECT t1.i1 FROM t1, mydb1.t1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`t1`.`i1`",
+ "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb1`.`t1`.`i1`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 12,
+ "fragment" : "t1.i1"
+ } ]
+}
-- !query
@@ -87,7 +113,20 @@ SELECT mydb1.t1.i1 FROM t1, mydb1.t1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'mydb1.t1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`mydb1`.`t1`.`i1`",
+ "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb1`.`t1`.`i1`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 18,
+ "fragment" : "mydb1.t1.i1"
+ } ]
+}
-- !query
@@ -96,7 +135,20 @@ SELECT i1 FROM t1, mydb2.t1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`i1`",
+ "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 9,
+ "fragment" : "i1"
+ } ]
+}
-- !query
@@ -105,7 +157,20 @@ SELECT t1.i1 FROM t1, mydb2.t1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`t1`.`i1`",
+ "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 12,
+ "fragment" : "t1.i1"
+ } ]
+}
-- !query
@@ -122,7 +187,20 @@ SELECT i1 FROM t1, mydb1.t1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`i1`",
+ "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 9,
+ "fragment" : "i1"
+ } ]
+}
-- !query
@@ -131,7 +209,20 @@ SELECT t1.i1 FROM t1, mydb1.t1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`t1`.`i1`",
+ "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 12,
+ "fragment" : "t1.i1"
+ } ]
+}
-- !query
@@ -140,7 +231,20 @@ SELECT i1 FROM t1, mydb2.t1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`i1`",
+ "referenceNames" : "[`spark_catalog`.`mydb2`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 9,
+ "fragment" : "i1"
+ } ]
+}
-- !query
@@ -149,7 +253,20 @@ SELECT t1.i1 FROM t1, mydb2.t1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`t1`.`i1`",
+ "referenceNames" : "[`spark_catalog`.`mydb2`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 12,
+ "fragment" : "t1.i1"
+ } ]
+}
-- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
index 04c1d47af92..6746efd0809 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
@@ -546,7 +546,20 @@ SELECT '' AS `xxx`, i, k, t
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'i' is ambiguous, could be: spark_catalog.default.j1_tbl.i, spark_catalog.default.j2_tbl.i.; line 1 pos 20
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`i`",
+ "referenceNames" : "[`spark_catalog`.`default`.`j1_tbl`.`i`, `spark_catalog`.`default`.`j2_tbl`.`i`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 21,
+ "stopIndex" : 21,
+ "fragment" : "i"
+ } ]
+}
-- !query
@@ -3235,7 +3248,20 @@ select * from
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'f1' is ambiguous, could be: j.f1, j.f1.; line 2 pos 63
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`f1`",
+ "referenceNames" : "[`j`.`f1`, `j`.`f1`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 78,
+ "stopIndex" : 79,
+ "fragment" : "f1"
+ } ]
+}
-- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
index 0b53a9024ef..1e216298d51 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
@@ -239,7 +239,20 @@ SELECT count(*) FROM test_missing_target x, test_missing_target y
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`b`",
+ "referenceNames" : "[`x`.`b`, `y`.`b`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 94,
+ "stopIndex" : 94,
+ "fragment" : "b"
+ } ]
+}
-- !query
@@ -429,7 +442,20 @@ SELECT count(x.a) FROM test_missing_target x, test_missing_target y
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`b`",
+ "referenceNames" : "[`x`.`b`, `y`.`b`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 96,
+ "stopIndex" : 96,
+ "fragment" : "b"
+ } ]
+}
-- !query
@@ -453,7 +479,20 @@ SELECT count(b) FROM test_missing_target x, test_missing_target y
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 13
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`b`",
+ "referenceNames" : "[`x`.`b`, `y`.`b`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 14,
+ "stopIndex" : 14,
+ "fragment" : "b"
+ } ]
+}
-- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
index 363e5d0b117..c30321c76db 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
@@ -546,7 +546,20 @@ SELECT udf('') AS `xxx`, udf(i) AS i, udf(k), udf(t) AS t
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'i' is ambiguous, could be: spark_catalog.default.j1_tbl.i, spark_catalog.default.j2_tbl.i.; line 1 pos 29
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`i`",
+ "referenceNames" : "[`spark_catalog`.`default`.`j1_tbl`.`i`, `spark_catalog`.`default`.`j2_tbl`.`i`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 30,
+ "stopIndex" : 30,
+ "fragment" : "i"
+ } ]
+}
-- !query
@@ -3263,7 +3276,20 @@ select * from
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'f1' is ambiguous, could be: j.f1, j.f1.; line 2 pos 72
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`f1`",
+ "referenceNames" : "[`j`.`f1`, `j`.`f1`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 87,
+ "stopIndex" : 88,
+ "fragment" : "f1"
+ } ]
+}
-- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
index 412013d9527..283d5a48ba4 100755
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
@@ -242,7 +242,20 @@ SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`b`",
+ "referenceNames" : "[`x`.`b`, `y`.`b`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 113,
+ "stopIndex" : 113,
+ "fragment" : "b"
+ } ]
+}
-- !query
@@ -432,7 +445,20 @@ SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`b`",
+ "referenceNames" : "[`x`.`b`, `y`.`b`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 120,
+ "stopIndex" : 120,
+ "fragment" : "b"
+ } ]
+}
-- !query
@@ -457,7 +483,20 @@ SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 21
+{
+ "errorClass" : "AMBIGUOUS_REFERENCE",
+ "messageParameters" : {
+ "name" : "`b`",
+ "referenceNames" : "[`x`.`b`, `y`.`b`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 22,
+ "stopIndex" : 22,
+ "fragment" : "b"
+ } ]
+}
-- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
index 8dbc57c0429..b83a8850fbe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
@@ -279,10 +279,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
val (df1, df2) = createDFsWithSameFieldsName()
val joined_df = df1.join(df2, Seq("f1"), joinType = "left_outer")
- val message = intercept[AnalysisException] {
- joined_df.na.fill("", cols = Seq("f2"))
- }.getMessage
- assert(message.contains("Reference 'f2' is ambiguous"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ joined_df.na.fill("", cols = Seq("f2"))
+ },
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map(
+ "name" -> "`f2`",
+ "referenceNames" -> "[`f2`, `f2`]"
+ )
+ )
}
test("fill with col(*)") {
@@ -397,10 +403,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
val df = left.join(right, Seq("col1"))
// If column names are specified, the following fails due to ambiguity.
- val exception = intercept[AnalysisException] {
- df.na.fill("hello", Seq("col2"))
- }
- assert(exception.getMessage.contains("Reference 'col2' is ambiguous"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ df.na.fill("hello", Seq("col2"))
+ },
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map(
+ "name" -> "`col2`",
+ "referenceNames" -> "[`col2`, `col2`]"
+ )
+ )
// If column names are not specified, fill() is applied to all the eligible columns.
checkAnswer(
@@ -414,10 +426,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
val df = left.join(right, Seq("col1"))
// If column names are specified, the following fails due to ambiguity.
- val exception = intercept[AnalysisException] {
- df.na.drop("any", Seq("col2"))
- }
- assert(exception.getMessage.contains("Reference 'col2' is ambiguous"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ df.na.drop("any", Seq("col2"))
+ },
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map(
+ "name" -> "`col2`",
+ "referenceNames" -> "[`col2`, `col2`]"
+ )
+ )
// If column names are not specified, drop() is applied to all the eligible rows.
checkAnswer(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index ceb1a75e83d..47ff942e5ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -138,18 +138,46 @@ class DataFrameStatSuite extends QueryTest with SharedSparkSession {
assert(dfx.stat.freqItems(Array("table1.num", "table2.num")).collect()(0).length == 2)
// this should throw "Reference 'num' is ambiguous"
- intercept[AnalysisException] {
- dfx.stat.freqItems(Array("num"))
- }
- intercept[AnalysisException] {
- dfx.stat.approxQuantile("num", Array(0.1), 0.0)
- }
- intercept[AnalysisException] {
- dfx.stat.cov("num", "num")
- }
- intercept[AnalysisException] {
- dfx.stat.corr("num", "num")
- }
+ checkError(
+ exception = intercept[AnalysisException] {
+ dfx.stat.freqItems(Array("num"))
+ },
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map(
+ "name" -> "`num`",
+ "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+ )
+ )
+ checkError(
+ exception = intercept[AnalysisException] {
+ dfx.stat.approxQuantile("num", Array(0.1), 0.0)
+ },
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map(
+ "name" -> "`num`",
+ "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+ )
+ )
+ checkError(
+ exception = intercept[AnalysisException] {
+ dfx.stat.cov("num", "num")
+ },
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map(
+ "name" -> "`num`",
+ "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+ )
+ )
+ checkError(
+ exception = intercept[AnalysisException] {
+ dfx.stat.corr("num", "num")
+ },
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map(
+ "name" -> "`num`",
+ "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+ )
+ )
}
test("SPARK-40933 test cov & corr with null values and empty dataset") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 80f258c4659..3202ef728e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -1929,10 +1929,14 @@ class PlanResolutionSuite extends AnalysisTest {
// no aliases
Seq(("v2Table", "v2Table1"), ("testcat.tab", "testcat.tab1")).foreach { pair =>
+ def referenceNames(target: String, column: String): String = target match {
+ case "v2Table" => s"[`spark_catalog`.`default`.`v2Table1`.`$column`, " +
+ s"`spark_catalog`.`default`.`v2Table`.`$column`]"
+ case "testcat.tab" => s"[`testcat`.`tab1`.`$column`, `testcat`.`tab`.`$column`]"
+ }
val target = pair._1
val source = pair._2
-
val sql1 =
s"""
|MERGE INTO $target
@@ -1986,8 +1990,8 @@ class PlanResolutionSuite extends AnalysisTest {
// resolve column `i` as it's ambiguous.
checkError(
exception = intercept[AnalysisException](parseAndResolve(sql2)),
- errorClass = null,
- parameters = Map.empty,
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map("name" -> "`i`", "referenceNames" -> referenceNames(target, "i")),
context = ExpectedContext(
fragment = "i",
start = 22 + target.length + source.length,
@@ -2002,8 +2006,8 @@ class PlanResolutionSuite extends AnalysisTest {
// resolve column `s` as it's ambiguous.
checkError(
exception = intercept[AnalysisException](parseAndResolve(sql3)),
- errorClass = null,
- parameters = Map.empty,
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")),
context = ExpectedContext(
fragment = "s",
start = 46 + target.length + source.length,
@@ -2018,8 +2022,8 @@ class PlanResolutionSuite extends AnalysisTest {
// resolve column `s` as it's ambiguous.
checkError(
exception = intercept[AnalysisException](parseAndResolve(sql4)),
- errorClass = null,
- parameters = Map.empty,
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")),
context = ExpectedContext(
fragment = "s",
start = 46 + target.length + source.length,
@@ -2034,8 +2038,8 @@ class PlanResolutionSuite extends AnalysisTest {
// resolve column `s` as it's ambiguous.
checkError(
exception = intercept[AnalysisException](parseAndResolve(sql5)),
- errorClass = null,
- parameters = Map.empty,
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")),
context = ExpectedContext(
fragment = "s",
start = 61 + target.length + source.length,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index aa0051a54af..a7f9da84c1f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -674,11 +674,21 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
// Exception thrown for ambiguous case.
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
- val e = intercept[AnalysisException] {
- sql(s"select a from $tableName where a < 0").collect()
- }
- assert(e.getMessage.contains(
- "Reference 'a' is ambiguous"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(s"select a from $tableName where a < 0").collect()
+ },
+ errorClass = "AMBIGUOUS_REFERENCE",
+ parameters = Map(
+ "name" -> "`a`",
+ "referenceNames" -> ("[`spark_catalog`.`default`.`spark_32622`.`a`, " +
+ "`spark_catalog`.`default`.`spark_32622`.`a`]")),
+ context = ExpectedContext(
+ fragment = "a",
+ start = 32,
+ stop = 32
+ )
+ )
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org