You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/06/02 12:10:24 UTC
[spark] branch branch-3.0 updated: [SPARK-31834][SQL] Improve error
message for incompatible data types
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new b1782af [SPARK-31834][SQL] Improve error message for incompatible data types
b1782af is described below
commit b1782af007de94aacf4a9ec2bc93930575a1adea
Author: lipzhu <li...@ebay.com>
AuthorDate: Tue Jun 2 21:07:10 2020 +0900
[SPARK-31834][SQL] Improve error message for incompatible data types
### What changes were proposed in this pull request?
We should use dataType.catalogString to unified the data type mismatch message.
Before:
```sql
spark-sql> create table SPARK_31834(a int) using parquet;
spark-sql> insert into SPARK_31834 select '1';
Error in query: Cannot write incompatible data to table '`default`.`spark_31834`':
- Cannot safely cast 'a': StringType to IntegerType;
```
After:
```sql
spark-sql> create table SPARK_31834(a int) using parquet;
spark-sql> insert into SPARK_31834 select '1';
Error in query: Cannot write incompatible data to table '`default`.`spark_31834`':
- Cannot safely cast 'a': string to int;
```
### How was this patch tested?
UT.
Closes #28654 from lipzhu/SPARK-31834.
Authored-by: lipzhu <li...@ebay.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
(cherry picked from commit d79a8a88b15645a29fabb245b6db3b2179d0f3c0)
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
docs/sql-ref-ansi-compliance.md | 2 +-
.../scala/org/apache/spark/sql/types/DataType.scala | 7 ++++---
.../catalyst/analysis/DataSourceV2AnalysisSuite.scala | 10 +++++-----
.../sql/types/DataTypeWriteCompatibilitySuite.scala | 18 +++++++++---------
.../sql-tests/inputs/postgreSQL/window_part1.sql | 2 +-
.../sql-tests/inputs/postgreSQL/window_part3.sql | 2 +-
.../org/apache/spark/sql/sources/InsertSuite.scala | 16 ++++++++--------
.../spark/sql/test/DataFrameReaderWriterSuite.scala | 10 +++++-----
.../apache/spark/sql/hive/client/VersionsSuite.scala | 2 +-
9 files changed, 35 insertions(+), 34 deletions(-)
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index b62834e..eab194c 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -95,7 +95,7 @@ CREATE TABLE t (v INT);
-- `spark.sql.storeAssignmentPolicy=ANSI`
INSERT INTO t VALUES ('1');
org.apache.spark.sql.AnalysisException: Cannot write incompatible data to table '`default`.`t`':
-- Cannot safely cast 'v': StringType to IntegerType;
+- Cannot safely cast 'v': string to int;
-- `spark.sql.storeAssignmentPolicy=LEGACY` (This is a legacy behaviour until Spark 2.x)
INSERT INTO t VALUES ('1');
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 7449a28..fe8d7ef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -457,7 +457,7 @@ object DataType {
case (w: AtomicType, r: AtomicType) if storeAssignmentPolicy == STRICT =>
if (!Cast.canUpCast(w, r)) {
- addError(s"Cannot safely cast '$context': $w to $r")
+ addError(s"Cannot safely cast '$context': ${w.catalogString} to ${r.catalogString}")
false
} else {
true
@@ -467,7 +467,7 @@ object DataType {
case (w: AtomicType, r: AtomicType) if storeAssignmentPolicy == ANSI =>
if (!Cast.canANSIStoreAssign(w, r)) {
- addError(s"Cannot safely cast '$context': $w to $r")
+ addError(s"Cannot safely cast '$context': ${w.catalogString} to ${r.catalogString}")
false
} else {
true
@@ -477,7 +477,8 @@ object DataType {
true
case (w, r) =>
- addError(s"Cannot write '$context': $w is incompatible with $r")
+ addError(s"Cannot write '$context': " +
+ s"${w.catalogString} is incompatible with ${r.catalogString}")
false
}
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index c01dea9..e466d55 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -21,7 +21,7 @@ import java.net.URI
import java.util.Locale
import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
-import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, AttributeReference, Cast, Expression, LessThanOrEqual, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, AttributeReference, Cast, LessThanOrEqual, Literal}
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
@@ -143,7 +143,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS
assertNotResolved(parsedPlan)
assertAnalysisError(parsedPlan, Seq(
"Cannot write", "'table-name'",
- "Cannot safely cast", "'x'", "'y'", "DoubleType to FloatType"))
+ "Cannot safely cast", "'x'", "'y'", "double to float"))
}
test("byName: multiple field errors are reported") {
@@ -160,7 +160,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS
assertNotResolved(parsedPlan)
assertAnalysisError(parsedPlan, Seq(
"Cannot write incompatible data to table", "'table-name'",
- "Cannot safely cast", "'x'", "DoubleType to FloatType",
+ "Cannot safely cast", "'x'", "double to float",
"Cannot write nullable values to non-null column", "'x'",
"Cannot find data for output column", "'y'"))
}
@@ -176,7 +176,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS
assertNotResolved(parsedPlan)
assertAnalysisError(parsedPlan, Seq(
"Cannot write", "'table-name'",
- "Cannot safely cast", "'x'", "'y'", "DoubleType to FloatType"))
+ "Cannot safely cast", "'x'", "'y'", "double to float"))
}
test("byPosition: multiple field errors are reported") {
@@ -194,7 +194,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS
assertAnalysisError(parsedPlan, Seq(
"Cannot write incompatible data to table", "'table-name'",
"Cannot write nullable values to non-null column", "'x'",
- "Cannot safely cast", "'x'", "DoubleType to FloatType"))
+ "Cannot safely cast", "'x'", "double to float"))
}
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
index c47332f..1a262d6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
@@ -80,7 +80,7 @@ class StrictDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBa
test("Check NullType is incompatible with all other types") {
allNonNullTypes.foreach { t =>
assertSingleError(NullType, t, "nulls", s"Should not allow writing None to type $t") { err =>
- assert(err.contains(s"incompatible with $t"))
+ assert(err.contains(s"incompatible with ${t.catalogString}"))
}
}
}
@@ -145,12 +145,12 @@ class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBase
test("Conversions between timestamp and long are not allowed") {
assertSingleError(LongType, TimestampType, "longToTimestamp",
"Should not allow long to timestamp") { err =>
- assert(err.contains("Cannot safely cast 'longToTimestamp': LongType to TimestampType"))
+ assert(err.contains("Cannot safely cast 'longToTimestamp': bigint to timestamp"))
}
assertSingleError(TimestampType, LongType, "timestampToLong",
"Should not allow timestamp to long") { err =>
- assert(err.contains("Cannot safely cast 'timestampToLong': TimestampType to LongType"))
+ assert(err.contains("Cannot safely cast 'timestampToLong': timestamp to bigint"))
}
}
@@ -209,8 +209,8 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
s"Should not allow writing $w to $r because cast is not safe") { err =>
assert(err.contains("'t'"), "Should include the field name context")
assert(err.contains("Cannot safely cast"), "Should identify unsafe cast")
- assert(err.contains(s"$w"), "Should include write type")
- assert(err.contains(s"$r"), "Should include read type")
+ assert(err.contains(s"${w.catalogString}"), "Should include write type")
+ assert(err.contains(s"${r.catalogString}"), "Should include read type")
}
}
}
@@ -413,7 +413,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
assertNumErrors(writeType, readType, "top", "Should catch 14 errors", 14) { errs =>
assert(errs(0).contains("'top.a.element'"), "Should identify bad type")
assert(errs(0).contains("Cannot safely cast"))
- assert(errs(0).contains("StringType to DoubleType"))
+ assert(errs(0).contains("string to double"))
assert(errs(1).contains("'top.a'"), "Should identify bad type")
assert(errs(1).contains("Cannot write nullable elements to array of non-nulls"))
@@ -430,11 +430,11 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
assert(errs(5).contains("'top.m.key'"), "Should identify bad type")
assert(errs(5).contains("Cannot safely cast"))
- assert(errs(5).contains("StringType to LongType"))
+ assert(errs(5).contains("string to bigint"))
assert(errs(6).contains("'top.m.value'"), "Should identify bad type")
assert(errs(6).contains("Cannot safely cast"))
- assert(errs(6).contains("BooleanType to FloatType"))
+ assert(errs(6).contains("boolean to float"))
assert(errs(7).contains("'top.m'"), "Should identify bad type")
assert(errs(7).contains("Cannot write nullable values to map of non-nulls"))
@@ -452,7 +452,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
assert(errs(11).contains("'top.x'"), "Should identify bad type")
assert(errs(11).contains("Cannot safely cast"))
- assert(errs(11).contains("StringType to IntegerType"))
+ assert(errs(11).contains("string to int"))
assert(errs(12).contains("'top'"), "Should identify bad type")
assert(errs(12).contains("expected 'x', found 'y'"), "Should detect name mismatch")
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql
index 087d7a5..6e95aca 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql
@@ -146,7 +146,7 @@ SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 WHERE FALSE)s
-- mixture of agg/wfunc in the same window
-- SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC);
--- Cannot safely cast 'enroll_date': StringType to DateType;
+-- Cannot safely cast 'enroll_date': string to date;
-- SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM(
-- SELECT *,
-- CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(year FROM enroll_date) END * 500 AS bonus,
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql
index cd3b74b..f4b8454 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql
@@ -42,7 +42,7 @@ create table datetimes (
f_timestamp timestamp
) using parquet;
--- Spark cannot safely cast StringType to TimestampType
+-- Spark cannot safely cast string to timestamp
-- [SPARK-29636] Spark can't parse '11:00 BST' or '2000-10-19 10:23:54+01' signatures to timestamp
insert into datetimes values
(1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), cast ('2000-10-19 10:23:54+01' as timestamp), timestamp '2000-10-19 10:23:54'),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index bb762d2..e56ecd1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -622,12 +622,12 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
var msg = intercept[AnalysisException] {
sql("insert into t select 1L, 2")
}.getMessage
- assert(msg.contains("Cannot safely cast 'i': LongType to IntegerType"))
+ assert(msg.contains("Cannot safely cast 'i': bigint to int"))
msg = intercept[AnalysisException] {
sql("insert into t select 1, 2.0")
}.getMessage
- assert(msg.contains("Cannot safely cast 'd': DecimalType(2,1) to DoubleType"))
+ assert(msg.contains("Cannot safely cast 'd': decimal(2,1) to double"))
msg = intercept[AnalysisException] {
sql("insert into t select 1, 2.0D, 3")
@@ -659,18 +659,18 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
var msg = intercept[AnalysisException] {
sql("insert into t values('a', 'b')")
}.getMessage
- assert(msg.contains("Cannot safely cast 'i': StringType to IntegerType") &&
- msg.contains("Cannot safely cast 'd': StringType to DoubleType"))
+ assert(msg.contains("Cannot safely cast 'i': string to int") &&
+ msg.contains("Cannot safely cast 'd': string to double"))
msg = intercept[AnalysisException] {
sql("insert into t values(now(), now())")
}.getMessage
- assert(msg.contains("Cannot safely cast 'i': TimestampType to IntegerType") &&
- msg.contains("Cannot safely cast 'd': TimestampType to DoubleType"))
+ assert(msg.contains("Cannot safely cast 'i': timestamp to int") &&
+ msg.contains("Cannot safely cast 'd': timestamp to double"))
msg = intercept[AnalysisException] {
sql("insert into t values(true, false)")
}.getMessage
- assert(msg.contains("Cannot safely cast 'i': BooleanType to IntegerType") &&
- msg.contains("Cannot safely cast 'd': BooleanType to DoubleType"))
+ assert(msg.contains("Cannot safely cast 'i': boolean to int") &&
+ msg.contains("Cannot safely cast 'd': boolean to double"))
}
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 9747840..fe0a843 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -333,7 +333,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
var msg = intercept[AnalysisException] {
Seq((1L, 2.0)).toDF("i", "d").write.mode("append").saveAsTable("t")
}.getMessage
- assert(msg.contains("Cannot safely cast 'i': LongType to IntegerType"))
+ assert(msg.contains("Cannot safely cast 'i': bigint to int"))
// Insert into table successfully.
Seq((1, 2.0)).toDF("i", "d").write.mode("append").saveAsTable("t")
@@ -354,14 +354,14 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
var msg = intercept[AnalysisException] {
Seq(("a", "b")).toDF("i", "d").write.mode("append").saveAsTable("t")
}.getMessage
- assert(msg.contains("Cannot safely cast 'i': StringType to IntegerType") &&
- msg.contains("Cannot safely cast 'd': StringType to DoubleType"))
+ assert(msg.contains("Cannot safely cast 'i': string to int") &&
+ msg.contains("Cannot safely cast 'd': string to double"))
msg = intercept[AnalysisException] {
Seq((true, false)).toDF("i", "d").write.mode("append").saveAsTable("t")
}.getMessage
- assert(msg.contains("Cannot safely cast 'i': BooleanType to IntegerType") &&
- msg.contains("Cannot safely cast 'd': BooleanType to DoubleType"))
+ assert(msg.contains("Cannot safely cast 'i': boolean to int") &&
+ msg.contains("Cannot safely cast 'd': boolean to double"))
}
}
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index d1dd136..8642a5f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -982,7 +982,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
""".stripMargin
)
- val errorMsg = "Cannot safely cast 'f0': DecimalType(2,1) to BinaryType"
+ val errorMsg = "Cannot safely cast 'f0': decimal(2,1) to binary"
if (isPartitioned) {
val insertStmt = s"INSERT OVERWRITE TABLE $tableName partition (ds='a') SELECT 1.3"
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org