You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/06/02 12:10:24 UTC

[spark] branch branch-3.0 updated: [SPARK-31834][SQL] Improve error message for incompatible data types

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new b1782af  [SPARK-31834][SQL] Improve error message for incompatible data types
b1782af is described below

commit b1782af007de94aacf4a9ec2bc93930575a1adea
Author: lipzhu <li...@ebay.com>
AuthorDate: Tue Jun 2 21:07:10 2020 +0900

    [SPARK-31834][SQL] Improve error message for incompatible data types
    
    ### What changes were proposed in this pull request?
    We should use dataType.catalogString to unified the data type mismatch message.
    Before:
    ```sql
    spark-sql> create table SPARK_31834(a int) using parquet;
    spark-sql> insert into SPARK_31834 select '1';
    Error in query: Cannot write incompatible data to table '`default`.`spark_31834`':
    - Cannot safely cast 'a': StringType to IntegerType;
    ```
    
    After:
    ```sql
    spark-sql> create table SPARK_31834(a int) using parquet;
    spark-sql> insert into SPARK_31834 select '1';
    Error in query: Cannot write incompatible data to table '`default`.`spark_31834`':
    - Cannot safely cast 'a': string to int;
    ```
    
    ### How was this patch tested?
    UT.
    
    Closes #28654 from lipzhu/SPARK-31834.
    
    Authored-by: lipzhu <li...@ebay.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
    (cherry picked from commit d79a8a88b15645a29fabb245b6db3b2179d0f3c0)
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 docs/sql-ref-ansi-compliance.md                        |  2 +-
 .../scala/org/apache/spark/sql/types/DataType.scala    |  7 ++++---
 .../catalyst/analysis/DataSourceV2AnalysisSuite.scala  | 10 +++++-----
 .../sql/types/DataTypeWriteCompatibilitySuite.scala    | 18 +++++++++---------
 .../sql-tests/inputs/postgreSQL/window_part1.sql       |  2 +-
 .../sql-tests/inputs/postgreSQL/window_part3.sql       |  2 +-
 .../org/apache/spark/sql/sources/InsertSuite.scala     | 16 ++++++++--------
 .../spark/sql/test/DataFrameReaderWriterSuite.scala    | 10 +++++-----
 .../apache/spark/sql/hive/client/VersionsSuite.scala   |  2 +-
 9 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index b62834e..eab194c 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -95,7 +95,7 @@ CREATE TABLE t (v INT);
 -- `spark.sql.storeAssignmentPolicy=ANSI`
 INSERT INTO t VALUES ('1');
 org.apache.spark.sql.AnalysisException: Cannot write incompatible data to table '`default`.`t`':
-- Cannot safely cast 'v': StringType to IntegerType;
+- Cannot safely cast 'v': string to int;
 
 -- `spark.sql.storeAssignmentPolicy=LEGACY` (This is a legacy behaviour until Spark 2.x)
 INSERT INTO t VALUES ('1');
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 7449a28..fe8d7ef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -457,7 +457,7 @@ object DataType {
 
       case (w: AtomicType, r: AtomicType) if storeAssignmentPolicy == STRICT =>
         if (!Cast.canUpCast(w, r)) {
-          addError(s"Cannot safely cast '$context': $w to $r")
+          addError(s"Cannot safely cast '$context': ${w.catalogString} to ${r.catalogString}")
           false
         } else {
           true
@@ -467,7 +467,7 @@ object DataType {
 
       case (w: AtomicType, r: AtomicType) if storeAssignmentPolicy == ANSI =>
         if (!Cast.canANSIStoreAssign(w, r)) {
-          addError(s"Cannot safely cast '$context': $w to $r")
+          addError(s"Cannot safely cast '$context': ${w.catalogString} to ${r.catalogString}")
           false
         } else {
           true
@@ -477,7 +477,8 @@ object DataType {
         true
 
       case (w, r) =>
-        addError(s"Cannot write '$context': $w is incompatible with $r")
+        addError(s"Cannot write '$context': " +
+          s"${w.catalogString} is incompatible with ${r.catalogString}")
         false
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index c01dea9..e466d55 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -21,7 +21,7 @@ import java.net.URI
 import java.util.Locale
 
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
-import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, AttributeReference, Cast, Expression, LessThanOrEqual, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, AttributeReference, Cast, LessThanOrEqual, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
@@ -143,7 +143,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
       "Cannot write", "'table-name'",
-      "Cannot safely cast", "'x'", "'y'", "DoubleType to FloatType"))
+      "Cannot safely cast", "'x'", "'y'", "double to float"))
   }
 
   test("byName: multiple field errors are reported") {
@@ -160,7 +160,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
       "Cannot write incompatible data to table", "'table-name'",
-      "Cannot safely cast", "'x'", "DoubleType to FloatType",
+      "Cannot safely cast", "'x'", "double to float",
       "Cannot write nullable values to non-null column", "'x'",
       "Cannot find data for output column", "'y'"))
   }
@@ -176,7 +176,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq(
       "Cannot write", "'table-name'",
-      "Cannot safely cast", "'x'", "'y'", "DoubleType to FloatType"))
+      "Cannot safely cast", "'x'", "'y'", "double to float"))
   }
 
   test("byPosition: multiple field errors are reported") {
@@ -194,7 +194,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS
     assertAnalysisError(parsedPlan, Seq(
       "Cannot write incompatible data to table", "'table-name'",
       "Cannot write nullable values to non-null column", "'x'",
-      "Cannot safely cast", "'x'", "DoubleType to FloatType"))
+      "Cannot safely cast", "'x'", "double to float"))
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
index c47332f..1a262d6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
@@ -80,7 +80,7 @@ class StrictDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBa
   test("Check NullType is incompatible with all other types") {
     allNonNullTypes.foreach { t =>
       assertSingleError(NullType, t, "nulls", s"Should not allow writing None to type $t") { err =>
-        assert(err.contains(s"incompatible with $t"))
+        assert(err.contains(s"incompatible with ${t.catalogString}"))
       }
     }
   }
@@ -145,12 +145,12 @@ class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBase
   test("Conversions between timestamp and long are not allowed") {
     assertSingleError(LongType, TimestampType, "longToTimestamp",
       "Should not allow long to timestamp") { err =>
-      assert(err.contains("Cannot safely cast 'longToTimestamp': LongType to TimestampType"))
+      assert(err.contains("Cannot safely cast 'longToTimestamp': bigint to timestamp"))
     }
 
     assertSingleError(TimestampType, LongType, "timestampToLong",
       "Should not allow timestamp to long") { err =>
-      assert(err.contains("Cannot safely cast 'timestampToLong': TimestampType to LongType"))
+      assert(err.contains("Cannot safely cast 'timestampToLong': timestamp to bigint"))
     }
   }
 
@@ -209,8 +209,8 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
             s"Should not allow writing $w to $r because cast is not safe") { err =>
             assert(err.contains("'t'"), "Should include the field name context")
             assert(err.contains("Cannot safely cast"), "Should identify unsafe cast")
-            assert(err.contains(s"$w"), "Should include write type")
-            assert(err.contains(s"$r"), "Should include read type")
+            assert(err.contains(s"${w.catalogString}"), "Should include write type")
+            assert(err.contains(s"${r.catalogString}"), "Should include read type")
           }
         }
       }
@@ -413,7 +413,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
     assertNumErrors(writeType, readType, "top", "Should catch 14 errors", 14) { errs =>
       assert(errs(0).contains("'top.a.element'"), "Should identify bad type")
       assert(errs(0).contains("Cannot safely cast"))
-      assert(errs(0).contains("StringType to DoubleType"))
+      assert(errs(0).contains("string to double"))
 
       assert(errs(1).contains("'top.a'"), "Should identify bad type")
       assert(errs(1).contains("Cannot write nullable elements to array of non-nulls"))
@@ -430,11 +430,11 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
 
       assert(errs(5).contains("'top.m.key'"), "Should identify bad type")
       assert(errs(5).contains("Cannot safely cast"))
-      assert(errs(5).contains("StringType to LongType"))
+      assert(errs(5).contains("string to bigint"))
 
       assert(errs(6).contains("'top.m.value'"), "Should identify bad type")
       assert(errs(6).contains("Cannot safely cast"))
-      assert(errs(6).contains("BooleanType to FloatType"))
+      assert(errs(6).contains("boolean to float"))
 
       assert(errs(7).contains("'top.m'"), "Should identify bad type")
       assert(errs(7).contains("Cannot write nullable values to map of non-nulls"))
@@ -452,7 +452,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
 
       assert(errs(11).contains("'top.x'"), "Should identify bad type")
       assert(errs(11).contains("Cannot safely cast"))
-      assert(errs(11).contains("StringType to IntegerType"))
+      assert(errs(11).contains("string to int"))
 
       assert(errs(12).contains("'top'"), "Should identify bad type")
       assert(errs(12).contains("expected 'x', found 'y'"), "Should detect name mismatch")
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql
index 087d7a5..6e95aca 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql
@@ -146,7 +146,7 @@ SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 WHERE FALSE)s
 -- mixture of agg/wfunc in the same window
 -- SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC);
 
--- Cannot safely cast 'enroll_date': StringType to DateType;
+-- Cannot safely cast 'enroll_date': string to date;
 -- SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM(
 -- SELECT *,
 --   CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(year FROM enroll_date) END * 500 AS bonus,
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql
index cd3b74b..f4b8454 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql
@@ -42,7 +42,7 @@ create table datetimes (
     f_timestamp timestamp
 ) using parquet;
 
--- Spark cannot safely cast StringType to TimestampType
+-- Spark cannot safely cast string to timestamp
 -- [SPARK-29636] Spark can't parse '11:00 BST' or '2000-10-19 10:23:54+01' signatures to timestamp
 insert into datetimes values
 (1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), cast ('2000-10-19 10:23:54+01' as timestamp), timestamp '2000-10-19 10:23:54'),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index bb762d2..e56ecd1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -622,12 +622,12 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         var msg = intercept[AnalysisException] {
           sql("insert into t select 1L, 2")
         }.getMessage
-        assert(msg.contains("Cannot safely cast 'i': LongType to IntegerType"))
+        assert(msg.contains("Cannot safely cast 'i': bigint to int"))
 
         msg = intercept[AnalysisException] {
           sql("insert into t select 1, 2.0")
         }.getMessage
-        assert(msg.contains("Cannot safely cast 'd': DecimalType(2,1) to DoubleType"))
+        assert(msg.contains("Cannot safely cast 'd': decimal(2,1) to double"))
 
         msg = intercept[AnalysisException] {
           sql("insert into t select 1, 2.0D, 3")
@@ -659,18 +659,18 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         var msg = intercept[AnalysisException] {
           sql("insert into t values('a', 'b')")
         }.getMessage
-        assert(msg.contains("Cannot safely cast 'i': StringType to IntegerType") &&
-          msg.contains("Cannot safely cast 'd': StringType to DoubleType"))
+        assert(msg.contains("Cannot safely cast 'i': string to int") &&
+          msg.contains("Cannot safely cast 'd': string to double"))
         msg = intercept[AnalysisException] {
           sql("insert into t values(now(), now())")
         }.getMessage
-        assert(msg.contains("Cannot safely cast 'i': TimestampType to IntegerType") &&
-          msg.contains("Cannot safely cast 'd': TimestampType to DoubleType"))
+        assert(msg.contains("Cannot safely cast 'i': timestamp to int") &&
+          msg.contains("Cannot safely cast 'd': timestamp to double"))
         msg = intercept[AnalysisException] {
           sql("insert into t values(true, false)")
         }.getMessage
-        assert(msg.contains("Cannot safely cast 'i': BooleanType to IntegerType") &&
-          msg.contains("Cannot safely cast 'd': BooleanType to DoubleType"))
+        assert(msg.contains("Cannot safely cast 'i': boolean to int") &&
+          msg.contains("Cannot safely cast 'd': boolean to double"))
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 9747840..fe0a843 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -333,7 +333,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
         var msg = intercept[AnalysisException] {
           Seq((1L, 2.0)).toDF("i", "d").write.mode("append").saveAsTable("t")
         }.getMessage
-        assert(msg.contains("Cannot safely cast 'i': LongType to IntegerType"))
+        assert(msg.contains("Cannot safely cast 'i': bigint to int"))
 
         // Insert into table successfully.
         Seq((1, 2.0)).toDF("i", "d").write.mode("append").saveAsTable("t")
@@ -354,14 +354,14 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
         var msg = intercept[AnalysisException] {
           Seq(("a", "b")).toDF("i", "d").write.mode("append").saveAsTable("t")
         }.getMessage
-        assert(msg.contains("Cannot safely cast 'i': StringType to IntegerType") &&
-          msg.contains("Cannot safely cast 'd': StringType to DoubleType"))
+        assert(msg.contains("Cannot safely cast 'i': string to int") &&
+          msg.contains("Cannot safely cast 'd': string to double"))
 
         msg = intercept[AnalysisException] {
           Seq((true, false)).toDF("i", "d").write.mode("append").saveAsTable("t")
         }.getMessage
-        assert(msg.contains("Cannot safely cast 'i': BooleanType to IntegerType") &&
-          msg.contains("Cannot safely cast 'd': BooleanType to DoubleType"))
+        assert(msg.contains("Cannot safely cast 'i': boolean to int") &&
+          msg.contains("Cannot safely cast 'd': boolean to double"))
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index d1dd136..8642a5f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -982,7 +982,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
            """.stripMargin
           )
 
-          val errorMsg = "Cannot safely cast 'f0': DecimalType(2,1) to BinaryType"
+          val errorMsg = "Cannot safely cast 'f0': decimal(2,1) to binary"
 
           if (isPartitioned) {
             val insertStmt = s"INSERT OVERWRITE TABLE $tableName partition (ds='a') SELECT 1.3"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org