You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/07/01 06:37:03 UTC

[spark] branch branch-3.0 updated: [SPARK-32131][SQL] Fix AnalysisException messages at UNION/EXCEPT/MINUS operations

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new f344253  [SPARK-32131][SQL] Fix AnalysisException messages at UNION/EXCEPT/MINUS operations
f344253 is described below

commit f34425371b99463b24588b944dc7cca7dc15e584
Author: GuoPhilipse <46...@users.noreply.github.com>
AuthorDate: Tue Jun 30 23:33:50 2020 -0700

    [SPARK-32131][SQL] Fix AnalysisException messages at UNION/EXCEPT/MINUS operations
    
    ### What changes were proposed in this pull request?
    fix error exception messages during exceptions on Union and set operations
    
    ### Why are the changes needed?
    Union and set operations can only be performed on tables with the compatible column types,while when we have more than two column, the exception messages will have wrong column index.
    
    Steps to reproduce:
    
    ```
    drop table if exists test1;
    drop table if exists test2;
    drop table if exists test3;
    create table if not exists test1(id int, age int, name timestamp);
    create table if not exists test2(id int, age timestamp, name timestamp);
    create table if not exists test3(id int, age int, name int);
    insert into test1 select 1,2,'2020-01-01 01:01:01';
    insert into test2 select 1,'2020-01-01 01:01:01','2020-01-01 01:01:01';
    insert into test3 select 1,3,4;
    ```
    
    Query1:
    ```sql
    select * from test1 except select * from test2;
    ```
    Result1:
    ```
    Error: org.apache.spark.sql.AnalysisException: Except can only be performed on tables with the compatible column types. timestamp <> int at the second column of the second table;; 'Except false :- Project [id#620, age#621, name#622] : +- SubqueryAlias `default`.`test1` : +- HiveTableRelation `default`.`test1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [id#620, age#621, name#622] +- Project [id#623, age#624, name#625] +- SubqueryAlias `default`.`test2` +- HiveTableRelation `d [...]
    ```
    
    Query2:
    
    ```sql
    select * from test1 except select * from test3;
    ```
    
    Result2:
    
    ```
    Error: org.apache.spark.sql.AnalysisException: Except can only be performed on tables with the compatible column types
     int <> timestamp at the 2th column of the second table;
    ```
    
    the above query1 has the right exception message
    the above query2 have the wrong errors information, it may need to change to the following
    
    ```
    Error: org.apache.spark.sql.AnalysisException: Except can only be performed on tables with the compatible column types.
    int <> timestamp at the  third column of the second table
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    NO
    
    ### How was this patch tested?
    unit test
    
    Closes #28951 from GuoPhilipse/32131-correct-error-messages.
    
    Lead-authored-by: GuoPhilipse <46...@users.noreply.github.com>
    Co-authored-by: GuoPhilipse <gu...@126.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
    (cherry picked from commit 02f3b80d3a277e0c19a66c28d935fa41da7b3307)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala      |  3 +-
 .../sql/catalyst/analysis/AnalysisSuite.scala      | 53 ++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 066dc6d..e5b8ae7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -341,7 +341,8 @@ trait CheckAnalysis extends PredicateHelper {
             def ordinalNumber(i: Int): String = i match {
               case 0 => "first"
               case 1 => "second"
-              case i => s"${i}th"
+              case 2 => "third"
+              case i => s"${i + 1}th"
             }
             val ref = dataTypes(operator.children.head)
             operator.children.tail.zipWithIndex.foreach { case (child, ti) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 02472e1..8db2036 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -826,4 +826,57 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       }
     }
   }
+
+  test("SPARK-32131: Fix wrong column index when we have more than two columns" +
+    " during union and set operations" ) {
+    val firstTable = LocalRelation(
+      AttributeReference("a", StringType)(),
+      AttributeReference("b", DoubleType)(),
+      AttributeReference("c", IntegerType)(),
+      AttributeReference("d", FloatType)())
+
+    val secondTable = LocalRelation(
+      AttributeReference("a", StringType)(),
+      AttributeReference("b", TimestampType)(),
+      AttributeReference("c", IntegerType)(),
+      AttributeReference("d", FloatType)())
+
+    val thirdTable = LocalRelation(
+      AttributeReference("a", StringType)(),
+      AttributeReference("b", DoubleType)(),
+      AttributeReference("c", TimestampType)(),
+      AttributeReference("d", FloatType)())
+
+    val fourthTable = LocalRelation(
+      AttributeReference("a", StringType)(),
+      AttributeReference("b", DoubleType)(),
+      AttributeReference("c", IntegerType)(),
+      AttributeReference("d", TimestampType)())
+
+    val r1 = Union(firstTable, secondTable)
+    val r2 = Union(firstTable, thirdTable)
+    val r3 = Union(firstTable, fourthTable)
+    val r4 = Except(firstTable, secondTable, isAll = false)
+    val r5 = Intersect(firstTable, secondTable, isAll = false)
+
+    assertAnalysisError(r1,
+      Seq("Union can only be performed on tables with the compatible column types. " +
+        "timestamp <> double at the second column of the second table"))
+
+    assertAnalysisError(r2,
+      Seq("Union can only be performed on tables with the compatible column types. " +
+        "timestamp <> int at the third column of the second table"))
+
+    assertAnalysisError(r3,
+      Seq("Union can only be performed on tables with the compatible column types. " +
+        "timestamp <> float at the 4th column of the second table"))
+
+    assertAnalysisError(r4,
+      Seq("Except can only be performed on tables with the compatible column types. " +
+        "timestamp <> double at the second column of the second table"))
+
+    assertAnalysisError(r5,
+      Seq("Intersect can only be performed on tables with the compatible column types. " +
+        "timestamp <> double at the second column of the second table"))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org