You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/12/24 07:41:26 UTC

[spark] branch branch-3.1 updated: [SPARK-33895][SQL] Char and Varchar fail in MetaOperation of ThriftServer

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 8da8e60  [SPARK-33895][SQL] Char and Varchar fail in MetaOperation of ThriftServer
8da8e60 is described below

commit 8da8e602ea8814104ab310ece657a129f62df192
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Thu Dec 24 07:40:38 2020 +0000

    [SPARK-33895][SQL] Char and Varchar fail in MetaOperation of ThriftServer
    
    ### What changes were proposed in this pull request?
    
    ```
    Caused by: java.lang.IllegalArgumentException: Unrecognized type name: CHAR(10)
    	at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.toJavaSQLType(SparkGetColumnsOperation.scala:187)
    	at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.$anonfun$addToRowSet$1(SparkGetColumnsOperation.scala:203)
    	at scala.collection.immutable.List.foreach(List.scala:392)
    	at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.addToRowSet(SparkGetColumnsOperation.scala:195)
    	at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.$anonfun$runInternal$4(SparkGetColumnsOperation.scala:99)
    	at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.$anonfun$runInternal$4$adapted(SparkGetColumnsOperation.scala:98)
    	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
    	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
    	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
    ```
    
    meta operation is targeting raw table schema, we need to handle these types there.
    
    ### Why are the changes needed?
    
    bugfix, see the above case
    ### Does this PR introduce _any_ user-facing change?
    
    no
    ### How was this patch tested?
    
    new tests
    
    locally
    
    ![image](https://user-images.githubusercontent.com/8326978/103069196-cdfcc480-45f9-11eb-9c6a-d4c42123c6e3.png)
    
    Closes #30914 from yaooqinn/SPARK-33895.
    
    Authored-by: Kent Yao <ya...@hotmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit d7dc42d5f6bbe861c7e4ac1bb49e0830af5e19f4)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala |  3 +++
 .../sql/hive/thriftserver/SparkGetTypeInfoOperation.scala      |  2 +-
 .../sql/hive/thriftserver/SparkMetadataOperationSuite.scala    | 10 +++++++---
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
index 66e6cf8..1f9c05c 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
@@ -133,6 +133,7 @@ private[hive] class SparkGetColumnsOperation(
     case dt @ (BooleanType | _: NumericType | DateType | TimestampType |
                CalendarIntervalType | NullType) =>
       Some(dt.defaultSize)
+    case CharType(n) => Some(n)
     case StructType(fields) =>
       val sizeArr = fields.map(f => getColumnSize(f.dataType))
       if (sizeArr.contains(None)) {
@@ -176,6 +177,8 @@ private[hive] class SparkGetColumnsOperation(
     case DoubleType => java.sql.Types.DOUBLE
     case _: DecimalType => java.sql.Types.DECIMAL
     case StringType => java.sql.Types.VARCHAR
+    case VarcharType(_) => java.sql.Types.VARCHAR
+    case CharType(_) => java.sql.Types.CHAR
     case BinaryType => java.sql.Types.BINARY
     case DateType => java.sql.Types.DATE
     case TimestampType => java.sql.Types.TIMESTAMP
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
index 26b5f8a..bd6feea 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
@@ -99,6 +99,6 @@ private[hive] object SparkGetTypeInfoUtil {
       TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE,
       FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE,
       DATE_TYPE, TIMESTAMP_TYPE,
-      ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE)
+      ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, CHAR_TYPE, VARCHAR_TYPE)
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
index bb74482..897ea00 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -283,6 +283,8 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
       .add("c14", "timestamp", nullable = false, "14")
       .add("c15", "struct<X: bigint,Y: double>", nullable = true, "15")
       .add("c16", "binary", nullable = false, "16")
+      .add("c17", "char(255)", nullable = true, "17")
+      .add("c18", "varchar(1024)", nullable = false, "18")
 
     val ddl =
       s"""
@@ -299,7 +301,8 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
 
       import java.sql.Types._
       val expectedJavaTypes = Seq(BOOLEAN, TINYINT, SMALLINT, INTEGER, BIGINT, FLOAT, DOUBLE,
-        DECIMAL, DECIMAL, VARCHAR, ARRAY, ARRAY, JAVA_OBJECT, DATE, TIMESTAMP, STRUCT, BINARY)
+        DECIMAL, DECIMAL, VARCHAR, ARRAY, ARRAY, JAVA_OBJECT, DATE, TIMESTAMP, STRUCT, BINARY,
+        CHAR, VARCHAR)
 
       var pos = 0
 
@@ -313,7 +316,8 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
 
         val colSize = rowSet.getInt("COLUMN_SIZE")
         schema(pos).dataType match {
-          case StringType | BinaryType | _: ArrayType | _: MapType => assert(colSize === 0)
+          case StringType | BinaryType | _: ArrayType | _: MapType | _: VarcharType =>
+            assert(colSize === 0)
           case o => assert(colSize === o.defaultSize)
         }
 
@@ -342,7 +346,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
         pos += 1
       }
 
-      assert(pos === 17, "all columns should have been verified")
+      assert(pos === 19, "all columns should have been verified")
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org