You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/06/05 22:38:37 UTC

[spark] branch branch-3.0 updated: [SPARK-31904][SQL] Fix case sensitive problem of char and varchar partition columns

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new fa608b9  [SPARK-31904][SQL] Fix case sensitive problem of char and varchar partition columns
fa608b9 is described below

commit fa608b949b854d716904f4e43a4a10c71742b3c6
Author: LantaoJin <ji...@gmail.com>
AuthorDate: Sat Jun 6 07:35:25 2020 +0900

    [SPARK-31904][SQL] Fix case sensitive problem of char and varchar partition columns
    
    ### What changes were proposed in this pull request?
    ```sql
    CREATE TABLE t1(a STRING, B VARCHAR(10), C CHAR(10)) STORED AS parquet;
    CREATE TABLE t2 USING parquet PARTITIONED BY (b, c) AS SELECT * FROM t1;
    SELECT * FROM t2 WHERE b = 'A';
    ```
    Above SQL throws MetaException
    
    > Caused by: java.lang.reflect.InvocationTargetException
    	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    	at java.lang.reflect.Method.invoke(Method.java:498)
    	at org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:810)
    	... 114 more
    Caused by: MetaException(message:Filtering is supported only on partition keys of type string, or integral types)
    	at org.apache.hadoop.hive.metastore.parser.ExpressionTree$FilterBuilder.setError(ExpressionTree.java:184)
    	at org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.getJdoFilterPushdownParam(ExpressionTree.java:439)
    	at org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.generateJDOFilterOverPartitions(ExpressionTree.java:356)
    	at org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.generateJDOFilter(ExpressionTree.java:278)
    	at org.apache.hadoop.hive.metastore.parser.ExpressionTree.generateJDOFilterFragment(ExpressionTree.java:583)
    	at org.apache.hadoop.hive.metastore.ObjectStore.makeQueryFilterString(ObjectStore.java:3315)
    	at org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsViaOrmFilter(ObjectStore.java:2768)
    	at org.apache.hadoop.hive.metastore.ObjectStore.access$500(ObjectStore.java:182)
    	at org.apache.hadoop.hive.metastore.ObjectStore$7.getJdoResult(ObjectStore.java:3248)
    	at org.apache.hadoop.hive.metastore.ObjectStore$7.getJdoResult(ObjectStore.java:3232)
    	at org.apache.hadoop.hive.metastore.ObjectStore$GetHelper.run(ObjectStore.java:2974)
    	at org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsByFilterInternal(ObjectStore.java:3250)
    	at org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsByFilter(ObjectStore.java:2906)
    	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    	at java.lang.reflect.Method.invoke(Method.java:498)
    	at org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:101)
    	at com.sun.proxy.$Proxy25.getPartitionsByFilter(Unknown Source)
    	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_partitions_by_filter(HiveMetaStore.java:5093)
    	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    	at java.lang.reflect.Method.invoke(Method.java:498)
    	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invokeInternal(RetryingHMSHandler.java:148)
    	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107)
    	at com.sun.proxy.$Proxy26.get_partitions_by_filter(Unknown Source)
    	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitionsByFilter(HiveMetaStoreClient.java:1232)
    	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    	at java.lang.reflect.Method.invoke(Method.java:498)
    	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:173)
    	at com.sun.proxy.$Proxy27.listPartitionsByFilter(Unknown Source)
    	at org.apache.hadoop.hive.ql.metadata.Hive.getPartitionsByFilter(Hive.java:2679)
    	... 119 more
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Add a unit test.
    
    Closes #28724 from LantaoJin/SPARK-31904.
    
    Authored-by: LantaoJin <ji...@gmail.com>
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
    (cherry picked from commit 5079831106ba22f81a26b4a8104a253422fa1b6a)
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
 .../main/scala/org/apache/spark/sql/hive/client/HiveShim.scala |  3 ++-
 .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala     | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 2b80660..8df43b7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -708,7 +708,8 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
         .map(col => col.getName).toSet
 
       def unapply(attr: Attribute): Option[String] = {
-        if (varcharKeys.contains(attr.name)) {
+        val resolver = SQLConf.get.resolver
+        if (varcharKeys.exists(c => resolver(c, attr.name))) {
           None
         } else if (attr.dataType.isInstanceOf[IntegralType] || attr.dataType == StringType) {
           Some(attr.name)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index e8548fd..e8cf4ad 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -2720,4 +2720,14 @@ class HiveDDLSuite
       checkAnswer(sql("SHOW PARTITIONS ta_part"), Row("ts=10") :: Nil)
     }
   }
+
+  test("SPARK-31904: Fix case sensitive problem of char and varchar partition columns") {
+    withTable("t1", "t2") {
+      sql("CREATE TABLE t1(a STRING, B VARCHAR(10), C CHAR(10)) STORED AS parquet")
+      sql("CREATE TABLE t2 USING parquet PARTITIONED BY (b, c) AS SELECT * FROM t1")
+      // make sure there is no exception
+      assert(sql("SELECT * FROM t2 WHERE b = 'A'").collect().isEmpty)
+      assert(sql("SELECT * FROM t2 WHERE c = 'A'").collect().isEmpty)
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org