You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2022/04/18 15:11:52 UTC

[spark] branch master updated: [SPARK-37643][SQL] when charVarcharAsString is true, for char datatype predicate query should skip rpadding rule

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new c1ea8b446d0 [SPARK-37643][SQL] when charVarcharAsString is true, for char datatype predicate query should skip rpadding rule
c1ea8b446d0 is described below

commit c1ea8b446d00dd0123a0fad93a3e143933419a76
Author: fhygh <28...@qq.com>
AuthorDate: Mon Apr 18 23:11:32 2022 +0800

    [SPARK-37643][SQL] when charVarcharAsString is true, for char datatype predicate query should skip rpadding rule
    
    ### What changes were proposed in this pull request?
    after add ApplyCharTypePadding rule, when predicate query column data type is char, if column value length is less then defined,  will be right-padding, then query will get incorrect result
    
    ### Why are the changes needed?
    fix query incorrect issue when predicate column data type is char, so in this case when charVarcharAsString is true, we should skip the rpadding rule.
    
    ### Does this PR introduce _any_ user-facing change?
    before this fix, if we query with char data type for predicate, then we should be careful to set charVarcharAsString to true.
    
    ### How was this patch tested?
    add new UT.
    
    Closes #36187 from fhygh/charpredicatequery.
    
    Authored-by: fhygh <28...@qq.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala   |  3 +++
 .../scala/org/apache/spark/sql/CharVarcharTestSuite.scala   | 13 +++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index bf0142bb059..d00818ba1ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -4202,6 +4202,9 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = {
+    if (SQLConf.get.charVarcharAsString) {
+      return plan
+    }
     plan.resolveOperatorsUpWithPruning(_.containsAnyPattern(BINARY_COMPARISON, IN)) {
       case operator => operator.transformExpressionsUpWithPruning(
         _.containsAnyPattern(BINARY_COMPARISON, IN)) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index 6ade7a7c99e..978e3f8d36d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -100,6 +100,19 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("char type values should not be padded when charVarcharAsString is true") {
+    withSQLConf(SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key -> "true") {
+      withTable("t") {
+        sql(s"CREATE TABLE t(a STRING, b CHAR(5), c CHAR(5)) USING $format partitioned by (c)")
+        sql("INSERT INTO t VALUES ('abc', 'abc', 'abc')")
+        checkAnswer(sql("SELECT b FROM t WHERE b='abc'"), Row("abc"))
+        checkAnswer(sql("SELECT b FROM t WHERE b in ('abc')"), Row("abc"))
+        checkAnswer(sql("SELECT c FROM t WHERE c='abc'"), Row("abc"))
+        checkAnswer(sql("SELECT c FROM t WHERE c in ('abc')"), Row("abc"))
+      }
+    }
+  }
+
   test("varchar type values length check and trim: partitioned columns") {
     (0 to 5).foreach { n =>
       // SPARK-34192: we need to create a a new table for each round of test because of


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org