You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/09/12 16:21:19 UTC

[impala] 02/02: IMPALA-10086: Implicit cast comparing char and varchar

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9f05cf79fa385ee6a5245ec7cb9ec1b9302c543d
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Fri Aug 25 15:46:00 2023 -0700

    IMPALA-10086: Implicit cast comparing char and varchar
    
    Until IMPALA-7368, Impala allowed comparing char and varchar slots as in
    
      select * from functional.chars_tiny where cs = vc;
    
    IMPALA-7368 added DATE type support, and as part of that changed
    function call resolution to use a fit function based on the number of
    arguments that match the call types. Previously the comparison above
    would take the first matching function, which happened to be equality
    between STRING and STRING; CHAR and VARCHAR can both be implicitly cast
    to STRING, so this function worked. With the new function resolution,
    the best fit is equality between VARCHAR and VARCHAR, however implicit
    casting to VARCHAR(*) from CHAR wasn't allowed.
    
    The behavior before IMPALA-7368 was somewhat accidental; it depended on
    the order that builtin EQ functions are added via
    BinaryPredicate.initBuiltins -> Type.getSupportedTypes. Supported types
    happened to be ordered with STRING preceding VARCHAR and CHAR. The fit
    function makes sense and changing its behavior may have other
    consequences; it also makes sense that CHAR should be castable to
    VARCHAR.
    
    This change allows implicit cast between matching types. Functionally it
    only changes how we handle char/varchar comparison with wildcard
    char/varchar, because decimals are handled before checking for matching
    types and other type matching is the same as equals. It now allows
    casting to a compatible type when it is a char or varchar and the target
    type is a wildcard version of the same.
    
    Does not attempt to address differences from CHAR padding (IMPALA-1652).
    
    Testing:
    - Adds tests covering cast comparison and other implicit conversions.
    - Passed exhaustive test run.
    
    Change-Id: Ib89d0a391bc8f2152ecd9151c8872a01ba19c436
    Reviewed-on: http://gerrit.cloudera.org:8080/20425
    Reviewed-by: Peter Rozsa <pr...@cloudera.com>
    Reviewed-by: Daniel Becker <da...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../main/java/org/apache/impala/analysis/Expr.java | 16 +++---
 .../apache/impala/analysis/AnalyzeExprsTest.java   |  6 +++
 tests/query_test/test_cast_with_format.py          | 58 ++++++++++++++++++++++
 3 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java b/fe/src/main/java/org/apache/impala/analysis/Expr.java
index 954430ad4..c5e3f02de 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Expr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java
@@ -1569,15 +1569,13 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
     // If the targetType is NULL_TYPE then ignore the cast because NULL_TYPE
     // is compatible with all types and no cast is necessary.
     if (targetType.isNull()) return this;
-    if (!targetType.isDecimal()) {
-      // requested cast must be to assignment-compatible type
-      // (which implies no loss of precision)
-      if (!targetType.equals(type)) {
-        throw new SqlCastException(
-          "targetType=" + targetType + " type=" + type);
-      }
-    }
-    return uncheckedCastTo(targetType, compatibility);
+    // If decimal, cast to the target type.
+    if (targetType.isDecimal()) return uncheckedCastTo(targetType, compatibility);
+    // If they match, cast to the type both values can be assigned to (the definition of
+    // getAssignmentCompatibleType), which implies no loss of precision. Note that
+    // getAssignmentCompatibleType always returns a "real" (not wildcard) type.
+    if (type.matchesType(targetType)) return uncheckedCastTo(type, compatibility);
+    throw new SqlCastException("targetType=" + targetType + " type=" + type);
   }
 
   public final Expr castTo(Type targetType) throws AnalysisException {
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
index 8d74d380b..fd9417ff2 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
@@ -495,6 +495,12 @@ public class AnalyzeExprsTest extends AnalyzerTest {
     AnalyzesOk("select cast(cast('helloworld' as VARCHAR(3)) as string)");
     AnalyzesOk("select cast(cast('3.0' as VARCHAR(5)) as float)");
     AnalyzesOk("select NULL = cast('123' as CHAR(3))");
+    AnalyzesOk("select * from functional.chars_tiny where cs = vc");
+    AnalyzesOk("select * from functional.chars_tiny where vc = cs");
+    AnalyzesOk("insert into functional.chars_tiny(vc) VALUES " +
+        "(cast('aaabbb' as varchar(6))), (cast('cccddd' as char(6)))");
+    AnalyzesOk("insert into functional.chars_tiny(vc) VALUES " +
+        "(cast('aaabbb' as varchar(32))), (cast('cccddd' as char(32)))");
     AnalysisError("select now() = cast('hi' as CHAR(3))",
         "operands of type TIMESTAMP and CHAR(3) are not comparable: " +
         "now() = CAST('hi' AS CHAR(3))");
diff --git a/tests/query_test/test_cast_with_format.py b/tests/query_test/test_cast_with_format.py
index 8d49c7d32..80eee2642 100644
--- a/tests/query_test/test_cast_with_format.py
+++ b/tests/query_test/test_cast_with_format.py
@@ -2185,3 +2185,61 @@ class TestCastWithFormat(ImpalaTestSuite):
     err = self.execute_query_expect_failure(self.client,
         r'''select cast(date"2001-03-03" as string format '"text"FXYYYY-MM-DD')''')
     assert "FX modifier should be at the beginning of the format string." in str(err)
+
+  def test_varchar_cast(self, unique_database):
+    table = "{0}.test_varchar_casts".format(unique_database)
+    self.execute_query("create table {0} (c char(6), v varchar(6))".format(table))
+    self.execute_query("insert into {0} values (cast('test' as char(6)), "
+        "cast('test' as varchar(6))), (cast('tester' as char(6)), "
+        "cast('tester' as varchar(6)))".format(table))
+
+    # Compare char to varchar
+    select_star = "select * from " + table
+    assert ['test  \ttest', 'tester\ttester'] == self.execute_query(
+        select_star + " where c = cast(v as char(6))").data
+    assert ['tester\ttester'] == self.execute_query(
+        select_star + " where v = cast(c as varchar(6))").data
+    assert ['tester\ttester'] == self.execute_query(
+        select_star + " where v = cast(c as varchar)").data
+    # Newly supported cases in IMPALA-10086
+    assert ['tester\ttester'] == self.execute_query(
+        select_star + " where c = v").data
+    assert ['tester\ttester'] == self.execute_query(
+        select_star + " where v = c").data
+
+    # Compare char to literal
+    select_c = "select c from " + table
+    assert [] == self.execute_query(select_c + " where c = 'test'").data
+    assert ['test  '] == self.execute_query(
+        select_c + " where c = 'test  '").data
+    assert ['tester'] == self.execute_query(
+        select_c + " where c = 'tester'").data
+    assert ['test  '] == self.execute_query(
+        select_c + " where c = cast('test' as char(6))").data
+    assert ['tester'] == self.execute_query(
+        select_c + " where c = cast('tester' as char(6))").data
+    # Newly supported cases in IMPALA-10086
+    assert [] == self.execute_query(
+        select_c + " where c = cast('test' as varchar(6))").data
+    assert ['test  '] == self.execute_query(
+        select_c + " where c = cast('test  ' as varchar(6))").data
+    assert ['tester'] == self.execute_query(
+        select_c + " where c = cast('tester' as varchar(6))").data
+
+    # Compare varchar to literal
+    select_v = "select v from " + table
+    assert ['test'] == self.execute_query(
+        select_v + " where v = 'test'").data
+    assert ['tester'] == self.execute_query(
+        select_v + " where v = 'tester'").data
+    assert ['test'] == self.execute_query(
+        select_v + " where v = cast('test' as varchar(6))").data
+    assert ['tester'] == self.execute_query(
+        select_v + " where v = cast('tester' as varchar(6))").data
+    # Newly supported cases in IMPALA-10086
+    assert [] == self.execute_query(
+        select_v + " where v = cast('test' as char(6))").data
+    assert ['test'] == self.execute_query(
+        select_v + " where v = cast('test' as char(4))").data
+    assert ['tester'] == self.execute_query(
+        select_v + " where v = cast('tester' as char(6))").data