You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/09/12 16:21:17 UTC

[impala] branch master updated (900f9f057 -> 9f05cf79f)

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from 900f9f057 IMPALA-12411: Fix data race in expr-test teardown
     new a0cdb7b59 IMPALA-12231: Bump GBN to get HMS thrift API changes
     new 9f05cf79f IMPALA-10086: Implicit cast comparing char and varchar

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 bin/impala-config.sh                               | 22 ++++----
 fe/pom.xml                                         |  8 +++
 .../main/java/org/apache/impala/analysis/Expr.java | 16 +++---
 .../java/org/apache/impala/catalog/HdfsTable.java  |  3 +-
 .../apache/impala/analysis/AnalyzeExprsTest.java   |  6 +++
 java/shaded-deps/hive-exec/pom.xml                 |  4 ++
 java/test-hive-udfs/pom.xml                        |  4 ++
 tests/custom_cluster/test_events_custom_configs.py | 51 +++++++++++++++++++
 tests/custom_cluster/test_metastore_service.py     |  4 +-
 tests/query_test/test_cast_with_format.py          | 58 ++++++++++++++++++++++
 10 files changed, 152 insertions(+), 24 deletions(-)


[impala] 01/02: IMPALA-12231: Bump GBN to get HMS thrift API changes

Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a0cdb7b5943d03e806388f304b73328f141372ba
Author: Sai Hemanth Gantasala <sa...@cloudera.com>
AuthorDate: Tue Jun 20 20:15:41 2023 -0700

    IMPALA-12231: Bump GBN to get HMS thrift API changes
    
    We need a couple of hive changes HIVE-27319 and HIVE-27337 for catalogD
    to work with latest HMS server to fix IMPALA-11768 and IMPALA-11939
    respectively.
    
    Bump CDP_BUILD_NUMBER (GBN) to 44206393
    Bump various CDP versiona numbers to be based on 7.2.18.0-273
    
    TESTING: Exhaustive tests ran clean
    Added a couple of tests for IMPALA-11939 and IMPALA-11768
    
    Change-Id: I117873b628aed3e24280f9fcd79643f918c8d5f3
    Reviewed-on: http://gerrit.cloudera.org:8080/20420
    Reviewed-by: Quanlong Huang <hu...@gmail.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/impala-config.sh                               | 22 +++++-----
 fe/pom.xml                                         |  8 ++++
 .../java/org/apache/impala/catalog/HdfsTable.java  |  3 +-
 java/shaded-deps/hive-exec/pom.xml                 |  4 ++
 java/test-hive-udfs/pom.xml                        |  4 ++
 tests/custom_cluster/test_events_custom_configs.py | 51 ++++++++++++++++++++++
 tests/custom_cluster/test_metastore_service.py     |  4 +-
 7 files changed, 81 insertions(+), 15 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 02a994135..13c2f872c 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -220,19 +220,19 @@ fi
 : ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
 export IMPALA_TOOLCHAIN_HOST
 
-export CDP_BUILD_NUMBER=40643771
+export CDP_BUILD_NUMBER=44206393
 export CDP_MAVEN_REPOSITORY=\
 "https://${IMPALA_TOOLCHAIN_HOST}/build/cdp_components/${CDP_BUILD_NUMBER}/maven"
-export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.18.0-41
-export CDP_HADOOP_VERSION=3.1.1.7.2.18.0-41
-export CDP_HBASE_VERSION=2.4.6.7.2.18.0-41
-export CDP_HIVE_VERSION=3.1.3000.7.2.18.0-41
-export CDP_ICEBERG_VERSION=1.1.0.7.2.18.0-41
-export CDP_KNOX_VERSION=1.3.0.7.2.18.0-41
-export CDP_OZONE_VERSION=1.3.0.7.2.18.0-41
-export CDP_PARQUET_VERSION=1.10.99.7.2.18.0-41
-export CDP_RANGER_VERSION=2.3.0.7.2.18.0-41
-export CDP_TEZ_VERSION=0.9.1.7.2.18.0-41
+export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.18.0-273
+export CDP_HADOOP_VERSION=3.1.1.7.2.18.0-273
+export CDP_HBASE_VERSION=2.4.17.7.2.18.0-273
+export CDP_HIVE_VERSION=3.1.3000.7.2.18.0-273
+export CDP_ICEBERG_VERSION=1.1.0.7.2.18.0-273
+export CDP_KNOX_VERSION=1.3.0.7.2.18.0-273
+export CDP_OZONE_VERSION=1.3.0.7.2.18.0-273
+export CDP_PARQUET_VERSION=1.10.99.7.2.18.0-273
+export CDP_RANGER_VERSION=2.4.0.7.2.18.0-273
+export CDP_TEZ_VERSION=0.9.1.7.2.18.0-273
 
 # Ref: https://infra.apache.org/release-download-pages.html#closer
 : ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"}
diff --git a/fe/pom.xml b/fe/pom.xml
index b817f3fb9..feb544ed4 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -491,6 +491,10 @@ under the License.
           <groupId>com.sun.jersey</groupId>
           <artifactId>jersey-server</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -525,6 +529,10 @@ under the License.
           <groupId>org.apache.hadoop</groupId>
           <artifactId>*</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
 
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 70ede2d5e..c57671a43 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest;
+import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest;
@@ -2816,7 +2817,7 @@ public class HdfsTable extends Table implements FeFsTable {
       }
       reloadPartitions(client, hmsPartToHdfsPart, fileMetadataLoadOpts);
       return hmsPartToHdfsPart.size();
-    } catch (NoSuchObjectException e) {
+    } catch (NoSuchObjectException | InvalidObjectException e) {
       // HMS throws a NoSuchObjectException if the table does not exist
       // in HMS anymore. In case the partitions don't exist in HMS it does not include
       // them in the result of getPartitionsByNames.
diff --git a/java/shaded-deps/hive-exec/pom.xml b/java/shaded-deps/hive-exec/pom.xml
index 2ae440552..18cec153d 100644
--- a/java/shaded-deps/hive-exec/pom.xml
+++ b/java/shaded-deps/hive-exec/pom.xml
@@ -48,6 +48,10 @@ the same dependencies
           <groupId>org.apache.atlas</groupId>
           <artifactId>*</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
   </dependencies>
diff --git a/java/test-hive-udfs/pom.xml b/java/test-hive-udfs/pom.xml
index be43607c5..6af53c8ce 100644
--- a/java/test-hive-udfs/pom.xml
+++ b/java/test-hive-udfs/pom.xml
@@ -55,6 +55,10 @@ under the License.
           <groupId>org.apache.atlas</groupId>
           <artifactId>*</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
diff --git a/tests/custom_cluster/test_events_custom_configs.py b/tests/custom_cluster/test_events_custom_configs.py
index 5adb1ea1a..fd69d8c91 100644
--- a/tests/custom_cluster/test_events_custom_configs.py
+++ b/tests/custom_cluster/test_events_custom_configs.py
@@ -22,6 +22,7 @@ import pytest
 
 from hive_metastore.ttypes import FireEventRequest
 from hive_metastore.ttypes import FireEventRequestData
+from hive_metastore.ttypes import InsertEventRequestData
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfFS
@@ -557,6 +558,56 @@ class TestEventProcessingCustomConfigs(CustomClusterTestSuite):
     # 24 partitions inserted and hence we must refresh 24 partitions once.
     assert int(partitions_refreshed_after_hive) == int(partitions_refreshed_insert) + 24
 
+  @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=5")
+  def test_event_processor_failure_extra_space(self, unique_database):
+    """This test verifies that impala event processor is in active state after
+    processing a couple of previously erroneous events"""
+    test_table = "extra_space_table"
+    # IMPALA-11939 -- create table event in HMS contains extra spaces in the db/table
+    self.run_stmt_in_hive("create table ` {}`.`{} ` (i1 int) partitioned by (year int)"
+      .format(unique_database, test_table))
+    self.run_stmt_in_hive("alter table ` {}`.`{} ` add columns (i2 int)"
+      .format(unique_database, test_table))
+    EventProcessorUtils.wait_for_event_processing(self)
+    assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
+
+  @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=10")
+  def test_event_processor_dropped_partition(self, unique_database):
+    """This test verifies that impala event processor is in active state after
+    processing partitioned insert events of a dropped table"""
+    # IMPALA-11768 -- Insert partition events should be ignored
+    # if the table is dropped
+    test_table = "partitioned_table"
+
+    def is_event_processor_active(is_insert):
+      self.run_stmt_in_hive("create table {}.{} (i1 int) partitioned by (year int)"
+        .format(unique_database, test_table))
+      EventProcessorUtils.wait_for_event_processing(self)
+      self.client.execute("refresh {}.{}".format(unique_database, test_table))
+      self.run_stmt_in_hive(
+        "insert into {}.{} partition(year=2023) values (4),(5),(6)"
+        .format(unique_database, test_table))
+      data = FireEventRequestData()
+      if is_insert:
+        insert_data = InsertEventRequestData()
+        insert_data.filesAdded = "/warehouse/mytable/b1"
+        insert_data.replace = False
+        data.insertData = insert_data
+      else:
+        data.refreshEvent = True
+      req = FireEventRequest(True, data)
+      req.dbName = unique_database
+      req.tableName = test_table
+      req.partitionVals = ["2023"]
+      self.hive_client.fire_listener_event(req)
+      self.run_stmt_in_hive(
+        "drop table {}.{}".format(unique_database, test_table))
+      EventProcessorUtils.wait_for_event_processing(self)
+      assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
+
+    is_event_processor_active(True)
+    is_event_processor_active(False)
+
   @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=1")
   def test_iceberg_self_events(self, unique_database):
     """This test checks that Impala doesn't refresh Iceberg tables on self events."""
diff --git a/tests/custom_cluster/test_metastore_service.py b/tests/custom_cluster/test_metastore_service.py
index d0a696c2b..fca6f3c4c 100644
--- a/tests/custom_cluster/test_metastore_service.py
+++ b/tests/custom_cluster/test_metastore_service.py
@@ -1201,10 +1201,8 @@ class TestMetastoreService(CustomClusterTestSuite):
       get_parts_req.tbl_name = "table-does-not-exist"
       get_parts_req.names = []
       if expect_fallback:
-        # TODO HMS actually throws an InvalidObjectException but the HMS API signature
-        # doesn't declare it in the signature.
         self.__get_parts_by_names_expect_exception(catalog_hms_client, get_parts_req,
-                                                   "Internal error")
+                                                   "InvalidObjectException")
       else:
         self.__get_parts_by_names_expect_exception(catalog_hms_client, get_parts_req,
           "Table {0}.table-does-not-exist not found".format(


[impala] 02/02: IMPALA-10086: Implicit cast comparing char and varchar

Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9f05cf79fa385ee6a5245ec7cb9ec1b9302c543d
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Fri Aug 25 15:46:00 2023 -0700

    IMPALA-10086: Implicit cast comparing char and varchar
    
    Until IMPALA-7368, Impala allowed comparing char and varchar slots as in
    
      select * from functional.chars_tiny where cs = vc;
    
    IMPALA-7368 added DATE type support, and as part of that changed
    function call resolution to use a fit function based on the number of
    arguments that match the call types. Previously the comparison above
    would take the first matching function, which happened to be equality
    between STRING and STRING; CHAR and VARCHAR can both be implicitly cast
    to STRING, so this function worked. With the new function resolution,
    the best fit is equality between VARCHAR and VARCHAR, however implicit
    casting to VARCHAR(*) from CHAR wasn't allowed.
    
    The behavior before IMPALA-7368 was somewhat accidental; it depended on
    the order that builtin EQ functions are added via
    BinaryPredicate.initBuiltins -> Type.getSupportedTypes. Supported types
    happened to be ordered with STRING preceding VARCHAR and CHAR. The fit
    function makes sense and changing its behavior may have other
    consequences; it also makes sense that CHAR should be castable to
    VARCHAR.
    
    This change allows implicit cast between matching types. Functionally it
    only changes how we handle char/varchar comparison with wildcard
    char/varchar, because decimals are handled before checking for matching
    types and other type matching is the same as equals. It now allows
    casting to a compatible type when it is a char or varchar and the target
    type is a wildcard version of the same.
    
    Does not attempt to address differences from CHAR padding (IMPALA-1652).
    
    Testing:
    - Adds tests covering cast comparison and other implicit conversions.
    - Passed exhaustive test run.
    
    Change-Id: Ib89d0a391bc8f2152ecd9151c8872a01ba19c436
    Reviewed-on: http://gerrit.cloudera.org:8080/20425
    Reviewed-by: Peter Rozsa <pr...@cloudera.com>
    Reviewed-by: Daniel Becker <da...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../main/java/org/apache/impala/analysis/Expr.java | 16 +++---
 .../apache/impala/analysis/AnalyzeExprsTest.java   |  6 +++
 tests/query_test/test_cast_with_format.py          | 58 ++++++++++++++++++++++
 3 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java b/fe/src/main/java/org/apache/impala/analysis/Expr.java
index 954430ad4..c5e3f02de 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Expr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java
@@ -1569,15 +1569,13 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
     // If the targetType is NULL_TYPE then ignore the cast because NULL_TYPE
     // is compatible with all types and no cast is necessary.
     if (targetType.isNull()) return this;
-    if (!targetType.isDecimal()) {
-      // requested cast must be to assignment-compatible type
-      // (which implies no loss of precision)
-      if (!targetType.equals(type)) {
-        throw new SqlCastException(
-          "targetType=" + targetType + " type=" + type);
-      }
-    }
-    return uncheckedCastTo(targetType, compatibility);
+    // If decimal, cast to the target type.
+    if (targetType.isDecimal()) return uncheckedCastTo(targetType, compatibility);
+    // If they match, cast to the type both values can be assigned to (the definition of
+    // getAssignmentCompatibleType), which implies no loss of precision. Note that
+    // getAssignmentCompatibleType always returns a "real" (not wildcard) type.
+    if (type.matchesType(targetType)) return uncheckedCastTo(type, compatibility);
+    throw new SqlCastException("targetType=" + targetType + " type=" + type);
   }
 
   public final Expr castTo(Type targetType) throws AnalysisException {
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
index 8d74d380b..fd9417ff2 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
@@ -495,6 +495,12 @@ public class AnalyzeExprsTest extends AnalyzerTest {
     AnalyzesOk("select cast(cast('helloworld' as VARCHAR(3)) as string)");
     AnalyzesOk("select cast(cast('3.0' as VARCHAR(5)) as float)");
     AnalyzesOk("select NULL = cast('123' as CHAR(3))");
+    AnalyzesOk("select * from functional.chars_tiny where cs = vc");
+    AnalyzesOk("select * from functional.chars_tiny where vc = cs");
+    AnalyzesOk("insert into functional.chars_tiny(vc) VALUES " +
+        "(cast('aaabbb' as varchar(6))), (cast('cccddd' as char(6)))");
+    AnalyzesOk("insert into functional.chars_tiny(vc) VALUES " +
+        "(cast('aaabbb' as varchar(32))), (cast('cccddd' as char(32)))");
     AnalysisError("select now() = cast('hi' as CHAR(3))",
         "operands of type TIMESTAMP and CHAR(3) are not comparable: " +
         "now() = CAST('hi' AS CHAR(3))");
diff --git a/tests/query_test/test_cast_with_format.py b/tests/query_test/test_cast_with_format.py
index 8d49c7d32..80eee2642 100644
--- a/tests/query_test/test_cast_with_format.py
+++ b/tests/query_test/test_cast_with_format.py
@@ -2185,3 +2185,61 @@ class TestCastWithFormat(ImpalaTestSuite):
     err = self.execute_query_expect_failure(self.client,
         r'''select cast(date"2001-03-03" as string format '"text"FXYYYY-MM-DD')''')
     assert "FX modifier should be at the beginning of the format string." in str(err)
+
+  def test_varchar_cast(self, unique_database):
+    table = "{0}.test_varchar_casts".format(unique_database)
+    self.execute_query("create table {0} (c char(6), v varchar(6))".format(table))
+    self.execute_query("insert into {0} values (cast('test' as char(6)), "
+        "cast('test' as varchar(6))), (cast('tester' as char(6)), "
+        "cast('tester' as varchar(6)))".format(table))
+
+    # Compare char to varchar
+    select_star = "select * from " + table
+    assert ['test  \ttest', 'tester\ttester'] == self.execute_query(
+        select_star + " where c = cast(v as char(6))").data
+    assert ['tester\ttester'] == self.execute_query(
+        select_star + " where v = cast(c as varchar(6))").data
+    assert ['tester\ttester'] == self.execute_query(
+        select_star + " where v = cast(c as varchar)").data
+    # Newly supported cases in IMPALA-10086
+    assert ['tester\ttester'] == self.execute_query(
+        select_star + " where c = v").data
+    assert ['tester\ttester'] == self.execute_query(
+        select_star + " where v = c").data
+
+    # Compare char to literal
+    select_c = "select c from " + table
+    assert [] == self.execute_query(select_c + " where c = 'test'").data
+    assert ['test  '] == self.execute_query(
+        select_c + " where c = 'test  '").data
+    assert ['tester'] == self.execute_query(
+        select_c + " where c = 'tester'").data
+    assert ['test  '] == self.execute_query(
+        select_c + " where c = cast('test' as char(6))").data
+    assert ['tester'] == self.execute_query(
+        select_c + " where c = cast('tester' as char(6))").data
+    # Newly supported cases in IMPALA-10086
+    assert [] == self.execute_query(
+        select_c + " where c = cast('test' as varchar(6))").data
+    assert ['test  '] == self.execute_query(
+        select_c + " where c = cast('test  ' as varchar(6))").data
+    assert ['tester'] == self.execute_query(
+        select_c + " where c = cast('tester' as varchar(6))").data
+
+    # Compare varchar to literal
+    select_v = "select v from " + table
+    assert ['test'] == self.execute_query(
+        select_v + " where v = 'test'").data
+    assert ['tester'] == self.execute_query(
+        select_v + " where v = 'tester'").data
+    assert ['test'] == self.execute_query(
+        select_v + " where v = cast('test' as varchar(6))").data
+    assert ['tester'] == self.execute_query(
+        select_v + " where v = cast('tester' as varchar(6))").data
+    # Newly supported cases in IMPALA-10086
+    assert [] == self.execute_query(
+        select_v + " where v = cast('test' as char(6))").data
+    assert ['test'] == self.execute_query(
+        select_v + " where v = cast('test' as char(4))").data
+    assert ['tester'] == self.execute_query(
+        select_v + " where v = cast('tester' as char(6))").data