You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/09/12 16:21:18 UTC

[impala] 01/02: IMPALA-12231: Bump GBN to get HMS thrift API changes

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a0cdb7b5943d03e806388f304b73328f141372ba
Author: Sai Hemanth Gantasala <sa...@cloudera.com>
AuthorDate: Tue Jun 20 20:15:41 2023 -0700

    IMPALA-12231: Bump GBN to get HMS thrift API changes
    
    We need a couple of hive changes HIVE-27319 and HIVE-27337 for catalogD
    to work with latest HMS server to fix IMPALA-11768 and IMPALA-11939
    respectively.
    
    Bump CDP_BUILD_NUMBER (GBN) to 44206393
    Bump various CDP versiona numbers to be based on 7.2.18.0-273
    
    TESTING: Exhaustive tests ran clean
    Added a couple of tests for IMPALA-11939 and IMPALA-11768
    
    Change-Id: I117873b628aed3e24280f9fcd79643f918c8d5f3
    Reviewed-on: http://gerrit.cloudera.org:8080/20420
    Reviewed-by: Quanlong Huang <hu...@gmail.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/impala-config.sh                               | 22 +++++-----
 fe/pom.xml                                         |  8 ++++
 .../java/org/apache/impala/catalog/HdfsTable.java  |  3 +-
 java/shaded-deps/hive-exec/pom.xml                 |  4 ++
 java/test-hive-udfs/pom.xml                        |  4 ++
 tests/custom_cluster/test_events_custom_configs.py | 51 ++++++++++++++++++++++
 tests/custom_cluster/test_metastore_service.py     |  4 +-
 7 files changed, 81 insertions(+), 15 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 02a994135..13c2f872c 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -220,19 +220,19 @@ fi
 : ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
 export IMPALA_TOOLCHAIN_HOST
 
-export CDP_BUILD_NUMBER=40643771
+export CDP_BUILD_NUMBER=44206393
 export CDP_MAVEN_REPOSITORY=\
 "https://${IMPALA_TOOLCHAIN_HOST}/build/cdp_components/${CDP_BUILD_NUMBER}/maven"
-export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.18.0-41
-export CDP_HADOOP_VERSION=3.1.1.7.2.18.0-41
-export CDP_HBASE_VERSION=2.4.6.7.2.18.0-41
-export CDP_HIVE_VERSION=3.1.3000.7.2.18.0-41
-export CDP_ICEBERG_VERSION=1.1.0.7.2.18.0-41
-export CDP_KNOX_VERSION=1.3.0.7.2.18.0-41
-export CDP_OZONE_VERSION=1.3.0.7.2.18.0-41
-export CDP_PARQUET_VERSION=1.10.99.7.2.18.0-41
-export CDP_RANGER_VERSION=2.3.0.7.2.18.0-41
-export CDP_TEZ_VERSION=0.9.1.7.2.18.0-41
+export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.18.0-273
+export CDP_HADOOP_VERSION=3.1.1.7.2.18.0-273
+export CDP_HBASE_VERSION=2.4.17.7.2.18.0-273
+export CDP_HIVE_VERSION=3.1.3000.7.2.18.0-273
+export CDP_ICEBERG_VERSION=1.1.0.7.2.18.0-273
+export CDP_KNOX_VERSION=1.3.0.7.2.18.0-273
+export CDP_OZONE_VERSION=1.3.0.7.2.18.0-273
+export CDP_PARQUET_VERSION=1.10.99.7.2.18.0-273
+export CDP_RANGER_VERSION=2.4.0.7.2.18.0-273
+export CDP_TEZ_VERSION=0.9.1.7.2.18.0-273
 
 # Ref: https://infra.apache.org/release-download-pages.html#closer
 : ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"}
diff --git a/fe/pom.xml b/fe/pom.xml
index b817f3fb9..feb544ed4 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -491,6 +491,10 @@ under the License.
           <groupId>com.sun.jersey</groupId>
           <artifactId>jersey-server</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -525,6 +529,10 @@ under the License.
           <groupId>org.apache.hadoop</groupId>
           <artifactId>*</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
 
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 70ede2d5e..c57671a43 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest;
+import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest;
@@ -2816,7 +2817,7 @@ public class HdfsTable extends Table implements FeFsTable {
       }
       reloadPartitions(client, hmsPartToHdfsPart, fileMetadataLoadOpts);
       return hmsPartToHdfsPart.size();
-    } catch (NoSuchObjectException e) {
+    } catch (NoSuchObjectException | InvalidObjectException e) {
       // HMS throws a NoSuchObjectException if the table does not exist
       // in HMS anymore. In case the partitions don't exist in HMS it does not include
       // them in the result of getPartitionsByNames.
diff --git a/java/shaded-deps/hive-exec/pom.xml b/java/shaded-deps/hive-exec/pom.xml
index 2ae440552..18cec153d 100644
--- a/java/shaded-deps/hive-exec/pom.xml
+++ b/java/shaded-deps/hive-exec/pom.xml
@@ -48,6 +48,10 @@ the same dependencies
           <groupId>org.apache.atlas</groupId>
           <artifactId>*</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
   </dependencies>
diff --git a/java/test-hive-udfs/pom.xml b/java/test-hive-udfs/pom.xml
index be43607c5..6af53c8ce 100644
--- a/java/test-hive-udfs/pom.xml
+++ b/java/test-hive-udfs/pom.xml
@@ -55,6 +55,10 @@ under the License.
           <groupId>org.apache.atlas</groupId>
           <artifactId>*</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.cloudera</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>
diff --git a/tests/custom_cluster/test_events_custom_configs.py b/tests/custom_cluster/test_events_custom_configs.py
index 5adb1ea1a..fd69d8c91 100644
--- a/tests/custom_cluster/test_events_custom_configs.py
+++ b/tests/custom_cluster/test_events_custom_configs.py
@@ -22,6 +22,7 @@ import pytest
 
 from hive_metastore.ttypes import FireEventRequest
 from hive_metastore.ttypes import FireEventRequestData
+from hive_metastore.ttypes import InsertEventRequestData
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfFS
@@ -557,6 +558,56 @@ class TestEventProcessingCustomConfigs(CustomClusterTestSuite):
     # 24 partitions inserted and hence we must refresh 24 partitions once.
     assert int(partitions_refreshed_after_hive) == int(partitions_refreshed_insert) + 24
 
+  @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=5")
+  def test_event_processor_failure_extra_space(self, unique_database):
+    """This test verifies that impala event processor is in active state after
+    processing a couple of previously erroneous events"""
+    test_table = "extra_space_table"
+    # IMPALA-11939 -- create table event in HMS contains extra spaces in the db/table
+    self.run_stmt_in_hive("create table ` {}`.`{} ` (i1 int) partitioned by (year int)"
+      .format(unique_database, test_table))
+    self.run_stmt_in_hive("alter table ` {}`.`{} ` add columns (i2 int)"
+      .format(unique_database, test_table))
+    EventProcessorUtils.wait_for_event_processing(self)
+    assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
+
+  @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=10")
+  def test_event_processor_dropped_partition(self, unique_database):
+    """This test verifies that impala event processor is in active state after
+    processing partitioned insert events of a dropped table"""
+    # IMPALA-11768 -- Insert partition events should be ignored
+    # if the table is dropped
+    test_table = "partitioned_table"
+
+    def is_event_processor_active(is_insert):
+      self.run_stmt_in_hive("create table {}.{} (i1 int) partitioned by (year int)"
+        .format(unique_database, test_table))
+      EventProcessorUtils.wait_for_event_processing(self)
+      self.client.execute("refresh {}.{}".format(unique_database, test_table))
+      self.run_stmt_in_hive(
+        "insert into {}.{} partition(year=2023) values (4),(5),(6)"
+        .format(unique_database, test_table))
+      data = FireEventRequestData()
+      if is_insert:
+        insert_data = InsertEventRequestData()
+        insert_data.filesAdded = "/warehouse/mytable/b1"
+        insert_data.replace = False
+        data.insertData = insert_data
+      else:
+        data.refreshEvent = True
+      req = FireEventRequest(True, data)
+      req.dbName = unique_database
+      req.tableName = test_table
+      req.partitionVals = ["2023"]
+      self.hive_client.fire_listener_event(req)
+      self.run_stmt_in_hive(
+        "drop table {}.{}".format(unique_database, test_table))
+      EventProcessorUtils.wait_for_event_processing(self)
+      assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
+
+    is_event_processor_active(True)
+    is_event_processor_active(False)
+
   @CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=1")
   def test_iceberg_self_events(self, unique_database):
     """This test checks that Impala doesn't refresh Iceberg tables on self events."""
diff --git a/tests/custom_cluster/test_metastore_service.py b/tests/custom_cluster/test_metastore_service.py
index d0a696c2b..fca6f3c4c 100644
--- a/tests/custom_cluster/test_metastore_service.py
+++ b/tests/custom_cluster/test_metastore_service.py
@@ -1201,10 +1201,8 @@ class TestMetastoreService(CustomClusterTestSuite):
       get_parts_req.tbl_name = "table-does-not-exist"
       get_parts_req.names = []
       if expect_fallback:
-        # TODO HMS actually throws an InvalidObjectException but the HMS API signature
-        # doesn't declare it in the signature.
         self.__get_parts_by_names_expect_exception(catalog_hms_client, get_parts_req,
-                                                   "Internal error")
+                                                   "InvalidObjectException")
       else:
         self.__get_parts_by_names_expect_exception(catalog_hms_client, get_parts_req,
           "Table {0}.table-does-not-exist not found".format(