You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/07/30 17:48:04 UTC

[impala] 01/02: IMPALA-10024: isBlackListedDb() should do a case-insensitive check

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a10ad2cad99a8a1b84b4faa23e28f2bad592db43
Author: Vihang Karajgaonkar <vi...@apache.org>
AuthorDate: Wed Jul 29 11:24:03 2020 -0700

    IMPALA-10024: isBlackListedDb() should do a case-insensitive check
    
    The util method CatalogServiceCatalog#isBlackListedDb() expects the
    input dbName to be in lower-case which could be error-prone.
    Specifically, this can cause issues when Metastore event which has
    dbName which is in a different case than one configured in
    --blacklisted_dbs. In such cases the EventsProcessor does not ignore
    the event and can go into error state.
    
    The fix modifies the isBlackListedDb method to do a case-insensitive
    comparision. The isBlacklistedTable is not affected by this issue
    since TableName has built-in mechanism to ignore the case.
    
    Testing Done:
    1. Modified the test_event_processing.py such that event generated
    has a different case than what is configured in --blacklisted_dbs.
    The updated test works after the patch.
    2. Ran existing tests for events processor.
    
    Change-Id: I3898a46b4236413b2e328cecbb2f4364082a5e41
    Reviewed-on: http://gerrit.cloudera.org:8080/16254
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../impala/catalog/CatalogServiceCatalog.java      |  4 +++-
 .../impala/catalog/events/MetastoreEvents.java     |  4 ++--
 tests/custom_cluster/test_event_processing.py      | 24 +++++++++++++++++++++-
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
index 893375b..c223247 100644
--- a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
@@ -356,13 +356,15 @@ public class CatalogServiceCatalog extends Catalog {
    * Check whether the database is in blacklist
    */
   public boolean isBlacklistedDb(String dbName) {
-    return blacklistedDbs_.contains(dbName);
+    Preconditions.checkNotNull(dbName);
+    return blacklistedDbs_.contains(dbName.toLowerCase());
   }
 
   /**
    * Check whether the table is in blacklist
    */
   public boolean isBlacklistedTable(TableName table) {
+    Preconditions.checkNotNull(table);
     return blacklistedTables_.contains(table);
   }
 
diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
index e64578b..66f7d96 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
@@ -213,8 +213,8 @@ public class MetastoreEvents {
         String eventDb = currentEvent.getDbName();
         String eventTbl = currentEvent.getTableName();
         // if the event is on blacklisted db or table we should filter it out
-        if (catalog_.isBlacklistedDb(eventDb) || (eventTbl != null && catalog_
-            .isBlacklistedTable(eventDb, eventTbl))) {
+        if ((eventDb != null && catalog_.isBlacklistedDb(eventDb)) || (eventTbl != null
+            && catalog_.isBlacklistedTable(eventDb, eventTbl))) {
           String blacklistedObject = eventTbl != null ? new TableName(eventDb,
               eventTbl).toString() : eventDb;
           LOG.info(currentEvent.debugString("Filtering out this event since it is on a "
diff --git a/tests/custom_cluster/test_event_processing.py b/tests/custom_cluster/test_event_processing.py
index cc381ba..d88e388 100644
--- a/tests/custom_cluster/test_event_processing.py
+++ b/tests/custom_cluster/test_event_processing.py
@@ -52,8 +52,19 @@ class TestEventProcessing(CustomClusterTestSuite):
     """
     try:
       event_id_before = EventProcessorUtils.get_last_synced_event_id()
-      self.run_stmt_in_hive("create database testBlackListedDb")
+      # create a blacklisted database from hive and make sure event is ignored
+      self.run_stmt_in_hive("create database TESTblackListedDb")
+      # wait until all the events generated above are processed
+      EventProcessorUtils.wait_for_event_processing(self)
+      assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
+      assert EventProcessorUtils.get_last_synced_event_id() > event_id_before
+      # make sure that the blacklisted db is ignored
+      assert "TESTblackListedDb".lower() not in self.all_db_names()
+
+      event_id_before = EventProcessorUtils.get_last_synced_event_id()
       self.run_stmt_in_hive("create table testBlackListedDb.testtbl (id int)")
+      # create a table on the blacklisted database with a different case
+      self.run_stmt_in_hive("create table TESTBLACKlISTEDDb.t2 (id int)")
       self.run_stmt_in_hive(
         "create table functional_parquet.testBlackListedTbl (id int, val string)"
         " partitioned by (part int) stored as parquet")
@@ -63,6 +74,17 @@ class TestEventProcessing(CustomClusterTestSuite):
       EventProcessorUtils.wait_for_event_processing(self)
       assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
       assert EventProcessorUtils.get_last_synced_event_id() > event_id_before
+      # make sure that the black listed table is not created
+      table_names = self.client.execute("show tables in functional_parquet").get_data()
+      assert "testBlackListedTbl".lower() not in table_names
+
+      event_id_before = EventProcessorUtils.get_last_synced_event_id()
+      # generate a table level event with a different case
+      self.run_stmt_in_hive("drop table functional_parquet.TESTBlackListedTbl")
+      # wait until all the events generated above are processed
+      EventProcessorUtils.wait_for_event_processing(self)
+      assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
+      assert EventProcessorUtils.get_last_synced_event_id() > event_id_before
     finally:
       self.run_stmt_in_hive("drop database testBlackListedDb cascade")
       self.run_stmt_in_hive("drop table functional_parquet.testBlackListedTbl")