You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/04/16 20:24:54 UTC

[impala] 03/05: IMPALA-8338 : Check CREATION_TIME while processing DROP_DATABASE events.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 55881609f1c21eb7a59c383de60171ffa8943db8
Author: Bharath Krishna <bh...@cloudera.com>
AuthorDate: Thu Apr 11 15:54:23 2019 -0700

    IMPALA-8338 : Check CREATION_TIME while processing DROP_DATABASE events.
    
    Process the drop database event only if the CREATION_TIME of the
    catalog's database object is lesser than or equal to that of the
    database object present in the notification event. If the
    CREATION_TIME in the notification event object is lesser than
    the catalog's DB object, it means that the Database object
    present in the catalog is the latest and we can skip the event
    instead.
    
    Testing :
     - Added unit tests in MetastoreEventsProcessorTest.
     - Enabled testCreateDropCreateDatabaseFromImpala as
       we now have CREATION_TIME in the notification events.
    
    Change-Id: I8fd3685cf7261e4953f4f884850489a47c5bbd6c
    Reviewed-on: http://gerrit.cloudera.org:8080/12938
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../impala/catalog/CatalogServiceCatalog.java      | 33 ++++++++++++
 .../impala/catalog/events/MetastoreEvents.java     | 60 +++++++++++++++-------
 .../events/MetastoreEventsProcessorTest.java       | 59 +++++++++++++++++++--
 3 files changed, 130 insertions(+), 22 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
index 99689e4..f3690ad 100644
--- a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
@@ -1528,6 +1528,39 @@ public class CatalogServiceCatalog extends Catalog {
   }
 
   /**
+   * @param msDb Metastore Database used to remove Db from Catalog
+   * @param dbFound Set to true if Database is found in Catalog
+   * @param dbMatched Set to true if Database is found in Catalog and it's CREATION_TIME
+   * is equal to the metastore DB
+   * @return the DB object removed. Return null if DB does not exist or was not removed
+   * because CREATION_TIME does not match.
+   */
+  public Db removeDbIfExists(org.apache.hadoop.hive.metastore.api.Database msDb,
+      Reference<Boolean> dbFound, Reference<Boolean> dbMatched) {
+    dbFound.setRef(false);
+    dbMatched.setRef(false);
+    versionLock_.writeLock().lock();
+    try {
+      String dbName = msDb.getName();
+      Db catalogDb = getDb(dbName);
+      if (catalogDb == null) return null;
+
+      dbFound.setRef(true);
+      // Remove the DB only if the CREATION_TIME matches with the metastore DB from event.
+      if (msDb.getCreateTime() == catalogDb.getMetaStoreDb().getCreateTime()) {
+        Db removedDb = removeDb(dbName);
+        if (removedDb != null) {
+          dbMatched.setRef(true);
+          return removedDb;
+        }
+      }
+      return null;
+    } finally {
+      versionLock_.writeLock().unlock();
+    }
+  }
+
+  /**
    * Helper function to clean up the state associated with a removed database. It creates
    * the entries in the delete log for 'db' as well as for its tables and functions
    * (if any).
diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
index 41f70e4..a876c61 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.metastore.messaging.CreateTableMessage;
 import org.apache.hadoop.hive.metastore.messaging.DropPartitionMessage;
 import org.apache.hadoop.hive.metastore.messaging.json.JSONAlterTableMessage;
 import org.apache.hadoop.hive.metastore.messaging.json.JSONCreateDatabaseMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONDropDatabaseMessage;
 import org.apache.hadoop.hive.metastore.messaging.json.JSONDropTableMessage;
 import org.apache.impala.analysis.TableName;
 import org.apache.impala.catalog.CatalogException;
@@ -204,6 +205,8 @@ public class MetastoreEvents {
       }
       LOG.info(String.format("Total number of events received: %d Total number of events "
           + "filtered out: %d", sizeBefore, numFilteredEvents));
+      metrics_.getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC)
+              .inc(numFilteredEvents);
       return metastoreEvents;
     }
   }
@@ -824,10 +827,7 @@ public class MetastoreEvents {
 
     /**
      * Processes the create database event by adding the Db object from the event if it
-     * does not exist in the catalog already. TODO we should compare the creationTime of
-     * the Database in catalog with the Database in the event to make sure we are ignoring
-     * only catalog has the latest Database object. This will be added after HIVE-21077 is
-     * fixed and available
+     * does not exist in the catalog already.
      */
     @Override
     public void process() {
@@ -865,33 +865,55 @@ public class MetastoreEvents {
    */
   public static class DropDatabaseEvent extends MetastoreDatabaseEvent {
 
+    // Metastore database object as parsed from NotificationEvent message
+    private final Database droppedDatabase_;
+
     /**
      * Prevent instantiation from outside should use MetastoreEventFactory instead
      */
     private DropDatabaseEvent(
-        CatalogServiceCatalog catalog, Metrics metrics, NotificationEvent event) {
+        CatalogServiceCatalog catalog, Metrics metrics, NotificationEvent event)
+        throws MetastoreNotificationException {
       super(catalog, metrics, event);
+      Preconditions.checkArgument(MetastoreEventType.DROP_DATABASE.equals(eventType_));
+      JSONDropDatabaseMessage dropDatabaseMessage =
+          (JSONDropDatabaseMessage) MetastoreEventsProcessor.getMessageFactory()
+              .getDeserializer().getDropDatabaseMessage(event.getMessage());
+      try {
+        droppedDatabase_ =
+            Preconditions.checkNotNull(dropDatabaseMessage.getDatabaseObject());
+      } catch (Exception e) {
+        throw new MetastoreNotificationException(debugString(
+            "Database object is null in the event. "
+                + "This could be a metastore configuration problem. "
+                + "Check if %s is set to true in metastore configuration",
+            MetastoreEventsProcessor.HMS_ADD_THRIFT_OBJECTS_IN_EVENTS_CONFIG_KEY), e);
+      }
     }
 
     /**
-     * Process the drop database event. Currently, this handler removes the db object from
-     * catalog. TODO Once we have HIVE-21077 we should compare creationTime to make sure
-     * that catalog's Db matches with the database object in the event
+     * Process the drop database event. This handler removes the db object from catalog
+     * only if the CREATION_TIME of the catalog's database object is lesser than or equal
+     * to that of the database object present in the notification event. If the
+     * CREATION_TIME of the catalog's DB object is greater than that of the notification
+     * event's DB object, it means that the Database object present in the catalog is a
+     * later version and we can skip the event. (For instance, when user does a create db,
+     * drop db and create db again with the same dbName.)
      */
     @Override
     public void process() {
-      // TODO this does not currently handle the case where the was a new instance
-      // of database with the same name created in catalog after this database instance
-      // was removed. For instance, user does a CREATE db, drop db and create db again
-      // with the same dbName. In this case, the drop database event will remove the
-      // database instance which is created after this create event. We should add a
-      // check to compare the creation time of the database with the creation time in
-      // the event to make sure we are removing the right databases object. Unfortunately,
-      // database do not have creation time currently. This would be fixed in HIVE-21077
-      Db removedDb = catalog_.removeDb(dbName_);
-      // if database did not exist in the cache there was nothing to do
+      Reference<Boolean> dbFound = new Reference<>();
+      Reference<Boolean> dbMatched = new Reference<>();
+      Db removedDb = catalog_.removeDbIfExists(droppedDatabase_, dbFound, dbMatched);
       if (removedDb != null) {
-        infoLog("Successfully removed database {}", dbName_);
+        infoLog("Removed Database {} ", dbName_);
+      } else if (!dbFound.getRef()) {
+        debugLog("Database {} was not removed since it " +
+            "did not exist in catalog.", dbName_);
+      } else if (!dbMatched.getRef()) {
+        infoLog(debugString("Database %s was not removed from catalog since "
+            + "the creation time of the Database did not match", dbName_));
+        metrics_.getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC).inc();
       }
     }
   }
diff --git a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
index 92ac7a4..7faa725 100644
--- a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
@@ -17,6 +17,7 @@
 
 package org.apache.impala.catalog.events;
 
+import static java.lang.Thread.sleep;
 import static org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventType.ALTER_TABLE;
 import static org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventType.CREATE_DATABASE;
 import static org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventType.CREATE_TABLE;
@@ -268,6 +269,60 @@ public class MetastoreEventsProcessorTest {
     }
   }
 
+  /**
+   * DROP_DATABASE uses CREATION_TIME to filter events that try to drop an earlier version
+   * of DB, hence this test is to verify that the sequence of operations CREATE_DB,
+   * DROP_DB, CREATE_DB from Hive will produce expected result in Impala's Catalog.
+   */
+  @Test
+  public void testCreateDropCreateDatabase() throws TException {
+    createDatabase(TEST_DB_NAME, null);
+    dropDatabaseCascade(TEST_DB_NAME);
+    createDatabase(TEST_DB_NAME, null);
+    eventsProcessor_.processEvents();
+    // Check that the database exists in Catalog
+    assertNotNull(catalog_.getDb(TEST_DB_NAME));
+  }
+
+  /**
+   * Test to verify that DROP_DATABASE event is processed such that it removes the DB from
+   * Catalog only if the CREATION_TIME of the Catalog's DB object is less than or equal to
+   * that in the event.
+   */
+  @Test
+  public void testDropDatabaseCreationTime()
+      throws ImpalaException, InterruptedException {
+    long filteredCount = eventsProcessor_.getMetrics()
+        .getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC).getCount();
+    createDatabaseFromImpala(TEST_DB_NAME, "Test DB for CREATION_TIME");
+    // now drop the database with cascade option
+    dropDatabaseCascadeFromImpala(TEST_DB_NAME);
+
+    // Adding sleep here to make sure that the CREATION_TIME is not same
+    // as the previous CREATE_DB operation, so as to trigger the filtering logic
+    // based on CREATION_TIME in DROP_DB event processing. This is currently a
+    // limitation : the DROP_DB event filtering expects that while processing events,
+    // the CREATION_TIME of two DB's with same name won't have the same
+    // creation timestamp.
+    sleep(2000);
+    // Create database again with same name
+    createDatabaseFromImpala(TEST_DB_NAME, "Test DB for CREATION_TIME");
+    eventsProcessor_.processEvents();
+
+    // Here, we expect the events CREATE_DB, DROP_DB, CREATE_DB for the
+    // same Database name. Hence, the DROP_DB event should not be processed,
+    // as the CREATION_TIME of the catalog's Database object should be greater
+    // than that in the DROP_DB notification event. Two events are filtered here,
+    // 1 : first CREATE_DATABASE as it is followed by another create of the same name.
+    // 2 : DROP_DATABASE as it is trying to drop a database which is again created.
+    assertEquals(2, eventsProcessor_.getMetrics()
+        .getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC)
+        .getCount() - filteredCount);
+
+    // Teardown step - Drop the created DB
+    dropDatabaseCascadeFromImpala(TEST_DB_NAME);
+  }
+
   @Ignore("Disabled until we fix Hive bug to deserialize alter_database event messages")
   @Test
   public void testAlterDatabaseEvents() throws TException, ImpalaException {
@@ -779,10 +834,8 @@ public class MetastoreEventsProcessorTest {
   /**
    * Similar to create,drop,create sequence table as in
    * <code>testCreateDropCreateTableFromImpala</code> but operates on Database instead
-   * of Table. Makes sure that the database creationTime is checked before processing
-   * create and drop database events
+   * of Table.
    */
-  @Ignore("Ignored since database createTime is unavailable until we have HIVE-21077")
   @Test
   public void testCreateDropCreateDatabaseFromImpala() throws ImpalaException {
     assertEquals(EventProcessorStatus.ACTIVE, eventsProcessor_.getStatus());