You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2019/08/29 12:44:09 UTC

[impala] 02/07: IMPALA-8579: Ignore trivial alter table/partition events.

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 08ecb6d28cd7794a654f66fe41bb8ae727bc1a3e
Author: Anurag Mantripragada <an...@gmail.com>
AuthorDate: Wed Aug 7 19:09:53 2019 -0700

    IMPALA-8579: Ignore trivial alter table/partition events.
    
    Hive generates certain trivial alter events for eg: change only
    "transient_lastDdlTime". This is seen in the alter events
    accompanying an INSERT event. MetastoreEventProcessor should ignore
    such events as these trivial properties are not used by Impala
    and they cause unnecessary invalidates/refreshes. This change will
    also potentially reduce the likelihood of IMPALA-8877 as back to back
    invalidates after INSERT operations are avoided.
    
    Testing:
    Added tests respectively to testPartitionEvents() and
    testAlterTableEvent() in MetastoreEventProcessorTest class.
    
    Change-Id: I01a59d5170accc014f76f14eb526d96ddcf61f76
    Reviewed-on: http://gerrit.cloudera.org:8080/14145
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../impala/catalog/events/MetastoreEvents.java     | 91 ++++++++++++++++++++++
 .../events/MetastoreEventsProcessorTest.java       | 59 ++++++++++++++
 2 files changed, 150 insertions(+)

diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
index 51d9ac2..6f23bf8 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
@@ -19,6 +19,7 @@ package org.apache.impala.catalog.events;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 
 import java.util.ArrayList;
@@ -857,6 +858,7 @@ public class MetastoreEvents {
    * MetastoreEvent for ALTER_TABLE event type
    */
   public static class AlterTableEvent extends TableInvalidatingEvent {
+    protected org.apache.hadoop.hive.metastore.api.Table tableBefore_;
     // the table object after alter operation, as parsed from the NotificationEvent
     protected org.apache.hadoop.hive.metastore.api.Table tableAfter_;
     // true if this alter event was due to a rename operation
@@ -882,6 +884,7 @@ public class MetastoreEvents {
       try {
         msTbl_ = Preconditions.checkNotNull(alterTableMessage.getTableObjBefore());
         tableAfter_ = Preconditions.checkNotNull(alterTableMessage.getTableObjAfter());
+        tableBefore_ = Preconditions.checkNotNull(alterTableMessage.getTableObjBefore());
       } catch (Exception e) {
         throw new MetastoreNotificationException(
             debugString("Unable to parse the alter table message"), e);
@@ -935,6 +938,13 @@ public class MetastoreEvents {
             + "update is already present in catalog.");
         return;
       }
+      // Ignore the event if this is a trivial event. See javadoc for
+      // canBeSkipped() for examples.
+      if (canBeSkipped()) {
+        infoLog("Not processing this event as it only modifies some table parameters "
+            + "which can be ignored.");
+        return;
+      }
       // in case of table level alters from external systems it is better to do a full
       // invalidate  eg. this could be due to as simple as adding a new parameter or a
       // full blown adding  or changing column type
@@ -1004,6 +1014,20 @@ public class MetastoreEvents {
       return false;
     }
 
+    @Override
+    protected boolean canBeSkipped() {
+      // Certain alter events just modify some parameters such as
+      // "transient_lastDdlTime" in Hive. For eg: the alter table event generated
+      // along with insert events. Check if the alter table event is such a trivial
+      // event by setting those parameters equal before and after the event and
+      // comparing the objects.
+
+      // Avoid modifying the object from event.
+      org.apache.hadoop.hive.metastore.api.Table tblAfter = tableAfter_.deepCopy();
+      setTrivialParameters(tableBefore_.getParameters(), tblAfter.getParameters());
+      return tblAfter.equals(tableBefore_);
+    }
+
     private String qualify(TTableName tTableName) {
       return new TableName(tTableName.db_name, tTableName.table_name).toString();
     }
@@ -1315,6 +1339,13 @@ public class MetastoreEvents {
         return;
       }
 
+      // Skip if it's only a change in parameters by Hive, which can be ignored.
+      if (canBeSkipped()) {
+        infoLog("Not processing this event as it only modifies some "
+            + "parameters which can be ignored.");
+        return;
+      }
+
       if (invalidateCatalogTable()) {
         infoLog("Table {} is invalidated", getFullyQualifiedTblName());
       } else {
@@ -1328,6 +1359,36 @@ public class MetastoreEvents {
       if (params == null) return defaultVal;
       return params.getOrDefault(key, defaultVal);
     }
+
+    /**
+     * Returns a list of parameters that are set by Hive for tables/partitions that can be
+     * ignored to determine if the alter table/partition event is a trivial one.
+     */
+    static final List<String> parametersToIgnore = new ImmutableList.Builder<String>()
+        .add("transient_lastDdlTime")
+        .add("totalSize")
+        .add("numFilesErasureCoded")
+        .add("numFiles")
+        .build();
+
+    /**
+     * Util method that sets the parameters that can be ignored equal before and after
+     * event.
+     */
+     static void setTrivialParameters(Map<String, String> parametersBefore,
+        Map<String, String> parametersAfter) {
+      for (String parameter: parametersToIgnore) {
+        parametersAfter.put(parameter,
+            parametersBefore.get(parameter));
+      }
+    }
+
+    /**
+     * Hive generates certain trivial alter events for eg: change only
+     * "transient_lastDdlTime". This method returns true if the alter partition event is
+     * such a trivial event.
+     */
+    protected abstract boolean canBeSkipped();
   }
 
   public static class AddPartitionEvent extends TableInvalidatingEvent {
@@ -1436,6 +1497,9 @@ public class MetastoreEvents {
             e.getMessage());
       }
     }
+
+    @Override
+    protected boolean canBeSkipped() { return false; }
   }
 
   public static class AlterPartitionEvent extends TableInvalidatingEvent {
@@ -1474,6 +1538,15 @@ public class MetastoreEvents {
         infoLog("Not processing the event as it is a self-event");
         return;
       }
+
+      // Ignore the event if this is a trivial event. See javadoc for
+      // isTrivialAlterPartitionEvent() for examples.
+      if (canBeSkipped()) {
+        infoLog("Not processing this event as it only modifies some partition "
+            + "parameters which can be ignored.");
+        return;
+      }
+
       // Reload the whole table if it's a transactional table.
       if (AcidUtils.isTransactionalTable(msTbl_.getParameters())) {
         reloadTableFromCatalog("ALTER_PARTITION");
@@ -1511,6 +1584,21 @@ public class MetastoreEvents {
     }
 
     @Override
+    protected boolean canBeSkipped() {
+      // Certain alter events just modify some parameters such as
+      // "transient_lastDdlTime" in Hive. For eg: the alter table event generated
+      // along with insert events. Check if the alter table event is such a trivial
+      // event by setting those parameters equal before and after the event and
+      // comparing the objects.
+
+      // Avoid modifying the object from event.
+      Partition afterPartition = partitionAfter_.deepCopy();
+      setTrivialParameters(partitionBefore_.getParameters(),
+          afterPartition.getParameters());
+      return afterPartition.equals(partitionBefore_);
+    }
+
+    @Override
     protected void initSelfEventIdentifiersFromEvent() {
       versionNumberFromEvent_ = Long.parseLong(getStringProperty(
           partitionAfter_.getParameters(),
@@ -1611,6 +1699,9 @@ public class MetastoreEvents {
     protected void initSelfEventIdentifiersFromEvent() {
       // no-op
     }
+
+    @Override
+    protected boolean canBeSkipped() { return false; }
   }
 
   /**
diff --git a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
index 4c2d2f8..f9d712e 100644
--- a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
@@ -78,6 +78,7 @@ import org.apache.impala.catalog.events.MetastoreEvents.AlterTableEvent;
 import org.apache.impala.catalog.events.MetastoreEvents.MetastoreEvent;
 import org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventPropertyKey;
 import org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventType;
+import org.apache.impala.catalog.events.MetastoreEvents.TableInvalidatingEvent;
 import org.apache.impala.catalog.events.MetastoreEventsProcessor.EventProcessorStatus;
 import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.ImpalaException;
@@ -596,6 +597,23 @@ public class MetastoreEventsProcessorTest {
     FeFsPartition singlePartition =
         Iterables.getOnlyElement(parts);
     assertTrue(newLocation.equals(singlePartition.getLocation()));
+
+    // Test if trivial partition inserts are skipped. Verify that changing parameters
+    // from TableInvalidatingEvent.parametersToIgnore doesn't trigger a refresh.
+    List partitionValue = Arrays.asList("4");
+    alterPartitionsTrivial(testTblName, partitionValue );
+    eventsProcessor_.processEvents();
+
+    Collection<? extends FeFsPartition> partsAfterTrivialAlter =
+        FeCatalogUtils.loadAllPartitions((HdfsTable)
+            catalog_.getTable(TEST_DB_NAME, testTblName));
+    FeFsPartition singlePartitionAfterTrivialAlter =
+        Iterables.getOnlyElement(partsAfterTrivialAlter);
+    for (String parameter : TableInvalidatingEvent.parametersToIgnore) {
+      assertEquals("Unexpected parameter value after trivial alter partition "
+          + "event", singlePartition.getParameters().get(parameter),
+          singlePartitionAfterTrivialAlter.getParameters().get(parameter));
+    }
   }
 
   /**
@@ -814,6 +832,14 @@ public class MetastoreEventsProcessorTest {
         .getCounter(MetastoreEventsProcessor.NUMBER_OF_TABLE_INVALIDATES).getCount();
     assertEquals("Unexpected number of table invalidates",
         numberOfInvalidatesBefore + 4, numberOfInvalidatesAfter);
+    // Check if trivial alters are ignored.
+    loadTable(testTblName);
+    alterTableChangeTrivialProperties(testTblName);
+    // The above alter should not cause an invalidate.
+    long numberOfInvalidatesAfterTrivialAlter = eventsProcessor_.getMetrics()
+        .getCounter(MetastoreEventsProcessor.NUMBER_OF_TABLE_INVALIDATES).getCount();
+    assertEquals("Unexpected number of table invalidates after trivial alter",
+        numberOfInvalidatesBefore + 4, numberOfInvalidatesAfterTrivialAlter);
   }
 
   /**
@@ -2519,6 +2545,22 @@ public class MetastoreEventsProcessorTest {
     }
   }
 
+  /**
+   * Alters trivial table properties which must be ignored by the event processor
+   */
+  private void alterTableChangeTrivialProperties(String tblName)
+      throws TException {
+    try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
+      org.apache.hadoop.hive.metastore.api.Table msTable =
+          msClient.getHiveClient().getTable(TEST_DB_NAME, tblName);
+      for (String parameter : TableInvalidatingEvent.parametersToIgnore) {
+        msTable.getParameters().put(parameter, "1234567");
+      }
+      msClient.getHiveClient().alter_table_with_environmentContext(
+          TEST_DB_NAME, tblName, msTable, null);
+    }
+  }
+
   private void alterTableAddCol(
       String tblName, String colName, String colType, String comment) throws TException {
     try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
@@ -2601,6 +2643,23 @@ public class MetastoreEventsProcessorTest {
     }
   }
 
+  /**
+   * Alters trivial partition properties which must be ignored by the event processor
+   */
+  private void alterPartitionsTrivial(String tblName, List<String> partVal)
+      throws TException {
+    List<Partition> partitions = new ArrayList<>();
+    try (MetaStoreClient metaStoreClient = catalog_.getMetaStoreClient()) {
+      Partition partition = metaStoreClient.getHiveClient().getPartition(TEST_DB_NAME,
+          tblName, partVal);
+      for (String parameter : TableInvalidatingEvent.parametersToIgnore) {
+        partition.getParameters().put(parameter, "12334567");
+        partitions.add(partition);
+      }
+      metaStoreClient.getHiveClient().alter_partitions(TEST_DB_NAME, tblName, partitions);
+    }
+  }
+
   private void addPartitions(String dbName, String tblName,
       List<List<String>> partitionValues)
       throws TException {