You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2019/08/29 12:44:09 UTC
[impala] 02/07: IMPALA-8579: Ignore trivial alter table/partition
events.
This is an automated email from the ASF dual-hosted git repository.
boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 08ecb6d28cd7794a654f66fe41bb8ae727bc1a3e
Author: Anurag Mantripragada <an...@gmail.com>
AuthorDate: Wed Aug 7 19:09:53 2019 -0700
IMPALA-8579: Ignore trivial alter table/partition events.
Hive generates certain trivial alter events for eg: change only
"transient_lastDdlTime". This is seen in the alter events
accompanying an INSERT event. MetastoreEventProcessor should ignore
such events as these trivial properties are not used by Impala
and they cause unnecessary invalidates/refreshes. This change will
also potentially reduce the likelihood of IMPALA-8877 as back to back
invalidates after INSERT operations are avoided.
Testing:
Added tests respectively to testPartitionEvents() and
testAlterTableEvent() in MetastoreEventProcessorTest class.
Change-Id: I01a59d5170accc014f76f14eb526d96ddcf61f76
Reviewed-on: http://gerrit.cloudera.org:8080/14145
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
.../impala/catalog/events/MetastoreEvents.java | 91 ++++++++++++++++++++++
.../events/MetastoreEventsProcessorTest.java | 59 ++++++++++++++
2 files changed, 150 insertions(+)
diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
index 51d9ac2..6f23bf8 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
@@ -19,6 +19,7 @@ package org.apache.impala.catalog.events;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import java.util.ArrayList;
@@ -857,6 +858,7 @@ public class MetastoreEvents {
* MetastoreEvent for ALTER_TABLE event type
*/
public static class AlterTableEvent extends TableInvalidatingEvent {
+ protected org.apache.hadoop.hive.metastore.api.Table tableBefore_;
// the table object after alter operation, as parsed from the NotificationEvent
protected org.apache.hadoop.hive.metastore.api.Table tableAfter_;
// true if this alter event was due to a rename operation
@@ -882,6 +884,7 @@ public class MetastoreEvents {
try {
msTbl_ = Preconditions.checkNotNull(alterTableMessage.getTableObjBefore());
tableAfter_ = Preconditions.checkNotNull(alterTableMessage.getTableObjAfter());
+ tableBefore_ = Preconditions.checkNotNull(alterTableMessage.getTableObjBefore());
} catch (Exception e) {
throw new MetastoreNotificationException(
debugString("Unable to parse the alter table message"), e);
@@ -935,6 +938,13 @@ public class MetastoreEvents {
+ "update is already present in catalog.");
return;
}
+ // Ignore the event if this is a trivial event. See javadoc for
+ // canBeSkipped() for examples.
+ if (canBeSkipped()) {
+ infoLog("Not processing this event as it only modifies some table parameters "
+ + "which can be ignored.");
+ return;
+ }
// in case of table level alters from external systems it is better to do a full
// invalidate eg. this could be due to as simple as adding a new parameter or a
// full blown adding or changing column type
@@ -1004,6 +1014,20 @@ public class MetastoreEvents {
return false;
}
+ @Override
+ protected boolean canBeSkipped() {
+ // Certain alter events just modify some parameters such as
+ // "transient_lastDdlTime" in Hive. For eg: the alter table event generated
+ // along with insert events. Check if the alter table event is such a trivial
+ // event by setting those parameters equal before and after the event and
+ // comparing the objects.
+
+ // Avoid modifying the object from event.
+ org.apache.hadoop.hive.metastore.api.Table tblAfter = tableAfter_.deepCopy();
+ setTrivialParameters(tableBefore_.getParameters(), tblAfter.getParameters());
+ return tblAfter.equals(tableBefore_);
+ }
+
private String qualify(TTableName tTableName) {
return new TableName(tTableName.db_name, tTableName.table_name).toString();
}
@@ -1315,6 +1339,13 @@ public class MetastoreEvents {
return;
}
+ // Skip if it's only a change in parameters by Hive, which can be ignored.
+ if (canBeSkipped()) {
+ infoLog("Not processing this event as it only modifies some "
+ + "parameters which can be ignored.");
+ return;
+ }
+
if (invalidateCatalogTable()) {
infoLog("Table {} is invalidated", getFullyQualifiedTblName());
} else {
@@ -1328,6 +1359,36 @@ public class MetastoreEvents {
if (params == null) return defaultVal;
return params.getOrDefault(key, defaultVal);
}
+
+ /**
+ * Returns a list of parameters that are set by Hive for tables/partitions that can be
+ * ignored to determine if the alter table/partition event is a trivial one.
+ */
+ static final List<String> parametersToIgnore = new ImmutableList.Builder<String>()
+ .add("transient_lastDdlTime")
+ .add("totalSize")
+ .add("numFilesErasureCoded")
+ .add("numFiles")
+ .build();
+
+ /**
+ * Util method that sets the parameters that can be ignored equal before and after
+ * event.
+ */
+ static void setTrivialParameters(Map<String, String> parametersBefore,
+ Map<String, String> parametersAfter) {
+ for (String parameter: parametersToIgnore) {
+ parametersAfter.put(parameter,
+ parametersBefore.get(parameter));
+ }
+ }
+
+ /**
+ * Hive generates certain trivial alter events for eg: change only
+ * "transient_lastDdlTime". This method returns true if the alter partition event is
+ * such a trivial event.
+ */
+ protected abstract boolean canBeSkipped();
}
public static class AddPartitionEvent extends TableInvalidatingEvent {
@@ -1436,6 +1497,9 @@ public class MetastoreEvents {
e.getMessage());
}
}
+
+ @Override
+ protected boolean canBeSkipped() { return false; }
}
public static class AlterPartitionEvent extends TableInvalidatingEvent {
@@ -1474,6 +1538,15 @@ public class MetastoreEvents {
infoLog("Not processing the event as it is a self-event");
return;
}
+
+ // Ignore the event if this is a trivial event. See javadoc for
+ // isTrivialAlterPartitionEvent() for examples.
+ if (canBeSkipped()) {
+ infoLog("Not processing this event as it only modifies some partition "
+ + "parameters which can be ignored.");
+ return;
+ }
+
// Reload the whole table if it's a transactional table.
if (AcidUtils.isTransactionalTable(msTbl_.getParameters())) {
reloadTableFromCatalog("ALTER_PARTITION");
@@ -1511,6 +1584,21 @@ public class MetastoreEvents {
}
@Override
+ protected boolean canBeSkipped() {
+ // Certain alter events just modify some parameters such as
+ // "transient_lastDdlTime" in Hive. For eg: the alter table event generated
+ // along with insert events. Check if the alter table event is such a trivial
+ // event by setting those parameters equal before and after the event and
+ // comparing the objects.
+
+ // Avoid modifying the object from event.
+ Partition afterPartition = partitionAfter_.deepCopy();
+ setTrivialParameters(partitionBefore_.getParameters(),
+ afterPartition.getParameters());
+ return afterPartition.equals(partitionBefore_);
+ }
+
+ @Override
protected void initSelfEventIdentifiersFromEvent() {
versionNumberFromEvent_ = Long.parseLong(getStringProperty(
partitionAfter_.getParameters(),
@@ -1611,6 +1699,9 @@ public class MetastoreEvents {
protected void initSelfEventIdentifiersFromEvent() {
// no-op
}
+
+ @Override
+ protected boolean canBeSkipped() { return false; }
}
/**
diff --git a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
index 4c2d2f8..f9d712e 100644
--- a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
@@ -78,6 +78,7 @@ import org.apache.impala.catalog.events.MetastoreEvents.AlterTableEvent;
import org.apache.impala.catalog.events.MetastoreEvents.MetastoreEvent;
import org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventPropertyKey;
import org.apache.impala.catalog.events.MetastoreEvents.MetastoreEventType;
+import org.apache.impala.catalog.events.MetastoreEvents.TableInvalidatingEvent;
import org.apache.impala.catalog.events.MetastoreEventsProcessor.EventProcessorStatus;
import org.apache.impala.common.FileSystemUtil;
import org.apache.impala.common.ImpalaException;
@@ -596,6 +597,23 @@ public class MetastoreEventsProcessorTest {
FeFsPartition singlePartition =
Iterables.getOnlyElement(parts);
assertTrue(newLocation.equals(singlePartition.getLocation()));
+
+ // Test if trivial partition inserts are skipped. Verify that changing parameters
+ // from TableInvalidatingEvent.parametersToIgnore doesn't trigger a refresh.
+ List partitionValue = Arrays.asList("4");
+ alterPartitionsTrivial(testTblName, partitionValue );
+ eventsProcessor_.processEvents();
+
+ Collection<? extends FeFsPartition> partsAfterTrivialAlter =
+ FeCatalogUtils.loadAllPartitions((HdfsTable)
+ catalog_.getTable(TEST_DB_NAME, testTblName));
+ FeFsPartition singlePartitionAfterTrivialAlter =
+ Iterables.getOnlyElement(partsAfterTrivialAlter);
+ for (String parameter : TableInvalidatingEvent.parametersToIgnore) {
+ assertEquals("Unexpected parameter value after trivial alter partition "
+ + "event", singlePartition.getParameters().get(parameter),
+ singlePartitionAfterTrivialAlter.getParameters().get(parameter));
+ }
}
/**
@@ -814,6 +832,14 @@ public class MetastoreEventsProcessorTest {
.getCounter(MetastoreEventsProcessor.NUMBER_OF_TABLE_INVALIDATES).getCount();
assertEquals("Unexpected number of table invalidates",
numberOfInvalidatesBefore + 4, numberOfInvalidatesAfter);
+ // Check if trivial alters are ignored.
+ loadTable(testTblName);
+ alterTableChangeTrivialProperties(testTblName);
+ // The above alter should not cause an invalidate.
+ long numberOfInvalidatesAfterTrivialAlter = eventsProcessor_.getMetrics()
+ .getCounter(MetastoreEventsProcessor.NUMBER_OF_TABLE_INVALIDATES).getCount();
+ assertEquals("Unexpected number of table invalidates after trivial alter",
+ numberOfInvalidatesBefore + 4, numberOfInvalidatesAfterTrivialAlter);
}
/**
@@ -2519,6 +2545,22 @@ public class MetastoreEventsProcessorTest {
}
}
+ /**
+ * Alters trivial table properties which must be ignored by the event processor
+ */
+ private void alterTableChangeTrivialProperties(String tblName)
+ throws TException {
+ try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
+ org.apache.hadoop.hive.metastore.api.Table msTable =
+ msClient.getHiveClient().getTable(TEST_DB_NAME, tblName);
+ for (String parameter : TableInvalidatingEvent.parametersToIgnore) {
+ msTable.getParameters().put(parameter, "1234567");
+ }
+ msClient.getHiveClient().alter_table_with_environmentContext(
+ TEST_DB_NAME, tblName, msTable, null);
+ }
+ }
+
private void alterTableAddCol(
String tblName, String colName, String colType, String comment) throws TException {
try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
@@ -2601,6 +2643,23 @@ public class MetastoreEventsProcessorTest {
}
}
+ /**
+ * Alters trivial partition properties which must be ignored by the event processor
+ */
+ private void alterPartitionsTrivial(String tblName, List<String> partVal)
+ throws TException {
+ List<Partition> partitions = new ArrayList<>();
+ try (MetaStoreClient metaStoreClient = catalog_.getMetaStoreClient()) {
+ Partition partition = metaStoreClient.getHiveClient().getPartition(TEST_DB_NAME,
+ tblName, partVal);
+ for (String parameter : TableInvalidatingEvent.parametersToIgnore) {
+ partition.getParameters().put(parameter, "12334567");
+ partitions.add(partition);
+ }
+ metaStoreClient.getHiveClient().alter_partitions(TEST_DB_NAME, tblName, partitions);
+ }
+ }
+
private void addPartitions(String dbName, String tblName,
List<List<String>> partitionValues)
throws TException {