You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by as...@apache.org on 2018/10/04 17:18:37 UTC
hive git commit: HIVE-20545: Exclude large-sized parameters from
serialization of Table and Partition thrift objects in HMS notifications
(Bharath Krishna, reviewed by Andrew Sherman)
Repository: hive
Updated Branches:
refs/heads/master d0ed25e3b -> 857259ed0
HIVE-20545: Exclude large-sized parameters from serialization of Table and Partition thrift objects in HMS notifications (Bharath Krishna, reviewed by Andrew Sherman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/857259ed
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/857259ed
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/857259ed
Branch: refs/heads/master
Commit: 857259ed08aaf31e198bdeb25540ec16ef6cc3e6
Parents: d0ed25e
Author: Bharath Krishna <bh...@cloudera.com>
Authored: Thu Oct 4 09:36:01 2018 -0700
Committer: Andrew Sherman <as...@apache.org>
Committed: Thu Oct 4 10:01:05 2018 -0700
----------------------------------------------------------------------
.../hive/metastore/conf/MetastoreConf.java | 28 ++++++
.../hive/metastore/utils/MetaStoreUtils.java | 42 +++++++++
.../metastore/messaging/MessageFactory.java | 13 +--
.../messaging/json/JSONMessageFactory.java | 31 +++++++
.../utils/TestMetaStoreServerUtils.java | 89 +++++++++++++++++++-
5 files changed, 194 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/857259ed/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 946f644..7b01678 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -516,6 +516,12 @@ public class MetastoreConf {
"hive.metastore.event.message.factory",
"org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory",
"Factory class for making encoding and decoding messages in the events generated."),
+ EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS("metastore.notification.parameters.exclude.patterns",
+ "hive.metastore.notification.parameters.exclude.patterns", "",
+ "List of comma-separated regexes that are used to reduced the size of HMS Notification messages."
+ + " The regexes are matched against each key of parameters map in Table or Partition object"
+ + "present in HMS Notification. Any key-value pair whose key is matched with any regex will"
+ +" be removed from Parameters map during Serialization of Table/Partition object."),
EVENT_DB_LISTENER_TTL("metastore.event.db.listener.timetolive",
"hive.metastore.event.db.listener.timetolive", 86400, TimeUnit.SECONDS,
"time after which events will be removed from the database listener queue"),
@@ -1410,6 +1416,28 @@ public class MetastoreConf {
}
/**
+ * Get values from comma-separated config, to an array after extracting individual values.
+ * @param conf Configuration to retrieve it from
+ * @param var variable to retrieve
+ * @return Array of String, containing each value from the comma-separated config,
+ * or default value if value not in config file
+ */
+ public static String[] getTrimmedStringsVar(Configuration conf, ConfVars var) {
+ assert var.defaultVal.getClass() == String.class;
+ String[] result = conf.getTrimmedStrings(var.varname, (String[]) null);
+ if (result != null) {
+ return result;
+ }
+ if (var.hiveName != null) {
+ result = conf.getTrimmedStrings(var.hiveName, (String[]) null);
+ if (result != null) {
+ return result;
+ }
+ }
+ return org.apache.hadoop.util.StringUtils.getTrimmedStrings((String) var.getDefaultVal());
+ }
+
+ /**
* Set the variable as a boolean
* @param conf configuration file to set it in
* @param var variable to set
http://git-wip-us.apache.org/repos/asf/hive/blob/857259ed/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
index a92f34b..720ec71 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
@@ -56,8 +56,13 @@ import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.TimeZone;
+import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+import java.util.stream.Collectors;
+
+import static java.util.regex.Pattern.compile;
public class MetaStoreUtils {
/** A fixed date format to be used for hive partition column values. */
@@ -905,4 +910,41 @@ public class MetaStoreUtils {
}
return TableType.VIRTUAL_VIEW.toString().equals(table.getTableType());
}
+
+ /**
+ * filters a given map with predicate provided. All entries of map whose key matches with
+ * predicate will be removed. Expects map to be modifiable and does the operation on actual map,
+ * so does not return a copy of filtered map.
+ * @param map A map of String key-value pairs
+ * @param predicate Predicate with pattern to filter the map
+ */
+ public static <T> void filterMapKeys(Map<String, T> map, Predicate<String> predicate) {
+ if (map == null) {
+ return;
+ }
+ map.entrySet().removeIf(entry -> predicate.test(entry.getKey()));
+ }
+
+ /**
+ * filters a given map with list of predicates. All entries of map whose key matches with any
+ * predicate will be removed. Expects map to be modifiable and does the operation on actual map,
+ * so does not return a copy of filtered map.
+ * @param map A map of String key-value pairs
+ * @param predicates List of predicates with patterns to filter the map
+ */
+ public static <T> void filterMapkeys(Map<String, T> map, List<Predicate<String>> predicates) {
+ if (map == null) {
+ return;
+ }
+ filterMapKeys(map, predicates.stream().reduce(Predicate::or).orElse(x -> false));
+ }
+
+ /**
+ * Compile a list of regex patterns and collect them as Predicates.
+ * @param patterns List of regex patterns to be compiled
+ * @return a List of Predicate created by compiling the regex patterns
+ */
+ public static List<Predicate<String>> compilePatternsToPredicates(List<String> patterns) {
+ return patterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList());
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/857259ed/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageFactory.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageFactory.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageFactory.java
index 7ff168f..58c6891 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageFactory.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageFactory.java
@@ -92,22 +92,23 @@ public abstract class MessageFactory {
protected static final String MS_SERVICE_PRINCIPAL =
MetastoreConf.getVar(conf, ConfVars.KERBEROS_PRINCIPAL, "");
+
/**
* Getter for MessageFactory instance.
*/
public static MessageFactory getInstance() {
if (instance == null) {
- instance =
- getInstance(MetastoreConf.getVar(conf, ConfVars.EVENT_MESSAGE_FACTORY));
+ instance = getInstance(MetastoreConf.getVar(conf, ConfVars.EVENT_MESSAGE_FACTORY));
}
return instance;
}
private static MessageFactory getInstance(String className) {
try {
- return JavaUtils.newInstance(JavaUtils.getClass(className, MessageFactory.class));
- }
- catch (MetaException e) {
+ MessageFactory factory = JavaUtils.newInstance(JavaUtils.getClass(className, MessageFactory.class));
+ factory.init();
+ return factory;
+ } catch (MetaException e) {
throw new IllegalStateException("Could not construct MessageFactory implementation: ", e);
}
}
@@ -133,6 +134,8 @@ public abstract class MessageFactory {
// itself for discoverability? Might be worth pursuing.
}
+ public void init() throws MetaException {}
+
public abstract MessageDeserializer getDeserializer();
/**
http://git-wip-us.apache.org/repos/asf/hive/blob/857259ed/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java
index 2668b05..6aa079d 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java
@@ -19,16 +19,20 @@
package org.apache.hadoop.hive.metastore.messaging.json;
+import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.function.Predicate;
+import java.util.regex.PatternSyntaxException;
import javax.annotation.Nullable;
import org.apache.hadoop.hive.metastore.api.Catalog;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Function;
+import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NotificationEvent;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
@@ -37,6 +41,7 @@ import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.TxnToWriteId;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.events.AcidWriteEvent;
import org.apache.hadoop.hive.metastore.messaging.AbortTxnMessage;
import org.apache.hadoop.hive.metastore.messaging.AddForeignKeyMessage;
@@ -66,6 +71,7 @@ import org.apache.hadoop.hive.metastore.messaging.MessageFactory;
import org.apache.hadoop.hive.metastore.messaging.OpenTxnMessage;
import org.apache.hadoop.hive.metastore.messaging.AcidWriteMessage;
import org.apache.hadoop.hive.metastore.messaging.PartitionFiles;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.thrift.TBase;
import org.apache.thrift.TDeserializer;
import org.apache.thrift.TException;
@@ -83,6 +89,8 @@ import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
+import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.filterMapkeys;
+
/**
* The JSON implementation of the MessageFactory. Constructs JSON implementations of each
* message-type.
@@ -93,6 +101,23 @@ public class JSONMessageFactory extends MessageFactory {
private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer();
+ private static List<Predicate<String>> paramsFilter;
+
+ @Override
+ public void init() throws MetaException {
+ super.init();
+
+ List<String> excludePatterns = Arrays.asList(MetastoreConf
+ .getTrimmedStringsVar(conf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS));
+ try {
+ paramsFilter = MetaStoreUtils.compilePatternsToPredicates(excludePatterns);
+ } catch (PatternSyntaxException e) {
+ LOG.error("Regex pattern compilation failed. Verify that "
+ + "metastore.notification.parameters.exclude.patterns has valid patterns.");
+ throw new MetaException("Regex pattern compilation failed. " + e.getMessage());
+ }
+ }
+
@Override
public MessageDeserializer getDeserializer() {
return deserializer;
@@ -295,11 +320,17 @@ public class JSONMessageFactory extends MessageFactory {
}
static String createTableObjJson(Table tableObj) throws TException {
+ //Note: The parameters of the Table object will be removed in the filter if it matches
+ // any pattern provided through EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS
+ filterMapkeys(tableObj.getParameters(), paramsFilter);
TSerializer serializer = new TSerializer(new TJSONProtocol.Factory());
return serializer.toString(tableObj, "UTF-8");
}
static String createPartitionObjJson(Partition partitionObj) throws TException {
+ //Note: The parameters of the Partition object will be removed in the filter if it matches
+ // any pattern provided through EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS
+ filterMapkeys(partitionObj.getParameters(), paramsFilter);
TSerializer serializer = new TSerializer(new TJSONProtocol.Factory());
return serializer.toString(partitionObj, "UTF-8");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/857259ed/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java
index 30de1c4..b05cb54 100644
--- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java
+++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.metastore.utils;
import com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
@@ -30,30 +31,31 @@ import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.Partition;
-import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder;
-import org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder;
import org.apache.hadoop.hive.metastore.client.builder.TableBuilder;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.thrift.TException;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import static java.util.regex.Pattern.compile;
import static org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE;
import static org.apache.hadoop.hive.common.StatsSetupConst.FAST_STATS;
import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_FILES;
import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_ERASURE_CODED_FILES;
import static org.apache.hadoop.hive.common.StatsSetupConst.STATS_GENERATED;
import static org.apache.hadoop.hive.common.StatsSetupConst.TOTAL_SIZE;
+import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.filterMapkeys;
import static org.hamcrest.core.Is.is;
-import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
@@ -379,5 +381,84 @@ public class TestMetaStoreServerUtils {
new Path(pathString), false, false, isErasureCoded);
}
+ @Test
+ public void testFilterMapWithPredicates() {
+ Map<String, String> testMap = getTestParamMap();
+
+ List<String> excludePatterns = Arrays.asList("lastDdl", "num");
+ testMapFilter(testMap, excludePatterns);
+ assertFalse(testMap.containsKey("transient_lastDdlTime"));
+ assertFalse(testMap.containsKey("numFiles"));
+ assertFalse(testMap.containsKey("numFilesErasureCoded"));
+ assertFalse(testMap.containsKey("numRows"));
+
+ Map<String, String> expectedMap = new HashMap<String, String>() {{
+ put("totalSize", "1024");
+ put("rawDataSize", "3243234");
+ put("COLUMN_STATS_ACCURATE", "{\"BASIC_STATS\":\"true\"");
+ put("COLUMN_STATS_ACCURATED", "dummy");
+ put("bucketing_version", "2");
+ put("testBucketing_version", "2");
+ }};
+
+ assertThat(expectedMap, is(testMap));
+
+ testMap = getTestParamMap();
+ excludePatterns = Arrays.asList("^bucket", "ACCURATE$");
+ testMapFilter(testMap, excludePatterns);
+
+ expectedMap = new HashMap<String, String>() {{
+ put("totalSize", "1024");
+ put("numRows", "10");
+ put("rawDataSize", "3243234");
+ put("COLUMN_STATS_ACCURATED", "dummy");
+ put("numFiles", "2");
+ put("transient_lastDdlTime", "1537487124");
+ put("testBucketing_version", "2");
+ put("numFilesErasureCoded", "0");
+ }};
+
+ assertThat(expectedMap, is(testMap));
+
+ // test that if the config is not set in MetastoreConf, it does not filter any parameter
+ Configuration testConf = MetastoreConf.newMetastoreConf();
+ testMap = getTestParamMap();
+ excludePatterns = Arrays.asList(MetastoreConf
+ .getTrimmedStringsVar(testConf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS));
+
+ testMapFilter(testMap, excludePatterns);
+ assertThat(getTestParamMap(), is(testMap));
+
+
+ // test that if the config is set to empty String in MetastoreConf, it does not filter any parameter
+ testConf.setStrings(MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS.getVarname(), "");
+ testMap = getTestParamMap();
+ excludePatterns = Arrays.asList(MetastoreConf
+ .getTrimmedStringsVar(testConf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS));
+
+ testMapFilter(testMap, excludePatterns);
+ assertThat(getTestParamMap(), is(testMap));
+ }
+
+ private void testMapFilter(Map<String, String> testMap, List<String> patterns) {
+ List<Predicate<String>> paramsFilter =
+ patterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList());
+ filterMapkeys(testMap, paramsFilter);
+ }
+
+ private Map<String, String> getTestParamMap() {
+ return new HashMap<String, String>() {{
+ put("totalSize", "1024");
+ put("numRows", "10");
+ put("rawDataSize", "3243234");
+ put("COLUMN_STATS_ACCURATE", "{\"BASIC_STATS\":\"true\"");
+ put("COLUMN_STATS_ACCURATED", "dummy");
+ put("numFiles", "2");
+ put("transient_lastDdlTime", "1537487124");
+ put("bucketing_version", "2");
+ put("testBucketing_version", "2");
+ put("numFilesErasureCoded", "0");
+ }};
+ }
}