You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by as...@apache.org on 2018/10/04 17:18:37 UTC

hive git commit: HIVE-20545: Exclude large-sized parameters from serialization of Table and Partition thrift objects in HMS notifications (Bharath Krishna, reviewed by Andrew Sherman)

Repository: hive
Updated Branches:
  refs/heads/master d0ed25e3b -> 857259ed0


HIVE-20545: Exclude large-sized parameters from serialization of Table and Partition thrift objects in HMS notifications (Bharath Krishna, reviewed by Andrew Sherman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/857259ed
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/857259ed
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/857259ed

Branch: refs/heads/master
Commit: 857259ed08aaf31e198bdeb25540ec16ef6cc3e6
Parents: d0ed25e
Author: Bharath Krishna <bh...@cloudera.com>
Authored: Thu Oct 4 09:36:01 2018 -0700
Committer: Andrew Sherman <as...@apache.org>
Committed: Thu Oct 4 10:01:05 2018 -0700

----------------------------------------------------------------------
 .../hive/metastore/conf/MetastoreConf.java      | 28 ++++++
 .../hive/metastore/utils/MetaStoreUtils.java    | 42 +++++++++
 .../metastore/messaging/MessageFactory.java     | 13 +--
 .../messaging/json/JSONMessageFactory.java      | 31 +++++++
 .../utils/TestMetaStoreServerUtils.java         | 89 +++++++++++++++++++-
 5 files changed, 194 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/857259ed/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 946f644..7b01678 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -516,6 +516,12 @@ public class MetastoreConf {
         "hive.metastore.event.message.factory",
         "org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory",
         "Factory class for making encoding and decoding messages in the events generated."),
+    EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS("metastore.notification.parameters.exclude.patterns",
+        "hive.metastore.notification.parameters.exclude.patterns", "",
+        "List of comma-separated regexes that are used to reduced the size of HMS Notification messages."
+            + " The regexes are matched against each key of parameters map in Table or Partition object"
+            + "present in HMS Notification. Any key-value pair whose key is matched with any regex will"
+            +" be removed from Parameters map during Serialization of Table/Partition object."),
     EVENT_DB_LISTENER_TTL("metastore.event.db.listener.timetolive",
         "hive.metastore.event.db.listener.timetolive", 86400, TimeUnit.SECONDS,
         "time after which events will be removed from the database listener queue"),
@@ -1410,6 +1416,28 @@ public class MetastoreConf {
   }
 
   /**
+   * Get values from comma-separated config, to an array after extracting individual values.
+   * @param conf Configuration to retrieve it from
+   * @param var variable to retrieve
+   * @return Array of String, containing each value from the comma-separated config,
+   *  or default value if value not in config file
+   */
+  public static String[] getTrimmedStringsVar(Configuration conf, ConfVars var) {
+    assert var.defaultVal.getClass() == String.class;
+    String[] result = conf.getTrimmedStrings(var.varname, (String[]) null);
+    if (result != null) {
+      return result;
+    }
+    if (var.hiveName != null) {
+      result = conf.getTrimmedStrings(var.hiveName, (String[]) null);
+      if (result != null) {
+        return result;
+      }
+    }
+    return org.apache.hadoop.util.StringUtils.getTrimmedStrings((String) var.getDefaultVal());
+  }
+
+  /**
    * Set the variable as a boolean
    * @param conf configuration file to set it in
    * @param var variable to set

http://git-wip-us.apache.org/repos/asf/hive/blob/857259ed/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
index a92f34b..720ec71 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
@@ -56,8 +56,13 @@ import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.TimeZone;
+import java.util.function.Predicate;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+import java.util.stream.Collectors;
+
+import static java.util.regex.Pattern.compile;
 
 public class MetaStoreUtils {
   /** A fixed date format to be used for hive partition column values. */
@@ -905,4 +910,41 @@ public class MetaStoreUtils {
     }
     return TableType.VIRTUAL_VIEW.toString().equals(table.getTableType());
   }
+
+  /**
+   * filters a given map with predicate provided. All entries of map whose key matches with
+   * predicate will be removed. Expects map to be modifiable and does the operation on actual map,
+   * so does not return a copy of filtered map.
+   * @param map A map of String key-value pairs
+   * @param predicate Predicate with pattern to filter the map
+   */
+  public static <T> void filterMapKeys(Map<String, T> map, Predicate<String> predicate) {
+    if (map == null) {
+      return;
+    }
+    map.entrySet().removeIf(entry -> predicate.test(entry.getKey()));
+  }
+
+  /**
+   * filters a given map with list of predicates. All entries of map whose key matches with any
+   * predicate will be removed. Expects map to be modifiable and does the operation on actual map,
+   * so does not return a copy of filtered map.
+   * @param map A map of String key-value pairs
+   * @param predicates List of predicates with patterns to filter the map
+   */
+  public static <T> void filterMapkeys(Map<String, T> map, List<Predicate<String>> predicates) {
+    if (map == null) {
+      return;
+    }
+    filterMapKeys(map, predicates.stream().reduce(Predicate::or).orElse(x -> false));
+  }
+
+  /**
+   * Compile a list of regex patterns and collect them as Predicates.
+   * @param patterns List of regex patterns to be compiled
+   * @return a List of Predicate created by compiling the regex patterns
+   */
+  public static List<Predicate<String>> compilePatternsToPredicates(List<String> patterns) {
+    return patterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList());
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/857259ed/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageFactory.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageFactory.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageFactory.java
index 7ff168f..58c6891 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageFactory.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageFactory.java
@@ -92,22 +92,23 @@ public abstract class MessageFactory {
   protected static final String MS_SERVICE_PRINCIPAL =
       MetastoreConf.getVar(conf, ConfVars.KERBEROS_PRINCIPAL, "");
 
+
   /**
    * Getter for MessageFactory instance.
    */
   public static MessageFactory getInstance() {
     if (instance == null) {
-      instance =
-          getInstance(MetastoreConf.getVar(conf, ConfVars.EVENT_MESSAGE_FACTORY));
+      instance = getInstance(MetastoreConf.getVar(conf, ConfVars.EVENT_MESSAGE_FACTORY));
     }
     return instance;
   }
 
   private static MessageFactory getInstance(String className) {
     try {
-      return JavaUtils.newInstance(JavaUtils.getClass(className, MessageFactory.class));
-    }
-    catch (MetaException e) {
+      MessageFactory factory = JavaUtils.newInstance(JavaUtils.getClass(className, MessageFactory.class));
+      factory.init();
+      return factory;
+    } catch (MetaException e) {
       throw new IllegalStateException("Could not construct MessageFactory implementation: ", e);
     }
   }
@@ -133,6 +134,8 @@ public abstract class MessageFactory {
     // itself for discoverability? Might be worth pursuing.
   }
 
+  public void init() throws MetaException {}
+
   public abstract MessageDeserializer getDeserializer();
 
   /**

http://git-wip-us.apache.org/repos/asf/hive/blob/857259ed/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java
index 2668b05..6aa079d 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java
@@ -19,16 +19,20 @@
 
 package org.apache.hadoop.hive.metastore.messaging.json;
 
+import java.util.Arrays;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.function.Predicate;
+import java.util.regex.PatternSyntaxException;
 
 import javax.annotation.Nullable;
 
 import org.apache.hadoop.hive.metastore.api.Catalog;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.Function;
+import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NotificationEvent;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
@@ -37,6 +41,7 @@ import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
 import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.TxnToWriteId;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.events.AcidWriteEvent;
 import org.apache.hadoop.hive.metastore.messaging.AbortTxnMessage;
 import org.apache.hadoop.hive.metastore.messaging.AddForeignKeyMessage;
@@ -66,6 +71,7 @@ import org.apache.hadoop.hive.metastore.messaging.MessageFactory;
 import org.apache.hadoop.hive.metastore.messaging.OpenTxnMessage;
 import org.apache.hadoop.hive.metastore.messaging.AcidWriteMessage;
 import org.apache.hadoop.hive.metastore.messaging.PartitionFiles;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.thrift.TBase;
 import org.apache.thrift.TDeserializer;
 import org.apache.thrift.TException;
@@ -83,6 +89,8 @@ import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
 
+import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.filterMapkeys;
+
 /**
  * The JSON implementation of the MessageFactory. Constructs JSON implementations of each
  * message-type.
@@ -93,6 +101,23 @@ public class JSONMessageFactory extends MessageFactory {
 
   private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer();
 
+  private static List<Predicate<String>> paramsFilter;
+
+  @Override
+  public void init() throws MetaException {
+    super.init();
+
+    List<String> excludePatterns = Arrays.asList(MetastoreConf
+        .getTrimmedStringsVar(conf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS));
+    try {
+      paramsFilter = MetaStoreUtils.compilePatternsToPredicates(excludePatterns);
+    } catch (PatternSyntaxException e) {
+      LOG.error("Regex pattern compilation failed. Verify that "
+          + "metastore.notification.parameters.exclude.patterns has valid patterns.");
+      throw new MetaException("Regex pattern compilation failed. " + e.getMessage());
+    }
+  }
+
   @Override
   public MessageDeserializer getDeserializer() {
     return deserializer;
@@ -295,11 +320,17 @@ public class JSONMessageFactory extends MessageFactory {
   }
 
   static String createTableObjJson(Table tableObj) throws TException {
+    //Note: The parameters of the Table object will be removed in the filter if it matches
+    // any pattern provided through EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS
+    filterMapkeys(tableObj.getParameters(), paramsFilter);
     TSerializer serializer = new TSerializer(new TJSONProtocol.Factory());
     return serializer.toString(tableObj, "UTF-8");
   }
 
   static String createPartitionObjJson(Partition partitionObj) throws TException {
+    //Note: The parameters of the Partition object will be removed in the filter if it matches
+    // any pattern provided through EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS
+    filterMapkeys(partitionObj.getParameters(), paramsFilter);
     TSerializer serializer = new TSerializer(new TJSONProtocol.Factory());
     return serializer.toString(partitionObj, "UTF-8");
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/857259ed/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java
index 30de1c4..b05cb54 100644
--- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java
+++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.metastore.utils;
 
 import com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
@@ -30,30 +31,31 @@ import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.Partition;
-import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder;
-import org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder;
 import org.apache.hadoop.hive.metastore.client.builder.TableBuilder;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.thrift.TException;
 import org.junit.Assert;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
 
+import static java.util.regex.Pattern.compile;
 import static org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE;
 import static org.apache.hadoop.hive.common.StatsSetupConst.FAST_STATS;
 import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_FILES;
 import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_ERASURE_CODED_FILES;
 import static org.apache.hadoop.hive.common.StatsSetupConst.STATS_GENERATED;
 import static org.apache.hadoop.hive.common.StatsSetupConst.TOTAL_SIZE;
+import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.filterMapkeys;
 import static org.hamcrest.core.Is.is;
-import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -379,5 +381,84 @@ public class TestMetaStoreServerUtils {
         new Path(pathString), false, false, isErasureCoded);
   }
 
+  @Test
+  public void testFilterMapWithPredicates() {
+    Map<String, String> testMap = getTestParamMap();
+
+    List<String> excludePatterns = Arrays.asList("lastDdl", "num");
+    testMapFilter(testMap, excludePatterns);
+    assertFalse(testMap.containsKey("transient_lastDdlTime"));
+    assertFalse(testMap.containsKey("numFiles"));
+    assertFalse(testMap.containsKey("numFilesErasureCoded"));
+    assertFalse(testMap.containsKey("numRows"));
+
+    Map<String, String> expectedMap = new HashMap<String, String>() {{
+        put("totalSize", "1024");
+        put("rawDataSize", "3243234");
+        put("COLUMN_STATS_ACCURATE", "{\"BASIC_STATS\":\"true\"");
+        put("COLUMN_STATS_ACCURATED", "dummy");
+        put("bucketing_version", "2");
+        put("testBucketing_version", "2");
+      }};
+
+    assertThat(expectedMap, is(testMap));
+
+    testMap = getTestParamMap();
+    excludePatterns = Arrays.asList("^bucket", "ACCURATE$");
+    testMapFilter(testMap, excludePatterns);
+
+    expectedMap = new HashMap<String, String>() {{
+        put("totalSize", "1024");
+        put("numRows", "10");
+        put("rawDataSize", "3243234");
+        put("COLUMN_STATS_ACCURATED", "dummy");
+        put("numFiles", "2");
+        put("transient_lastDdlTime", "1537487124");
+        put("testBucketing_version", "2");
+        put("numFilesErasureCoded", "0");
+      }};
+
+    assertThat(expectedMap, is(testMap));
+
+    // test that if the config is not set in MetastoreConf, it does not filter any parameter
+    Configuration testConf = MetastoreConf.newMetastoreConf();
+    testMap = getTestParamMap();
+    excludePatterns = Arrays.asList(MetastoreConf
+        .getTrimmedStringsVar(testConf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS));
+
+    testMapFilter(testMap, excludePatterns);
+    assertThat(getTestParamMap(), is(testMap));
+
+
+    // test that if the config is set to empty String in MetastoreConf, it does not filter any parameter
+    testConf.setStrings(MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS.getVarname(), "");
+    testMap = getTestParamMap();
+    excludePatterns = Arrays.asList(MetastoreConf
+        .getTrimmedStringsVar(testConf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS));
+
+    testMapFilter(testMap, excludePatterns);
+    assertThat(getTestParamMap(), is(testMap));
+  }
+
+  private void testMapFilter(Map<String, String> testMap, List<String> patterns) {
+    List<Predicate<String>> paramsFilter =
+        patterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList());
+    filterMapkeys(testMap, paramsFilter);
+  }
+
+  private Map<String, String> getTestParamMap() {
+    return new HashMap<String, String>() {{
+        put("totalSize", "1024");
+        put("numRows", "10");
+        put("rawDataSize", "3243234");
+        put("COLUMN_STATS_ACCURATE", "{\"BASIC_STATS\":\"true\"");
+        put("COLUMN_STATS_ACCURATED", "dummy");
+        put("numFiles", "2");
+        put("transient_lastDdlTime", "1537487124");
+        put("bucketing_version", "2");
+        put("testBucketing_version", "2");
+        put("numFilesErasureCoded", "0");
+      }};
+  }
 }