You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2018/03/19 17:54:33 UTC

[4/4] hive git commit: HIVE-18264: CachedStore: Store cached partitions/col stats within the table cache and make prewarm non-blocking (Vaibhav Gumashta reviewed by Daniel Dai, Alexander Kolbasov)

HIVE-18264: CachedStore: Store cached partitions/col stats within the table cache and make prewarm non-blocking (Vaibhav Gumashta reviewed by Daniel Dai, Alexander Kolbasov)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/26c0ab6a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/26c0ab6a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/26c0ab6a

Branch: refs/heads/master
Commit: 26c0ab6adb48755ef2f5cff2ec9c4b0e9a431821
Parents: 79e8869
Author: Vaibhav Gumashta <vg...@hortonworks.com>
Authored: Mon Mar 19 10:47:37 2018 -0700
Committer: Vaibhav Gumashta <vg...@hortonworks.com>
Committed: Mon Mar 19 10:47:37 2018 -0700

----------------------------------------------------------------------
 .../listener/DummyRawStoreFailEvent.java        |    9 +-
 .../apache/hive/service/server/HiveServer2.java |    6 +-
 .../hadoop/hive/metastore/HiveMetaStore.java    |    4 -
 .../hadoop/hive/metastore/ObjectStore.java      |   30 -
 .../apache/hadoop/hive/metastore/RawStore.java  |   11 -
 .../hadoop/hive/metastore/cache/CacheUtils.java |   85 +-
 .../hive/metastore/cache/CachedStore.java       | 1552 +++++------------
 .../hive/metastore/cache/SharedCache.java       | 1588 +++++++++++++-----
 .../hive/metastore/utils/MetaStoreUtils.java    |   11 +-
 .../DummyRawStoreControlledCommit.java          |    7 -
 .../DummyRawStoreForJdoConnection.java          |    7 -
 .../hive/metastore/cache/TestCachedStore.java   |  546 +++---
 .../src/test/resources/log4j2.properties        |   74 +-
 13 files changed, 2043 insertions(+), 1887 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
----------------------------------------------------------------------
diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
index 6144b61..e2244a1 100644
--- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
+++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
@@ -976,7 +976,7 @@ public class DummyRawStoreFailEvent implements RawStore, Configurable {
   public List<WMResourcePlan> getAllResourcePlans() throws MetaException {
     return objectStore.getAllResourcePlans();
   }
- 
+
   @Override
   public WMFullResourcePlan alterResourcePlan(String name, WMNullableResourcePlan resourcePlan,
       boolean canActivateDisabled, boolean canDeactivate, boolean isReplace)
@@ -1069,13 +1069,6 @@ public class DummyRawStoreFailEvent implements RawStore, Configurable {
     objectStore.dropWMTriggerToPoolMapping(resourcePlanName, triggerName, poolPath);
   }
 
-  @Override
-  public List<ColStatsObjWithSourceInfo> getPartitionColStatsForDatabase(String dbName)
-      throws MetaException, NoSuchObjectException {
-    // TODO Auto-generated method stub
-    return null;
-  }
-
   public void createISchema(ISchema schema) throws AlreadyExistsException, MetaException,
       NoSuchObjectException {
     objectStore.createISchema(schema);

http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/service/src/java/org/apache/hive/service/server/HiveServer2.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java
index 5b792ac..bb92c44 100644
--- a/service/src/java/org/apache/hive/service/server/HiveServer2.java
+++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java
@@ -64,7 +64,6 @@ import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService;
 import org.apache.hadoop.hive.metastore.api.WMFullResourcePlan;
 import org.apache.hadoop.hive.metastore.api.WMPool;
 import org.apache.hadoop.hive.metastore.api.WMResourcePlan;
-import org.apache.hadoop.hive.metastore.cache.CachedStore;
 import org.apache.hadoop.hive.ql.cache.results.QueryResultsCache;
 import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl;
 import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager;
@@ -163,9 +162,6 @@ public class HiveServer2 extends CompositeService {
       LOG.warn("Could not initiate the HiveServer2 Metrics system.  Metrics may not be reported.", t);
     }
 
-    // Initialize cachedstore with background prewarm. The prewarm will only start if configured.
-    CachedStore.initSharedCacheAsync(hiveConf);
-
     cliService = new CLIService(this);
     addService(cliService);
     final HiveServer2 hiveServer2 = this;
@@ -570,7 +566,7 @@ public class HiveServer2 extends CompositeService {
 
   private void removeServerInstanceFromZooKeeper() throws Exception {
     setDeregisteredWithZooKeeper(true);
-    
+
     if (znode != null) {
       znode.close();
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 66353e7..5285570 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -73,7 +73,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.metastore.api.*;
 import org.apache.hadoop.hive.metastore.events.AddForeignKeyEvent;
-import org.apache.hadoop.hive.metastore.cache.CachedStore;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars;
 import org.apache.hadoop.hive.metastore.events.AddNotNullConstraintEvent;
@@ -7962,9 +7961,6 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         ThreadPool.shutdown();
       }, 10);
 
-      // This will only initialize the cache if configured.
-      CachedStore.initSharedCacheAsync(conf);
-
       //Start Metrics for Standalone (Remote) Mode
       if (MetastoreConf.getBoolVar(conf, ConfVars.METRICS_ENABLED)) {
         try {

http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 1f75105..88d88ed 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -201,7 +201,6 @@ import org.apache.hadoop.hive.metastore.tools.SQLGenerator;
 import org.apache.hadoop.hive.metastore.utils.FileUtils;
 import org.apache.hadoop.hive.metastore.utils.JavaUtils;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
-import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo;
 import org.apache.hadoop.hive.metastore.utils.ObjectPair;
 import org.apache.thrift.TException;
 import org.datanucleus.AbstractNucleusContext;
@@ -7906,35 +7905,6 @@ public class ObjectStore implements RawStore, Configurable {
   }
 
   @Override
-  public List<ColStatsObjWithSourceInfo> getPartitionColStatsForDatabase(String dbName)
-      throws MetaException, NoSuchObjectException {
-    final boolean enableBitVector =
-        MetastoreConf.getBoolVar(getConf(), ConfVars.STATS_FETCH_BITVECTOR);
-    return new GetHelper<List<ColStatsObjWithSourceInfo>>(dbName, null, true, false) {
-      @Override
-      protected List<ColStatsObjWithSourceInfo> getSqlResult(
-          GetHelper<List<ColStatsObjWithSourceInfo>> ctx) throws MetaException {
-        return directSql.getColStatsForAllTablePartitions(dbName, enableBitVector);
-      }
-
-      @Override
-      protected List<ColStatsObjWithSourceInfo> getJdoResult(
-          GetHelper<List<ColStatsObjWithSourceInfo>> ctx)
-          throws MetaException, NoSuchObjectException {
-        // This is fast path for query optimizations, if we can find this info
-        // quickly using directSql, do it. No point in failing back to slow path
-        // here.
-        throw new MetaException("Jdo path is not implemented for getPartitionColStatsForDatabase.");
-      }
-
-      @Override
-      protected String describeResult() {
-        return null;
-      }
-    }.run(true);
-  }
-
-  @Override
   public void flushCache() {
     // NOP as there's no caching
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
index b079f8b..ad4af1a 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
@@ -596,17 +596,6 @@ public interface RawStore extends Configurable {
     List<String> partNames, List<String> colNames) throws MetaException, NoSuchObjectException;
 
   /**
-   * Get column stats for all partitions of all tables in the database
-   *
-   * @param dbName
-   * @return List of column stats objects for all partitions of all tables in the database
-   * @throws MetaException
-   * @throws NoSuchObjectException
-   */
-  List<ColStatsObjWithSourceInfo> getPartitionColStatsForDatabase(String dbName)
-      throws MetaException, NoSuchObjectException;
-
-  /**
    * Get the next notification event.
    * @param rqst Request containing information on the last processed notification.
    * @return list of notifications, sorted by eventId

http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CacheUtils.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CacheUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CacheUtils.java
index f0f650d..97d8af6 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CacheUtils.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CacheUtils.java
@@ -17,78 +17,57 @@
  */
 package org.apache.hadoop.hive.metastore.cache;
 
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.regex.Pattern;
 
-import org.apache.commons.collections.CollectionUtils;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.SkewedInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.metastore.cache.CachedStore.PartitionWrapper;
-import org.apache.hadoop.hive.metastore.cache.CachedStore.TableWrapper;
+import org.apache.hadoop.hive.metastore.cache.SharedCache.PartitionWrapper;
+import org.apache.hadoop.hive.metastore.cache.SharedCache.TableWrapper;
 import org.apache.hadoop.hive.metastore.utils.StringUtils;
 
 public class CacheUtils {
   private static final String delimit = "\u0001";
 
-  public static String buildKey(String dbName) {
-    return dbName;
-  }
-
-  public static String buildKeyWithDelimit(String dbName) {
-    return buildKey(dbName) + delimit;
-  }
-
-  public static String buildKey(String dbName, String tableName) {
+  /**
+   * Builds a key for the table cache which is concatenation of database name and table name
+   * separated by a delimiter
+   *
+   * @param dbName
+   * @param tableName
+   * @return
+   */
+  public static String buildTableCacheKey(String dbName, String tableName) {
     return dbName + delimit + tableName;
   }
 
-  public static String buildKeyWithDelimit(String dbName, String tableName) {
-    return buildKey(dbName, tableName) + delimit;
-  }
-
-  public static String buildKey(String dbName, String tableName, List<String> partVals) {
-    String key = buildKey(dbName, tableName);
-    if (CollectionUtils.isNotEmpty(partVals)) {
-      key += delimit;
-      key += String.join(delimit, partVals);
+  /**
+   * Builds a key for the partition cache which is concatenation of partition values, each value
+   * separated by a delimiter
+   *
+   * @param list of partition values
+   * @return cache key for partitions cache
+   */
+  public static String buildPartitionCacheKey(List<String> partVals) {
+    if (partVals == null || partVals.isEmpty()) {
+      return "";
     }
-    return key;
-  }
-
-  public static String buildKeyWithDelimit(String dbName, String tableName, List<String> partVals) {
-    return buildKey(dbName, tableName, partVals) + delimit;
-  }
-
-  public static String buildKey(String dbName, String tableName, List<String> partVals, String colName) {
-    String key = buildKey(dbName, tableName, partVals);
-    return key + delimit + colName;
-  }
-
-  public static String buildKey(String dbName, String tableName, String colName) {
-    String key = buildKey(dbName, tableName);
-    return key + delimit + colName;
-  }
-
-  public static String[] splitTableColStats(String key) {
-    return key.split(delimit);
-  }
-
-  public static Object[] splitPartitionColStats(String key) {
-    Object[] result = new Object[4];
-    String[] comps = key.split(delimit);
-    result[0] = comps[0];
-    result[1] = comps[1];
-    result[2] = Arrays.asList((Arrays.copyOfRange(comps, 2, comps.length - 1)));
-    result[3] = comps[comps.length-1];
-    return result;
+    return String.join(delimit, partVals);
   }
 
-  public static Object[] splitAggrColStats(String key) {
-    return key.split(delimit);
+  /**
+   * Builds a key for the partitions column cache which is concatenation of partition values, each
+   * value separated by a delimiter and the column name
+   *
+   * @param list of partition values
+   * @param column name
+   * @return cache key for partitions column stats cache
+   */
+  public static String buildPartitonColStatsCacheKey(List<String> partVals, String colName) {
+    return buildPartitionCacheKey(partVals) + delimit + colName;
   }
 
   static Table assemble(TableWrapper wrapper, SharedCache sharedCache) {