You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/05/27 05:34:19 UTC

[hive] branch master updated: HIVE-23487 : Optimise PartitionManagementTask (Rajesh Balamohan via Ashutosh Chauhan)

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 65e947e  HIVE-23487 : Optimise PartitionManagementTask (Rajesh Balamohan via Ashutosh Chauhan)
65e947e is described below

commit 65e947eec05304ed5b9441a374c240cd9a53d30a
Author: Rajesh Balamohan <rb...@apache.org>
AuthorDate: Tue May 26 22:33:33 2020 -0700

    HIVE-23487 : Optimise PartitionManagementTask (Rajesh Balamohan via Ashutosh Chauhan)
    
    Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
 .../hadoop/hive/ql/ddl/misc/msck/MsckOperation.java      |  2 +-
 .../hive/ql/exec/TestMsckCreatePartitionsInBatches.java  |  2 +-
 .../hive/ql/exec/TestMsckDropPartitionsInBatches.java    |  2 +-
 .../main/java/org/apache/hadoop/hive/metastore/Msck.java | 16 ++++++++++------
 .../hadoop/hive/metastore/PartitionManagementTask.java   |  7 ++++---
 5 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java
index c05d699..3ffc58f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java
@@ -52,7 +52,7 @@ public class MsckOperation extends DDLOperation<MsckDesc> {
   public int execute() throws HiveException, IOException, TException {
     try {
       Msck msck = new Msck(false, false);
-      msck.init(context.getDb().getConf());
+      msck.init(Msck.getMsckConf(context.getDb().getConf()));
 
       String[] names = Utilities.getDbTableName(desc.getTableName());
 
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java
index 7821f40..869249a 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java
@@ -83,7 +83,7 @@ public class TestMsckCreatePartitionsInBatches {
       throw new HiveException(e);
     }
     msck = new Msck( false, false);
-    msck.init(hiveConf);
+    msck.init(Msck.getMsckConf(hiveConf));
   }
 
   @Before
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java
index 8be3112..e7318bf 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java
@@ -80,7 +80,7 @@ public class TestMsckDropPartitionsInBatches {
     SessionState.start(hiveConf);
     db = new HiveMetaStoreClient(hiveConf);
     msck = new Msck( false, false);
-    msck.init(hiveConf);
+    msck.init(Msck.getMsckConf(hiveConf));
   }
 
   @Before
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java
index f4e109d..59a96e8 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java
@@ -84,15 +84,19 @@ public class Msck {
 
   public void init(Configuration conf) throws MetaException {
     if (msc == null) {
-      // the only reason we are using new conf here is to override EXPRESSION_PROXY_CLASS
-      Configuration metastoreConf = MetastoreConf.newMetastoreConf(new Configuration(conf));
-      metastoreConf.set(MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS.getVarname(),
-        MsckPartitionExpressionProxy.class.getCanonicalName());
-      setConf(metastoreConf);
-      this.msc = new HiveMetaStoreClient(metastoreConf);
+      setConf(conf);
+      this.msc = new HiveMetaStoreClient(conf);
     }
   }
 
+  public static Configuration getMsckConf(Configuration conf) {
+    // the only reason we are using new conf here is to override EXPRESSION_PROXY_CLASS
+    Configuration metastoreConf = MetastoreConf.newMetastoreConf(new Configuration(conf));
+    metastoreConf.set(MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS.getVarname(),
+        MsckPartitionExpressionProxy.class.getCanonicalName());
+    return metastoreConf;
+  }
+
   /**
    * MetastoreCheck, see if the data in the metastore matches what is on the
    * dfs. Current version checks for tables and partitions that are either
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
index e4488f4..612ac87 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
@@ -146,7 +146,8 @@ public class PartitionManagementTask implements MetastoreTaskThread {
             new ThreadFactoryBuilder().setDaemon(true).setNameFormat("PartitionDiscoveryTask-%d").build());
         CountDownLatch countDownLatch = new CountDownLatch(candidateTables.size());
         LOG.info("Found {} candidate tables for partition discovery", candidateTables.size());
-        setupMsckConf();
+        setupMsckPathInvalidation();
+        Configuration msckConf = Msck.getMsckConf(conf);
         for (Table table : candidateTables) {
           qualifiedTableName = Warehouse.getCatalogQualifiedTableName(table);
           long retentionSeconds = getRetentionPeriodInSeconds(table);
@@ -155,7 +156,7 @@ public class PartitionManagementTask implements MetastoreTaskThread {
           // this always runs in 'sync' mode where partitions can be added and dropped
           MsckInfo msckInfo = new MsckInfo(table.getCatName(), table.getDbName(), table.getTableName(),
             null, null, true, true, true, retentionSeconds);
-          executorService.submit(new MsckThread(msckInfo, conf, qualifiedTableName, countDownLatch));
+          executorService.submit(new MsckThread(msckInfo, msckConf, qualifiedTableName, countDownLatch));
         }
         countDownLatch.await();
         executorService.shutdownNow();
@@ -196,7 +197,7 @@ public class PartitionManagementTask implements MetastoreTaskThread {
     return retentionSeconds;
   }
 
-  private void setupMsckConf() {
+  private void setupMsckPathInvalidation() {
     // if invalid partition directory appears, we just skip and move on. We don't want partition management to throw
     // when invalid path is encountered as these are background threads. We just want to skip and move on. Users will
     // have to fix the invalid paths via external means.