You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/05/27 05:34:19 UTC
[hive] branch master updated: HIVE-23487 : Optimise
PartitionManagementTask (Rajesh Balamohan via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 65e947e HIVE-23487 : Optimise PartitionManagementTask (Rajesh Balamohan via Ashutosh Chauhan)
65e947e is described below
commit 65e947eec05304ed5b9441a374c240cd9a53d30a
Author: Rajesh Balamohan <rb...@apache.org>
AuthorDate: Tue May 26 22:33:33 2020 -0700
HIVE-23487 : Optimise PartitionManagementTask (Rajesh Balamohan via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
.../hadoop/hive/ql/ddl/misc/msck/MsckOperation.java | 2 +-
.../hive/ql/exec/TestMsckCreatePartitionsInBatches.java | 2 +-
.../hive/ql/exec/TestMsckDropPartitionsInBatches.java | 2 +-
.../main/java/org/apache/hadoop/hive/metastore/Msck.java | 16 ++++++++++------
.../hadoop/hive/metastore/PartitionManagementTask.java | 7 ++++---
5 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java
index c05d699..3ffc58f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/misc/msck/MsckOperation.java
@@ -52,7 +52,7 @@ public class MsckOperation extends DDLOperation<MsckDesc> {
public int execute() throws HiveException, IOException, TException {
try {
Msck msck = new Msck(false, false);
- msck.init(context.getDb().getConf());
+ msck.init(Msck.getMsckConf(context.getDb().getConf()));
String[] names = Utilities.getDbTableName(desc.getTableName());
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java
index 7821f40..869249a 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java
@@ -83,7 +83,7 @@ public class TestMsckCreatePartitionsInBatches {
throw new HiveException(e);
}
msck = new Msck( false, false);
- msck.init(hiveConf);
+ msck.init(Msck.getMsckConf(hiveConf));
}
@Before
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java
index 8be3112..e7318bf 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java
@@ -80,7 +80,7 @@ public class TestMsckDropPartitionsInBatches {
SessionState.start(hiveConf);
db = new HiveMetaStoreClient(hiveConf);
msck = new Msck( false, false);
- msck.init(hiveConf);
+ msck.init(Msck.getMsckConf(hiveConf));
}
@Before
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java
index f4e109d..59a96e8 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java
@@ -84,15 +84,19 @@ public class Msck {
public void init(Configuration conf) throws MetaException {
if (msc == null) {
- // the only reason we are using new conf here is to override EXPRESSION_PROXY_CLASS
- Configuration metastoreConf = MetastoreConf.newMetastoreConf(new Configuration(conf));
- metastoreConf.set(MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS.getVarname(),
- MsckPartitionExpressionProxy.class.getCanonicalName());
- setConf(metastoreConf);
- this.msc = new HiveMetaStoreClient(metastoreConf);
+ setConf(conf);
+ this.msc = new HiveMetaStoreClient(conf);
}
}
+ public static Configuration getMsckConf(Configuration conf) {
+ // the only reason we are using new conf here is to override EXPRESSION_PROXY_CLASS
+ Configuration metastoreConf = MetastoreConf.newMetastoreConf(new Configuration(conf));
+ metastoreConf.set(MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS.getVarname(),
+ MsckPartitionExpressionProxy.class.getCanonicalName());
+ return metastoreConf;
+ }
+
/**
* MetastoreCheck, see if the data in the metastore matches what is on the
* dfs. Current version checks for tables and partitions that are either
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
index e4488f4..612ac87 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java
@@ -146,7 +146,8 @@ public class PartitionManagementTask implements MetastoreTaskThread {
new ThreadFactoryBuilder().setDaemon(true).setNameFormat("PartitionDiscoveryTask-%d").build());
CountDownLatch countDownLatch = new CountDownLatch(candidateTables.size());
LOG.info("Found {} candidate tables for partition discovery", candidateTables.size());
- setupMsckConf();
+ setupMsckPathInvalidation();
+ Configuration msckConf = Msck.getMsckConf(conf);
for (Table table : candidateTables) {
qualifiedTableName = Warehouse.getCatalogQualifiedTableName(table);
long retentionSeconds = getRetentionPeriodInSeconds(table);
@@ -155,7 +156,7 @@ public class PartitionManagementTask implements MetastoreTaskThread {
// this always runs in 'sync' mode where partitions can be added and dropped
MsckInfo msckInfo = new MsckInfo(table.getCatName(), table.getDbName(), table.getTableName(),
null, null, true, true, true, retentionSeconds);
- executorService.submit(new MsckThread(msckInfo, conf, qualifiedTableName, countDownLatch));
+ executorService.submit(new MsckThread(msckInfo, msckConf, qualifiedTableName, countDownLatch));
}
countDownLatch.await();
executorService.shutdownNow();
@@ -196,7 +197,7 @@ public class PartitionManagementTask implements MetastoreTaskThread {
return retentionSeconds;
}
- private void setupMsckConf() {
+ private void setupMsckPathInvalidation() {
// if invalid partition directory appears, we just skip and move on. We don't want partition management to throw
// when invalid path is encountered as these are background threads. We just want to skip and move on. Users will
// have to fix the invalid paths via external means.