You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sa...@apache.org on 2019/02/12 04:59:07 UTC

[hive] branch master updated: HIVE-21206: Bootstrap replication is slow as it opens lot of metastore connections (Sankar Hariappan, reviewed by Mahesh Kumar Behera)

This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e3c5fc  HIVE-21206: Bootstrap replication is slow as it opens lot of metastore connections (Sankar Hariappan, reviewed by Mahesh Kumar Behera)
8e3c5fc is described below

commit 8e3c5fcbf16b2688ca435bf7361da3a3d4b1a699
Author: Sankar Hariappan <sa...@apache.org>
AuthorDate: Tue Feb 12 10:28:35 2019 +0530

    HIVE-21206: Bootstrap replication is slow as it opens lot of metastore connections (Sankar Hariappan, reviewed by Mahesh Kumar Behera)
    
    Signed-off-by: Sankar Hariappan <sa...@apache.org>
---
 ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java | 5 +----
 ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java         | 4 +++-
 ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java   | 9 +++++++++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
index 822051c..179f291 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
@@ -23,7 +23,6 @@ import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.parse.EximUtil;
 import org.apache.hadoop.hive.ql.parse.ReplicationSpec;
 import org.apache.hadoop.hive.ql.plan.CopyWork;
@@ -69,8 +68,6 @@ public class ReplCopyTask extends Task<ReplCopyWork> implements Serializable {
     Path toPath = null;
 
     try {
-      Hive hiveDb = getHive();
-
       // Note: CopyWork supports copying multiple files, but ReplCopyWork doesn't.
       //       Not clear of ReplCopyWork should inherit from CopyWork.
       if (work.getFromPaths().length > 1 || work.getToPaths().length > 1) {
@@ -168,7 +165,7 @@ public class ReplCopyTask extends Task<ReplCopyWork> implements Serializable {
       // its a replace (insert overwrite ) operation.
       if (work.getDeleteDestIfExist() && dstFs.exists(toPath)) {
         LOG.debug(" path " + toPath + " is cleaned before renaming");
-        hiveDb.cleanUpOneDirectoryForReplace(toPath, dstFs, HIDDEN_FILES_PATH_FILTER, conf, work.getNeedRecycle(),
+        getHive().cleanUpOneDirectoryForReplace(toPath, dstFs, HIDDEN_FILES_PATH_FILTER, conf, work.getNeedRecycle(),
                                                           work.getIsAutoPurge());
       }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java
index 11ef62c..b39771f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java
@@ -184,7 +184,9 @@ public abstract class Task<T extends Serializable> implements Serializable, Node
 
   protected Hive getHive() {
     try {
-      return Hive.getWithFastCheck(conf);
+      // Hive.getWithFastCheck shouldn't be used here as it always re-opens metastore connection.
+      // The conf object in HMS client is always different from the one used here.
+      return Hive.get(conf);
     } catch (HiveException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw new RuntimeException(e);
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
index 33c25ed..6a2e0ca 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
@@ -64,6 +64,7 @@ import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.BucketCodec;
 import org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2;
+import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
 import org.apache.hadoop.hive.ql.session.SessionState;
@@ -358,6 +359,8 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
       throws Exception, MetaException, TException, NoSuchObjectException {
     hiveConf.setBoolean("hive.stats.autogather", true);
     hiveConf.setBoolean("hive.stats.column.autogather", true);
+    // Need to close the thread local Hive object so that configuration change is reflected to HMS.
+    Hive.closeCurrent();
     runStatementOnDriver("drop table if exists " + tableName);
     runStatementOnDriver(String.format("create table %s (a int) stored as orc " +
         "TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only')",
@@ -433,6 +436,8 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
     String tableName = "mm_table";
     hiveConf.setBoolean("hive.stats.autogather", true);
     hiveConf.setBoolean("hive.stats.column.autogather", true);
+    // Need to close the thread local Hive object so that configuration change is reflected to HMS.
+    Hive.closeCurrent();
     runStatementOnDriver("drop table if exists " + tableName);
     runStatementOnDriver(String.format("create table %s (a int) stored as orc " +
         "TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only')",
@@ -459,6 +464,8 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
     Assert.assertEquals(1, stats.size());
     msClient.close();
     hiveConf.setBoolean(MetastoreConf.ConfVars.HIVE_TXN_STATS_ENABLED.getVarname(), false);
+    // Need to close the thread local Hive object so that configuration change is reflected to HMS.
+    Hive.closeCurrent();
     // Running the query with stats disabled will cause stats in metastore itself to become invalid.
     runStatementOnDriver(String.format("insert into %s (a) values (1)", tableName));
     hiveConf.setBoolean(MetastoreConf.ConfVars.HIVE_TXN_STATS_ENABLED.getVarname(), true);
@@ -1224,6 +1231,8 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
   @Test
   public void testVersioning() throws Exception {
     hiveConf.set(MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID.getVarname(), "true");
+    // Need to close the thread local Hive object so that configuration change is reflected to HMS.
+    Hive.closeCurrent();
     runStatementOnDriver("drop table if exists T");
     runStatementOnDriver("create table T (a int, b int) stored as orc");
     int[][] data = {{1, 2}};