You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by tc...@apache.org on 2023/01/19 05:16:35 UTC
[hive] branch master updated: HIVE-26598: Fix unsetting of db params for optimized bootstrap when repl dump initiates data copy (Rakshith Chandraiah, reviewed by Teddy Choi)

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new d480aa575f0 HIVE-26598: Fix unsetting of db params for optimized bootstrap when repl dump initiates data copy (Rakshith Chandraiah, reviewed by Teddy Choi)
d480aa575f0 is described below

commit d480aa575f09e815bd169c4e2cff0f337eea6371
Author: Rakshith C <56...@users.noreply.github.com>
AuthorDate: Thu Jan 19 10:46:28 2023 +0530

    HIVE-26598: Fix unsetting of db params for optimized bootstrap when repl dump initiates data copy (Rakshith Chandraiah, reviewed by Teddy Choi)
    
    Co-authored-by: rakshithc <ra...@cloudera.com>
---
 .../parse/TestReplicationOptimisedBootstrap.java   | 57 ++++++++++++++++++++++
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java     |  6 +--
 .../hadoop/hive/ql/exec/repl/ReplDumpWork.java     |  9 ++++
 3 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index 165b4d438e2..396abd24b47 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -55,6 +55,7 @@ import java.util.Map;
 
 import static org.apache.hadoop.hdfs.protocol.HdfsConstants.QUOTA_DONT_SET;
 import static org.apache.hadoop.hdfs.protocol.HdfsConstants.QUOTA_RESET;
+import static org.apache.hadoop.hive.common.repl.ReplConst.REPL_ENABLE_BACKGROUND_THREAD;
 import static org.apache.hadoop.hive.common.repl.ReplConst.REPL_TARGET_DB_PROPERTY;
 import static org.apache.hadoop.hive.common.repl.ReplConst.TARGET_OF_REPLICATION;
 import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION;
@@ -1092,4 +1093,60 @@ public class TestReplicationOptimisedBootstrap extends BaseReplicationScenariosA
     }
     return txnHandler.getOpenTxns(txnListExcludingReplCreated).getOpen_txns();
   }
+
+  @Test
+  public void testDbParametersAfterOptimizedBootstrap() throws Throwable {
+    List<String> withClause = Arrays.asList(
+            String.format("'%s'='%s'", HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false"),
+            String.format("'%s'='%s'", HiveConf.ConfVars.HIVE_REPL_FAILOVER_START.varname, "true")
+    );
+
+    // bootstrap
+    primary.run("use " + primaryDbName)
+            .run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " +
+                    "tblproperties (\"transactional\"=\"true\")")
+            .run("insert into table t1 values (1),(2)")
+            .dump(primaryDbName, withClause);
+    replica.load(replicatedDbName, primaryDbName, withClause);
+
+    // incremental
+    primary.run("use " + primaryDbName)
+            .run("insert into table t1 values (3)")
+            .dump(primaryDbName, withClause);
+    replica.load(replicatedDbName, primaryDbName, withClause);
+
+    // make some changes on primary
+    primary.run("use " + primaryDbName)
+            .run("insert into table t1 values (4)");
+
+    withClause = Arrays.asList(
+            String.format("'%s'='%s'", HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false")
+    );
+    // 1st cycle of optimized bootstrap
+    replica.dump(replicatedDbName, withClause);
+    primary.load(primaryDbName, replicatedDbName, withClause);
+
+    String[] dbParams = new String[]{
+            TARGET_OF_REPLICATION,
+            CURR_STATE_ID_SOURCE.toString(),
+            CURR_STATE_ID_TARGET.toString(),
+            REPL_TARGET_DB_PROPERTY,
+            REPL_ENABLE_BACKGROUND_THREAD
+    };
+    //verify if all db parameters are set
+    for (String paramKey : dbParams) {
+      assertTrue(replica.getDatabase(replicatedDbName).getParameters().containsKey(paramKey));
+    }
+
+    // 2nd cycle of optimized bootstrap
+    replica.dump(replicatedDbName, withClause);
+    primary.load(primaryDbName, replicatedDbName, withClause);
+
+    for (String paramKey : dbParams) {
+      assertFalse(replica.getDatabase(replicatedDbName).getParameters().containsKey(paramKey));
+    }
+    // ensure optimized bootstrap was successful.
+    primary.run(String.format("select * from %s.t1", primaryDbName))
+            .verifyResults(new String[]{"1", "2", "3"});
+  }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
index e7c329a5f25..02815334fc5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
@@ -162,7 +162,6 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable {
   private Set<String> tablesForBootstrap = new HashSet<>();
   private List<TxnType> excludedTxns = Arrays.asList(TxnType.READ_ONLY, TxnType.REPL_CREATED);
   private boolean createEventMarker = false;
-  private boolean unsetDbPropertiesForOptimisedBootstrap;
 
   public enum ConstraintFileType {COMMON("common", "c_"), FOREIGNKEY("fk", "f_");
     private final String name;
@@ -264,8 +263,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable {
 
               assert isTableDiffDirectoryPresent;
 
-              // Set boolean to determine the db properties need to sorted once dump is complete
-              unsetDbPropertiesForOptimisedBootstrap = true;
+              work.setSecondDumpAfterFailover(true);
 
               long fromEventId = Long.parseLong(getEventIdFromFile(previousValidHiveDumpPath.getParent(), conf)[1]);
               LOG.info("Starting optimised bootstrap from event id {} for database {}", fromEventId,
@@ -474,7 +472,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable {
                     + ReplAck.DUMP_ACKNOWLEDGEMENT);
 
     // Check if we need to unset database properties after successful optimised bootstrap.
-    if (unsetDbPropertiesForOptimisedBootstrap) {
+    if (work.isSecondDumpAfterFailover()) {
       Hive hiveDb = getHive();
       Database database = hiveDb.getDatabase(work.dbNameOrPattern);
       LinkedHashMap<String, String> dbParams = new LinkedHashMap<>(database.getParameters());
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java
index 4ac1bf51db3..65d9c17a675 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java
@@ -73,6 +73,7 @@ public class ReplDumpWork implements Serializable {
   private ReplLogger replLogger;
   private FailoverMetaData fmd;
   private boolean firstDumpAfterFailover;
+  private boolean secondDumpAfterFailover;
 
   public static void injectNextDumpDirForTest(String dumpDir) {
     injectNextDumpDirForTest(dumpDir, false);
@@ -355,4 +356,12 @@ public class ReplDumpWork implements Serializable {
   public void setReplLogger(ReplLogger replLogger) {
     this.replLogger = replLogger;
   }
+
+  public boolean isSecondDumpAfterFailover() {
+    return secondDumpAfterFailover;
+  }
+
+  public void setSecondDumpAfterFailover(boolean secondDumpAfterFailover) {
+    this.secondDumpAfterFailover = secondDumpAfterFailover;
+  }
 }