You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by tc...@apache.org on 2023/01/19 05:16:35 UTC
[hive] branch master updated: HIVE-26598: Fix unsetting of db params for optimized bootstrap when repl dump initiates data copy (Rakshith Chandraiah, reviewed by Teddy Choi)
This is an automated email from the ASF dual-hosted git repository.
tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new d480aa575f0 HIVE-26598: Fix unsetting of db params for optimized bootstrap when repl dump initiates data copy (Rakshith Chandraiah, reviewed by Teddy Choi)
d480aa575f0 is described below
commit d480aa575f09e815bd169c4e2cff0f337eea6371
Author: Rakshith C <56...@users.noreply.github.com>
AuthorDate: Thu Jan 19 10:46:28 2023 +0530
HIVE-26598: Fix unsetting of db params for optimized bootstrap when repl dump initiates data copy (Rakshith Chandraiah, reviewed by Teddy Choi)
Co-authored-by: rakshithc <ra...@cloudera.com>
---
.../parse/TestReplicationOptimisedBootstrap.java | 57 ++++++++++++++++++++++
.../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 6 +--
.../hadoop/hive/ql/exec/repl/ReplDumpWork.java | 9 ++++
3 files changed, 68 insertions(+), 4 deletions(-)
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index 165b4d438e2..396abd24b47 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -55,6 +55,7 @@ import java.util.Map;
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.QUOTA_DONT_SET;
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.QUOTA_RESET;
+import static org.apache.hadoop.hive.common.repl.ReplConst.REPL_ENABLE_BACKGROUND_THREAD;
import static org.apache.hadoop.hive.common.repl.ReplConst.REPL_TARGET_DB_PROPERTY;
import static org.apache.hadoop.hive.common.repl.ReplConst.TARGET_OF_REPLICATION;
import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION;
@@ -1092,4 +1093,60 @@ public class TestReplicationOptimisedBootstrap extends BaseReplicationScenariosA
}
return txnHandler.getOpenTxns(txnListExcludingReplCreated).getOpen_txns();
}
+
+ @Test
+ public void testDbParametersAfterOptimizedBootstrap() throws Throwable {
+ List<String> withClause = Arrays.asList(
+ String.format("'%s'='%s'", HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false"),
+ String.format("'%s'='%s'", HiveConf.ConfVars.HIVE_REPL_FAILOVER_START.varname, "true")
+ );
+
+ // bootstrap
+ primary.run("use " + primaryDbName)
+ .run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " +
+ "tblproperties (\"transactional\"=\"true\")")
+ .run("insert into table t1 values (1),(2)")
+ .dump(primaryDbName, withClause);
+ replica.load(replicatedDbName, primaryDbName, withClause);
+
+ // incremental
+ primary.run("use " + primaryDbName)
+ .run("insert into table t1 values (3)")
+ .dump(primaryDbName, withClause);
+ replica.load(replicatedDbName, primaryDbName, withClause);
+
+ // make some changes on primary
+ primary.run("use " + primaryDbName)
+ .run("insert into table t1 values (4)");
+
+ withClause = Arrays.asList(
+ String.format("'%s'='%s'", HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false")
+ );
+ // 1st cycle of optimized bootstrap
+ replica.dump(replicatedDbName, withClause);
+ primary.load(primaryDbName, replicatedDbName, withClause);
+
+ String[] dbParams = new String[]{
+ TARGET_OF_REPLICATION,
+ CURR_STATE_ID_SOURCE.toString(),
+ CURR_STATE_ID_TARGET.toString(),
+ REPL_TARGET_DB_PROPERTY,
+ REPL_ENABLE_BACKGROUND_THREAD
+ };
+ //verify if all db parameters are set
+ for (String paramKey : dbParams) {
+ assertTrue(replica.getDatabase(replicatedDbName).getParameters().containsKey(paramKey));
+ }
+
+ // 2nd cycle of optimized bootstrap
+ replica.dump(replicatedDbName, withClause);
+ primary.load(primaryDbName, replicatedDbName, withClause);
+
+ for (String paramKey : dbParams) {
+ assertFalse(replica.getDatabase(replicatedDbName).getParameters().containsKey(paramKey));
+ }
+ // ensure optimized bootstrap was successful.
+ primary.run(String.format("select * from %s.t1", primaryDbName))
+ .verifyResults(new String[]{"1", "2", "3"});
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
index e7c329a5f25..02815334fc5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
@@ -162,7 +162,6 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable {
private Set<String> tablesForBootstrap = new HashSet<>();
private List<TxnType> excludedTxns = Arrays.asList(TxnType.READ_ONLY, TxnType.REPL_CREATED);
private boolean createEventMarker = false;
- private boolean unsetDbPropertiesForOptimisedBootstrap;
public enum ConstraintFileType {COMMON("common", "c_"), FOREIGNKEY("fk", "f_");
private final String name;
@@ -264,8 +263,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable {
assert isTableDiffDirectoryPresent;
- // Set boolean to determine the db properties need to sorted once dump is complete
- unsetDbPropertiesForOptimisedBootstrap = true;
+ work.setSecondDumpAfterFailover(true);
long fromEventId = Long.parseLong(getEventIdFromFile(previousValidHiveDumpPath.getParent(), conf)[1]);
LOG.info("Starting optimised bootstrap from event id {} for database {}", fromEventId,
@@ -474,7 +472,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable {
+ ReplAck.DUMP_ACKNOWLEDGEMENT);
// Check if we need to unset database properties after successful optimised bootstrap.
- if (unsetDbPropertiesForOptimisedBootstrap) {
+ if (work.isSecondDumpAfterFailover()) {
Hive hiveDb = getHive();
Database database = hiveDb.getDatabase(work.dbNameOrPattern);
LinkedHashMap<String, String> dbParams = new LinkedHashMap<>(database.getParameters());
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java
index 4ac1bf51db3..65d9c17a675 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpWork.java
@@ -73,6 +73,7 @@ public class ReplDumpWork implements Serializable {
private ReplLogger replLogger;
private FailoverMetaData fmd;
private boolean firstDumpAfterFailover;
+ private boolean secondDumpAfterFailover;
public static void injectNextDumpDirForTest(String dumpDir) {
injectNextDumpDirForTest(dumpDir, false);
@@ -355,4 +356,12 @@ public class ReplDumpWork implements Serializable {
public void setReplLogger(ReplLogger replLogger) {
this.replLogger = replLogger;
}
+
+ public boolean isSecondDumpAfterFailover() {
+ return secondDumpAfterFailover;
+ }
+
+ public void setSecondDumpAfterFailover(boolean secondDumpAfterFailover) {
+ this.secondDumpAfterFailover = secondDumpAfterFailover;
+ }
}