You are viewing a plain text version of this content. The canonical link for it is here.
Posted to gitbox@hive.apache.org by GitBox <gi...@apache.org> on 2021/02/15 09:57:26 UTC

[GitHub] [hive] aasha commented on a change in pull request #1942: HIVE-24733: Handle replication when db location and managed location …

aasha commented on a change in pull request #1942:
URL: https://github.com/apache/hive/pull/1942#discussion_r575957081



##########
File path: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
##########
@@ -485,6 +485,7 @@ private Task getReplLoadRootTask(String sourceDb, String replicadb, boolean isIn
       metricCollector = new BootstrapLoadMetricCollector(replicadb, tuple.dumpLocation, 0,
         confTemp);
     }
+    confTemp.setBoolVar(HiveConf.ConfVars.REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET, false);

Review comment:
       Add a comment for setting this to false.

##########
File path: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
##########
@@ -554,6 +554,8 @@ private static void populateLlapDaemonVarsSet(Set<String> llapDaemonVarsSetLocal
     REPL_RETAIN_PREV_DUMP_DIR_COUNT("hive.repl.retain.prev.dump.dir.count", 3,
             "Indicates maximium number of latest previously used dump-directories which would be retained when " +
              "hive.repl.retain.prev.dump.dir is set to true"),
+    REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET("hive.repl.retain.custom.db.locations.on.target", true,
+            "Indicates if source database has custom warehouse locations, whether that should be retained on target as well"),

Review comment:
       managed table location?

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java
##########
@@ -244,6 +245,7 @@ a database ( directory )
           scope.database = true;
         }
         dbTracker.debugLog("database");
+        dbEventFound = true;

Review comment:
       should this be set to true only if scope.database = true. What is tracked using that?

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/LoadDatabase.java
##########
@@ -162,6 +184,10 @@ private boolean isDbEmpty(String dbName) throws HiveException {
     Map<String, String> parameters = new HashMap<>(dbObj.getParameters());
     parameters.remove(ReplicationSpec.KEY.CURR_STATE_ID.toString());
 
+    parameters.remove(ReplUtils.REPL_IS_CUSTOM_DB_LOC);

Review comment:
       why are these params needed?

##########
File path: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExclusiveReplica.java
##########
@@ -522,6 +524,194 @@ public void externalTableReplicationDropDatabase() throws Throwable {
     verifyTableDataExists(replica, dbDataLocReplica, tableName, true);
   }
 
+  @Test
+  public void testCustomWarehouseLocations() throws Throwable {
+    List<String> withClauseOptions = getStagingLocationConfig(primary.repldDir, false);
+    String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), "customManagedLoc").toUri().getPath();
+    String dbWhExternalLoc = new Path(primary.externalTableWarehouseRoot.getParent(),
+            "customExternalLoc").toUri().getPath();
+    String srcDb = "srcDb";
+    WarehouseInstance.Tuple tuple = primary
+            .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc + "' MANAGEDLOCATION '" + dbWhManagedLoc
+                    + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '1,2,3')")
+            .run("use " + srcDb)
+            .run("create table t1 (id int)")
+            .run("insert into table t1 values (500)")
+            .run("create external table t2 (id int)")
+            .run("insert into table t2 values (1000)")
+            .run("create table tp1 (id int) partitioned by (p int)")
+            .run("insert into tp1 partition(p=1) values(10)")
+            .run("insert into tp1 partition(p=2) values(20)")
+            .dump(srcDb, withClauseOptions);
+
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResult("500")
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResult("1000")
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20"});
+    verifyCustomDBLocations(dbWhManagedLoc, dbWhExternalLoc, true);
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (1000)")
+            .run("insert into table t2 values (2000)")
+            .run("insert into tp1 partition(p=1) values(30)")
+            .run("insert into tp1 partition(p=2) values(40)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000"})
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20", "30", "40"});
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (2000)")
+            .run("insert into table t2 values (3000)")
+            .run("create table t3 (id int)")
+            .run("insert into table t3 values (3000)")
+            .run("create external table t4 (id int)")
+            .run("insert into table t4 values (4000)")
+            .run("insert into tp1 partition(p=1) values(50)")
+            .run("insert into tp1 partition(p=2) values(60)")
+            .run("create table tp2 (id int) partitioned by (p int)")
+            .run("insert into tp2 partition(p=1) values(100)")
+            .run("insert into tp2 partition(p=2) values(200)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000", "2000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000", "3000"})
+            .run("show tables like 't3'")
+            .verifyResult("t3")
+            .run("select id from t3")
+            .verifyResults(new String[]{"3000"})
+            .run("show tables like 't4'")
+            .verifyResult("t4")
+            .run("select id from t4")
+            .verifyResults(new String[]{"4000"})
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20", "30", "40", "50", "60"})
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp2")
+            .verifyResults(new String[]{"100", "200"});
+  }
+
+  @Test
+  public void testCustomWarehouseLocationsConf() throws Throwable {
+    List<String> withClauseOptions = getStagingLocationConfig(primary.repldDir, false);
+    String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), "customManagedLoc1").toUri().getPath();
+    String dbWhExternalLoc = new Path(primary.externalTableWarehouseRoot.getParent(),
+            "customExternalLoc1").toUri().getPath();
+    String srcDb = "srcDbConf";
+    WarehouseInstance.Tuple tuple = primary
+            .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc + "' MANAGEDLOCATION '" + dbWhManagedLoc
+                    + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '1,2,3')")
+            .run("use " + srcDb)
+            .run("create table t1 (id int)")
+            .run("insert into table t1 values (500)")
+            .run("create external table t2 (id int)")
+            .run("insert into table t2 values (1000)")
+            .dump(srcDb, withClauseOptions);
+
+    withClauseOptions.add("'" + HiveConf.ConfVars.REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET.varname + "'='false'");
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResult("500")
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResult("1000");
+    verifyDefaultDBLocations(dbWhManagedLoc, dbWhExternalLoc);
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (1000)")
+            .run("insert into table t2 values (2000)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000"});
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (2000)")
+            .run("insert into table t2 values (3000)")
+            .run("create table t3 (id int)")
+            .run("insert into table t3 values (3000)")
+            .run("create external table t4 (id int)")
+            .run("insert into table t4 values (4000)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000", "2000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000", "3000"})
+            .run("show tables like 't3'")
+            .verifyResult("t3")
+            .run("select id from t3")
+            .verifyResults(new String[]{"3000"})
+            .run("show tables like 't4'")
+            .verifyResult("t4")
+            .run("select id from t4")
+            .verifyResults(new String[]{"4000"});
+  }
+
+  private void verifyCustomDBLocations(String managedCustLocOnSrc, String externalCustLocOnSrc, boolean replaceCustPath)
+          throws Exception {
+    if (replaceCustPath ) {
+      Database replDatabase  = replica.getDatabase(replicatedDbName);
+      String managedCustLocOnTgt = new Path(replDatabase.getManagedLocationUri()).toUri().getPath();
+      Assert.assertEquals(managedCustLocOnSrc,  managedCustLocOnTgt);
+      Assert.assertNotEquals(managedCustLocOnTgt,  replica.warehouseRoot.toUri().getPath());
+      String externalCustLocOnTgt = new Path(replDatabase.getLocationUri()).toUri().getPath();
+      Assert.assertEquals(externalCustLocOnSrc,  externalCustLocOnTgt);
+      Assert.assertNotEquals(externalCustLocOnTgt,  new Path(replica.externalTableWarehouseRoot,
+              replicatedDbName.toLowerCase()  + ".db").toUri().getPath());
+    }
+  }
+
+  private void verifyDefaultDBLocations(String managedCustLocOnSrc, String externalCustLocOnSrc) throws Exception {

Review comment:
       can reuse the same method as verifyCustomDBLocations with replaceCustPath set to false

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/events/filesystem/FSDatabaseEvent.java
##########
@@ -24,6 +24,7 @@
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.ql.exec.repl.bootstrap.events.DatabaseEvent;
 import org.apache.hadoop.hive.ql.parse.EximUtil;
+import org.apache.hadoop.hive.ql.parse.ReplicationSpec;

Review comment:
       unused import

##########
File path: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExclusiveReplica.java
##########
@@ -522,6 +524,194 @@ public void externalTableReplicationDropDatabase() throws Throwable {
     verifyTableDataExists(replica, dbDataLocReplica, tableName, true);
   }
 
+  @Test
+  public void testCustomWarehouseLocations() throws Throwable {
+    List<String> withClauseOptions = getStagingLocationConfig(primary.repldDir, false);
+    String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), "customManagedLoc").toUri().getPath();
+    String dbWhExternalLoc = new Path(primary.externalTableWarehouseRoot.getParent(),
+            "customExternalLoc").toUri().getPath();
+    String srcDb = "srcDb";
+    WarehouseInstance.Tuple tuple = primary
+            .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc + "' MANAGEDLOCATION '" + dbWhManagedLoc
+                    + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '1,2,3')")
+            .run("use " + srcDb)
+            .run("create table t1 (id int)")
+            .run("insert into table t1 values (500)")
+            .run("create external table t2 (id int)")
+            .run("insert into table t2 values (1000)")
+            .run("create table tp1 (id int) partitioned by (p int)")
+            .run("insert into tp1 partition(p=1) values(10)")
+            .run("insert into tp1 partition(p=2) values(20)")
+            .dump(srcDb, withClauseOptions);
+
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResult("500")
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResult("1000")
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20"});
+    verifyCustomDBLocations(dbWhManagedLoc, dbWhExternalLoc, true);
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (1000)")
+            .run("insert into table t2 values (2000)")
+            .run("insert into tp1 partition(p=1) values(30)")
+            .run("insert into tp1 partition(p=2) values(40)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000"})
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20", "30", "40"});
+    primary.run("use " + srcDb)
+            .run("insert into table t1 values (2000)")
+            .run("insert into table t2 values (3000)")
+            .run("create table t3 (id int)")
+            .run("insert into table t3 values (3000)")
+            .run("create external table t4 (id int)")
+            .run("insert into table t4 values (4000)")
+            .run("insert into tp1 partition(p=1) values(50)")
+            .run("insert into tp1 partition(p=2) values(60)")
+            .run("create table tp2 (id int) partitioned by (p int)")
+            .run("insert into tp2 partition(p=1) values(100)")
+            .run("insert into tp2 partition(p=2) values(200)")
+            .dump(srcDb, withClauseOptions);
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResults(new String[]{"500", "1000", "2000"})
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResults(new String[]{"1000", "2000", "3000"})
+            .run("show tables like 't3'")
+            .verifyResult("t3")
+            .run("select id from t3")
+            .verifyResults(new String[]{"3000"})
+            .run("show tables like 't4'")
+            .verifyResult("t4")
+            .run("select id from t4")
+            .verifyResults(new String[]{"4000"})
+            .run("select id from tp1")
+            .verifyResults(new String[]{"10", "20", "30", "40", "50", "60"})
+            .run("show tables like 'tp1'")
+            .verifyResult("tp1")
+            .run("select id from tp2")
+            .verifyResults(new String[]{"100", "200"});
+  }
+
+  @Test
+  public void testCustomWarehouseLocationsConf() throws Throwable {
+    List<String> withClauseOptions = getStagingLocationConfig(primary.repldDir, false);
+    String dbWhManagedLoc = new Path(primary.warehouseRoot.getParent(), "customManagedLoc1").toUri().getPath();
+    String dbWhExternalLoc = new Path(primary.externalTableWarehouseRoot.getParent(),
+            "customExternalLoc1").toUri().getPath();
+    String srcDb = "srcDbConf";
+    WarehouseInstance.Tuple tuple = primary
+            .run("create database " + srcDb + " LOCATION '" + dbWhExternalLoc + "' MANAGEDLOCATION '" + dbWhManagedLoc
+                    + "' WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '1,2,3')")
+            .run("use " + srcDb)
+            .run("create table t1 (id int)")
+            .run("insert into table t1 values (500)")
+            .run("create external table t2 (id int)")
+            .run("insert into table t2 values (1000)")
+            .dump(srcDb, withClauseOptions);
+
+    withClauseOptions.add("'" + HiveConf.ConfVars.REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET.varname + "'='false'");
+    replica.load(replicatedDbName, srcDb, withClauseOptions)
+            .run("use " + replicatedDbName)
+            .run("show tables like 't1'")
+            .verifyResult("t1")
+            .run("select id from t1")
+            .verifyResult("500")
+            .run("show tables like 't2'")
+            .verifyResult("t2")
+            .run("select id from t2")
+            .verifyResult("1000");
+    verifyDefaultDBLocations(dbWhManagedLoc, dbWhExternalLoc);

Review comment:
       compare table locations also




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org
For additional commands, e-mail: gitbox-help@hive.apache.org