You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2020/02/20 16:33:15 UTC
[hive] branch master updated: HIVE-22831: Add option in
HiveStrictManagedMigration to also move tables converted to external living
in old WH (Adam Szita, reviewed by Peter Vary)
This is an automated email from the ASF dual-hosted git repository.
szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new faaf2c3 HIVE-22831: Add option in HiveStrictManagedMigration to also move tables converted to external living in old WH (Adam Szita, reviewed by Peter Vary)
faaf2c3 is described below
commit faaf2c3fda527cbebf0ed22ab95691ddb9ad8588
Author: Adam Szita <sz...@cloudera.com>
AuthorDate: Thu Feb 6 17:10:39 2020 +0100
HIVE-22831: Add option in HiveStrictManagedMigration to also move tables converted to external living in old WH (Adam Szita, reviewed by Peter Vary)
---
.../hive/ql/util/HiveStrictManagedMigration.java | 251 ++++++++++++++-------
.../ql/util/TestHiveStrictManagedMigration.java | 202 ++++++++++++++++-
2 files changed, 368 insertions(+), 85 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java
index 19fb750..e2e5983 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java
@@ -73,6 +73,10 @@ import org.slf4j.LoggerFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
+import static java.util.stream.Collectors.toList;
+import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;
+import static org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE;
+
public class HiveStrictManagedMigration {
private static final Logger LOG = LoggerFactory.getLogger(HiveStrictManagedMigration.class);
@@ -91,9 +95,10 @@ public class HiveStrictManagedMigration {
final String oldWarehouseRoot;
final TableMigrationOption migrationOption;
final Properties confProps;
- final boolean shouldModifyManagedTableLocation;
+ boolean shouldModifyManagedTableLocation;
final boolean shouldModifyManagedTableOwner;
final boolean shouldModifyManagedTablePermissions;
+ boolean shouldMoveExternal;
final boolean dryRun;
final TableType tableType;
final int tablePoolSize;
@@ -106,6 +111,7 @@ public class HiveStrictManagedMigration {
boolean shouldModifyManagedTableLocation,
boolean shouldModifyManagedTableOwner,
boolean shouldModifyManagedTablePermissions,
+ boolean shouldMoveExternal,
boolean dryRun,
TableType tableType,
int tablePoolSize) {
@@ -118,24 +124,18 @@ public class HiveStrictManagedMigration {
this.shouldModifyManagedTableLocation = shouldModifyManagedTableLocation;
this.shouldModifyManagedTableOwner = shouldModifyManagedTableOwner;
this.shouldModifyManagedTablePermissions = shouldModifyManagedTablePermissions;
+ this.shouldMoveExternal = shouldMoveExternal;
this.dryRun = dryRun;
this.tableType = tableType;
this.tablePoolSize = tablePoolSize;
}
- public RunOptions setShouldModifyManagedTableLocation(boolean shouldModifyManagedTableLocation) {
- return new RunOptions(
- this.dbRegex,
- this.tableRegex,
- this.oldWarehouseRoot,
- this.migrationOption,
- this.confProps,
- shouldModifyManagedTableLocation,
- this.shouldModifyManagedTableOwner,
- this.shouldModifyManagedTablePermissions,
- this.dryRun,
- this.tableType,
- this.tablePoolSize);
+ public void setShouldModifyManagedTableLocation(boolean shouldModifyManagedTableLocation) {
+ this.shouldModifyManagedTableLocation = shouldModifyManagedTableLocation;
+ }
+
+ public void setShouldMoveExternal(boolean shouldMoveExternal) {
+ this.shouldMoveExternal = shouldMoveExternal;
}
@Override
@@ -149,6 +149,7 @@ public class HiveStrictManagedMigration {
", shouldModifyManagedTableLocation=" + shouldModifyManagedTableLocation +
", shouldModifyManagedTableOwner=" + shouldModifyManagedTableOwner +
", shouldModifyManagedTablePermissions=" + shouldModifyManagedTablePermissions +
+ ", shouldMoveExternal=" + shouldMoveExternal +
", dryRun=" + dryRun +
", tableType=" + tableType +
", tablePoolSize=" + tablePoolSize +
@@ -172,16 +173,22 @@ public class HiveStrictManagedMigration {
private static class WarehouseRootCheckResult {
final boolean shouldModifyManagedTableLocation;
- final Path curWhRootPath;
+ final boolean shouldMoveExternal;
+ final Path targetPath;
final HadoopShims.HdfsEncryptionShim encryptionShim;
+ final HadoopShims.HdfsErasureCodingShim ecShim;
WarehouseRootCheckResult(
boolean shouldModifyManagedTableLocation,
+ boolean shouldMoveExternal,
Path curWhRootPath,
- HadoopShims.HdfsEncryptionShim encryptionShim) {
+ HadoopShims.HdfsEncryptionShim encryptionShim,
+ HadoopShims.HdfsErasureCodingShim ecShim) {
this.shouldModifyManagedTableLocation = shouldModifyManagedTableLocation;
- this.curWhRootPath = curWhRootPath;
+ this.shouldMoveExternal = shouldMoveExternal;
+ this.targetPath = curWhRootPath;
this.encryptionShim = encryptionShim;
+ this.ecShim = ecShim;
}
}
@@ -208,8 +215,10 @@ public class HiveStrictManagedMigration {
try {
HiveConf conf = hiveConf == null ? new HiveConf() : hiveConf;
WarehouseRootCheckResult warehouseRootCheckResult = checkOldWarehouseRoot(runOptions, conf);
- runOptions = runOptions.setShouldModifyManagedTableLocation(
- warehouseRootCheckResult.shouldModifyManagedTableLocation);
+ runOptions.setShouldModifyManagedTableLocation(
+ warehouseRootCheckResult.shouldModifyManagedTableLocation);
+ runOptions.setShouldMoveExternal(
+ warehouseRootCheckResult.shouldMoveExternal);
boolean createExternalDirsForDbs = checkExternalWarehouseDir(conf);
OwnerPermsOptions ownerPermsOptions = checkOwnerPermsOptions(runOptions, conf);
@@ -296,6 +305,12 @@ public class HiveStrictManagedMigration {
.create());
result.addOption(OptionBuilder
+ .withLongOpt("shouldMoveExternal")
+ .withDescription("Whether tables living in the old warehouse path should have their data moved to the" +
+ " default external location. Applicable only if migrationOption = external")
+ .create());
+
+ result.addOption(OptionBuilder
.withLongOpt("help")
.withDescription("print help message")
.create('h'));
@@ -347,6 +362,16 @@ public class HiveStrictManagedMigration {
shouldModifyManagedTablePermissions = true;
}
String oldWarehouseRoot = cli.getOptionValue("oldWarehouseRoot");
+ boolean shouldMoveExternal = cli.hasOption("shouldMoveExternal");
+ if (shouldMoveExternal && !migrationOption.equals(TableMigrationOption.EXTERNAL)) {
+ throw new IllegalArgumentException("Please select external as migration option, it is required for " +
+ "shouldMoveExternal option.");
+ }
+ if (shouldModifyManagedTableLocation && shouldMoveExternal) {
+ throw new IllegalArgumentException("Options shouldModifyManagedTableLocation and " +
+ "shouldMoveExternal cannot be used at the same time. Migration with move option on " +
+ " managed tables either ends up with them remaining managed or converted to external, but can't be both.");
+ }
boolean dryRun = cli.hasOption("dryRun");
String tableTypeText = cli.getOptionValue("tableType");
@@ -374,6 +399,7 @@ public class HiveStrictManagedMigration {
shouldModifyManagedTableLocation,
shouldModifyManagedTableOwner,
shouldModifyManagedTablePermissions,
+ shouldMoveExternal,
dryRun,
tableTypeText == null ? null : TableType.valueOf(tableTypeText),
tablePoolSize);
@@ -392,10 +418,11 @@ public class HiveStrictManagedMigration {
}
private final HiveConf conf;
- private RunOptions runOptions;
+ private final RunOptions runOptions;
private final boolean createExternalDirsForDbs;
- private final Path curWhRootPath;
+ private final Path targetPath;
private final HadoopShims.HdfsEncryptionShim encryptionShim;
+ private final HadoopShims.HdfsErasureCodingShim ecShim;
private final String ownerName;
private final String groupName;
private final FsPermission dirPerms;
@@ -418,8 +445,9 @@ public class HiveStrictManagedMigration {
this.groupName = ownerPermsOptions.groupName;
this.dirPerms = ownerPermsOptions.dirPerms;
this.filePerms = ownerPermsOptions.filePerms;
- this.curWhRootPath = warehouseRootCheckResult.curWhRootPath;
+ this.targetPath = warehouseRootCheckResult.targetPath;
this.encryptionShim = warehouseRootCheckResult.encryptionShim;
+ this.ecShim = warehouseRootCheckResult.ecShim;
// Make sure all --hiveconf settings get added to the HiveConf.
// This allows utility-specific settings (such as strict.managed.tables.migration.owner)
@@ -449,7 +477,7 @@ public class HiveStrictManagedMigration {
throw new RuntimeException(e);
}
});
- if (runOptions.shouldModifyManagedTableLocation) {
+ if (runOptions.shouldModifyManagedTableLocation || runOptions.shouldMoveExternal) {
Configuration oldConf = new Configuration(conf);
HiveConf.setVar(oldConf, HiveConf.ConfVars.METASTOREWAREHOUSE, runOptions.oldWarehouseRoot);
@@ -500,43 +528,68 @@ public class HiveStrictManagedMigration {
static WarehouseRootCheckResult checkOldWarehouseRoot(RunOptions runOptions, HiveConf conf) throws IOException {
boolean shouldModifyManagedTableLocation = runOptions.shouldModifyManagedTableLocation;
- Path curWhRootPath = null;
+ boolean shouldMoveExternal = runOptions.shouldMoveExternal;
+ Path targetPath = null;
HadoopShims.HdfsEncryptionShim encryptionShim = null;
+ HadoopShims.HdfsErasureCodingShim ecShim = null;
+
+ if (shouldMoveExternal && !checkExternalWarehouseDir(conf)) {
+ LOG.info("External warehouse path not specified/empty. Disabling shouldMoveExternal");
+ shouldMoveExternal = false;
+ }
- if (runOptions.shouldModifyManagedTableLocation) {
+ if (shouldModifyManagedTableLocation || shouldMoveExternal) {
if (runOptions.oldWarehouseRoot == null) {
- LOG.info("oldWarehouseRoot is not specified. Disabling shouldModifyManagedTableLocation");
+ LOG.info("oldWarehouseRoot is not specified. Disabling shouldModifyManagedTableLocation and " +
+ "shouldMoveExternal");
shouldModifyManagedTableLocation = false;
+ shouldMoveExternal = false;
} else {
- String curWarehouseRoot = HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREWAREHOUSE);
- if (arePathsEqual(conf, runOptions.oldWarehouseRoot, curWarehouseRoot)) {
- LOG.info("oldWarehouseRoot is the same as the current warehouse root {}."
- + " Disabling shouldModifyManagedTableLocation",
+ String currentPathString = shouldModifyManagedTableLocation ?
+ HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREWAREHOUSE) :
+ HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL);
+ if (arePathsEqual(conf, runOptions.oldWarehouseRoot, currentPathString)) {
+ LOG.info("oldWarehouseRoot is the same as the target path {}."
+ + " Disabling shouldModifyManagedTableLocation and shouldMoveExternal",
runOptions.oldWarehouseRoot);
shouldModifyManagedTableLocation = false;
+ shouldMoveExternal = false;
} else {
Path oldWhRootPath = new Path(runOptions.oldWarehouseRoot);
- curWhRootPath = new Path(curWarehouseRoot);
+ targetPath = new Path(currentPathString);
FileSystem oldWhRootFs = oldWhRootPath.getFileSystem(conf);
- FileSystem curWhRootFs = curWhRootPath.getFileSystem(conf);
+ FileSystem curWhRootFs = targetPath.getFileSystem(conf);
oldWhRootPath = oldWhRootFs.makeQualified(oldWhRootPath);
- curWhRootPath = curWhRootFs.makeQualified(curWhRootPath);
+ targetPath = curWhRootFs.makeQualified(targetPath);
if (!FileUtils.equalsFileSystem(oldWhRootFs, curWhRootFs)) {
- LOG.info("oldWarehouseRoot {} has a different FS than the current warehouse root {}."
- + " Disabling shouldModifyManagedTableLocation",
- runOptions.oldWarehouseRoot, curWarehouseRoot);
+ LOG.info("oldWarehouseRoot {} has a different FS than the target path {}."
+ + " Disabling shouldModifyManagedTableLocation and shouldMoveExternal",
+ runOptions.oldWarehouseRoot, currentPathString);
shouldModifyManagedTableLocation = false;
+ shouldMoveExternal = false;
} else {
if (!isHdfs(oldWhRootFs)) {
- LOG.info("Warehouse is using non-HDFS FileSystem {}. Disabling shouldModifyManagedTableLocation",
- oldWhRootFs.getUri());
+ LOG.info("Warehouse is using non-HDFS FileSystem {}. Disabling shouldModifyManagedTableLocation and" +
+ "shouldMoveExternal", oldWhRootFs.getUri());
shouldModifyManagedTableLocation = false;
+ shouldMoveExternal = false;
} else {
encryptionShim = ShimLoader.getHadoopShims().createHdfsEncryptionShim(oldWhRootFs, conf);
- if (!hasEquivalentEncryption(encryptionShim, oldWhRootPath, curWhRootPath)) {
- LOG.info("oldWarehouseRoot {} and current warehouse root {} have different encryption zones." +
- " Disabling shouldModifyManagedTableLocation", oldWhRootPath, curWhRootPath);
+ if (!hasEquivalentEncryption(encryptionShim, oldWhRootPath, targetPath)) {
+ LOG.info("oldWarehouseRoot {} and target path {} have different encryption zones." +
+ " Disabling shouldModifyManagedTableLocation and shouldMoveExternal",
+ oldWhRootPath, targetPath);
shouldModifyManagedTableLocation = false;
+ shouldMoveExternal = false;
+ } else {
+ ecShim = ShimLoader.getHadoopShims().createHdfsErasureCodingShim(oldWhRootFs, conf);
+ if (!hasEquivalentErasureCodingPolicy(ecShim, oldWhRootPath, targetPath)) {
+ LOG.info("oldWarehouseRoot {} and target path {} have different erasure coding policies." +
+ " Disabling shouldModifyManagedTableLocation and shouldMoveExternal",
+ oldWhRootPath, targetPath);
+ shouldModifyManagedTableLocation = false;
+ shouldMoveExternal = false;
+ }
}
}
}
@@ -544,7 +597,8 @@ public class HiveStrictManagedMigration {
}
}
- return new WarehouseRootCheckResult(shouldModifyManagedTableLocation, curWhRootPath, encryptionShim);
+ return new WarehouseRootCheckResult(shouldModifyManagedTableLocation, shouldMoveExternal,
+ targetPath, encryptionShim, ecShim);
}
static OwnerPermsOptions checkOwnerPermsOptions(RunOptions runOptions, HiveConf conf) {
@@ -581,9 +635,14 @@ public class HiveStrictManagedMigration {
LOG.info("Processing database {}", dbName);
Database dbObj = hms.get().getDatabase(dbName);
- boolean modifyDefaultManagedLocation = shouldModifyDatabaseLocation(dbObj);
- if (modifyDefaultManagedLocation) {
- Path newDefaultDbLocation = wh.get().getDefaultDatabasePath(dbName);
+ if (createExternalDirsForDbs) {
+ createExternalDbDir(dbObj);
+ }
+
+ boolean modifyLocation = shouldModifyDatabaseLocation(dbObj);
+
+ if (modifyLocation) {
+ Path newDefaultDbLocation = getDefaultDbPathManagedOrExternal(dbName);
LOG.info("Changing location of database {} to {}", dbName, newDefaultDbLocation);
if (!runOptions.dryRun) {
@@ -595,10 +654,6 @@ public class HiveStrictManagedMigration {
}
}
- if (createExternalDirsForDbs) {
- createExternalDbDir(dbObj);
- }
-
List<String> tableNames;
if (runOptions.tableType == null) {
tableNames = hms.get().getTables(dbName, runOptions.tableRegex);
@@ -609,20 +664,20 @@ public class HiveStrictManagedMigration {
}
boolean errorsInThisDb = !tablePool.submit(() -> tableNames.parallelStream()
- .map(tableName -> processTable(dbObj, tableName, modifyDefaultManagedLocation))
+ .map(tableName -> processTable(dbObj, tableName, modifyLocation))
.reduce(true, (aBoolean, aBoolean2) -> aBoolean && aBoolean2)).get();
if (errorsInThisDb) {
failuresEncountered.set(true);
}
// Finally update the DB location. This would prevent subsequent runs of the migration from processing this DB.
- if (modifyDefaultManagedLocation) {
+ if (modifyLocation) {
if (errorsInThisDb) {
LOG.error("Not updating database location for {} since an error was encountered. " +
"The migration must be run again for this database.", dbObj.getName());
} else {
if (!runOptions.dryRun) {
- Path newDefaultDbLocation = wh.get().getDefaultDatabasePath(dbName);
+ Path newDefaultDbLocation = getDefaultDbPathManagedOrExternal(dbName);
// dbObj after this call would have the new DB location.
// Keep that in mind if anything below this requires the old DB path.
hiveUpdater.get().updateDbLocation(dbObj, newDefaultDbLocation);
@@ -638,6 +693,12 @@ public class HiveStrictManagedMigration {
}
}
+ private Path getDefaultDbPathManagedOrExternal(String dbName) throws MetaException {
+ return runOptions.shouldMoveExternal ?
+ wh.get().getDefaultExternalDatabasePath(dbName) :
+ wh.get().getDefaultDatabasePath(dbName);
+ }
+
public static boolean migrateTable(Table tableObj, TableType tableType, TableMigrationOption migrationOption,
boolean dryRun, HiveUpdater hiveUpdater, IMetaStoreClient hms, Configuration conf)
throws HiveException, IOException, TException {
@@ -664,7 +725,7 @@ public class HiveStrictManagedMigration {
return false;
}
- boolean processTable(Database dbObj, String tableName, boolean modifyDefaultManagedLocation) {
+ boolean processTable(Database dbObj, String tableName, boolean modifyLocation) {
try {
String dbName = dbObj.getName();
LOG.debug("Processing table {}", getQualifiedName(dbName, tableName));
@@ -686,19 +747,24 @@ public class HiveStrictManagedMigration {
return true;
}
- if (TableType.valueOf(tableObj.getTableType()) == TableType.MANAGED_TABLE) {
- Path tablePath = new Path(tableObj.getSd().getLocation());
- if (modifyDefaultManagedLocation && shouldModifyTableLocation(dbObj, tableObj)) {
- Path newTablePath = wh.get().getDnsPath(
- new Path(wh.get().getDefaultDatabasePath(dbName),
- MetaStoreUtils.encodeTableName(tableName.toLowerCase())));
- moveTableData(dbObj, tableObj, newTablePath);
- if (!runOptions.dryRun) {
- // File ownership/permission checks should be done on the new table path.
- tablePath = newTablePath;
- }
+ Path tablePath = new Path(tableObj.getSd().getLocation());
+
+ boolean shouldMoveTable = modifyLocation && (
+ (MANAGED_TABLE.name().equals(tableObj.getTableType()) && runOptions.shouldModifyManagedTableLocation) ||
+ (EXTERNAL_TABLE.name().equals(tableObj.getTableType()) && runOptions.shouldMoveExternal));
+
+ if (shouldMoveTable && shouldModifyTableLocation(dbObj, tableObj)) {
+ Path newTablePath = wh.get().getDnsPath(
+ new Path(getDefaultDbPathManagedOrExternal(dbName),
+ MetaStoreUtils.encodeTableName(tableName.toLowerCase())));
+ moveTableData(dbObj, tableObj, newTablePath);
+ if (!runOptions.dryRun) {
+ // File ownership/permission checks should be done on the new table path.
+ tablePath = newTablePath;
}
+ }
+ if (MANAGED_TABLE.equals(tableType)) {
if (runOptions.shouldModifyManagedTableOwner || runOptions.shouldModifyManagedTablePermissions) {
FileSystem fs = tablePath.getFileSystem(conf);
if (isHdfs(fs)) {
@@ -717,17 +783,22 @@ public class HiveStrictManagedMigration {
boolean shouldModifyDatabaseLocation(Database dbObj) throws IOException, MetaException {
String dbName = dbObj.getName();
- if (runOptions.shouldModifyManagedTableLocation) {
+ if (runOptions.shouldModifyManagedTableLocation || runOptions.shouldMoveExternal) {
// Check if the database location is in the default location based on the old warehouse root.
// If so then change the database location to the default based on the current warehouse root.
String dbLocation = dbObj.getLocationUri();
Path oldDefaultDbLocation = oldWh.get().getDefaultDatabasePath(dbName);
if (arePathsEqual(conf, dbLocation, oldDefaultDbLocation.toString())) {
- if (hasEquivalentEncryption(encryptionShim, oldDefaultDbLocation, curWhRootPath)) {
- return true;
+ if (hasEquivalentEncryption(encryptionShim, oldDefaultDbLocation, targetPath)) {
+ if (hasEquivalentErasureCodingPolicy(ecShim, oldDefaultDbLocation, targetPath)) {
+ return true;
+ } else {
+ LOG.info("{} and {} have different EC policies. Will not change database location for {}",
+ oldDefaultDbLocation, targetPath, dbName);
+ }
} else {
LOG.info("{} and {} are on different encryption zones. Will not change database location for {}",
- oldDefaultDbLocation, curWhRootPath, dbName);
+ oldDefaultDbLocation, targetPath, dbName);
}
}
}
@@ -742,28 +813,30 @@ public class HiveStrictManagedMigration {
String tableLocation = tableObj.getSd().getLocation();
Path oldDefaultTableLocation = oldWh.get().getDefaultTablePath(dbObj, tableObj.getTableName());
if (arePathsEqual(conf, tableLocation, oldDefaultTableLocation.toString())) {
- if (hasEquivalentEncryption(encryptionShim, oldDefaultTableLocation, curWhRootPath)) {
- return true;
+ if (hasEquivalentEncryption(encryptionShim, oldDefaultTableLocation, targetPath)) {
+ if (hasEquivalentErasureCodingPolicy(ecShim, oldDefaultTableLocation, targetPath)) {
+ return true;
+ } else {
+ LOG.info("{} and {} have different EC policies. Will not change table location for {}",
+ oldDefaultTableLocation, targetPath, getQualifiedName(tableObj));
+ }
} else {
LOG.info("{} and {} are on different encryption zones. Will not change table location for {}",
- oldDefaultTableLocation, curWhRootPath, getQualifiedName(tableObj));
+ oldDefaultTableLocation, targetPath, getQualifiedName(tableObj));
}
}
return false;
}
- boolean shouldModifyPartitionLocation(Database dbObj, Table tableObj, Partition partObj, Map<String, String> partSpec)
- throws IOException, MetaException {
- String tableName = tableObj.getTableName();
+ boolean shouldModifyPartitionLocation(Database dbObj, Table tableObj, Partition partObj,
+ Map<String, String> partSpec) throws IOException, MetaException {
String partLocation = partObj.getSd().getLocation();
- Path oldDefaultPartLocation = oldWh.get().getDefaultPartitionPath(dbObj, tableObj, partSpec);
+ Path oldDefaultPartLocation = runOptions.shouldMoveExternal ?
+ oldWh.get().getPartitionPath(dbObj, tableObj, partSpec.values().stream().collect(toList())):
+ oldWh.get().getDefaultPartitionPath(dbObj, tableObj, partSpec);
if (arePathsEqual(conf, partLocation, oldDefaultPartLocation.toString())) {
- if (hasEquivalentEncryption(encryptionShim, oldDefaultPartLocation, curWhRootPath)) {
- return true;
- } else {
- LOG.info("{} and {} are on different encryption zones. Will not change partition location",
- oldDefaultPartLocation, curWhRootPath);
- }
+ // No need to check encryption zone and EC policy. Data was moved already along with the whole table.
+ return true;
}
return false;
}
@@ -946,7 +1019,7 @@ public class HiveStrictManagedMigration {
}
LOG.info("Converting {} to external table ...", getQualifiedName(tableObj));
if (!dryRun) {
- tableObj.setTableType(TableType.EXTERNAL_TABLE.toString());
+ tableObj.setTableType(EXTERNAL_TABLE.toString());
hiveUpdater.updateTableProperties(tableObj, convertToExternalTableProps);
}
return true;
@@ -1540,6 +1613,20 @@ public class HiveStrictManagedMigration {
return true;
}
+ static boolean hasEquivalentErasureCodingPolicy(HadoopShims.HdfsErasureCodingShim ecShim,
+ Path path1, Path path2) throws IOException {
+ HadoopShims.HdfsFileErasureCodingPolicy policy1 = ecShim.getErasureCodingPolicy(path1);
+ HadoopShims.HdfsFileErasureCodingPolicy policy2 = ecShim.getErasureCodingPolicy(path2);
+ if (policy1 != null) {
+ return policy1.equals(policy2);
+ } else {
+ if (policy2 == null) {
+ return true;
+ }
+ return false;
+ }
+ }
+
/**
* can set it from tests to test when config needs something other than default values.
*/
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java b/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java
index 057135b..16d4772 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java
@@ -17,16 +17,30 @@
*/
package org.apache.hadoop.hive.ql.util;
+import static java.util.stream.Collectors.toSet;
import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.Table.ACIDTBL;
import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.Table.ACIDTBLPART;
import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.Table.NONACIDNONBUCKET;
import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.Table.NONACIDORCTBL;
import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.Table.NONACIDORCTBL2;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.ql.TxnCommandsBaseForTests;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+
import org.junit.Assert;
import org.junit.Test;
@@ -34,6 +48,7 @@ public class TestHiveStrictManagedMigration extends TxnCommandsBaseForTests {
private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") +
File.separator + TestHiveStrictManagedMigration.class.getCanonicalName() + "-" + System.currentTimeMillis()
).getPath().replaceAll("\\\\", "/");
+ private static final String EXTERNAL_TABLE_LOCATION = new File(TEST_DATA_DIR, "tmp").getPath();
@Test
public void testUpgrade() throws Exception {
@@ -57,9 +72,7 @@ public class TestHiveStrictManagedMigration extends TxnCommandsBaseForTests {
File newWarehouseDir = new File(getTestDataDir(), "newWarehouse");
newConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, newWarehouseDir.getAbsolutePath());
newConf.set("strict.managed.tables.migration.owner", System.getProperty("user.name"));
- HiveStrictManagedMigration.hiveConf = newConf;
- HiveStrictManagedMigration.scheme = "file";
- HiveStrictManagedMigration.main(args);
+ runMigrationTool(newConf, args);
Assert.assertTrue(newWarehouseDir.exists());
Assert.assertTrue(new File(newWarehouseDir, ACIDTBL.toString().toLowerCase()).exists());
@@ -69,10 +82,193 @@ public class TestHiveStrictManagedMigration extends TxnCommandsBaseForTests {
Assert.assertTrue(new File(newWarehouseDir, NONACIDORCTBL2.toString().toLowerCase()).exists());
Assert.assertTrue(new File(new File(newWarehouseDir, "test.db"), "tacid").exists());
Assert.assertTrue(new File(oldWarehouse, "texternal").exists());
+
+ // Tear down
+ runStatementOnDriver("drop database test cascade");
+ Database defaultDb = Hive.get().getDatabase("default");
+ defaultDb.setLocationUri(oldWarehouse);
+ Hive.get().alterDatabase("default", defaultDb);
+ System.setProperty("hive.strict.managed.tables", "false");
+ }
+
+ /**
+ * Tests shouldMoveExternal option on all possible scenarios of the following dimensions:
+ * - managed or external table type?
+ * - location in (old) warehouse or truly external location?
+ * - is partitioned?
+ * - is partition location default (under table directory) or custom external path?
+ * - default or custom database?
+ * @throws Exception
+ */
+ @Test
+ public void testExternalMove() throws Exception {
+ setupExternalTableTest();
+ String oldWarehouse = getWarehouseDir();
+ String[] args = {"-m", "external", "--shouldMoveExternal", "--tableRegex", "man.*|ext.*|custm.*|custe.*",
+ "--oldWarehouseRoot", oldWarehouse};
+ HiveConf newConf = new HiveConf(hiveConf);
+ File newManagedWarehouseDir = new File(getTestDataDir(), "newManaged");
+ File newExtWarehouseDir = new File(getTestDataDir(), "newExternal");
+ newConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, newManagedWarehouseDir.getAbsolutePath());
+ newConf.set(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL.varname, newExtWarehouseDir.getAbsolutePath());
+ runMigrationTool(newConf, args);
+ Assert.assertTrue(newExtWarehouseDir.exists());
+ assertExternalTableLocations(newExtWarehouseDir, new File(EXTERNAL_TABLE_LOCATION));
+ assertSDLocationCorrect();
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testExternalMoveFailsForIncorrectOptions() throws Throwable {
+ try {
+ String[] args = {"-m", "automatic", "--shouldMoveExternal"};
+ runMigrationTool(new HiveConf(hiveConf), args);
+ } catch (Exception e) {
+ // Exceptions are re-packaged by the migration tool...
+ throw e.getCause();
+ }
}
@Override
protected String getTestDataDir() {
return TEST_DATA_DIR;
}
+
+
+ private static void runMigrationTool(HiveConf hiveConf, String[] args) throws Exception {
+ HiveStrictManagedMigration.hiveConf = hiveConf;
+ HiveStrictManagedMigration.scheme = "file";
+ HiveStrictManagedMigration.main(args);
+ }
+
+ private void setupExternalTableTest() throws Exception {
+ runStatementOnDriver("drop table if exists manwhnone");
+ runStatementOnDriver("drop table if exists manoutnone");
+ runStatementOnDriver("drop table if exists manwhwh");
+ runStatementOnDriver("drop table if exists manwhout");
+ runStatementOnDriver("drop table if exists manwhmixed");
+ runStatementOnDriver("drop table if exists manoutout");
+ runStatementOnDriver("drop table if exists extwhnone");
+ runStatementOnDriver("drop table if exists extoutnone");
+ runStatementOnDriver("drop table if exists extwhwh");
+ runStatementOnDriver("drop table if exists extwhout");
+ runStatementOnDriver("drop table if exists extwhmixed");
+ runStatementOnDriver("drop table if exists extoutout");
+ runStatementOnDriver("drop table if exists custdb.custmanwhwh");
+ runStatementOnDriver("drop table if exists custdb.custextwhwh");
+ runStatementOnDriver("create table manwhnone (a string)");
+ runStatementOnDriver("create table manoutnone (a string) location '" + EXTERNAL_TABLE_LOCATION
+ + "/manoutnone'");
+ runStatementOnDriver("create table manwhwh (a string) partitioned by (p string)");
+ runStatementOnDriver("alter table manwhwh add partition (p='p1')");
+ runStatementOnDriver("alter table manwhwh add partition (p='p2')");
+ runStatementOnDriver("create table manwhout (a string) partitioned by (p string)");
+ runStatementOnDriver("alter table manwhout add partition (p='p1') location '" + EXTERNAL_TABLE_LOCATION
+ + "/manwhoutp1'");
+ runStatementOnDriver("alter table manwhout add partition (p='p2') location '" + EXTERNAL_TABLE_LOCATION
+ + "/manwhoutp2'");
+ runStatementOnDriver("create table manwhmixed (a string) partitioned by (p string)");
+ runStatementOnDriver("alter table manwhmixed add partition (p='p1') location '" + EXTERNAL_TABLE_LOCATION
+ + "/manwhmixedp1'");
+ runStatementOnDriver("alter table manwhmixed add partition (p='p2')");
+ runStatementOnDriver("create table manoutout (a string) partitioned by (p string) location '" +
+ EXTERNAL_TABLE_LOCATION + "/manoutout'");
+ runStatementOnDriver("alter table manoutout add partition (p='p1')");
+ runStatementOnDriver("alter table manoutout add partition (p='p2')");
+ runStatementOnDriver("create external table extwhnone (a string)");
+ runStatementOnDriver("create external table extoutnone (a string) location '" + EXTERNAL_TABLE_LOCATION
+ + "/extoutnone'");
+ runStatementOnDriver("create external table extwhwh (a string) partitioned by (p string)");
+ runStatementOnDriver("alter table extwhwh add partition (p='p1')");
+ runStatementOnDriver("alter table extwhwh add partition (p='p2')");
+ runStatementOnDriver("create external table extwhout (a string) partitioned by (p string)");
+ runStatementOnDriver("alter table extwhout add partition (p='p1') location '" + EXTERNAL_TABLE_LOCATION
+ + "/extwhoutp1'");
+ runStatementOnDriver("alter table extwhout add partition (p='p2') location '" + EXTERNAL_TABLE_LOCATION
+ + "/extwhoutp2'");
+ runStatementOnDriver("create external table extwhmixed (a string) partitioned by (p string)");
+ runStatementOnDriver("alter table extwhmixed add partition (p='p1') location '" + EXTERNAL_TABLE_LOCATION
+ + "/extwhmixedp1'");
+ runStatementOnDriver("alter table extwhmixed add partition (p='p2')");
+ runStatementOnDriver("create external table extoutout (a string) partitioned by (p string) location '"
+ + EXTERNAL_TABLE_LOCATION + "/extoutout'");
+ runStatementOnDriver("alter table extoutout add partition (p='p1')");
+ runStatementOnDriver("alter table extoutout add partition (p='p2')");
+ runStatementOnDriver("drop database if exists custdb");
+ runStatementOnDriver("create database custdb");
+ runStatementOnDriver("create table custdb.custmanwhwh (a string) partitioned by (p string)");
+ runStatementOnDriver("alter table custdb.custmanwhwh add partition (p='p1')");
+ runStatementOnDriver("alter table custdb.custmanwhwh add partition (p='p2')");
+ runStatementOnDriver("create external table custdb.custextwhwh (a string) partitioned by (p string)");
+ runStatementOnDriver("alter table custdb.custextwhwh add partition (p='p1')");
+ runStatementOnDriver("alter table custdb.custextwhwh add partition (p='p2')");
+ }
+
+ private static void assertExternalTableLocations(File exteralWarehouseDir, File externalNonWhDir)
+ throws IOException {
+ Set<String> actualDirs = Files.find(Paths.get(exteralWarehouseDir.toURI()), Integer.MAX_VALUE, (p, a)->true)
+ .map(p->p.toString().replaceAll(exteralWarehouseDir.getAbsolutePath(), ""))
+ .filter(s->!s.isEmpty()).collect(toSet());
+ Set<String> expectedDirs = new HashSet<>();
+ expectedDirs.add("/extwhwh");
+ expectedDirs.add("/extwhwh/p=p2");
+ expectedDirs.add("/extwhwh/p=p1");
+ expectedDirs.add("/extwhmixed");
+ expectedDirs.add("/extwhmixed/p=p2");
+ expectedDirs.add("/manwhwh");
+ expectedDirs.add("/manwhwh/p=p2");
+ expectedDirs.add("/manwhwh/p=p1");
+ expectedDirs.add("/custdb.db");
+ expectedDirs.add("/custdb.db/custmanwhwh");
+ expectedDirs.add("/custdb.db/custmanwhwh/p=p2");
+ expectedDirs.add("/custdb.db/custmanwhwh/p=p1");
+ expectedDirs.add("/custdb.db/custextwhwh");
+ expectedDirs.add("/custdb.db/custextwhwh/p=p2");
+ expectedDirs.add("/custdb.db/custextwhwh/p=p1");
+ expectedDirs.add("/manwhout");
+ expectedDirs.add("/manwhnone");
+ expectedDirs.add("/manwhmixed");
+ expectedDirs.add("/manwhmixed/p=p2");
+ expectedDirs.add("/extwhnone");
+ expectedDirs.add("/extwhout");
+ assertEquals("Unexpected external warehouse directory structure in " + exteralWarehouseDir, expectedDirs,
+ actualDirs);
+
+ actualDirs = Files.find(Paths.get(externalNonWhDir.toURI()), Integer.MAX_VALUE, (p, a)->true)
+ .map(p->p.toString().replaceAll(externalNonWhDir.getAbsolutePath(), ""))
+ .filter(s->!s.isEmpty()).collect(toSet());
+ expectedDirs.clear();
+ expectedDirs.add("/manoutout");
+ expectedDirs.add("/extoutout/p=p2");
+ expectedDirs.add("/extoutout/p=p1");
+ expectedDirs.add("/extwhoutp2");
+ expectedDirs.add("/extwhoutp1");
+ expectedDirs.add("/manwhmixedp1");
+ expectedDirs.add("/manwhoutp1");
+ expectedDirs.add("/manoutout/p=p1");
+ expectedDirs.add("/manoutout/p=p2");
+ expectedDirs.add("/manwhoutp2");
+ expectedDirs.add("/extoutnone");
+ expectedDirs.add("/manoutnone");
+ expectedDirs.add("/extoutout");
+ expectedDirs.add("/extwhmixedp1");
+ assertEquals("Unexpected external (non-warehouse) directory structure in " + externalNonWhDir, expectedDirs,
+ actualDirs);
+ }
+
+ private static void assertSDLocationCorrect() throws HiveException {
+ org.apache.hadoop.hive.ql.metadata.Table table = Hive.get().getTable("manwhwh");
+ List<Partition> partitions = Hive.get().getPartitions(table);
+ assertTrue(partitions.get(0).getLocation().contains("/newExternal/manwhwh/p=p1"));
+ assertTrue(partitions.get(1).getLocation().contains("/newExternal/manwhwh/p=p2"));
+
+ table = Hive.get().getTable("manwhout");
+ partitions = Hive.get().getPartitions(table);
+ assertTrue(partitions.get(0).getLocation().contains("/tmp/manwhoutp1"));
+ assertTrue(partitions.get(1).getLocation().contains("/tmp/manwhoutp2"));
+
+ table = Hive.get().getTable("manwhmixed");
+ partitions = Hive.get().getPartitions(table);
+ assertTrue(partitions.get(0).getLocation().contains("/tmp/manwhmixedp1"));
+ assertTrue(partitions.get(1).getLocation().contains("/newExternal/manwhmixed/p=p2"));
+ }
}