You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/06/05 16:35:08 UTC
hive git commit: HIVE-19768 : Utility to convert tables to conform to
Hive strict managed tables mode (Jason Dere via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 1b7f62b05 -> 96c65a395
HIVE-19768 : Utility to convert tables to conform to Hive strict managed tables mode (Jason Dere via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/96c65a39
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/96c65a39
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/96c65a39
Branch: refs/heads/master
Commit: 96c65a395a814136cde3ab95598132d2558507d1
Parents: 1b7f62b
Author: Jason Dere <jd...@hortonworks.com>
Authored: Tue Jun 5 09:34:40 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Jun 5 09:34:40 2018 -0700
----------------------------------------------------------------------
bin/ext/strictmanagedmigration.sh | 27 +
.../org/apache/hadoop/hive/conf/HiveConf.java | 4 +
.../ql/util/HiveStrictManagedMigration.java | 932 +++++++++++++++++++
.../hive/ql/util/HiveStrictManagedUtils.java | 115 +++
4 files changed, 1078 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/96c65a39/bin/ext/strictmanagedmigration.sh
----------------------------------------------------------------------
diff --git a/bin/ext/strictmanagedmigration.sh b/bin/ext/strictmanagedmigration.sh
new file mode 100644
index 0000000..a24c321
--- /dev/null
+++ b/bin/ext/strictmanagedmigration.sh
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+THISSERVICE=strictmanagedmigration
+export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
+
+strictmanagedmigration () {
+ CLASS=org.apache.hadoop.hive.ql.util.HiveStrictManagedMigration
+ HIVE_OPTS=''
+ execHiveCmd $CLASS "$@"
+}
+
+strictmanagedmigration_help () {
+ strictmanagedmigration "--help"
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/96c65a39/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 56d2de0..9004894 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2838,6 +2838,10 @@ public class HiveConf extends Configuration {
" on the assumption that data changes by external applications may have negative effects" +
" on these operations."),
+ HIVE_STRICT_MANAGED_TABLES("hive.strict.managed.tables", false,
+ "Whether strict managed tables mode is enabled. With this mode enabled, " +
+ "only transactional tables (both full and insert-only) are allowed to be created as managed tables"),
+
HIVE_ERROR_ON_EMPTY_PARTITION("hive.error.on.empty.partition", false,
"Whether to throw an exception if dynamic partition insert generates empty results."),
http://git-wip-us.apache.org/repos/asf/hive/blob/96c65a39/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java
new file mode 100644
index 0000000..a2861c5
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java
@@ -0,0 +1,932 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.util;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.LogUtils;
+import org.apache.hadoop.hive.common.cli.CommonCliOptions;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.TransactionalValidationListener;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.DriverFactory;
+import org.apache.hadoop.hive.ql.IDriver;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.HiveParser.switchDatabaseStatement_return;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
+
+import org.apache.thrift.TException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HiveStrictManagedMigration {
+
+ private static final Logger LOG = LoggerFactory.getLogger(HiveStrictManagedMigration.class);
+
+ enum TableMigrationOption {
+ NONE, // Do nothing
+ VALIDATE, // No migration, just validate that the tables
+ AUTOMATIC, // Automatically determine if the table should be managed or external
+ EXTERNAL, // Migrate tables to external tables
+ MANAGED // Migrate tables as managed transactional tables
+ }
+
+ static class RunOptions {
+ String dbRegex;
+ String tableRegex;
+ String oldWarehouseRoot;
+ TableMigrationOption migrationOption;
+ boolean shouldModifyManagedTableLocation;
+ boolean shouldModifyManagedTableOwner;
+ boolean shouldModifyManagedTablePermissions;
+ boolean dryRun;
+
+ public RunOptions(String dbRegex,
+ String tableRegex,
+ String oldWarehouseRoot,
+ TableMigrationOption migrationOption,
+ boolean shouldModifyManagedTableLocation,
+ boolean shouldModifyManagedTableOwner,
+ boolean shouldModifyManagedTablePermissions,
+ boolean dryRun) {
+ super();
+ this.dbRegex = dbRegex;
+ this.tableRegex = tableRegex;
+ this.oldWarehouseRoot = oldWarehouseRoot;
+ this.migrationOption = migrationOption;
+ this.shouldModifyManagedTableLocation = shouldModifyManagedTableLocation;
+ this.shouldModifyManagedTableOwner = shouldModifyManagedTableOwner;
+ this.shouldModifyManagedTablePermissions = shouldModifyManagedTablePermissions;
+ this.dryRun = dryRun;
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ RunOptions runOptions;
+
+ try {
+ Options opts = createOptions();
+ CommandLine cli = new GnuParser().parse(opts, args);
+
+ if (cli.hasOption('h')) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp(HiveStrictManagedMigration.class.getName(), opts);
+ return;
+ }
+
+ runOptions = createRunOptions(cli);
+ } catch (Exception err) {
+ throw new Exception("Error processing options", err);
+ }
+
+ int rc = 0;
+ HiveStrictManagedMigration migration = null;
+ try {
+ migration = new HiveStrictManagedMigration(runOptions);
+ migration.run();
+ } catch (Exception err) {
+ LOG.error("Failed with error", err);
+ rc = -1;
+ } finally {
+ if (migration != null) {
+ migration.cleanup();
+ }
+ }
+
+ // TODO: Something is preventing the process from terminating after main(), adding exit() as hacky solution.
+ System.exit(rc);
+ }
+
+ static Options createOptions() {
+ Options result = new Options();
+
+ // -hiveconf x=y
+ result.addOption(OptionBuilder
+ .withValueSeparator()
+ .hasArgs(2)
+ .withArgName("property=value")
+ .withLongOpt("hiveconf")
+ .withDescription("Use value for given property")
+ .create());
+
+ result.addOption(OptionBuilder
+ .withLongOpt("dryRun")
+ .withDescription("Show what migration actions would be taken without actually running commands")
+ .create());
+
+ result.addOption(OptionBuilder
+ .withLongOpt("dbRegex")
+ .withDescription("Regular expression to match database names on which this tool will be run")
+ .hasArg()
+ .create('d'));
+
+ result.addOption(OptionBuilder
+ .withLongOpt("tableRegex")
+ .withDescription("Regular expression to match table names on which this tool will be run")
+ .hasArg()
+ .create('t'));
+
+ result.addOption(OptionBuilder
+ .withLongOpt("oldWarehouseRoot")
+ .withDescription("Location of the previous warehouse root")
+ .hasArg()
+ .create());
+
+ result.addOption(OptionBuilder
+ .withLongOpt("migrationOption")
+ .withDescription("Table migration option (automatic|external|managed|validate|none)")
+ .hasArg()
+ .create('m'));
+
+ result.addOption(OptionBuilder
+ .withLongOpt("shouldModifyManagedTableLocation")
+ .withDescription("Whether managed tables should have their data moved from the old warehouse path to the current warehouse path")
+ .create());
+
+ result.addOption(OptionBuilder
+ .withLongOpt("shouldModifyManagedTableOwner")
+ .withDescription("Whether managed tables should have their directory owners changed to the hive user")
+ .create());
+
+ result.addOption(OptionBuilder
+ .withLongOpt("shouldModifyManagedTablePermissions")
+ .withDescription("Whether managed tables should have their directory permissions changed to conform to strict managed tables mode")
+ .create());
+
+ result.addOption(OptionBuilder
+ .withLongOpt("modifyManagedTables")
+ .withDescription("This setting enables the shouldModifyManagedTableLocation, shouldModifyManagedTableOwner, shouldModifyManagedTablePermissions options")
+ .create());
+
+ result.addOption(OptionBuilder
+ .withLongOpt("help")
+ .withDescription("print help message")
+ .create('h'));
+
+ return result;
+ }
+
+ static RunOptions createRunOptions(CommandLine cli) throws Exception {
+ // Process --hiveconf
+ // Get hiveconf param values and set the System property values
+ Properties confProps = cli.getOptionProperties("hiveconf");
+ for (String propKey : confProps.stringPropertyNames()) {
+ LOG.info("Setting {}={}", propKey, confProps.getProperty(propKey));
+ if (propKey.equalsIgnoreCase("hive.root.logger")) {
+ // TODO: logging currently goes to hive.log
+ CommonCliOptions.splitAndSetLogger(propKey, confProps);
+ } else {
+ System.setProperty(propKey, confProps.getProperty(propKey));
+ }
+ }
+
+ LogUtils.initHiveLog4j();
+
+ String dbRegex = cli.getOptionValue("dbRegex", ".*");
+ String tableRegex = cli.getOptionValue("tableRegex", ".*");
+ TableMigrationOption migrationOption =
+ TableMigrationOption.valueOf(cli.getOptionValue("migrationOption", "none").toUpperCase());
+ boolean shouldModifyManagedTableLocation = cli.hasOption("shouldModifyManagedTableLocation");
+ boolean shouldModifyManagedTableOwner = cli.hasOption("shouldModifyManagedTableOwner");
+ boolean shouldModifyManagedTablePermissions = cli.hasOption("shouldModifyManagedTablePermissions");
+ if (cli.hasOption("modifyManagedTables")) {
+ shouldModifyManagedTableLocation = true;
+ shouldModifyManagedTableOwner = true;
+ shouldModifyManagedTablePermissions = true;
+ }
+ String oldWarehouseRoot = cli.getOptionValue("oldWarehouseRoot");
+ boolean dryRun = cli.hasOption("dryRun");
+
+ RunOptions runOpts = new RunOptions(
+ dbRegex,
+ tableRegex,
+ oldWarehouseRoot,
+ migrationOption,
+ shouldModifyManagedTableLocation,
+ shouldModifyManagedTableOwner,
+ shouldModifyManagedTablePermissions,
+ dryRun);
+ return runOpts;
+ }
+
+ private RunOptions runOptions;
+ private Configuration conf;
+ private HiveMetaStoreClient hms;
+ private boolean failedValidationChecks;
+ private Warehouse wh;
+ private Warehouse oldWh;
+ private String ownerName;
+ private String groupName;
+ private FsPermission dirPerms;
+ private FsPermission filePerms;
+
+ HiveStrictManagedMigration(RunOptions runOptions) {
+ this.runOptions = runOptions;
+ this.conf = MetastoreConf.newMetastoreConf();
+ }
+
+ void run() throws Exception {
+ checkOldWarehouseRoot();
+ checkOwnerPermsOptions();
+
+ hms = new HiveMetaStoreClient(conf);//MetaException
+ try {
+ List<String> databases = hms.getAllDatabases();//TException
+ LOG.info("Found {} databases", databases.size());
+ for (String dbName : databases) {
+ if (dbName.matches(runOptions.dbRegex)) {
+ processDatabase(dbName);
+ }
+ }
+ LOG.info("Done processing databases.");
+ } finally {
+ hms.close();
+ }
+
+ if (failedValidationChecks) {
+ throw new HiveException("One or more tables failed validation checks for strict managed table mode.");
+ }
+ }
+
+ void checkOldWarehouseRoot() throws IOException, MetaException {
+ if (runOptions.shouldModifyManagedTableLocation) {
+ if (runOptions.oldWarehouseRoot == null) {
+ LOG.info("oldWarehouseRoot is not specified. Disabling shouldModifyManagedTableLocation");
+ runOptions.shouldModifyManagedTableLocation = false;
+ } else {
+ String curWarehouseRoot = HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREWAREHOUSE);
+ if (arePathsEqual(conf, runOptions.oldWarehouseRoot, curWarehouseRoot)) {
+ LOG.info("oldWarehouseRoot is the same as the current warehouse root {}."
+ + " Disabling shouldModifyManagedTableLocation",
+ runOptions.oldWarehouseRoot);
+ runOptions.shouldModifyManagedTableLocation = false;
+ } else {
+ FileSystem oldWhRootFs = new Path(runOptions.oldWarehouseRoot).getFileSystem(conf);
+ FileSystem curWhRootFs = new Path(curWarehouseRoot).getFileSystem(conf);
+ if (!FileUtils.equalsFileSystem(oldWhRootFs, curWhRootFs)) {
+ LOG.info("oldWarehouseRoot {} has a different FS than the current warehouse root {}."
+ + " Disabling shouldModifyManagedTableLocation",
+ runOptions.oldWarehouseRoot, curWarehouseRoot);
+ runOptions.shouldModifyManagedTableLocation = false;
+ } else {
+ if (!isHdfs(oldWhRootFs)) {
+ LOG.info("Warehouse is using non-HDFS FileSystem {}. Disabling shouldModifyManagedTableLocation",
+ oldWhRootFs.getUri());
+ runOptions.shouldModifyManagedTableLocation = false;
+ }
+ }
+ }
+ }
+ }
+
+ if (runOptions.shouldModifyManagedTableLocation) {
+ wh = new Warehouse(conf);
+ Configuration oldWhConf = new Configuration(conf);
+ HiveConf.setVar(oldWhConf, HiveConf.ConfVars.METASTOREWAREHOUSE, runOptions.oldWarehouseRoot);
+ oldWh = new Warehouse(oldWhConf);
+ }
+ }
+
+ void checkOwnerPermsOptions() {
+ if (runOptions.shouldModifyManagedTableOwner) {
+ ownerName = conf.get("strict.managed.tables.migration.owner", "hive");
+ groupName = conf.get("strict.managed.tables.migration.group", null);
+ }
+ if (runOptions.shouldModifyManagedTablePermissions) {
+ String dirPermsString = conf.get("strict.managed.tables.migration.dir.permissions", "1700");
+ if (dirPermsString != null) {
+ dirPerms = new FsPermission(dirPermsString);
+ }
+ String filePermsString = conf.get("strict.managed.tables.migration.dir.permissions", "700");
+ if (filePermsString != null) {
+ filePerms = new FsPermission(filePermsString);
+ }
+ }
+ }
+
+ void processDatabase(String dbName) throws IOException, HiveException, MetaException, TException {
+ LOG.info("Processing database {}", dbName);
+ Database dbObj = hms.getDatabase(dbName);
+
+ boolean modifyDefaultManagedLocation = shouldModifyDatabaseLocation(dbObj);
+ if (modifyDefaultManagedLocation) {
+ Path newDefaultDbLocation = wh.getDefaultDatabasePath(dbName);
+
+ LOG.info("Changing location of database {} to {}", dbName, newDefaultDbLocation);
+ if (!runOptions.dryRun) {
+ FileSystem fs = newDefaultDbLocation.getFileSystem(conf);
+ FileUtils.mkdir(fs, newDefaultDbLocation, conf);
+ // Set appropriate owner/perms of the DB dir only, no need to recurse
+ checkAndSetFileOwnerPermissions(fs, newDefaultDbLocation,
+ ownerName, groupName, dirPerms, null, runOptions.dryRun, false);
+
+ String command = String.format("ALTER DATABASE %s SET LOCATION '%s'", dbName, newDefaultDbLocation);
+ runHiveCommand(command);
+ }
+ }
+
+ List<String> tableNames = hms.getTables(dbName, runOptions.tableRegex);
+ for (String tableName : tableNames) {
+ // If we did not change the DB location, there is no need to move the table directories.
+ processTable(dbObj, tableName, modifyDefaultManagedLocation);
+ }
+ }
+
+ void processTable(Database dbObj, String tableName, boolean modifyDefaultManagedLocation)
+ throws HiveException, IOException, TException {
+ String dbName = dbObj.getName();
+ LOG.debug("Processing table {}", getQualifiedName(dbName, tableName));
+
+ Table tableObj = hms.getTable(dbName, tableName);
+ TableType tableType = TableType.valueOf(tableObj.getTableType());
+ boolean tableMigrated;
+
+ TableMigrationOption migrationOption = runOptions.migrationOption;
+ if (migrationOption == TableMigrationOption.AUTOMATIC) {
+ migrationOption = determineMigrationTypeAutomatically(tableObj, tableType);
+ }
+
+ switch (migrationOption) {
+ case EXTERNAL:
+ tableMigrated = migrateToExternalTable(tableObj, tableType);
+ if (tableMigrated) {
+ tableType = TableType.EXTERNAL_TABLE;
+ }
+ break;
+ case MANAGED:
+ tableMigrated = migrateToManagedTable(tableObj, tableType);
+ if (tableMigrated) {
+ tableType = TableType.MANAGED_TABLE;
+ }
+ break;
+ case NONE:
+ break;
+ case VALIDATE:
+ // Check that the table is valid under strict managed tables mode.
+ String reason = HiveStrictManagedUtils.validateStrictManagedTable(conf, tableObj);
+ if (reason != null) {
+ LOG.warn(reason);
+ failedValidationChecks = true;
+ }
+ break;
+ default:
+ throw new IllegalArgumentException("Unexpected table migration option " + runOptions.migrationOption);
+ }
+
+ if (tableType == TableType.MANAGED_TABLE) {
+ Path tablePath = new Path(tableObj.getSd().getLocation());
+ if (modifyDefaultManagedLocation && shouldModifyTableLocation(dbObj, tableObj)) {
+ Path newTablePath = wh.getDnsPath(
+ new Path(wh.getDefaultDatabasePath(dbName),
+ MetaStoreUtils.encodeTableName(tableName.toLowerCase())));
+ moveTableData(dbObj, tableObj, newTablePath);
+ if (!runOptions.dryRun) {
+ // File ownership/permission checks should be done on the new table path.
+ tablePath = newTablePath;
+ }
+ }
+
+ if (runOptions.shouldModifyManagedTableOwner || runOptions.shouldModifyManagedTablePermissions) {
+ FileSystem fs = tablePath.getFileSystem(conf);
+ if (isHdfs(fs)) {
+ // TODO: what about partitions not in the default location?
+ checkAndSetFileOwnerPermissions(fs, tablePath,
+ ownerName, groupName, dirPerms, filePerms, runOptions.dryRun, true);
+ }
+ }
+ }
+ }
+
+ boolean shouldModifyDatabaseLocation(Database dbObj) throws IOException, MetaException {
+ String dbName = dbObj.getName();
+ if (runOptions.shouldModifyManagedTableLocation) {
+ // Check if the database location is in the default location based on the old warehouse root.
+ // If so then change the database location to the default based on the current warehouse root.
+ String dbLocation = dbObj.getLocationUri();
+ Path oldDefaultDbLocation = oldWh.getDefaultDatabasePath(dbName);
+ if (arePathsEqual(conf, dbLocation, oldDefaultDbLocation.toString())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ boolean shouldModifyTableLocation(Database dbObj, Table tableObj) throws IOException, MetaException {
+ // Should only be managed tables passed in here.
+ // Check if table is in the default table location based on the old warehouse root.
+ // If so then change the table location to the default based on the current warehouse root.
+ // The existing table directory will also be moved to the new default database directory.
+ String tableLocation = tableObj.getSd().getLocation();
+ Path oldDefaultTableLocation = oldWh.getDefaultTablePath(dbObj, tableObj.getTableName());
+ if (arePathsEqual(conf, tableLocation, oldDefaultTableLocation.toString())) {
+ return true;
+ }
+ return false;
+ }
+
+ boolean shouldModifyPartitionLocation(Database dbObj, Table tableObj, Partition partObj, Map<String, String> partSpec)
+ throws IOException, MetaException {
+ String tableName = tableObj.getTableName();
+ String partLocation = partObj.getSd().getLocation();
+ Path oldDefaultPartLocation = oldWh.getDefaultPartitionPath(dbObj, tableName, partSpec);
+ return arePathsEqual(conf, partLocation, oldDefaultPartLocation.toString());
+ }
+
+ void moveTableData(Database dbObj, Table tableObj, Path newTablePath) throws HiveException, IOException, TException {
+ String dbName = tableObj.getDbName();
+ String tableName = tableObj.getTableName();
+
+ Path oldTablePath = new Path(tableObj.getSd().getLocation());
+
+ LOG.info("Moving location of {} from {} to {}", getQualifiedName(tableObj), oldTablePath, newTablePath);
+ if (!runOptions.dryRun) {
+ FileSystem fs = newTablePath.getFileSystem(conf);
+ boolean movedData = fs.rename(oldTablePath, newTablePath);
+ if (!movedData) {
+ String msg = String.format("Unable to move data directory for table %s from %s to %s",
+ getQualifiedName(tableObj), oldTablePath, newTablePath);
+ throw new HiveException(msg);
+ }
+ }
+ if (!runOptions.dryRun) {
+ String command = String.format("ALTER TABLE %s SET LOCATION '%s'",
+ getQualifiedName(tableObj), newTablePath);
+ runHiveCommand(command);
+ }
+ if (isPartitionedTable(tableObj)) {
+ List<String> partNames = hms.listPartitionNames(dbName, tableName, Short.MAX_VALUE);
+ // TODO: Fetch partitions in batches?
+ // TODO: Threadpool to process partitions?
+ for (String partName : partNames) {
+ Partition partObj = hms.getPartition(dbName, tableName, partName);
+ Map<String, String> partSpec =
+ Warehouse.makeSpecFromValues(tableObj.getPartitionKeys(), partObj.getValues());
+ if (shouldModifyPartitionLocation(dbObj, tableObj, partObj, partSpec)) {
+ // Table directory (which includes the partition directory) has already been moved,
+ // just update the partition location in the metastore.
+ if (!runOptions.dryRun) {
+ Path newPartPath = wh.getPartitionPath(newTablePath, partSpec);
+ String command = String.format("ALTER TABLE PARTITION (%s) SET LOCATION '%s'",
+ partName, newPartPath.toString());
+ runHiveCommand(command);
+ }
+ }
+ }
+ }
+ }
+
+ TableMigrationOption determineMigrationTypeAutomatically(Table tableObj, TableType tableType)
+ throws IOException, MetaException, TException {
+ TableMigrationOption result = TableMigrationOption.NONE;
+ String msg;
+ switch (tableType) {
+ case MANAGED_TABLE:
+ if (AcidUtils.isTransactionalTable(tableObj)) {
+ // Always keep transactional tables as managed tables.
+ result = TableMigrationOption.MANAGED;
+ } else {
+ String reason = shouldTableBeExternal(tableObj);
+ if (reason != null) {
+ LOG.debug("Converting {} to external table. {}", getQualifiedName(tableObj), reason);
+ result = TableMigrationOption.EXTERNAL;
+ } else {
+ result = TableMigrationOption.MANAGED;
+ }
+ }
+ break;
+ case EXTERNAL_TABLE:
+ msg = String.format("Table %s is already an external table, not processing.",
+ getQualifiedName(tableObj));
+ LOG.debug(msg);
+ result = TableMigrationOption.NONE;
+ break;
+ default: // VIEW/MATERIALIZED_VIEW
+ msg = String.format("Ignoring table %s because it has table type %s",
+ getQualifiedName(tableObj), tableType);
+ LOG.debug(msg);
+ result = TableMigrationOption.NONE;
+ break;
+ }
+
+ return result;
+ }
+
+ boolean migrateToExternalTable(Table tableObj, TableType tableType) throws HiveException {
+ String msg;
+ switch (tableType) {
+ case MANAGED_TABLE:
+ if (AcidUtils.isTransactionalTable(tableObj)) {
+ msg = createExternalConversionExcuse(tableObj,
+ "Table is a transactional table");
+ LOG.debug(msg);
+ return false;
+ }
+ LOG.info("Converting {} to external table ...", getQualifiedName(tableObj));
+ if (!runOptions.dryRun) {
+ String command = String.format(
+ "ALTER TABLE %s SET TBLPROPERTIES ('EXTERNAL'='TRUE', 'external.table.purge'='true')",
+ getQualifiedName(tableObj));
+ runHiveCommand(command);
+ }
+ return true;
+ case EXTERNAL_TABLE:
+ msg = createExternalConversionExcuse(tableObj,
+ "Table is already an external table");
+ LOG.debug(msg);
+ break;
+ default: // VIEW/MATERIALIZED_VIEW
+ msg = createExternalConversionExcuse(tableObj,
+ "Table type " + tableType + " cannot be converted");
+ LOG.debug(msg);
+ break;
+ }
+ return false;
+ }
+
+ boolean migrateToManagedTable(Table tableObj, TableType tableType) throws HiveException, MetaException {
+
+ String externalFalse = "";
+ switch (tableType) {
+ case EXTERNAL_TABLE:
+ externalFalse = "'EXTERNAL'='FALSE', ";
+ // fall through
+ case MANAGED_TABLE:
+ if (MetaStoreUtils.isNonNativeTable(tableObj)) {
+ String msg = createManagedConversionExcuse(tableObj,
+ "Table is a non-native (StorageHandler) table");
+ LOG.debug(msg);
+ return false;
+ }
+ if (HiveStrictManagedUtils.isAvroTableWithExternalSchema(tableObj)) {
+ String msg = createManagedConversionExcuse(tableObj,
+ "Table is an Avro table with an external schema url");
+ LOG.debug(msg);
+ return false;
+ }
+ // List bucketed table cannot be converted to transactional
+ if (HiveStrictManagedUtils.isListBucketedTable(tableObj)) {
+ String msg = createManagedConversionExcuse(tableObj,
+ "Table is a list bucketed table");
+ LOG.debug(msg);
+ return false;
+ }
+ // If table is already transactional, no migration needed.
+ if (AcidUtils.isFullAcidTable(tableObj)) {
+ String msg = createManagedConversionExcuse(tableObj,
+ "Table is already a transactional table");
+ LOG.debug(msg);
+ return false;
+ }
+
+ // ORC files can be converted to full acid transactional tables
+ // Other formats can be converted to insert-only transactional tables
+ if (TransactionalValidationListener.conformToAcid(tableObj)) {
+ // TODO: option to allow converting ORC file to insert-only transactional?
+ LOG.info("Converting {} to full transactional table", getQualifiedName(tableObj));
+ if (!runOptions.dryRun) {
+ String command = String.format(
+ "ALTER TABLE %s SET TBLPROPERTIES ('transactional'='true')",
+ getQualifiedName(tableObj));
+ runHiveCommand(command);
+ }
+ return true;
+ } else {
+ LOG.info("Converting {} to insert-only transactional table", getQualifiedName(tableObj));
+ if (!runOptions.dryRun) {
+ String command = String.format(
+ "ALTER TABLE %s SET TBLPROPERTIES (%s'transactional'='true', 'transactional_properties'='insert_only')",
+ getQualifiedName(tableObj), externalFalse);
+ runHiveCommand(command);
+ }
+ return true;
+ }
+ default: // VIEW/MATERIALIZED_VIEW
+ String msg = createManagedConversionExcuse(tableObj,
+ "Table type " + tableType + " cannot be converted");
+ LOG.debug(msg);
+ return false;
+ }
+ }
+
+ String shouldTableBeExternal(Table tableObj) throws IOException, MetaException, TException {
+ if (MetaStoreUtils.isNonNativeTable(tableObj)) {
+ return "Table is a non-native (StorageHandler) table";
+ }
+ if (HiveStrictManagedUtils.isAvroTableWithExternalSchema(tableObj)) {
+ return "Table is an Avro table with an external schema url";
+ }
+ // List bucketed table cannot be converted to transactional
+ if (HiveStrictManagedUtils.isListBucketedTable(tableObj)) {
+ return "Table is a list bucketed table";
+ }
+ // If any table/partition directory is not owned by hive,
+ // then assume table is using storage-based auth - set external.
+ // Transactional tables should still remain transactional,
+ // but we should have already checked for that before this point.
+ if (shouldTablePathBeExternal(tableObj, ownerName)) {
+ return String.format("One or more table directories not owned by %s, or non-HDFS path", ownerName);
+ }
+
+ return null;
+ }
+
+ boolean shouldTablePathBeExternal(Table tableObj, String userName) throws IOException, MetaException, TException {
+ boolean shouldBeExternal = false;
+ String dbName = tableObj.getDbName();
+ String tableName = tableObj.getTableName();
+
+ if (!isPartitionedTable(tableObj)) {
+ // Check the table directory.
+ Path tablePath = new Path(tableObj.getSd().getLocation());
+ FileSystem fs = tablePath.getFileSystem(conf);
+ if (isHdfs(fs)) {
+ shouldBeExternal = checkDirectoryOwnership(fs, tablePath, ownerName, true);
+ } else {
+ // Set non-hdfs tables to external, unless transactional (should have been checked before this).
+ shouldBeExternal = true;
+ }
+ } else {
+ // Check ownership for all partitions
+ List<String> partNames = hms.listPartitionNames(dbName, tableName, Short.MAX_VALUE);
+ for (String partName : partNames) {
+ Partition partObj = hms.getPartition(dbName, tableName, partName);
+ Path partPath = new Path(partObj.getSd().getLocation());
+ FileSystem fs = partPath.getFileSystem(conf);
+ if (isHdfs(fs)) {
+ shouldBeExternal = checkDirectoryOwnership(fs, partPath, ownerName, true);
+ } else {
+ shouldBeExternal = true;
+ }
+ if (shouldBeExternal) {
+ break;
+ }
+ }
+ }
+
+ return shouldBeExternal;
+ }
+
+ void runHiveCommand(String command) throws HiveException {
+ LOG.info("Running command: {}", command);
+
+ if (driver == null) {
+ driver = new MyDriver(conf);
+ }
+
+ CommandProcessorResponse cpr = driver.driver.run(command);
+ if (cpr.getResponseCode() != 0) {
+ String msg = "Query returned non-zero code: " + cpr.getResponseCode()
+ + ", cause: " + cpr.getErrorMessage();
+ throw new HiveException(msg);
+ }
+ }
+
+ void cleanup() {
+ if (driver != null) {
+ runAndLogErrors(() -> driver.close());
+ driver = null;
+ }
+ }
+
+ static class MyDriver {
+ IDriver driver;
+
+ MyDriver(Configuration conf) {
+ HiveConf hiveConf = new HiveConf(conf, this.getClass());
+ // TODO: Clean up SessionState/Driver/TezSession on exit
+ SessionState.start(hiveConf);
+ driver = DriverFactory.newDriver(hiveConf);
+ }
+
+ void close() {
+ if (driver != null) {
+ runAndLogErrors(() -> driver.close());
+ runAndLogErrors(() -> driver.destroy());
+ driver = null;
+ runAndLogErrors(() -> SessionState.get().close());
+ }
+ }
+ }
+
+ MyDriver driver;
+
+ interface ThrowableRunnable {
+ void run() throws Exception;
+ }
+
+ static void runAndLogErrors(ThrowableRunnable r) {
+ try {
+ r.run();
+ } catch (Exception err) {
+ LOG.error("Error encountered", err);
+ }
+ }
+
+ static String createExternalConversionExcuse(Table tableObj, String reason) {
+ return String.format("Table %s cannot be converted to an external table in "
+ + "strict managed table mode for the following reason: %s",
+ getQualifiedName(tableObj), reason);
+ }
+
+ static String createManagedConversionExcuse(Table tableObj, String reason) {
+ return String.format("Table %s cannot be converted to a managed table in "
+ + "strict managed table mode for the following reason: %s",
+ getQualifiedName(tableObj), reason);
+ }
+
+ static boolean isPartitionedTable(Table tableObj) {
+ List<FieldSchema> partKeys = tableObj.getPartitionKeys();
+ if (partKeys != null || partKeys.size() > 0) {
+ return true;
+ }
+ return false;
+ }
+
+ static boolean isHdfs(FileSystem fs) {
+ return fs.getScheme().equals("hdfs");
+ }
+
+ static String getQualifiedName(Table tableObj) {
+ return getQualifiedName(tableObj.getDbName(), tableObj.getTableName());
+ }
+
+ static String getQualifiedName(String dbName, String tableName) {
+ StringBuilder sb = new StringBuilder();
+ sb.append('`');
+ sb.append(dbName);
+ sb.append("`.`");
+ sb.append(tableName);
+ sb.append('`');
+ return sb.toString();
+ }
+
+ static boolean arePathsEqual(Configuration conf, String path1, String path2) throws IOException {
+ String qualified1 = getQualifiedPath(conf, new Path(path1));
+ String qualified2 = getQualifiedPath(conf, new Path(path2));
+ return qualified1.equals(qualified2);
+ }
+
+ static String getQualifiedPath(Configuration conf, Path path) throws IOException {
+ FileSystem fs;
+ if (path == null) {
+ return null;
+ }
+
+ fs = path.getFileSystem(conf);
+ return fs.makeQualified(path).toString();
+ }
+
+ /**
+ * Recursively check the file owner and permissions, setting them to the passed in values
+ * if the owner/perms of the file do not match.
+ * @param fs
+ * @param path
+ * @param userName Owner of the file to compare/set. Null to skip this check.
+ * @param groupName Group of the file to compare/set. Null to skip this check.
+ * @param dirPerms Permissions to compare/set, if the file is a directory. Null to skip this check.
+ * @param filePerms Permissions to compare/set, if the file is a file. Null to skip this check.
+ * @param dryRun Dry run - check but do not actually set
+ * @param recurse Whether to recursively check/set the contents of a directory
+ * @throws IOException
+ */
+ static void checkAndSetFileOwnerPermissions(FileSystem fs, Path path,
+ String userName, String groupName,
+ FsPermission dirPerms, FsPermission filePerms,
+ boolean dryRun, boolean recurse) throws IOException {
+ FileStatus fStatus = fs.getFileStatus(path);
+ checkAndSetFileOwnerPermissions(fs, fStatus, userName, groupName, dirPerms, filePerms, dryRun, recurse);
+ }
+
+ /**
+ * Recursively check the file owner and permissions, setting them to the passed in values
+ * if the owner/perms of the file do not match.
+ * @param fs
+ * @param fStatus
+ * @param userName Owner of the file to compare/set. Null to skip this check.
+ * @param groupName Group of the file to compare/set. Null to skip this check.
+ * @param dirPerms Permissions to compare/set, if the file is a directory. Null to skip this check.
+ * @param filePerms Permissions to compare/set, if the file is a file. Null to skip this check.
+ * @param dryRun Dry run - check but do not actually set
+ * @param recurse Whether to recursively check/set the contents of a directory
+ * @throws IOException
+ */
+ static void checkAndSetFileOwnerPermissions(FileSystem fs, FileStatus fStatus,
+ String userName, String groupName,
+ FsPermission dirPerms, FsPermission filePerms,
+ boolean dryRun, boolean recurse) throws IOException {
+ Path path = fStatus.getPath();
+ boolean setOwner = false;
+ if (userName != null && !userName.equals(fStatus.getOwner())) {
+ setOwner = true;
+ } else if (groupName != null && !groupName.equals(fStatus.getGroup())) {
+ setOwner = true;
+ }
+
+ boolean isDir = fStatus.isDirectory();
+ boolean setPerms = false;
+ FsPermission perms = filePerms;
+ if (isDir) {
+ perms = dirPerms;
+ }
+ if (perms != null && !perms.equals(fStatus.getPermission())) {
+ setPerms = true;
+ }
+
+ if (setOwner) {
+ LOG.debug("Setting owner/group of {} to {}/{}", path, userName, groupName);
+ if (!dryRun) {
+ fs.setOwner(path, userName, groupName);
+ }
+ }
+ if (setPerms) {
+ LOG.debug("Setting perms of {} to {}", path, perms);
+ if (!dryRun) {
+ fs.setPermission(path, perms);
+ }
+ }
+
+ if (isDir && recurse) {
+ for (FileStatus subFile : fs.listStatus(path)) {
+ // TODO: Use threadpool for more concurrency?
+ // TODO: check/set all files, or only directories
+ checkAndSetFileOwnerPermissions(fs, subFile, userName, groupName, dirPerms, filePerms, dryRun, recurse);
+ }
+ }
+ }
+
+ static boolean checkDirectoryOwnership(FileSystem fs,
+ Path path,
+ String userName,
+ boolean recurse) throws IOException {
+ FileStatus fStatus = fs.getFileStatus(path);
+ return checkDirectoryOwnership(fs, fStatus, userName, recurse);
+ }
+
+ static boolean checkDirectoryOwnership(FileSystem fs,
+ FileStatus fStatus,
+ String userName,
+ boolean recurse) throws IOException {
+ Path path = fStatus.getPath();
+ boolean result = true;
+
+ // Ignore non-directory files
+ boolean isDir = fStatus.isDirectory();
+ if (isDir) {
+ if (userName != null && !userName.equals(fStatus.getOwner())) {
+ return false;
+ }
+
+ if (recurse) {
+ for (FileStatus subFile : fs.listStatus(path)) {
+ if (!checkDirectoryOwnership(fs, subFile, userName, recurse)) {
+ return false;
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/96c65a39/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedUtils.java
new file mode 100644
index 0000000..d9536eb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedUtils.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.util;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
+import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat;
+import org.apache.hadoop.hive.ql.io.ZeroRowsInputFormat;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.serde2.avro.AvroSerDe;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties;
+
+public class HiveStrictManagedUtils {
+
+ private final static Set<String> EXEMPT_INPUTFORMATS =
+ new HashSet<String>(Arrays.asList(NullRowsInputFormat.class.getName(),
+ OneNullRowInputFormat.class.getName(), ZeroRowsInputFormat.class.getName()));
+
+
+ public static void validateStrictManagedTable(Configuration conf, Table table)
+ throws HiveException {
+ String reason = validateStrictManagedTable(conf, table.getTTable());
+ if (reason != null) {
+ throw new HiveException(reason);
+ }
+ }
+
+ /**
+ * Checks if the table is valid based on the rules for strict managed tables.
+ * @param conf
+ * @param table
+ * @return Null if the table is valid, otherwise a string message indicating why the table is invalid.
+ */
+ public static String validateStrictManagedTable(Configuration conf,
+ org.apache.hadoop.hive.metastore.api.Table table) {
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STRICT_MANAGED_TABLES)) {
+ if (table.isTemporary()) {
+ // temp tables exempted from checks.
+ return null;
+ }
+
+ TableType tableType = TableType.valueOf(table.getTableType());
+ if (tableType == TableType.MANAGED_TABLE) {
+ if (!AcidUtils.isTransactionalTable(table)) {
+ String inputFormat = null;
+ if (table.getSd() != null) {
+ inputFormat = table.getSd().getInputFormat();
+ }
+ if (!EXEMPT_INPUTFORMATS.contains(inputFormat)) {
+ return createValidationError(table, "Table is marked as a managed table but is not transactional.");
+ }
+ }
+ if (MetaStoreUtils.isNonNativeTable(table)) {
+ return createValidationError(table, "Table is marked as a managed table but is non-native.");
+ }
+ if (isAvroTableWithExternalSchema(table)) {
+ return createValidationError(table, "Managed Avro table has externally defined schema.");
+ }
+ }
+ }
+
+ // Table is valid
+ return null;
+ }
+
+ public static boolean isAvroTableWithExternalSchema(org.apache.hadoop.hive.metastore.api.Table table) {
+ if (table.getSd().getSerdeInfo().getSerializationLib().equals(AvroSerDe.class.getName())) {
+ String schemaUrl = table.getParameters().get(AvroTableProperties.SCHEMA_URL.getPropName());
+ if (schemaUrl != null && !schemaUrl.isEmpty()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public static boolean isListBucketedTable(org.apache.hadoop.hive.metastore.api.Table table) {
+ return table.getSd().isStoredAsSubDirectories();
+ }
+
+ private static String createValidationError(org.apache.hadoop.hive.metastore.api.Table table, String message) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("Table ");
+ sb.append(table.getDbName());
+ sb.append(".");
+ sb.append(table.getTableName());
+ sb.append(" failed strict managed table checks due to the following reason: ");
+ sb.append(message);
+ return sb.toString();
+ }
+}