You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/06/05 16:35:08 UTC

hive git commit: HIVE-19768 : Utility to convert tables to conform to Hive strict managed tables mode (Jason Dere via Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 1b7f62b05 -> 96c65a395


HIVE-19768 : Utility to convert tables to conform to Hive strict managed tables mode (Jason Dere via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/96c65a39
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/96c65a39
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/96c65a39

Branch: refs/heads/master
Commit: 96c65a395a814136cde3ab95598132d2558507d1
Parents: 1b7f62b
Author: Jason Dere <jd...@hortonworks.com>
Authored: Tue Jun 5 09:34:40 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Jun 5 09:34:40 2018 -0700

----------------------------------------------------------------------
 bin/ext/strictmanagedmigration.sh               |  27 +
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   4 +
 .../ql/util/HiveStrictManagedMigration.java     | 932 +++++++++++++++++++
 .../hive/ql/util/HiveStrictManagedUtils.java    | 115 +++
 4 files changed, 1078 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/96c65a39/bin/ext/strictmanagedmigration.sh
----------------------------------------------------------------------
diff --git a/bin/ext/strictmanagedmigration.sh b/bin/ext/strictmanagedmigration.sh
new file mode 100644
index 0000000..a24c321
--- /dev/null
+++ b/bin/ext/strictmanagedmigration.sh
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+THISSERVICE=strictmanagedmigration
+export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
+
+strictmanagedmigration () {
+  CLASS=org.apache.hadoop.hive.ql.util.HiveStrictManagedMigration
+  HIVE_OPTS=''
+  execHiveCmd $CLASS "$@"
+}
+
+strictmanagedmigration_help () {
+  strictmanagedmigration "--help"
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/96c65a39/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 56d2de0..9004894 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2838,6 +2838,10 @@ public class HiveConf extends Configuration {
         " on the assumption that data changes by external applications may have negative effects" +
         " on these operations."),
 
+    HIVE_STRICT_MANAGED_TABLES("hive.strict.managed.tables", false,
+        "Whether strict managed tables mode is enabled. With this mode enabled, " +
+        "only transactional tables (both full and insert-only) are allowed to be created as managed tables"),
+
     HIVE_ERROR_ON_EMPTY_PARTITION("hive.error.on.empty.partition", false,
         "Whether to throw an exception if dynamic partition insert generates empty results."),
 

http://git-wip-us.apache.org/repos/asf/hive/blob/96c65a39/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java
new file mode 100644
index 0000000..a2861c5
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java
@@ -0,0 +1,932 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.util;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.LogUtils;
+import org.apache.hadoop.hive.common.cli.CommonCliOptions;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.TransactionalValidationListener;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.DriverFactory;
+import org.apache.hadoop.hive.ql.IDriver;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.HiveParser.switchDatabaseStatement_return;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
+
+import org.apache.thrift.TException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HiveStrictManagedMigration {
+
+  private static final Logger LOG = LoggerFactory.getLogger(HiveStrictManagedMigration.class);
+
+  enum TableMigrationOption {
+    NONE,      // Do nothing
+    VALIDATE,  // No migration, just validate that the tables
+    AUTOMATIC, // Automatically determine if the table should be managed or external
+    EXTERNAL,  // Migrate tables to external tables
+    MANAGED    // Migrate tables as managed transactional tables
+  }
+
+  static class RunOptions {
+    String dbRegex;
+    String tableRegex;
+    String oldWarehouseRoot;
+    TableMigrationOption migrationOption;
+    boolean shouldModifyManagedTableLocation;
+    boolean shouldModifyManagedTableOwner;
+    boolean shouldModifyManagedTablePermissions;
+    boolean dryRun;
+
+    public RunOptions(String dbRegex,
+        String tableRegex,
+        String oldWarehouseRoot,
+        TableMigrationOption migrationOption,
+        boolean shouldModifyManagedTableLocation,
+        boolean shouldModifyManagedTableOwner,
+        boolean shouldModifyManagedTablePermissions,
+        boolean dryRun) {
+      super();
+      this.dbRegex = dbRegex;
+      this.tableRegex = tableRegex;
+      this.oldWarehouseRoot = oldWarehouseRoot;
+      this.migrationOption = migrationOption;
+      this.shouldModifyManagedTableLocation = shouldModifyManagedTableLocation;
+      this.shouldModifyManagedTableOwner = shouldModifyManagedTableOwner;
+      this.shouldModifyManagedTablePermissions = shouldModifyManagedTablePermissions;
+      this.dryRun = dryRun;
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    RunOptions runOptions;
+
+    try {
+      Options opts = createOptions();
+      CommandLine cli = new GnuParser().parse(opts, args);
+
+      if (cli.hasOption('h')) {
+        HelpFormatter formatter = new HelpFormatter();
+        formatter.printHelp(HiveStrictManagedMigration.class.getName(), opts);
+        return;
+      }
+
+      runOptions = createRunOptions(cli);
+    } catch (Exception err) {
+      throw new Exception("Error processing options", err);
+    }
+
+    int rc = 0;
+    HiveStrictManagedMigration migration = null;
+    try {
+      migration = new HiveStrictManagedMigration(runOptions);
+      migration.run();
+    } catch (Exception err) {
+      LOG.error("Failed with error", err);
+      rc = -1;
+    } finally {
+      if (migration != null) {
+        migration.cleanup();
+      }
+    }
+
+    // TODO: Something is preventing the process from terminating after main(), adding exit() as hacky solution.
+    System.exit(rc);
+  }
+
+  static Options createOptions() {
+    Options result = new Options();
+
+    // -hiveconf x=y
+    result.addOption(OptionBuilder
+        .withValueSeparator()
+        .hasArgs(2)
+        .withArgName("property=value")
+        .withLongOpt("hiveconf")
+        .withDescription("Use value for given property")
+        .create());
+
+    result.addOption(OptionBuilder
+        .withLongOpt("dryRun")
+        .withDescription("Show what migration actions would be taken without actually running commands")
+        .create());
+
+    result.addOption(OptionBuilder
+        .withLongOpt("dbRegex")
+        .withDescription("Regular expression to match database names on which this tool will be run")
+        .hasArg()
+        .create('d'));
+
+    result.addOption(OptionBuilder
+        .withLongOpt("tableRegex")
+        .withDescription("Regular expression to match table names on which this tool will be run")
+        .hasArg()
+        .create('t'));
+
+    result.addOption(OptionBuilder
+        .withLongOpt("oldWarehouseRoot")
+        .withDescription("Location of the previous warehouse root")
+        .hasArg()
+        .create());
+
+    result.addOption(OptionBuilder
+        .withLongOpt("migrationOption")
+        .withDescription("Table migration option (automatic|external|managed|validate|none)")
+        .hasArg()
+        .create('m'));
+
+    result.addOption(OptionBuilder
+        .withLongOpt("shouldModifyManagedTableLocation")
+        .withDescription("Whether managed tables should have their data moved from the old warehouse path to the current warehouse path")
+        .create());
+
+    result.addOption(OptionBuilder
+        .withLongOpt("shouldModifyManagedTableOwner")
+        .withDescription("Whether managed tables should have their directory owners changed to the hive user")
+        .create());
+
+    result.addOption(OptionBuilder
+        .withLongOpt("shouldModifyManagedTablePermissions")
+        .withDescription("Whether managed tables should have their directory permissions changed to conform to strict managed tables mode")
+        .create());
+
+    result.addOption(OptionBuilder
+        .withLongOpt("modifyManagedTables")
+        .withDescription("This setting enables the shouldModifyManagedTableLocation, shouldModifyManagedTableOwner, shouldModifyManagedTablePermissions options")
+        .create());
+
+    result.addOption(OptionBuilder
+        .withLongOpt("help")
+        .withDescription("print help message")
+        .create('h'));
+
+    return result;
+  }
+
+  static RunOptions createRunOptions(CommandLine cli) throws Exception {
+    // Process --hiveconf
+    // Get hiveconf param values and set the System property values
+    Properties confProps = cli.getOptionProperties("hiveconf");
+    for (String propKey : confProps.stringPropertyNames()) {
+      LOG.info("Setting {}={}", propKey, confProps.getProperty(propKey));
+      if (propKey.equalsIgnoreCase("hive.root.logger")) {
+        // TODO: logging currently goes to hive.log
+        CommonCliOptions.splitAndSetLogger(propKey, confProps);
+      } else {
+        System.setProperty(propKey, confProps.getProperty(propKey));
+      }
+    }
+
+    LogUtils.initHiveLog4j();
+
+    String dbRegex = cli.getOptionValue("dbRegex", ".*");
+    String tableRegex = cli.getOptionValue("tableRegex", ".*");
+    TableMigrationOption migrationOption =
+        TableMigrationOption.valueOf(cli.getOptionValue("migrationOption", "none").toUpperCase());
+    boolean shouldModifyManagedTableLocation = cli.hasOption("shouldModifyManagedTableLocation");
+    boolean shouldModifyManagedTableOwner = cli.hasOption("shouldModifyManagedTableOwner");
+    boolean shouldModifyManagedTablePermissions = cli.hasOption("shouldModifyManagedTablePermissions");
+    if (cli.hasOption("modifyManagedTables")) {
+      shouldModifyManagedTableLocation = true;
+      shouldModifyManagedTableOwner = true;
+      shouldModifyManagedTablePermissions = true;
+    }
+    String oldWarehouseRoot = cli.getOptionValue("oldWarehouseRoot");
+    boolean dryRun = cli.hasOption("dryRun");
+    
+    RunOptions runOpts = new RunOptions(
+        dbRegex,
+        tableRegex,
+        oldWarehouseRoot,
+        migrationOption,
+        shouldModifyManagedTableLocation,
+        shouldModifyManagedTableOwner,
+        shouldModifyManagedTablePermissions,
+        dryRun);
+    return runOpts;
+  }
+
+  private RunOptions runOptions;
+  private Configuration conf;
+  private HiveMetaStoreClient hms;
+  private boolean failedValidationChecks;
+  private Warehouse wh;
+  private Warehouse oldWh;
+  private String ownerName;
+  private String groupName;
+  private FsPermission dirPerms;
+  private FsPermission filePerms;
+
+  HiveStrictManagedMigration(RunOptions runOptions) {
+    this.runOptions = runOptions;
+    this.conf = MetastoreConf.newMetastoreConf();
+  }
+
+  void run() throws Exception {
+    checkOldWarehouseRoot();
+    checkOwnerPermsOptions();
+
+    hms = new HiveMetaStoreClient(conf);//MetaException
+    try {
+      List<String> databases = hms.getAllDatabases();//TException
+      LOG.info("Found {} databases", databases.size());
+      for (String dbName : databases) {
+        if (dbName.matches(runOptions.dbRegex)) {
+          processDatabase(dbName);
+        }
+      }
+      LOG.info("Done processing databases.");
+    } finally {
+      hms.close();
+    }
+
+    if (failedValidationChecks) {
+      throw new HiveException("One or more tables failed validation checks for strict managed table mode.");
+    }
+  }
+
+  void checkOldWarehouseRoot() throws IOException, MetaException {
+    if (runOptions.shouldModifyManagedTableLocation) {
+      if (runOptions.oldWarehouseRoot == null) {
+        LOG.info("oldWarehouseRoot is not specified. Disabling shouldModifyManagedTableLocation");
+        runOptions.shouldModifyManagedTableLocation = false;
+      } else {
+        String curWarehouseRoot = HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREWAREHOUSE);
+        if (arePathsEqual(conf, runOptions.oldWarehouseRoot, curWarehouseRoot)) {
+          LOG.info("oldWarehouseRoot is the same as the current warehouse root {}."
+              + " Disabling shouldModifyManagedTableLocation",
+              runOptions.oldWarehouseRoot);
+          runOptions.shouldModifyManagedTableLocation = false;
+        } else {
+          FileSystem oldWhRootFs = new Path(runOptions.oldWarehouseRoot).getFileSystem(conf);
+          FileSystem curWhRootFs = new Path(curWarehouseRoot).getFileSystem(conf);
+          if (!FileUtils.equalsFileSystem(oldWhRootFs, curWhRootFs)) {
+            LOG.info("oldWarehouseRoot {} has a different FS than the current warehouse root {}."
+                + " Disabling shouldModifyManagedTableLocation",
+                runOptions.oldWarehouseRoot, curWarehouseRoot);
+            runOptions.shouldModifyManagedTableLocation = false;
+          } else {
+            if (!isHdfs(oldWhRootFs)) {
+              LOG.info("Warehouse is using non-HDFS FileSystem {}. Disabling shouldModifyManagedTableLocation",
+                  oldWhRootFs.getUri());
+              runOptions.shouldModifyManagedTableLocation = false;
+            }
+          }
+        }
+      }
+    }
+
+    if (runOptions.shouldModifyManagedTableLocation) {
+      wh = new Warehouse(conf);
+      Configuration oldWhConf = new Configuration(conf);
+      HiveConf.setVar(oldWhConf, HiveConf.ConfVars.METASTOREWAREHOUSE, runOptions.oldWarehouseRoot);
+      oldWh = new Warehouse(oldWhConf);
+    }
+  }
+
+  void checkOwnerPermsOptions() {
+    if (runOptions.shouldModifyManagedTableOwner) {
+      ownerName = conf.get("strict.managed.tables.migration.owner", "hive");
+      groupName = conf.get("strict.managed.tables.migration.group", null);
+    }
+    if (runOptions.shouldModifyManagedTablePermissions) {
+      String dirPermsString = conf.get("strict.managed.tables.migration.dir.permissions", "1700");
+      if (dirPermsString != null) {
+        dirPerms = new FsPermission(dirPermsString);
+      }
+      String filePermsString = conf.get("strict.managed.tables.migration.dir.permissions", "700");
+      if (filePermsString != null) {
+        filePerms = new FsPermission(filePermsString);
+      }
+    }
+  }
+
+  void processDatabase(String dbName) throws IOException, HiveException, MetaException, TException {
+    LOG.info("Processing database {}", dbName);
+    Database dbObj = hms.getDatabase(dbName);
+
+    boolean modifyDefaultManagedLocation = shouldModifyDatabaseLocation(dbObj);
+    if (modifyDefaultManagedLocation) {
+      Path newDefaultDbLocation = wh.getDefaultDatabasePath(dbName);
+
+      LOG.info("Changing location of database {} to {}", dbName, newDefaultDbLocation);
+      if (!runOptions.dryRun) {
+        FileSystem fs = newDefaultDbLocation.getFileSystem(conf);
+        FileUtils.mkdir(fs, newDefaultDbLocation, conf);
+        // Set appropriate owner/perms of the DB dir only, no need to recurse
+        checkAndSetFileOwnerPermissions(fs, newDefaultDbLocation,
+            ownerName, groupName, dirPerms, null, runOptions.dryRun, false);
+
+        String command = String.format("ALTER DATABASE %s SET LOCATION '%s'", dbName, newDefaultDbLocation);
+        runHiveCommand(command);
+      }
+    }
+
+    List<String> tableNames = hms.getTables(dbName, runOptions.tableRegex);
+    for (String tableName : tableNames) {
+      // If we did not change the DB location, there is no need to move the table directories.
+      processTable(dbObj, tableName, modifyDefaultManagedLocation);
+    }
+  }
+
+  void processTable(Database dbObj, String tableName, boolean modifyDefaultManagedLocation)
+      throws HiveException, IOException, TException {
+    String dbName = dbObj.getName();
+    LOG.debug("Processing table {}", getQualifiedName(dbName, tableName));
+
+    Table tableObj = hms.getTable(dbName, tableName);
+    TableType tableType = TableType.valueOf(tableObj.getTableType());
+    boolean tableMigrated;
+
+    TableMigrationOption migrationOption = runOptions.migrationOption;
+    if (migrationOption == TableMigrationOption.AUTOMATIC) {
+      migrationOption = determineMigrationTypeAutomatically(tableObj, tableType);
+    }
+
+    switch (migrationOption) {
+    case EXTERNAL:
+      tableMigrated = migrateToExternalTable(tableObj, tableType);
+      if (tableMigrated) {
+        tableType = TableType.EXTERNAL_TABLE;
+      }
+      break;
+    case MANAGED:
+      tableMigrated = migrateToManagedTable(tableObj, tableType);
+      if (tableMigrated) {
+        tableType = TableType.MANAGED_TABLE;
+      }
+      break;
+    case NONE:
+      break;
+    case VALIDATE:
+      // Check that the table is valid under strict managed tables mode.
+      String reason = HiveStrictManagedUtils.validateStrictManagedTable(conf, tableObj);
+      if (reason != null) {
+        LOG.warn(reason);
+        failedValidationChecks = true;
+      }
+      break;
+    default:
+      throw new IllegalArgumentException("Unexpected table migration option " + runOptions.migrationOption);
+    }
+
+    if (tableType == TableType.MANAGED_TABLE) {
+      Path tablePath = new Path(tableObj.getSd().getLocation());
+      if (modifyDefaultManagedLocation && shouldModifyTableLocation(dbObj, tableObj)) {
+        Path newTablePath = wh.getDnsPath(
+            new Path(wh.getDefaultDatabasePath(dbName),
+                MetaStoreUtils.encodeTableName(tableName.toLowerCase())));
+        moveTableData(dbObj, tableObj, newTablePath);
+        if (!runOptions.dryRun) {
+          // File ownership/permission checks should be done on the new table path.
+          tablePath = newTablePath;
+        }
+      }
+
+      if (runOptions.shouldModifyManagedTableOwner || runOptions.shouldModifyManagedTablePermissions) {
+        FileSystem fs = tablePath.getFileSystem(conf);
+        if (isHdfs(fs)) {
+          // TODO: what about partitions not in the default location?
+          checkAndSetFileOwnerPermissions(fs, tablePath,
+              ownerName, groupName, dirPerms, filePerms, runOptions.dryRun, true);
+        }
+      }
+    }
+  }
+
+  boolean shouldModifyDatabaseLocation(Database dbObj) throws IOException, MetaException {
+    String dbName = dbObj.getName();
+    if (runOptions.shouldModifyManagedTableLocation) {
+      // Check if the database location is in the default location based on the old warehouse root.
+      // If so then change the database location to the default based on the current warehouse root.
+      String dbLocation = dbObj.getLocationUri();
+      Path oldDefaultDbLocation = oldWh.getDefaultDatabasePath(dbName);
+      if (arePathsEqual(conf, dbLocation, oldDefaultDbLocation.toString())) {
+        return true;
+      }
+    } 
+    return false;
+  }
+
+  boolean shouldModifyTableLocation(Database dbObj, Table tableObj) throws IOException, MetaException {
+    // Should only be managed tables passed in here.
+    // Check if table is in the default table location based on the old warehouse root.
+    // If so then change the table location to the default based on the current warehouse root.
+    // The existing table directory will also be moved to the new default database directory.
+    String tableLocation = tableObj.getSd().getLocation();
+    Path oldDefaultTableLocation = oldWh.getDefaultTablePath(dbObj, tableObj.getTableName());
+    if (arePathsEqual(conf, tableLocation, oldDefaultTableLocation.toString())) {
+      return true;
+    }
+    return false;
+  }
+
+  boolean shouldModifyPartitionLocation(Database dbObj, Table tableObj, Partition partObj, Map<String, String> partSpec)
+      throws IOException, MetaException {
+    String tableName = tableObj.getTableName();
+    String partLocation = partObj.getSd().getLocation();
+    Path oldDefaultPartLocation = oldWh.getDefaultPartitionPath(dbObj, tableName, partSpec);
+    return arePathsEqual(conf, partLocation, oldDefaultPartLocation.toString());
+  }
+
+  void moveTableData(Database dbObj, Table tableObj, Path newTablePath) throws HiveException, IOException, TException {
+    String dbName = tableObj.getDbName();
+    String tableName = tableObj.getTableName();
+    
+    Path oldTablePath = new Path(tableObj.getSd().getLocation());
+
+    LOG.info("Moving location of {} from {} to {}", getQualifiedName(tableObj), oldTablePath, newTablePath);
+    if (!runOptions.dryRun) {
+      FileSystem fs = newTablePath.getFileSystem(conf);
+      boolean movedData = fs.rename(oldTablePath, newTablePath);
+      if (!movedData) {
+        String msg = String.format("Unable to move data directory for table %s from %s to %s",
+            getQualifiedName(tableObj), oldTablePath, newTablePath);
+        throw new HiveException(msg);
+      }
+    }
+    if (!runOptions.dryRun) {
+      String command = String.format("ALTER TABLE %s SET LOCATION '%s'",
+          getQualifiedName(tableObj), newTablePath);
+      runHiveCommand(command);
+    }
+    if (isPartitionedTable(tableObj)) {
+      List<String> partNames = hms.listPartitionNames(dbName, tableName, Short.MAX_VALUE);
+      // TODO: Fetch partitions in batches?
+      // TODO: Threadpool to process partitions?
+      for (String partName : partNames) {
+        Partition partObj = hms.getPartition(dbName, tableName, partName);
+        Map<String, String> partSpec =
+            Warehouse.makeSpecFromValues(tableObj.getPartitionKeys(), partObj.getValues());
+        if (shouldModifyPartitionLocation(dbObj, tableObj, partObj, partSpec)) {
+          // Table directory (which includes the partition directory) has already been moved,
+          // just update the partition location in the metastore.
+          if (!runOptions.dryRun) {
+            Path newPartPath = wh.getPartitionPath(newTablePath, partSpec);
+            String command = String.format("ALTER TABLE PARTITION (%s) SET LOCATION '%s'",
+                partName, newPartPath.toString());
+            runHiveCommand(command);
+          }
+        }
+      }
+    }
+  }
+
+  TableMigrationOption determineMigrationTypeAutomatically(Table tableObj, TableType tableType)
+      throws IOException, MetaException, TException {
+    TableMigrationOption result = TableMigrationOption.NONE;
+    String msg;
+    switch (tableType) {
+    case MANAGED_TABLE:
+      if (AcidUtils.isTransactionalTable(tableObj)) {
+        // Always keep transactional tables as managed tables.
+        result = TableMigrationOption.MANAGED;
+      } else {
+        String reason = shouldTableBeExternal(tableObj);
+        if (reason != null) {
+          LOG.debug("Converting {} to external table. {}", getQualifiedName(tableObj), reason);
+          result = TableMigrationOption.EXTERNAL;
+        } else {
+          result = TableMigrationOption.MANAGED;
+        }
+      }
+      break;
+    case EXTERNAL_TABLE:
+      msg = String.format("Table %s is already an external table, not processing.",
+          getQualifiedName(tableObj));
+      LOG.debug(msg);
+      result = TableMigrationOption.NONE;
+      break;
+    default: // VIEW/MATERIALIZED_VIEW
+      msg = String.format("Ignoring table %s because it has table type %s",
+          getQualifiedName(tableObj), tableType);
+      LOG.debug(msg);
+      result = TableMigrationOption.NONE;
+      break;
+    }
+
+    return result;
+  }
+
+  boolean migrateToExternalTable(Table tableObj, TableType tableType) throws HiveException {
+    String msg;
+    switch (tableType) {
+    case MANAGED_TABLE:
+      if (AcidUtils.isTransactionalTable(tableObj)) {
+        msg = createExternalConversionExcuse(tableObj,
+            "Table is a transactional table");
+        LOG.debug(msg);
+        return false;
+      }
+      LOG.info("Converting {} to external table ...", getQualifiedName(tableObj));
+      if (!runOptions.dryRun) {
+        String command = String.format(
+            "ALTER TABLE %s SET TBLPROPERTIES ('EXTERNAL'='TRUE', 'external.table.purge'='true')",
+            getQualifiedName(tableObj));
+        runHiveCommand(command);
+      }
+      return true;
+    case EXTERNAL_TABLE:
+      msg = createExternalConversionExcuse(tableObj,
+          "Table is already an external table");
+      LOG.debug(msg);
+      break;
+    default: // VIEW/MATERIALIZED_VIEW
+      msg = createExternalConversionExcuse(tableObj,
+          "Table type " + tableType + " cannot be converted");
+      LOG.debug(msg);
+      break;
+    }
+    return false;
+  }
+
+  boolean migrateToManagedTable(Table tableObj, TableType tableType) throws HiveException, MetaException {
+
+    String externalFalse = "";
+    switch (tableType) {
+    case EXTERNAL_TABLE:
+      externalFalse = "'EXTERNAL'='FALSE', ";
+      // fall through
+    case MANAGED_TABLE:
+      if (MetaStoreUtils.isNonNativeTable(tableObj)) {
+        String msg = createManagedConversionExcuse(tableObj,
+            "Table is a non-native (StorageHandler) table");
+        LOG.debug(msg);
+        return false;
+      }
+      if (HiveStrictManagedUtils.isAvroTableWithExternalSchema(tableObj)) {
+        String msg = createManagedConversionExcuse(tableObj,
+            "Table is an Avro table with an external schema url");
+        LOG.debug(msg);
+        return false;
+      }
+      // List bucketed table cannot be converted to transactional
+      if (HiveStrictManagedUtils.isListBucketedTable(tableObj)) {
+        String msg = createManagedConversionExcuse(tableObj,
+            "Table is a list bucketed table");
+        LOG.debug(msg);
+        return false;
+      }
+      // If table is already transactional, no migration needed.
+      if (AcidUtils.isFullAcidTable(tableObj)) {
+        String msg = createManagedConversionExcuse(tableObj,
+            "Table is already a transactional table");
+        LOG.debug(msg);
+        return false;
+      }
+
+      // ORC files can be converted to full acid transactional tables
+      // Other formats can be converted to insert-only transactional tables
+      if (TransactionalValidationListener.conformToAcid(tableObj)) {
+        // TODO: option to allow converting ORC file to insert-only transactional?
+        LOG.info("Converting {} to full transactional table", getQualifiedName(tableObj));
+        if (!runOptions.dryRun) {
+          String command = String.format(
+              "ALTER TABLE %s SET TBLPROPERTIES ('transactional'='true')",
+              getQualifiedName(tableObj));
+          runHiveCommand(command);
+        }
+        return true;
+      } else {
+        LOG.info("Converting {} to insert-only transactional table", getQualifiedName(tableObj));
+        if (!runOptions.dryRun) {
+          String command = String.format(
+              "ALTER TABLE %s SET TBLPROPERTIES (%s'transactional'='true', 'transactional_properties'='insert_only')",
+              getQualifiedName(tableObj), externalFalse);
+          runHiveCommand(command);
+        }
+        return true;
+      }
+    default: // VIEW/MATERIALIZED_VIEW
+      String msg = createManagedConversionExcuse(tableObj,
+          "Table type " + tableType + " cannot be converted");
+      LOG.debug(msg);
+      return false;
+    }
+  }
+
+  String shouldTableBeExternal(Table tableObj) throws IOException, MetaException, TException {
+    if (MetaStoreUtils.isNonNativeTable(tableObj)) {
+      return "Table is a non-native (StorageHandler) table";
+    }
+    if (HiveStrictManagedUtils.isAvroTableWithExternalSchema(tableObj)) {
+      return "Table is an Avro table with an external schema url";
+    }
+    // List bucketed table cannot be converted to transactional
+    if (HiveStrictManagedUtils.isListBucketedTable(tableObj)) {
+      return "Table is a list bucketed table";
+    }
+    // If any table/partition directory is not owned by hive,
+    // then assume table is using storage-based auth - set external.
+    // Transactional tables should still remain transactional,
+    // but we should have already checked for that before this point.
+    if (shouldTablePathBeExternal(tableObj, ownerName)) {
+      return String.format("One or more table directories not owned by %s, or non-HDFS path", ownerName);
+    }
+
+    return null;
+  }
+
+  boolean shouldTablePathBeExternal(Table tableObj, String userName) throws IOException, MetaException, TException {
+    boolean shouldBeExternal = false;
+    String dbName = tableObj.getDbName();
+    String tableName = tableObj.getTableName();
+
+    if (!isPartitionedTable(tableObj)) {
+      // Check the table directory.
+      Path tablePath = new Path(tableObj.getSd().getLocation());
+      FileSystem fs = tablePath.getFileSystem(conf);
+      if (isHdfs(fs)) {
+        shouldBeExternal = checkDirectoryOwnership(fs, tablePath, ownerName, true);
+      } else {
+        // Set non-hdfs tables to external, unless transactional (should have been checked before this).
+        shouldBeExternal = true;
+      }
+    } else {
+      // Check ownership for all partitions
+      List<String> partNames = hms.listPartitionNames(dbName, tableName, Short.MAX_VALUE);
+      for (String partName : partNames) {
+        Partition partObj = hms.getPartition(dbName, tableName, partName);
+        Path partPath = new Path(partObj.getSd().getLocation());
+        FileSystem fs = partPath.getFileSystem(conf);
+        if (isHdfs(fs)) {
+          shouldBeExternal = checkDirectoryOwnership(fs, partPath, ownerName, true);
+        } else {
+          shouldBeExternal = true;
+        }
+        if (shouldBeExternal) {
+          break;
+        }
+      }
+    }
+
+    return shouldBeExternal;
+  }
+
+  void runHiveCommand(String command) throws HiveException {
+    LOG.info("Running command: {}", command);
+
+    if (driver == null) {
+      driver = new MyDriver(conf);
+    }
+  
+    CommandProcessorResponse cpr = driver.driver.run(command);
+    if (cpr.getResponseCode() != 0) {
+      String msg = "Query returned non-zero code: " + cpr.getResponseCode()
+          + ", cause: " + cpr.getErrorMessage();
+      throw new HiveException(msg);
+    }
+  }
+
+  void cleanup() {
+    if (driver != null) {
+      runAndLogErrors(() -> driver.close());
+      driver = null;
+    }
+  }
+
+  static class MyDriver {
+    IDriver driver;
+
+    MyDriver(Configuration conf) {
+      HiveConf hiveConf = new HiveConf(conf, this.getClass());
+      // TODO: Clean up SessionState/Driver/TezSession on exit
+      SessionState.start(hiveConf);
+      driver = DriverFactory.newDriver(hiveConf);
+    }
+
+    void close() {
+      if (driver != null) {
+        runAndLogErrors(() -> driver.close());
+        runAndLogErrors(() -> driver.destroy());
+        driver = null;
+        runAndLogErrors(() -> SessionState.get().close());
+      }
+    }
+  }
+
+  MyDriver driver;
+
+  interface ThrowableRunnable {
+    void run() throws Exception;
+  }
+
+  static void runAndLogErrors(ThrowableRunnable r) {
+    try {
+      r.run();
+    } catch (Exception err) {
+      LOG.error("Error encountered", err);
+    }
+  }
+
+  static String createExternalConversionExcuse(Table tableObj, String reason) {
+    return String.format("Table %s cannot be converted to an external table in "
+        + "strict managed table mode for the following reason: %s",
+        getQualifiedName(tableObj), reason);
+  }
+
+  static String createManagedConversionExcuse(Table tableObj, String reason) {
+    return String.format("Table %s cannot be converted to a managed table in "
+        + "strict managed table mode for the following reason: %s",
+        getQualifiedName(tableObj), reason);
+  }
+
+  static boolean isPartitionedTable(Table tableObj) {
+    List<FieldSchema> partKeys = tableObj.getPartitionKeys();
+    if (partKeys != null || partKeys.size() > 0) {
+      return true;
+    }
+    return false;
+  }
+
+  static boolean isHdfs(FileSystem fs) {
+    return fs.getScheme().equals("hdfs");
+  }
+
+  static String getQualifiedName(Table tableObj) {
+    return getQualifiedName(tableObj.getDbName(), tableObj.getTableName());
+  }
+
+  static String getQualifiedName(String dbName, String tableName) {
+    StringBuilder sb = new StringBuilder();
+    sb.append('`');
+    sb.append(dbName);
+    sb.append("`.`");
+    sb.append(tableName);
+    sb.append('`');
+    return sb.toString();
+  }
+
+  static boolean arePathsEqual(Configuration conf, String path1, String path2) throws IOException {
+    String qualified1 = getQualifiedPath(conf, new Path(path1));
+    String qualified2 = getQualifiedPath(conf, new Path(path2));
+    return qualified1.equals(qualified2);
+  }
+
+  static String getQualifiedPath(Configuration conf, Path path) throws IOException {
+    FileSystem fs;
+    if (path == null) {
+      return null;
+    }
+
+    fs = path.getFileSystem(conf);
+    return fs.makeQualified(path).toString();
+  }
+
+  /**
+   * Recursively check the file owner and permissions, setting them to the passed in values
+   * if the owner/perms of the file do not match.
+   * @param fs
+   * @param path
+   * @param userName  Owner of the file to compare/set. Null to skip this check.
+   * @param groupName Group of the file to compare/set. Null to skip this check.
+   * @param dirPerms  Permissions to compare/set, if the file is a directory. Null to skip this check.
+   * @param filePerms Permissions to compare/set, if the file is a file. Null to skip this check.
+   * @param dryRun    Dry run - check but do not actually set
+   * @param recurse   Whether to recursively check/set the contents of a directory
+   * @throws IOException
+   */
+  static void checkAndSetFileOwnerPermissions(FileSystem fs, Path path,
+      String userName, String groupName,
+      FsPermission dirPerms, FsPermission filePerms,
+      boolean dryRun, boolean recurse) throws IOException {
+    FileStatus fStatus = fs.getFileStatus(path);
+    checkAndSetFileOwnerPermissions(fs, fStatus, userName, groupName, dirPerms, filePerms, dryRun, recurse);
+  }
+
+  /**
+   * Recursively check the file owner and permissions, setting them to the passed in values
+   * if the owner/perms of the file do not match.
+   * @param fs
+   * @param fStatus
+   * @param userName  Owner of the file to compare/set. Null to skip this check.
+   * @param groupName Group of the file to compare/set. Null to skip this check.
+   * @param dirPerms  Permissions to compare/set, if the file is a directory. Null to skip this check.
+   * @param filePerms Permissions to compare/set, if the file is a file. Null to skip this check.
+   * @param dryRun    Dry run - check but do not actually set
+   * @param recurse   Whether to recursively check/set the contents of a directory
+   * @throws IOException
+   */
+  static void checkAndSetFileOwnerPermissions(FileSystem fs, FileStatus fStatus,
+      String userName, String groupName,
+      FsPermission dirPerms, FsPermission filePerms,
+      boolean dryRun, boolean recurse) throws IOException {
+    Path path = fStatus.getPath();
+    boolean setOwner = false;
+    if (userName != null && !userName.equals(fStatus.getOwner())) {
+      setOwner = true;
+    } else if (groupName != null && !groupName.equals(fStatus.getGroup())) {
+      setOwner = true;
+    }
+
+    boolean isDir = fStatus.isDirectory();
+    boolean setPerms = false;
+    FsPermission perms = filePerms;
+    if (isDir) {
+      perms = dirPerms;
+    }
+    if (perms != null && !perms.equals(fStatus.getPermission())) {
+      setPerms = true;
+    }
+
+    if (setOwner) {
+      LOG.debug("Setting owner/group of {} to {}/{}", path, userName, groupName);
+      if (!dryRun) {
+        fs.setOwner(path, userName, groupName);
+      }
+    }
+    if (setPerms) {
+      LOG.debug("Setting perms of {} to {}", path, perms);
+      if (!dryRun) {
+        fs.setPermission(path, perms);
+      }
+    }
+
+    if (isDir && recurse) {
+      for (FileStatus subFile : fs.listStatus(path)) {
+        // TODO: Use threadpool for more concurrency?
+        // TODO: check/set all files, or only directories 
+        checkAndSetFileOwnerPermissions(fs, subFile, userName, groupName, dirPerms, filePerms, dryRun, recurse);
+      }
+    }
+  }
+
+  static boolean checkDirectoryOwnership(FileSystem fs,
+      Path path,
+      String userName,
+      boolean recurse) throws IOException {
+    FileStatus fStatus = fs.getFileStatus(path);
+    return checkDirectoryOwnership(fs, fStatus, userName, recurse);
+  }
+
+  static boolean checkDirectoryOwnership(FileSystem fs,
+      FileStatus fStatus,
+      String userName,
+      boolean recurse) throws IOException {
+    Path path = fStatus.getPath();
+    boolean result = true;
+
+    // Ignore non-directory files
+    boolean isDir = fStatus.isDirectory();
+    if (isDir) {
+      if (userName != null && !userName.equals(fStatus.getOwner())) {
+        return false;
+      }
+
+      if (recurse) {
+        for (FileStatus subFile : fs.listStatus(path)) {
+          if (!checkDirectoryOwnership(fs, subFile, userName, recurse)) {
+            return false;
+          }
+        }
+      }
+    }
+
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/96c65a39/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedUtils.java
new file mode 100644
index 0000000..d9536eb
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedUtils.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.util;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
+import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat;
+import org.apache.hadoop.hive.ql.io.ZeroRowsInputFormat;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.serde2.avro.AvroSerDe;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties;
+
+public class HiveStrictManagedUtils {
+
+  private final static Set<String> EXEMPT_INPUTFORMATS =
+      new HashSet<String>(Arrays.asList(NullRowsInputFormat.class.getName(),
+          OneNullRowInputFormat.class.getName(), ZeroRowsInputFormat.class.getName()));
+
+
+  public static void validateStrictManagedTable(Configuration conf, Table table)
+      throws HiveException {
+    String reason = validateStrictManagedTable(conf, table.getTTable());
+    if (reason != null) {
+      throw new HiveException(reason);
+    }
+  }
+
+  /**
+   * Checks if the table is valid based on the rules for strict managed tables.
+   * @param conf
+   * @param table
+   * @return  Null if the table is valid, otherwise a string message indicating why the table is invalid.
+   */
+  public static String validateStrictManagedTable(Configuration conf,
+      org.apache.hadoop.hive.metastore.api.Table table) {
+    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STRICT_MANAGED_TABLES)) {
+      if (table.isTemporary()) {
+        // temp tables exempted from checks.
+        return null;
+      }
+
+      TableType tableType = TableType.valueOf(table.getTableType());
+      if (tableType == TableType.MANAGED_TABLE) {
+        if (!AcidUtils.isTransactionalTable(table)) {
+          String inputFormat = null;
+          if (table.getSd() != null) {
+            inputFormat = table.getSd().getInputFormat();
+          }
+          if (!EXEMPT_INPUTFORMATS.contains(inputFormat)) {
+            return createValidationError(table, "Table is marked as a managed table but is not transactional.");
+          }
+        }
+        if (MetaStoreUtils.isNonNativeTable(table)) {
+          return createValidationError(table, "Table is marked as a managed table but is non-native.");
+        }
+        if (isAvroTableWithExternalSchema(table)) {
+          return createValidationError(table, "Managed Avro table has externally defined schema.");
+        }
+      }
+    }
+
+    // Table is valid
+    return null;
+  }
+
+  public static boolean isAvroTableWithExternalSchema(org.apache.hadoop.hive.metastore.api.Table table) {
+    if (table.getSd().getSerdeInfo().getSerializationLib().equals(AvroSerDe.class.getName())) {
+      String schemaUrl = table.getParameters().get(AvroTableProperties.SCHEMA_URL.getPropName());
+      if (schemaUrl != null && !schemaUrl.isEmpty()) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  public static boolean isListBucketedTable(org.apache.hadoop.hive.metastore.api.Table table) {
+    return table.getSd().isStoredAsSubDirectories();
+  }
+
+  private static String createValidationError(org.apache.hadoop.hive.metastore.api.Table table, String message) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("Table ");
+    sb.append(table.getDbName());
+    sb.append(".");
+    sb.append(table.getTableName());
+    sb.append(" failed strict managed table checks due to the following reason: ");
+    sb.append(message);
+    return sb.toString();
+  }
+}