You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ng...@apache.org on 2020/07/07 15:26:09 UTC

[hive] branch master updated: HIVE-23388: CTAS queries should use target's location for staging (Naveen Gangam)

This is an automated email from the ASF dual-hosted git repository.

ngangam pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new cf453fb  HIVE-23388: CTAS queries should use target's location for staging (Naveen Gangam)
cf453fb is described below

commit cf453fbabc0e94aa7f5c6be70c956c0a3cfddea7
Author: Naveen Gangam <ng...@cloudera.com>
AuthorDate: Mon Jun 15 13:55:27 2020 -0400

    HIVE-23388: CTAS queries should use target's location for staging (Naveen Gangam)
---
 ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java |  8 ++++++++
 .../apache/hadoop/hive/ql/parse/SemanticAnalyzer.java   | 17 +++++++++++++++--
 .../clientpositive/ctas_uses_database_location.q        |  4 ++--
 .../org/apache/hadoop/hive/metastore/Warehouse.java     | 17 +++++++++++++++++
 4 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index a06194d..500f168 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -68,6 +68,7 @@ import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc;
+import org.apache.hadoop.hive.ql.ddl.view.create.CreateViewDesc;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.Entity;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
@@ -1968,6 +1969,13 @@ public class AcidUtils {
     return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true");
   }
 
+  public static boolean isTransactionalView(CreateViewDesc view) {
+    if (view == null || view.getTblProps() == null) {
+      return false;
+    }
+    return isTransactionalTable(view.getTblProps());
+  }
+
   /**
    * Should produce the same result as
    * {@link org.apache.hadoop.hive.metastore.txn.TxnUtils#isAcidTable(org.apache.hadoop.hive.metastore.api.Table)}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 72794e4..534fbc8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -2342,7 +2342,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
             Path location;
             // If the CTAS query does specify a location, use the table location, else use the db location
-            if (qb.getTableDesc() != null && qb.getTableDesc().getLocation() != null) {
+            if (qb.isMaterializedView() && qb.getViewDesc() != null && qb.getViewDesc().getLocation() != null) {
+              location = new Path(qb.getViewDesc().getLocation());
+            } else if (qb.isCTAS() && qb.getTableDesc() != null && qb.getTableDesc().getLocation() != null) {
               location = new Path(qb.getTableDesc().getLocation());
             } else {
               // allocate a temporary output dir on the location of the table
@@ -2355,7 +2357,18 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
                 if (destTableDb == null) {
                   destTableDb = names[0];
                 }
-                location = wh.getDatabasePath(db.getDatabase(destTableDb));
+                boolean useExternal = false;
+                if (qb.isMaterializedView()) {
+                  useExternal = !AcidUtils.isTransactionalView(qb.getViewDesc()) && !makeAcid();
+                } else {
+                  useExternal = (qb.getTableDesc() == null || qb.getTableDesc().isTemporary()
+                    || qb.getTableDesc().isExternal() || !makeAcid());
+                }
+                if (useExternal) {
+                  location = wh.getDatabaseExternalPath(db.getDatabase(destTableDb));
+                } else {
+                  location = wh.getDatabaseManagedPath(db.getDatabase(destTableDb));
+                }
               } catch (MetaException e) {
                 throw new SemanticException(e);
               }
diff --git a/ql/src/test/queries/clientpositive/ctas_uses_database_location.q b/ql/src/test/queries/clientpositive/ctas_uses_database_location.q
index 1c21981..d0514c8 100644
--- a/ql/src/test/queries/clientpositive/ctas_uses_database_location.q
+++ b/ql/src/test/queries/clientpositive/ctas_uses_database_location.q
@@ -1,12 +1,12 @@
 --! qt:dataset:src
-set hive.metastore.warehouse.dir=invalid_scheme://${system:test.tmp.dir};
+set hive.metastore.warehouse.dir=pfile://${system:test.tmp.dir};
 
 -- Tests that CTAS queries in non-default databases use the location of the database
 -- not the hive.metastore.warehouse.dir for intermediate files (FileSinkOperator output).
 -- If hive.metastore.warehouse.dir were used this would fail because the scheme is invalid.
 
 CREATE DATABASE db1
-LOCATION 'pfile://${system:test.tmp.dir}/db1';
+LOCATION 'pfile://${system:test.tmp.dir}/db_nondefault';
 
 USE db1;
 EXPLAIN CREATE TABLE table_db1 AS SELECT * FROM default.src;
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
index dcd9132..5ec8ad8 100755
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
@@ -237,6 +237,23 @@ public class Warehouse {
    * @throws MetaException when the file path cannot be properly determined from the configured
    * file system.
    */
+  public Path getDatabaseExternalPath(Database db) throws MetaException {
+      Path dbPath = new Path(db.getLocationUri());
+      if (FileUtils.isSubdirectory(getWhRoot().toString(), dbPath.toString() + Path.SEPARATOR)) {
+        // db metadata incorrect, find new location based on external warehouse root
+        dbPath = getDefaultExternalDatabasePath(db.getName());
+      }
+      return getDnsPath(dbPath);
+  }
+
+  /**
+   * Get the managed tables path specified by the database.  In the case of the default database the root of the
+   * warehouse is returned.
+   * @param db database to get the path of
+   * @return path to the database directory
+   * @throws MetaException when the file path cannot be properly determined from the configured
+   * file system.
+   */
   public Path getDatabaseManagedPath(Database db) throws MetaException {
     if (db.getManagedLocationUri() != null) {
       return getDnsPath(new Path(db.getManagedLocationUri()));