You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ng...@apache.org on 2020/07/07 15:26:09 UTC
[hive] branch master updated: HIVE-23388: CTAS queries should use
target's location for staging (Naveen Gangam)
This is an automated email from the ASF dual-hosted git repository.
ngangam pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new cf453fb HIVE-23388: CTAS queries should use target's location for staging (Naveen Gangam)
cf453fb is described below
commit cf453fbabc0e94aa7f5c6be70c956c0a3cfddea7
Author: Naveen Gangam <ng...@cloudera.com>
AuthorDate: Mon Jun 15 13:55:27 2020 -0400
HIVE-23388: CTAS queries should use target's location for staging (Naveen Gangam)
---
ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java | 8 ++++++++
.../apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 17 +++++++++++++++--
.../clientpositive/ctas_uses_database_location.q | 4 ++--
.../org/apache/hadoop/hive/metastore/Warehouse.java | 17 +++++++++++++++++
4 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index a06194d..500f168 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -68,6 +68,7 @@ import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc;
+import org.apache.hadoop.hive.ql.ddl.view.create.CreateViewDesc;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
@@ -1968,6 +1969,13 @@ public class AcidUtils {
return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true");
}
+ public static boolean isTransactionalView(CreateViewDesc view) {
+ if (view == null || view.getTblProps() == null) {
+ return false;
+ }
+ return isTransactionalTable(view.getTblProps());
+ }
+
/**
* Should produce the same result as
* {@link org.apache.hadoop.hive.metastore.txn.TxnUtils#isAcidTable(org.apache.hadoop.hive.metastore.api.Table)}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 72794e4..534fbc8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -2342,7 +2342,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
Path location;
// If the CTAS query does specify a location, use the table location, else use the db location
- if (qb.getTableDesc() != null && qb.getTableDesc().getLocation() != null) {
+ if (qb.isMaterializedView() && qb.getViewDesc() != null && qb.getViewDesc().getLocation() != null) {
+ location = new Path(qb.getViewDesc().getLocation());
+ } else if (qb.isCTAS() && qb.getTableDesc() != null && qb.getTableDesc().getLocation() != null) {
location = new Path(qb.getTableDesc().getLocation());
} else {
// allocate a temporary output dir on the location of the table
@@ -2355,7 +2357,18 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (destTableDb == null) {
destTableDb = names[0];
}
- location = wh.getDatabasePath(db.getDatabase(destTableDb));
+ boolean useExternal = false;
+ if (qb.isMaterializedView()) {
+ useExternal = !AcidUtils.isTransactionalView(qb.getViewDesc()) && !makeAcid();
+ } else {
+ useExternal = (qb.getTableDesc() == null || qb.getTableDesc().isTemporary()
+ || qb.getTableDesc().isExternal() || !makeAcid());
+ }
+ if (useExternal) {
+ location = wh.getDatabaseExternalPath(db.getDatabase(destTableDb));
+ } else {
+ location = wh.getDatabaseManagedPath(db.getDatabase(destTableDb));
+ }
} catch (MetaException e) {
throw new SemanticException(e);
}
diff --git a/ql/src/test/queries/clientpositive/ctas_uses_database_location.q b/ql/src/test/queries/clientpositive/ctas_uses_database_location.q
index 1c21981..d0514c8 100644
--- a/ql/src/test/queries/clientpositive/ctas_uses_database_location.q
+++ b/ql/src/test/queries/clientpositive/ctas_uses_database_location.q
@@ -1,12 +1,12 @@
--! qt:dataset:src
-set hive.metastore.warehouse.dir=invalid_scheme://${system:test.tmp.dir};
+set hive.metastore.warehouse.dir=pfile://${system:test.tmp.dir};
-- Tests that CTAS queries in non-default databases use the location of the database
-- not the hive.metastore.warehouse.dir for intermediate files (FileSinkOperator output).
-- If hive.metastore.warehouse.dir were used this would fail because the scheme is invalid.
CREATE DATABASE db1
-LOCATION 'pfile://${system:test.tmp.dir}/db1';
+LOCATION 'pfile://${system:test.tmp.dir}/db_nondefault';
USE db1;
EXPLAIN CREATE TABLE table_db1 AS SELECT * FROM default.src;
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
index dcd9132..5ec8ad8 100755
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
@@ -237,6 +237,23 @@ public class Warehouse {
* @throws MetaException when the file path cannot be properly determined from the configured
* file system.
*/
+ public Path getDatabaseExternalPath(Database db) throws MetaException {
+ Path dbPath = new Path(db.getLocationUri());
+ if (FileUtils.isSubdirectory(getWhRoot().toString(), dbPath.toString() + Path.SEPARATOR)) {
+ // db metadata incorrect, find new location based on external warehouse root
+ dbPath = getDefaultExternalDatabasePath(db.getName());
+ }
+ return getDnsPath(dbPath);
+ }
+
+ /**
+ * Get the managed tables path specified by the database. In the case of the default database the root of the
+ * warehouse is returned.
+ * @param db database to get the path of
+ * @return path to the database directory
+ * @throws MetaException when the file path cannot be properly determined from the configured
+ * file system.
+ */
public Path getDatabaseManagedPath(Database db) throws MetaException {
if (db.getManagedLocationUri() != null) {
return getDnsPath(new Path(db.getManagedLocationUri()));