You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/02/23 17:22:28 UTC

[3/3] impala git commit: IMPALA-6275: Fix warn stacktrace in successful CTAS

IMPALA-6275: Fix warn stacktrace in successful CTAS

For CTAS in HDFS tables, do not load the column statistics from HMS
since since that information will not be available in HMS.

Refactored CTAS for HDFS tables to use HdfsTable.createCtasTarget()
instead of HdfsTable.load().

Testing:
- Ran end-to-end query tests

Change-Id: I6f07a188458954802fda20e3b3b56280d96e788e
Reviewed-on: http://gerrit.cloudera.org:8080/9364
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1765a44d
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1765a44d
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1765a44d

Branch: refs/heads/master
Commit: 1765a44da00401298a6367a2885996f4b12a5e3b
Parents: d91df9b
Author: Fredy wijaya <fw...@cloudera.com>
Authored: Thu Feb 22 11:10:36 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Feb 23 10:49:28 2018 +0000

----------------------------------------------------------------------
 .../analysis/CreateTableAsSelectStmt.java       |  8 ++--
 .../org/apache/impala/catalog/HdfsTable.java    | 42 +++++++++++++-------
 2 files changed, 30 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/1765a44d/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
index 222b99a..aac6873 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
@@ -23,6 +23,7 @@ import java.util.List;
 
 import org.apache.impala.authorization.Privilege;
 import org.apache.impala.catalog.Db;
+import org.apache.impala.catalog.HdfsFileFormat;
 import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.catalog.KuduTable;
 import org.apache.impala.catalog.MetaStoreClientPool.MetaStoreClient;
@@ -211,11 +212,8 @@ public class CreateTableAsSelectStmt extends StatementBase {
         tmpTable = KuduTable.createCtasTarget(db, msTbl, createStmt_.getColumnDefs(),
             createStmt_.getPrimaryKeyColumnDefs(),
             createStmt_.getKuduPartitionParams());
-      } else {
-        // TODO: Creating a tmp table using load() is confusing.
-        // Refactor it to use a 'createCtasTarget()' function similar to Kudu table.
-        tmpTable = Table.fromMetastoreTable(db, msTbl);
-        tmpTable.load(true, client.getHiveClient(), msTbl);
+      } else if (HdfsFileFormat.isHdfsInputFormatClass(msTbl.getSd().getInputFormat())) {
+        tmpTable = HdfsTable.createCtasTarget(db, msTbl);
       }
       Preconditions.checkState(tmpTable != null &&
           (tmpTable instanceof HdfsTable || tmpTable instanceof KuduTable));

http://git-wip-us.apache.org/repos/asf/impala/blob/1765a44d/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 0f782be..14f2888 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Partition;
@@ -1246,13 +1247,12 @@ public class HdfsTable extends Table {
       // turn all exceptions into TableLoadingException
       msTable_ = msTbl;
       try {
-        if (loadTableSchema) loadSchema(client, msTbl);
-        if (reuseMetadata && getCatalogVersion() == Catalog.INITIAL_CATALOG_VERSION) {
-          // This is the special case of CTAS that creates a 'temp' table that does not
-          // actually exist in the Hive Metastore.
-          initializePartitionMetadata(msTbl);
-          setTableStats(msTbl);
-          return;
+        if (loadTableSchema) {
+            // set nullPartitionKeyValue from the hive conf.
+            nullPartitionKeyValue_ = client.getConfigValue(
+                "hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__");
+            loadSchema(msTbl);
+            loadAllColumnStats(client);
         }
         // Load partition and file metadata
         if (reuseMetadata) {
@@ -1568,15 +1568,11 @@ public class HdfsTable extends Table {
   }
 
   /**
-   * Loads table schema and column stats from Hive Metastore.
+   * Loads table schema.
    */
-  private void loadSchema(IMetaStoreClient client,
-      org.apache.hadoop.hive.metastore.api.Table msTbl) throws Exception {
+  private void loadSchema(org.apache.hadoop.hive.metastore.api.Table msTbl)
+      throws TableLoadingException {
     nonPartFieldSchemas_.clear();
-    // set nullPartitionKeyValue from the hive conf.
-    nullPartitionKeyValue_ = client.getConfigValue(
-        "hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__");
-
     // set NULL indicator string from table properties
     nullColumnValue_ =
         msTbl.getParameters().get(serdeConstants.SERIALIZATION_NULL_FORMAT);
@@ -1593,7 +1589,6 @@ public class HdfsTable extends Table {
     // then all other columns.
     addColumnsFromFieldSchemas(msTbl.getPartitionKeys());
     addColumnsFromFieldSchemas(nonPartFieldSchemas_);
-    loadAllColumnStats(client);
     isSchemaLoaded_ = true;
   }
 
@@ -2278,4 +2273,21 @@ public class HdfsTable extends Table {
     });
     metrics_.addTimer(CATALOG_UPDATE_DURATION_METRIC);
   }
+
+  /**
+   * Creates a temporary HdfsTable object populated with the specified properties.
+   * This is used for CTAS statements.
+   */
+  public static HdfsTable createCtasTarget(Db db,
+      org.apache.hadoop.hive.metastore.api.Table msTbl) throws CatalogException {
+    HdfsTable tmpTable = new HdfsTable(msTbl, db, msTbl.getTableName(), msTbl.getOwner());
+    HiveConf hiveConf = new HiveConf(HdfsTable.class);
+    // set nullPartitionKeyValue from the hive conf.
+    tmpTable.nullPartitionKeyValue_ = hiveConf.get(
+        "hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__");
+    tmpTable.loadSchema(msTbl);
+    tmpTable.initializePartitionMetadata(msTbl);
+    tmpTable.setTableStats(msTbl);
+    return tmpTable;
+  }
 }