You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/02/24 01:59:12 UTC

[4/5] impala git commit: IMPALA-6275: Fix warn stacktrace in successful CTAS

IMPALA-6275: Fix warn stacktrace in successful CTAS

For CTAS in HDFS tables, do not load the column statistics from HMS
since since that information will not be available in HMS.

Refactored CTAS for HDFS tables to use HdfsTable.createCtasTarget()
instead of HdfsTable.load().

Testing:
- Ran end-to-end query tests

Change-Id: I6f07a188458954802fda20e3b3b56280d96e788e
Reviewed-on: http://gerrit.cloudera.org:8080/9364
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins
Reviewed-on: http://gerrit.cloudera.org:8080/9429
Reviewed-by: Tim Armstrong <ta...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1795acaa
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1795acaa
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1795acaa

Branch: refs/heads/2.x
Commit: 1795acaa0fd0aa4186c5cfdbdff681fb17dec16a
Parents: ea3cefb
Author: Fredy wijaya <fw...@cloudera.com>
Authored: Thu Feb 22 11:10:36 2018 -0800
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Sat Feb 24 01:58:46 2018 +0000

----------------------------------------------------------------------
 .../analysis/CreateTableAsSelectStmt.java       |  8 ++--
 .../org/apache/impala/catalog/HdfsTable.java    | 42 +++++++++++++-------
 2 files changed, 30 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/1795acaa/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
index 222b99a..aac6873 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
@@ -23,6 +23,7 @@ import java.util.List;
 
 import org.apache.impala.authorization.Privilege;
 import org.apache.impala.catalog.Db;
+import org.apache.impala.catalog.HdfsFileFormat;
 import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.catalog.KuduTable;
 import org.apache.impala.catalog.MetaStoreClientPool.MetaStoreClient;
@@ -211,11 +212,8 @@ public class CreateTableAsSelectStmt extends StatementBase {
         tmpTable = KuduTable.createCtasTarget(db, msTbl, createStmt_.getColumnDefs(),
             createStmt_.getPrimaryKeyColumnDefs(),
             createStmt_.getKuduPartitionParams());
-      } else {
-        // TODO: Creating a tmp table using load() is confusing.
-        // Refactor it to use a 'createCtasTarget()' function similar to Kudu table.
-        tmpTable = Table.fromMetastoreTable(db, msTbl);
-        tmpTable.load(true, client.getHiveClient(), msTbl);
+      } else if (HdfsFileFormat.isHdfsInputFormatClass(msTbl.getSd().getInputFormat())) {
+        tmpTable = HdfsTable.createCtasTarget(db, msTbl);
       }
       Preconditions.checkState(tmpTable != null &&
           (tmpTable instanceof HdfsTable || tmpTable instanceof KuduTable));

http://git-wip-us.apache.org/repos/asf/impala/blob/1795acaa/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 0f782be..14f2888 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Partition;
@@ -1246,13 +1247,12 @@ public class HdfsTable extends Table {
       // turn all exceptions into TableLoadingException
       msTable_ = msTbl;
       try {
-        if (loadTableSchema) loadSchema(client, msTbl);
-        if (reuseMetadata && getCatalogVersion() == Catalog.INITIAL_CATALOG_VERSION) {
-          // This is the special case of CTAS that creates a 'temp' table that does not
-          // actually exist in the Hive Metastore.
-          initializePartitionMetadata(msTbl);
-          setTableStats(msTbl);
-          return;
+        if (loadTableSchema) {
+            // set nullPartitionKeyValue from the hive conf.
+            nullPartitionKeyValue_ = client.getConfigValue(
+                "hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__");
+            loadSchema(msTbl);
+            loadAllColumnStats(client);
         }
         // Load partition and file metadata
         if (reuseMetadata) {
@@ -1568,15 +1568,11 @@ public class HdfsTable extends Table {
   }
 
   /**
-   * Loads table schema and column stats from Hive Metastore.
+   * Loads table schema.
    */
-  private void loadSchema(IMetaStoreClient client,
-      org.apache.hadoop.hive.metastore.api.Table msTbl) throws Exception {
+  private void loadSchema(org.apache.hadoop.hive.metastore.api.Table msTbl)
+      throws TableLoadingException {
     nonPartFieldSchemas_.clear();
-    // set nullPartitionKeyValue from the hive conf.
-    nullPartitionKeyValue_ = client.getConfigValue(
-        "hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__");
-
     // set NULL indicator string from table properties
     nullColumnValue_ =
         msTbl.getParameters().get(serdeConstants.SERIALIZATION_NULL_FORMAT);
@@ -1593,7 +1589,6 @@ public class HdfsTable extends Table {
     // then all other columns.
     addColumnsFromFieldSchemas(msTbl.getPartitionKeys());
     addColumnsFromFieldSchemas(nonPartFieldSchemas_);
-    loadAllColumnStats(client);
     isSchemaLoaded_ = true;
   }
 
@@ -2278,4 +2273,21 @@ public class HdfsTable extends Table {
     });
     metrics_.addTimer(CATALOG_UPDATE_DURATION_METRIC);
   }
+
+  /**
+   * Creates a temporary HdfsTable object populated with the specified properties.
+   * This is used for CTAS statements.
+   */
+  public static HdfsTable createCtasTarget(Db db,
+      org.apache.hadoop.hive.metastore.api.Table msTbl) throws CatalogException {
+    HdfsTable tmpTable = new HdfsTable(msTbl, db, msTbl.getTableName(), msTbl.getOwner());
+    HiveConf hiveConf = new HiveConf(HdfsTable.class);
+    // set nullPartitionKeyValue from the hive conf.
+    tmpTable.nullPartitionKeyValue_ = hiveConf.get(
+        "hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__");
+    tmpTable.loadSchema(msTbl);
+    tmpTable.initializePartitionMetadata(msTbl);
+    tmpTable.setTableStats(msTbl);
+    return tmpTable;
+  }
 }