You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by dt...@apache.org on 2017/01/20 21:17:54 UTC

[2/4] incubator-impala git commit: IMPALA-4767: Workaround for HIVE-15653 to preserve table stats.

IMPALA-4767: Workaround for HIVE-15653 to preserve table stats.

HIVE-15653 is a Hive Metastore bug that results in ALTER TABLE
commands wiping the table stats of unpartitioned tables.

Until the Hive bug is fixed, this patch adds a workaround
to Impala that forces the Metastore to preserve the table stats.

Testing: Private core/hdfs run passed.

Change-Id: Ic191c765f73624bc716badadd7215c8dca9d6b1f
Reviewed-on: http://gerrit.cloudera.org:8080/5731
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/74387300
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/74387300
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/74387300

Branch: refs/heads/master
Commit: 743873005225c55240de7181eea7bb438a260ff1
Parents: 6cf3efd
Author: Alex Behm <al...@cloudera.com>
Authored: Tue Jan 17 18:34:08 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Jan 20 01:18:10 2017 +0000

----------------------------------------------------------------------
 .../impala/service/CatalogOpExecutor.java       |  6 ++-
 .../queries/QueryTest/compute-stats.test        | 52 +++++++++++++++++++-
 2 files changed, 56 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/74387300/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index 208ff2b..fce6e07 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -712,7 +712,7 @@ public class CatalogOpExecutor {
           msClient.getHiveClient().updateTableColumnStatistics(colStats);
         } catch (Exception e) {
           throw new ImpalaRuntimeException(String.format(HMS_RPC_ERROR_FORMAT_STR,
-                  "updateTableColumnStatistics"), e);
+              "updateTableColumnStatistics"), e);
         }
       }
       // Update the table stats. Apply the table alteration last to ensure the
@@ -2622,6 +2622,10 @@ public class CatalogOpExecutor {
     try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
       lastDdlTime = calculateDdlTime(msTbl);
       msTbl.putToParameters("transient_lastDdlTime", Long.toString(lastDdlTime));
+      // TODO: Remove this workaround for HIVE-15653 to preserve table stats
+      // during table alterations.
+      msTbl.putToParameters(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK,
+          StatsSetupConst.TRUE);
       msClient.getHiveClient().alter_table(
           msTbl.getDbName(), msTbl.getTableName(), msTbl);
     } catch (TException e) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/74387300/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test
index b741c5a..a42dedf 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test
@@ -316,7 +316,7 @@ STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
 drop stats alltypes
 ====
 ---- QUERY
-# test computing stats on an partitioned text table with all types
+# test computing stats on an unpartitioned text table with all types
 create table alltypesnopart like functional.alltypesnopart;
 insert into alltypesnopart
 select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col,
@@ -359,6 +359,19 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
 ====
 ---- QUERY
+# IMPALA-4767: Test that ALTER TABLE commands preserve table stats.
+alter table alltypesnopart set tblproperties('test'='test');
+alter table alltypesnopart set column stats string_col ('numDVs'='10');
+alter table alltypesnopart add columns (new_col int);
+show table stats alltypesnopart;
+---- LABELS
+#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
+---- RESULTS
+100,3,'7.73KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.*
+---- TYPES
+BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
+====
+---- QUERY
 # test computing stats on a partitioned parquet table with all types
 create table alltypes_parquet
 like functional_parquet.alltypes;
@@ -427,6 +440,43 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
 ====
 ---- QUERY
+# IMPALA-4767: Test that ALTER TABLE commands preserve table stats.
+alter table alltypes_parquet set tblproperties('test'='test');
+alter table alltypes_parquet set column stats string_col ('numDVs'='10');
+alter table alltypes_parquet add columns (new_col int);
+show table stats alltypes_parquet;
+---- LABELS
+YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
+---- RESULTS
+'2009','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2009','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2009','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2009','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2009','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2009','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2009','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2009','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2009','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2009','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2009','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2009','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'2010','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.*
+'Total','',7300,24,regex:.+KB,'0B','','','',''
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
+====
+---- QUERY
 # test computing stats on an empty table
 create table alltypes_empty like functional_rc_snap.alltypes
 ====