You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mj...@apache.org on 2017/02/17 23:17:11 UTC

[8/9] incubator-impala git commit: IMPALA-4854: Fix incremental stats with complex types.

IMPALA-4854: Fix incremental stats with complex types.

The bug: Compute incremental stats used to always do a
full stats recomputation for tables with complex types.
The logic for detecting schema changes (e.g. an added
column) did not take into consideration that columns
with complex types are ignored in the stats computation,
and should therefore not be recognized as a new column
that does not yet have stats.

Testing:
- Added a new regression test
- Locally ran test_compute_stats.py and the FE tests

Change-Id: I6e0335048d688ee25ff55c6628d0f6f8ecc1dd8a
Reviewed-on: http://gerrit.cloudera.org:8080/6033
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d845413a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d845413a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d845413a

Branch: refs/heads/master
Commit: d845413ab8fb0c92fc2d8d0c2a54d0de4dbd7429
Parents: 0c87152
Author: Alex Behm <al...@cloudera.com>
Authored: Wed Feb 15 19:03:47 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Feb 17 06:02:48 2017 +0000

----------------------------------------------------------------------
 .../impala/analysis/ComputeStatsStmt.java       | 24 +++++++++------
 .../QueryTest/compute-stats-incremental.test    | 32 ++++++++++++++++++++
 2 files changed, 47 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d845413a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
index 90c46a8..a4552a6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
@@ -163,14 +163,8 @@ public class ComputeStatsStmt extends StatementBase {
 
     for (int i = startColIdx; i < table_.getColumns().size(); ++i) {
       Column c = table_.getColumns().get(i);
-      Type type = c.getType();
+      if (ignoreColumn(c)) continue;
 
-      // Ignore columns with an invalid/unsupported type. For example, complex types in
-      // an HBase-backed table will appear as invalid types.
-      if (!type.isValid() || !type.isSupported()
-          || c.getType().isComplexType()) {
-        continue;
-      }
       // NDV approximation function. Add explicit alias for later identification when
       // updating the Metastore.
       String colRefSql = ToSqlUtils.getIdentSql(c.getName());
@@ -189,6 +183,7 @@ public class ComputeStatsStmt extends StatementBase {
       }
 
       // For STRING columns also compute the max and avg string length.
+      Type type = c.getType();
       if (type.isStringType()) {
         columnStatsSelectList.add("MAX(length(" + colRefSql + "))");
         columnStatsSelectList.add("AVG(length(" + colRefSql + "))");
@@ -313,12 +308,13 @@ public class ComputeStatsStmt extends StatementBase {
         boolean tableIsMissingColStats = false;
 
         // We'll warn the user if a column is missing stats (and therefore we rescan the
-        // whole table), but if all columns are missing stats, the table just doesn't have
-        // any stats and there's no need to warn.
+        // whole table), but if all columns are missing stats, the table just doesn't
+        // have any stats and there's no need to warn.
         boolean allColumnsMissingStats = true;
         String exampleColumnMissingStats = null;
         // Partition columns always have stats, so exclude them from this search
         for (Column col: table_.getNonClusteringColumns()) {
+          if (ignoreColumn(col)) continue;
           if (!col.getStats().hasStats()) {
             if (!tableIsMissingColStats) {
               tableIsMissingColStats = true;
@@ -527,6 +523,16 @@ public class ComputeStatsStmt extends StatementBase {
     }
   }
 
+  /**
+   * Returns true if the given column should be ignored for the purpose of computing
+   * column stats. Columns with an invalid/unsupported/complex type are ignored.
+   * For example, complex types in an HBase-backed table will appear as invalid types.
+   */
+  private boolean ignoreColumn(Column c) {
+    Type t = c.getType();
+    return !t.isValid() || !t.isSupported() || t.isComplexType();
+  }
+
   public String getTblStatsQuery() { return tableStatsQueryStr_; }
   public String getColStatsQuery() { return columnStatsQueryStr_; }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d845413a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
index 5f98ee7..8e89956 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
@@ -527,3 +527,35 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- TYPES
 STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
 ====
+---- QUERY
+# IMPALA-4854: Tests incremental computation in the presence of complex-typed columns.
+create external table complextypestbl_part
+  like parquet '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/nullable.parq'
+  partitioned by (p int) stored as parquet;
+alter table complextypestbl_part add partition (p=0)
+  location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/';
+alter table complextypestbl_part add partition (p=1)
+  location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/';
+compute incremental stats complextypestbl_part;
+---- RESULTS
+'Updated 2 partition(s) and 1 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+# The table was not changed. Validate that the next compute incremental stats is a no-op.
+compute incremental stats complextypestbl_part;
+---- RESULTS
+---- ERRORS
+No partitions selected for incremental stats update
+====
+---- QUERY
+# Add a new partition and check that only stats for the new partition are computed.
+alter table complextypestbl_part add partition (p=2)
+  location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_parquet/';
+compute incremental stats complextypestbl_part;
+---- RESULTS
+'Updated 1 partition(s) and 1 column(s).'
+---- TYPES
+STRING
+====