You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by to...@apache.org on 2019/05/21 17:14:27 UTC

[impala] 02/02: IMPALA-8566. Fix computation of num_nulls for incremental stats

This is an automated email from the ASF dual-hosted git repository.

todd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f9bf62eefab7fb807f4e5d6900064b612b455a5e
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Mon May 20 13:45:29 2019 -0700

    IMPALA-8566. Fix computation of num_nulls for incremental stats
    
    The calculation for num_nulls in the incremental stats code path
    initialized the counter to -1 instead of 0. This meant that, if there
    were no nulls (reasonably common), the num_nulls counter would be set to
    -1, indicating unknown, rather than 0.
    
    This simply fixes the initialization and updates the tests.
    
    Change-Id: Ie42103ad21d719cac45abc160c8d5422dd33fb28
    Reviewed-on: http://gerrit.cloudera.org:8080/13378
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/incr-stats-util.cc                     |   6 +-
 .../QueryTest/compute-stats-incremental.test       | 122 ++++++++++-----------
 .../queries/QueryTest/truncate-table.test          |  26 ++---
 3 files changed, 77 insertions(+), 77 deletions(-)

diff --git a/be/src/exec/incr-stats-util.cc b/be/src/exec/incr-stats-util.cc
index f0bb73f..6cc9f2f 100644
--- a/be/src/exec/incr-stats-util.cc
+++ b/be/src/exec/incr-stats-util.cc
@@ -140,7 +140,7 @@ struct PerColumnStats {
   double avg_width;
 
   PerColumnStats()
-      : intermediate_ndv(AggregateFunctions::HLL_LEN, 0), num_nulls(-1),
+      : intermediate_ndv(AggregateFunctions::HLL_LEN, 0), num_nulls(0),
         max_width(0), num_rows(0), avg_width(0) { }
 
   // Updates all aggregate statistics with a new set of measurements.
@@ -150,11 +150,11 @@ struct PerColumnStats {
     DCHECK_GE(num_new_rows, 0);
     DCHECK_GE(max_new_width, 0);
     DCHECK_GE(new_avg_width, 0);
-    DCHECK_GE(num_new_nulls, -1);
+    DCHECK_GE(num_new_nulls, 0);
     for (int j = 0; j < ndv.size(); ++j) {
       intermediate_ndv[j] = ::max(intermediate_ndv[j], ndv[j]);
     }
-    if (num_new_nulls >= 0) num_nulls += num_new_nulls;
+    num_nulls += num_new_nulls;
     max_width = ::max(max_width, max_new_width);
     avg_width += (new_avg_width * num_new_rows);
     num_rows += num_new_rows;
diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
index 064c23a..e76170a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
@@ -48,17 +48,17 @@ show column stats alltypes_incremental
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
-'id','INT',7300,-1,4,4
-'bool_col','BOOLEAN',2,-1,1,1
-'tinyint_col','TINYINT',10,-1,1,1
-'smallint_col','SMALLINT',10,-1,2,2
-'int_col','INT',10,-1,4,4
-'bigint_col','BIGINT',10,-1,8,8
-'float_col','FLOAT',10,-1,4,4
-'double_col','DOUBLE',10,-1,8,8
-'date_string_col','STRING',736,-1,8,8
-'string_col','STRING',10,-1,1,1
-'timestamp_col','TIMESTAMP',7300,-1,16,16
+'id','INT',7300,0,4,4
+'bool_col','BOOLEAN',2,0,1,1
+'tinyint_col','TINYINT',10,0,1,1
+'smallint_col','SMALLINT',10,0,2,2
+'int_col','INT',10,0,4,4
+'bigint_col','BIGINT',10,0,8,8
+'float_col','FLOAT',10,0,4,4
+'double_col','DOUBLE',10,0,8,8
+'date_string_col','STRING',736,0,8,8
+'string_col','STRING',10,0,1,1
+'timestamp_col','TIMESTAMP',7300,0,16,16
 'year','INT',2,0,4,4
 'month','INT',12,0,4,4
 ---- TYPES
@@ -141,17 +141,17 @@ show column stats alltypes_incremental
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
-'id','INT',7300,-1,4,4
-'bool_col','BOOLEAN',2,-1,1,1
-'tinyint_col','TINYINT',10,-1,1,1
-'smallint_col','SMALLINT',10,-1,2,2
-'int_col','INT',10,-1,4,4
-'bigint_col','BIGINT',10,-1,8,8
-'float_col','FLOAT',10,-1,4,4
-'double_col','DOUBLE',10,-1,8,8
-'date_string_col','STRING',736,-1,8,8
-'string_col','STRING',10,-1,1,1
-'timestamp_col','TIMESTAMP',7300,-1,16,16
+'id','INT',7300,0,4,4
+'bool_col','BOOLEAN',2,0,1,1
+'tinyint_col','TINYINT',10,0,1,1
+'smallint_col','SMALLINT',10,0,2,2
+'int_col','INT',10,0,4,4
+'bigint_col','BIGINT',10,0,8,8
+'float_col','FLOAT',10,0,4,4
+'double_col','DOUBLE',10,0,8,8
+'date_string_col','STRING',736,0,8,8
+'string_col','STRING',10,0,1,1
+'timestamp_col','TIMESTAMP',7300,0,16,16
 'year','INT',2,0,4,4
 'month','INT',12,0,4,4
 ---- TYPES
@@ -242,17 +242,17 @@ show column stats alltypes_incremental
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
-'id','INT',6990,-1,4,4
-'bool_col','BOOLEAN',2,-1,1,1
-'tinyint_col','TINYINT',10,-1,1,1
-'smallint_col','SMALLINT',10,-1,2,2
-'int_col','INT',10,-1,4,4
-'bigint_col','BIGINT',10,-1,8,8
-'float_col','FLOAT',10,-1,4,4
-'double_col','DOUBLE',10,-1,8,8
-'date_string_col','STRING',688,-1,8,8
-'string_col','STRING',10,-1,1,1
-'timestamp_col','TIMESTAMP',6990,-1,16,16
+'id','INT',6990,0,4,4
+'bool_col','BOOLEAN',2,0,1,1
+'tinyint_col','TINYINT',10,0,1,1
+'smallint_col','SMALLINT',10,0,2,2
+'int_col','INT',10,0,4,4
+'bigint_col','BIGINT',10,0,8,8
+'float_col','FLOAT',10,0,4,4
+'double_col','DOUBLE',10,0,8,8
+'date_string_col','STRING',688,0,8,8
+'string_col','STRING',10,0,1,1
+'timestamp_col','TIMESTAMP',6990,0,16,16
 'year','INT',2,0,4,4
 'month','INT',12,0,4,4
 ---- TYPES
@@ -305,17 +305,17 @@ show column stats alltypes_incremental
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
-'id','INT',7300,-1,4,4
-'bool_col','BOOLEAN',2,-1,1,1
-'tinyint_col','TINYINT',10,-1,1,1
-'smallint_col','SMALLINT',10,-1,2,2
-'int_col','INT',10,-1,4,4
-'bigint_col','BIGINT',10,-1,8,8
-'float_col','FLOAT',10,-1,4,4
-'double_col','DOUBLE',10,-1,8,8
-'date_string_col','STRING',736,-1,8,8
-'string_col','STRING',10,-1,1,1
-'timestamp_col','TIMESTAMP',7300,-1,16,16
+'id','INT',7300,0,4,4
+'bool_col','BOOLEAN',2,0,1,1
+'tinyint_col','TINYINT',10,0,1,1
+'smallint_col','SMALLINT',10,0,2,2
+'int_col','INT',10,0,4,4
+'bigint_col','BIGINT',10,0,8,8
+'float_col','FLOAT',10,0,4,4
+'double_col','DOUBLE',10,0,8,8
+'date_string_col','STRING',736,0,8,8
+'string_col','STRING',10,0,1,1
+'timestamp_col','TIMESTAMP',7300,0,16,16
 'year','INT',2,0,4,4
 'month','INT',12,0,4,4
 ---- TYPES
@@ -546,14 +546,14 @@ show column stats chars_tbl
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
-'id','INT',2915,-1,4,4
-'ch1','CHAR(1)',1,-1,1,1
-'ch2','CHAR(8)',10,-1,8,8
-'ch3','CHAR(20)',10,-1,8,8
-'ts','TIMESTAMP',2871,-1,16,16
-'vc1','VARCHAR(1)',1,-1,1,1
-'vc2','VARCHAR(8)',10,-1,8,8
-'vc3','VARCHAR(20)',10,-1,8,8
+'id','INT',2915,0,4,4
+'ch1','CHAR(1)',1,0,1,1
+'ch2','CHAR(8)',10,0,8,8
+'ch3','CHAR(20)',10,0,8,8
+'ts','TIMESTAMP',2871,0,16,16
+'vc1','VARCHAR(1)',1,0,1,1
+'vc2','VARCHAR(8)',10,0,8,8
+'vc3','VARCHAR(20)',10,0,8,8
 'year','CHAR(5)',1,0,5,5
 'day','VARCHAR(13)',3,1,-1,-1
 ---- TYPES
@@ -576,14 +576,14 @@ show column stats chars_tbl
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
-'id','INT',2915,0,4,4
-'ch1','CHAR(1)',2,-1,1,1
-'ch2','CHAR(8)',11,-1,8,7.99766731262207
-'ch3','CHAR(20)',11,-1,8,7.99766731262207
-'ts','TIMESTAMP',2871,0,16,16
-'vc1','VARCHAR(1)',2,-1,1,1
-'vc2','VARCHAR(8)',11,-1,8,7.99766731262207
-'vc3','VARCHAR(20)',11,-1,8,7.99766731262207
+'id','INT',2915,1,4,4
+'ch1','CHAR(1)',2,0,1,1
+'ch2','CHAR(8)',11,0,8,7.99766731262207
+'ch3','CHAR(20)',11,0,8,7.99766731262207
+'ts','TIMESTAMP',2871,1,16,16
+'vc1','VARCHAR(1)',2,0,1,1
+'vc2','VARCHAR(8)',11,0,8,7.99766731262207
+'vc3','VARCHAR(20)',11,0,8,7.99766731262207
 'year','CHAR(5)',2,0,5,5
 'day','VARCHAR(13)',4,1,-1,-1
 ---- TYPES
@@ -621,4 +621,4 @@ compute incremental stats complextypestbl_part;
 'Updated 1 partition(s) and 1 column(s).'
 ---- TYPES
 STRING
-====
\ No newline at end of file
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test b/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test
index a8d2a80..9ad769f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test
@@ -42,17 +42,17 @@ show column stats t1;
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
-'id','INT',7300,-1,4,4
-'bool_col','BOOLEAN',2,-1,1,1
-'tinyint_col','TINYINT',10,-1,1,1
-'smallint_col','SMALLINT',10,-1,2,2
-'int_col','INT',10,-1,4,4
-'bigint_col','BIGINT',10,-1,8,8
-'float_col','FLOAT',10,-1,4,4
-'double_col','DOUBLE',10,-1,8,8
-'date_string_col','STRING',736,-1,8,8
-'string_col','STRING',10,-1,1,1
-'timestamp_col','TIMESTAMP',7300,-1,16,16
+'id','INT',7300,0,4,4
+'bool_col','BOOLEAN',2,0,1,1
+'tinyint_col','TINYINT',10,0,1,1
+'smallint_col','SMALLINT',10,0,2,2
+'int_col','INT',10,0,4,4
+'bigint_col','BIGINT',10,0,8,8
+'float_col','FLOAT',10,0,4,4
+'double_col','DOUBLE',10,0,8,8
+'date_string_col','STRING',736,0,8,8
+'string_col','STRING',10,0,1,1
+'timestamp_col','TIMESTAMP',7300,0,16,16
 'year','INT',2,0,4,4
 'month','INT',12,0,4,4
 ---- TYPES
@@ -135,8 +135,8 @@ show column stats t2;
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
-'a','STRING',3,-1,8,6.666666507720947
-'b','STRING',3,-1,7,4
+'a','STRING',3,0,8,6.666666507720947
+'b','STRING',3,0,7,4
 ---- TYPES
 STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
 ====