You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/02/12 20:44:01 UTC

[impala] branch master updated: IMPALA-5861: fix RowsRead for zero-slot table scan

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new a154b2d  IMPALA-5861: fix RowsRead for zero-slot table scan
a154b2d is described below

commit a154b2d6e775a508df4fd2c8d51a18d5c1d1f933
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Fri Feb 1 07:13:56 2019 -0800

    IMPALA-5861: fix RowsRead for zero-slot table scan
    
    Testing:
    Added regression test based on JIRA and a targeted
    test for all HDFS file formats.
    
    Change-Id: I7a927c6a4f0b8055608cb7a5e2b550a1610cef89
    Reviewed-on: http://gerrit.cloudera.org:8080/12332
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/parquet/hdfs-parquet-scanner.cc        |   2 +-
 .../queries/QueryTest/mixed-format.test            |  14 +++
 .../queries/QueryTest/scanners.test                | 111 +++++++++++++++++++++
 3 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/be/src/exec/parquet/hdfs-parquet-scanner.cc b/be/src/exec/parquet/hdfs-parquet-scanner.cc
index 4fe9914..3836d0b 100644
--- a/be/src/exec/parquet/hdfs-parquet-scanner.cc
+++ b/be/src/exec/parquet/hdfs-parquet-scanner.cc
@@ -400,7 +400,7 @@ Status HdfsParquetScanner::GetNextInternal(RowBatch* row_batch) {
     assemble_rows_timer_.Stop();
     RETURN_IF_ERROR(status);
     row_group_rows_read_ += max_tuples;
-    COUNTER_ADD(scan_node_->rows_read_counter(), row_group_rows_read_);
+    COUNTER_ADD(scan_node_->rows_read_counter(), max_tuples);
     return Status::OK();
   }
 
diff --git a/testdata/workloads/functional-query/queries/QueryTest/mixed-format.test b/testdata/workloads/functional-query/queries/QueryTest/mixed-format.test
index 0b693e1..2d5bf9e 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/mixed-format.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/mixed-format.test
@@ -24,3 +24,17 @@ bigint, bigint
 ---- RESULTS
 280,1260
 ====
+---- QUERY
+# IMPALA-5861: RowsRead counter should be accurate for table scan that returns
+# zero slots. This test is run with various batch_size values, which helps
+# reproduce the bug. Scanning multiple file formats triggers the bug because
+# the Parquet count(*) rewrite is disabled when non-Parquet file formats are
+# present.
+select count(*) from functional.alltypesmixedformat
+---- TYPES
+bigint
+---- RESULTS
+1200
+---- RUNTIME_PROFILE
+aggregation(SUM, RowsRead): 1200
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/scanners.test b/testdata/workloads/functional-query/queries/QueryTest/scanners.test
index b05786e..72d6505 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/scanners.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/scanners.test
@@ -128,3 +128,114 @@ select count(*) from alltypessmall
 ---- TYPES
 BIGINT
 ====
+---- QUERY
+# IMPALA-5861: RowsRead counter should be accurate for table scan that materializes
+# zero slots from this files. This test is run with various batch_size values,
+# which helps reproduce the Parquet bug.
+select 1 from alltypessmall
+---- TYPES
+tinyint
+---- RESULTS
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+---- RUNTIME_PROFILE
+aggregation(SUM, RowsRead): 100
+====