You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/02/17 01:42:02 UTC

impala git commit: IMPALA-6258: Uninitialized tuple pointers in row batch for empty rows

Repository: impala
Updated Branches:
  refs/heads/2.x a4bdded6e -> 3a386d975


IMPALA-6258: Uninitialized tuple pointers in row batch for empty rows

Tuple pointers in the generated row batches may not be initialized
if a tuple has byte size 0. There are some codes which compare these
uninitialized pointers against nullptr so having them uninitialized
may return wrong (and non-deterministic) results, e.g.:
impala::TupleIsNullPredicate::GetBooleanVal()

The following query produces non-deterministic results currently:

SELECT count(v.x) FROM functional.alltypestiny t3 LEFT OUTER JOIN (
  SELECT true AS x FROM functional.alltypestiny t1 LEFT OUTER JOIN
  functional.alltypestiny t2 ON (true)) v ON (v.x = t3.bool_col)
WHERE t3.bool_col = true;

The alltypestiny table has 8 records, 4 records of them has the true
boolean value for bool_col. Therefore, the above query is a fancy
way of saying "8 * 8 * 4", i.e. it should return 256.

The solution is that scanners initialize tuple pointers to a non-null
value when there are no materialized slots. This non-null value is
provided by the new static member Tuple::POISON.

I extended QueryTest/scanners.test with the above query. This test
executes the query against all table formats.

This change has the biggest performance impact on count(*) queries on
large kudu tables. For my quick benchmark I copied tpch_kudu.lineitem
and doubled its data. The resulting table has 12,002,430 rows.

Without this patch 'select count(*) from biglineitem' runs for ~0.12s.
With the patch applied, the overhead is around a dozens of ms. I measured
the query on my desktop PC using a relase build of Impala. On debug builds,
the execution time of the patched version is around 160% of the original
version.

Without this patch:
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+
| Operator     | #Hosts | Avg Time | Max Time | #Rows  | Est. #Rows | Peak Mem  | Est. Peak Mem | Detail              |
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+
| 03:AGGREGATE | 1      | 127.50us | 127.50us | 1      | 1          | 28.00 KB  | 10.00 MB      | FINALIZE            |
| 02:EXCHANGE  | 1      | 22.32ms  | 22.32ms  | 3      | 1          | 0 B       | 0 B           | UNPARTITIONED       |
| 01:AGGREGATE | 3      | 1.78ms   | 1.89ms   | 3      | 1          | 16.00 KB  | 10.00 MB      |                     |
| 00:SCAN KUDU | 3      | 8.00ms   | 8.28ms   | 12.00M | -1         | 512.00 KB | 0 B           | default.biglineitem |
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+

With this patch:
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+
| Operator     | #Hosts | Avg Time | Max Time | #Rows  | Est. #Rows | Peak Mem  | Est. Peak Mem | Detail              |
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+
| 03:AGGREGATE | 1      | 129.01us | 129.01us | 1      | 1          | 28.00 KB  | 10.00 MB      | FINALIZE            |
| 02:EXCHANGE  | 1      | 33.00ms  | 33.00ms  | 3      | 1          | 0 B       | 0 B           | UNPARTITIONED       |
| 01:AGGREGATE | 3      | 1.99ms   | 2.13ms   | 3      | 1          | 16.00 KB  | 10.00 MB      |                     |
| 00:SCAN KUDU | 3      | 13.13ms  | 13.97ms  | 12.00M | -1         | 512.00 KB | 0 B           | default.biglineitem |
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+

Change-Id: I298122aaaa7e62eb5971508e0698e189519755de
Reviewed-on: http://gerrit.cloudera.org:8080/9239
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3a386d97
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3a386d97
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3a386d97

Branch: refs/heads/2.x
Commit: 3a386d9752c525b625c967f4e9bc6842a09d0c57
Parents: a4bdded
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
Authored: Wed Feb 7 17:05:00 2018 +0100
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Feb 16 21:10:14 2018 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-scanner.cc                             |  4 +++-
 be/src/exec/kudu-scanner.cc                             |  7 ++++++-
 be/src/runtime/tuple.cc                                 |  2 ++
 be/src/runtime/tuple.h                                  |  4 ++++
 .../functional-query/queries/QueryTest/scanners.test    | 12 ++++++++++++
 5 files changed, 27 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/3a386d97/be/src/exec/hdfs-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc
index f934f79..d62ccc7 100644
--- a/be/src/exec/hdfs-scanner.cc
+++ b/be/src/exec/hdfs-scanner.cc
@@ -220,11 +220,13 @@ int HdfsScanner::WriteTemplateTuples(TupleRow* row, int num_tuples) {
       if (EvalConjuncts(&template_tuple_row)) ++num_to_commit;
     }
   }
+  Tuple** row_tuple = reinterpret_cast<Tuple**>(row);
   if (template_tuple_ != nullptr) {
-    Tuple** row_tuple = reinterpret_cast<Tuple**>(row);
     for (int i = 0; i < num_to_commit; ++i) row_tuple[i] = template_tuple_;
   } else {
     DCHECK_EQ(scan_node_->tuple_desc()->byte_size(), 0);
+    // IMPALA-6258: Initialize tuple ptrs to non-null value
+    for (int i = 0; i < num_to_commit; ++i) row_tuple[i] = Tuple::POISON;
   }
   return num_to_commit;
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/3a386d97/be/src/exec/kudu-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scanner.cc b/be/src/exec/kudu-scanner.cc
index a9b56fe..909567c 100644
--- a/be/src/exec/kudu-scanner.cc
+++ b/be/src/exec/kudu-scanner.cc
@@ -255,8 +255,13 @@ Status KuduScanner::HandleEmptyProjection(RowBatch* row_batch) {
       }
     }
   }
+  for (int i = 0; i < num_to_commit; ++i) {
+    // IMPALA-6258: Initialize tuple ptrs to non-null value
+    TupleRow* row = row_batch->GetRow(row_batch->AddRow());
+    row->SetTuple(0, Tuple::POISON);
+    row_batch->CommitLastRow();
+  }
   cur_kudu_batch_num_read_ += rows_to_add;
-  row_batch->CommitRows(num_to_commit);
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/3a386d97/be/src/runtime/tuple.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc
index cab7686..787c3a4 100644
--- a/be/src/runtime/tuple.cc
+++ b/be/src/runtime/tuple.cc
@@ -46,6 +46,8 @@ const char* SlotOffsets::LLVM_CLASS_NAME = "struct.impala::SlotOffsets";
 const char* Tuple::MATERIALIZE_EXPRS_SYMBOL = "MaterializeExprsILb0ELb0";
 const char* Tuple::MATERIALIZE_EXPRS_NULL_POOL_SYMBOL = "MaterializeExprsILb0ELb1";
 
+Tuple* const Tuple::POISON = reinterpret_cast<Tuple*>(42L);
+
 int64_t Tuple::TotalByteSize(const TupleDescriptor& desc) const {
   int64_t result = desc.byte_size();
   if (!desc.HasVarlenSlots()) return result;

http://git-wip-us.apache.org/repos/asf/impala/blob/3a386d97/be/src/runtime/tuple.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h
index 3286f10..ae9be32 100644
--- a/be/src/runtime/tuple.h
+++ b/be/src/runtime/tuple.h
@@ -84,6 +84,10 @@ class Tuple {
     return result;
   }
 
+  /// Pointer that marks an invalid Tuple address. Rather than leaving Tuple
+  /// pointers uninitialized, they should point to the value of POISON.
+  static Tuple* const POISON;
+
   void Init(int size) { memset(this, 0, size); }
 
   void ClearNullBits(const TupleDescriptor& tuple_desc) {

http://git-wip-us.apache.org/repos/asf/impala/blob/3a386d97/testdata/workloads/functional-query/queries/QueryTest/scanners.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/scanners.test b/testdata/workloads/functional-query/queries/QueryTest/scanners.test
index 99ec5c5..bd5f496 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/scanners.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/scanners.test
@@ -98,3 +98,15 @@ select count(*) from alltypes where rand() - year > month;
 ---- TYPES
 BIGINT
 ====
+---- QUERY
+# IMPALA-6258: Uninitialized tuple pointers in row batch for empty rows
+# The following query was non-deterministic because of this bug
+select count(v.x) from alltypestiny t3 left outer join (
+  select true as x from alltypestiny t1 left outer join
+  alltypestiny t2 on (true)) v
+on (v.x = t3.bool_col) where t3.bool_col = true
+---- RESULTS
+256
+---- TYPES
+BIGINT
+====