You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/02/17 01:42:02 UTC
impala git commit: IMPALA-6258: Uninitialized tuple pointers in row
batch for empty rows
Repository: impala
Updated Branches:
refs/heads/2.x a4bdded6e -> 3a386d975
IMPALA-6258: Uninitialized tuple pointers in row batch for empty rows
Tuple pointers in the generated row batches may not be initialized
if a tuple has byte size 0. There are some codes which compare these
uninitialized pointers against nullptr so having them uninitialized
may return wrong (and non-deterministic) results, e.g.:
impala::TupleIsNullPredicate::GetBooleanVal()
The following query produces non-deterministic results currently:
SELECT count(v.x) FROM functional.alltypestiny t3 LEFT OUTER JOIN (
SELECT true AS x FROM functional.alltypestiny t1 LEFT OUTER JOIN
functional.alltypestiny t2 ON (true)) v ON (v.x = t3.bool_col)
WHERE t3.bool_col = true;
The alltypestiny table has 8 records, 4 records of them has the true
boolean value for bool_col. Therefore, the above query is a fancy
way of saying "8 * 8 * 4", i.e. it should return 256.
The solution is that scanners initialize tuple pointers to a non-null
value when there are no materialized slots. This non-null value is
provided by the new static member Tuple::POISON.
I extended QueryTest/scanners.test with the above query. This test
executes the query against all table formats.
This change has the biggest performance impact on count(*) queries on
large kudu tables. For my quick benchmark I copied tpch_kudu.lineitem
and doubled its data. The resulting table has 12,002,430 rows.
Without this patch 'select count(*) from biglineitem' runs for ~0.12s.
With the patch applied, the overhead is around a dozens of ms. I measured
the query on my desktop PC using a relase build of Impala. On debug builds,
the execution time of the patched version is around 160% of the original
version.
Without this patch:
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+
| Operator | #Hosts | Avg Time | Max Time | #Rows | Est. #Rows | Peak Mem | Est. Peak Mem | Detail |
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+
| 03:AGGREGATE | 1 | 127.50us | 127.50us | 1 | 1 | 28.00 KB | 10.00 MB | FINALIZE |
| 02:EXCHANGE | 1 | 22.32ms | 22.32ms | 3 | 1 | 0 B | 0 B | UNPARTITIONED |
| 01:AGGREGATE | 3 | 1.78ms | 1.89ms | 3 | 1 | 16.00 KB | 10.00 MB | |
| 00:SCAN KUDU | 3 | 8.00ms | 8.28ms | 12.00M | -1 | 512.00 KB | 0 B | default.biglineitem |
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+
With this patch:
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+
| Operator | #Hosts | Avg Time | Max Time | #Rows | Est. #Rows | Peak Mem | Est. Peak Mem | Detail |
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+
| 03:AGGREGATE | 1 | 129.01us | 129.01us | 1 | 1 | 28.00 KB | 10.00 MB | FINALIZE |
| 02:EXCHANGE | 1 | 33.00ms | 33.00ms | 3 | 1 | 0 B | 0 B | UNPARTITIONED |
| 01:AGGREGATE | 3 | 1.99ms | 2.13ms | 3 | 1 | 16.00 KB | 10.00 MB | |
| 00:SCAN KUDU | 3 | 13.13ms | 13.97ms | 12.00M | -1 | 512.00 KB | 0 B | default.biglineitem |
+--------------+--------+----------+----------+--------+------------+-----------+---------------+---------------------+
Change-Id: I298122aaaa7e62eb5971508e0698e189519755de
Reviewed-on: http://gerrit.cloudera.org:8080/9239
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3a386d97
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3a386d97
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3a386d97
Branch: refs/heads/2.x
Commit: 3a386d9752c525b625c967f4e9bc6842a09d0c57
Parents: a4bdded
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
Authored: Wed Feb 7 17:05:00 2018 +0100
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Feb 16 21:10:14 2018 +0000
----------------------------------------------------------------------
be/src/exec/hdfs-scanner.cc | 4 +++-
be/src/exec/kudu-scanner.cc | 7 ++++++-
be/src/runtime/tuple.cc | 2 ++
be/src/runtime/tuple.h | 4 ++++
.../functional-query/queries/QueryTest/scanners.test | 12 ++++++++++++
5 files changed, 27 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/3a386d97/be/src/exec/hdfs-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc
index f934f79..d62ccc7 100644
--- a/be/src/exec/hdfs-scanner.cc
+++ b/be/src/exec/hdfs-scanner.cc
@@ -220,11 +220,13 @@ int HdfsScanner::WriteTemplateTuples(TupleRow* row, int num_tuples) {
if (EvalConjuncts(&template_tuple_row)) ++num_to_commit;
}
}
+ Tuple** row_tuple = reinterpret_cast<Tuple**>(row);
if (template_tuple_ != nullptr) {
- Tuple** row_tuple = reinterpret_cast<Tuple**>(row);
for (int i = 0; i < num_to_commit; ++i) row_tuple[i] = template_tuple_;
} else {
DCHECK_EQ(scan_node_->tuple_desc()->byte_size(), 0);
+ // IMPALA-6258: Initialize tuple ptrs to non-null value
+ for (int i = 0; i < num_to_commit; ++i) row_tuple[i] = Tuple::POISON;
}
return num_to_commit;
}
http://git-wip-us.apache.org/repos/asf/impala/blob/3a386d97/be/src/exec/kudu-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scanner.cc b/be/src/exec/kudu-scanner.cc
index a9b56fe..909567c 100644
--- a/be/src/exec/kudu-scanner.cc
+++ b/be/src/exec/kudu-scanner.cc
@@ -255,8 +255,13 @@ Status KuduScanner::HandleEmptyProjection(RowBatch* row_batch) {
}
}
}
+ for (int i = 0; i < num_to_commit; ++i) {
+ // IMPALA-6258: Initialize tuple ptrs to non-null value
+ TupleRow* row = row_batch->GetRow(row_batch->AddRow());
+ row->SetTuple(0, Tuple::POISON);
+ row_batch->CommitLastRow();
+ }
cur_kudu_batch_num_read_ += rows_to_add;
- row_batch->CommitRows(num_to_commit);
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/impala/blob/3a386d97/be/src/runtime/tuple.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc
index cab7686..787c3a4 100644
--- a/be/src/runtime/tuple.cc
+++ b/be/src/runtime/tuple.cc
@@ -46,6 +46,8 @@ const char* SlotOffsets::LLVM_CLASS_NAME = "struct.impala::SlotOffsets";
const char* Tuple::MATERIALIZE_EXPRS_SYMBOL = "MaterializeExprsILb0ELb0";
const char* Tuple::MATERIALIZE_EXPRS_NULL_POOL_SYMBOL = "MaterializeExprsILb0ELb1";
+Tuple* const Tuple::POISON = reinterpret_cast<Tuple*>(42L);
+
int64_t Tuple::TotalByteSize(const TupleDescriptor& desc) const {
int64_t result = desc.byte_size();
if (!desc.HasVarlenSlots()) return result;
http://git-wip-us.apache.org/repos/asf/impala/blob/3a386d97/be/src/runtime/tuple.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h
index 3286f10..ae9be32 100644
--- a/be/src/runtime/tuple.h
+++ b/be/src/runtime/tuple.h
@@ -84,6 +84,10 @@ class Tuple {
return result;
}
+ /// Pointer that marks an invalid Tuple address. Rather than leaving Tuple
+ /// pointers uninitialized, they should point to the value of POISON.
+ static Tuple* const POISON;
+
void Init(int size) { memset(this, 0, size); }
void ClearNullBits(const TupleDescriptor& tuple_desc) {
http://git-wip-us.apache.org/repos/asf/impala/blob/3a386d97/testdata/workloads/functional-query/queries/QueryTest/scanners.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/scanners.test b/testdata/workloads/functional-query/queries/QueryTest/scanners.test
index 99ec5c5..bd5f496 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/scanners.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/scanners.test
@@ -98,3 +98,15 @@ select count(*) from alltypes where rand() - year > month;
---- TYPES
BIGINT
====
+---- QUERY
+# IMPALA-6258: Uninitialized tuple pointers in row batch for empty rows
+# The following query was non-deterministic because of this bug
+select count(v.x) from alltypestiny t3 left outer join (
+ select true as x from alltypestiny t1 left outer join
+ alltypestiny t2 on (true)) v
+on (v.x = t3.bool_col) where t3.bool_col = true
+---- RESULTS
+256
+---- TYPES
+BIGINT
+====