You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/03/31 19:04:41 UTC
[impala] branch master updated: IMPALA-9572: Fix DCHECK in nested
Parquet scanning
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new e8f604a IMPALA-9572: Fix DCHECK in nested Parquet scanning
e8f604a is described below
commit e8f604a2139be2ee3f011e6f2ce71fa0dde26492
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Mon Mar 30 20:15:09 2020 +0200
IMPALA-9572: Fix DCHECK in nested Parquet scanning
The issue occurred when there were skipped pages and a column
inside a collection was scanned, but its position was not needed.
The repetition level still needs to be read in this case, as the
skipped ranges are set in top level rows, so collection items
need to know which top level row do they belong to.
A DCHECK in StrideWriter's constructor was hit, otherwise the
code ran correctly in release mode. The DCHECK is moved to
functions where the condition would actually cause problems.
Testing:
- added and ran a regression test
Change-Id: I5e8ef514ead71f732c73f910af7fd1aecd37bb81
Reviewed-on: http://gerrit.cloudera.org:8080/15598
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exec/parquet/parquet-column-readers.cc | 2 +-
be/src/util/mem-util.h | 9 ++++++++-
.../queries/QueryTest/nested-types-parquet-page-index.test | 9 +++++++++
3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/be/src/exec/parquet/parquet-column-readers.cc b/be/src/exec/parquet/parquet-column-readers.cc
index 512632b..58cc12f 100644
--- a/be/src/exec/parquet/parquet-column-readers.cc
+++ b/be/src/exec/parquet/parquet-column-readers.cc
@@ -1323,7 +1323,7 @@ int BaseScalarColumnReader::FillPositionsInCandidateRange(int rows_remaining,
int rep_level = rep_levels_.CacheGetNext();
if (rep_level == 0) ++row_count;
++val_count;
- if (pos_slot_desc_ != nullptr) {
+ if (pos_writer.IsValid()) {
if (rep_level <= max_rep_level() - 1) pos_current_value_ = 0;
*pos_writer.Advance() = pos_current_value_++;
}
diff --git a/be/src/util/mem-util.h b/be/src/util/mem-util.h
index a7125a9..16e8085 100644
--- a/be/src/util/mem-util.h
+++ b/be/src/util/mem-util.h
@@ -31,17 +31,21 @@ namespace impala {
template <typename T>
struct StrideWriter {
// The next element to write to. May not be aligned to the natural alignment of T.
+ // Can be null for convenience, but the StrideWriter cannot be used in that case.
T* current;
// The stride in bytes between subsequent values.
int64_t stride;
explicit StrideWriter(T* current, int64_t stride) : current(current), stride(stride) {
- DCHECK(current != nullptr);
}
+ /// No other functions can be called if false.
+ ALWAYS_INLINE bool IsValid() const { return current != nullptr; }
+
/// Set the next element to 'val' and advance 'current' to the next element.
ALWAYS_INLINE void SetNext(T& val) {
+ DCHECK(IsValid());
// memcpy() is necessary because 'current' may not be aligned.
memcpy(current, &val, sizeof(T));
SkipNext(1);
@@ -49,6 +53,7 @@ struct StrideWriter {
/// Return a pointer to the current element and advance 'current' to the next element.
ALWAYS_INLINE T* Advance() {
+ DCHECK(IsValid());
T* curr = current;
SkipNext(1);
return curr;
@@ -56,11 +61,13 @@ struct StrideWriter {
/// Set the next 'count' elements to 'val', advancing 'current' by 'count' values.
void SetNext(T& val, int64_t count) {
+ DCHECK(IsValid());
for (int64_t i = 0; i < count; ++i) SetNext(val);
}
/// Advance 'current' by 'count' values.
ALWAYS_INLINE void SkipNext(int64_t count) {
+ DCHECK(IsValid());
DCHECK_GE(count, 0);
current = reinterpret_cast<T*>(reinterpret_cast<uint8_t*>(current) + stride * count);
}
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-page-index.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-page-index.test
index 8c85b57..8a372f7 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-page-index.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-page-index.test
@@ -702,3 +702,12 @@ BIGINT, DECIMAL
---- RUNTIME_PROFILE
aggregation(SUM, NumStatsFilteredPages): 6
====
+---- QUERY
+# Regression test for IMPALA-9572.
+select count(l_partkey) from tpch_nested_parquet.customer.c_orders.o_lineitems
+where l_partkey < 10
+---- RESULTS
+263
+---- TYPES
+BIGINT
+====