You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2017/09/22 17:13:24 UTC
[3/4] incubator-impala git commit: IMPALA-5965: avoid per-value
switch on NeedsConversionInline() in parquet
IMPALA-5965: avoid per-value switch on NeedsConversionInline() in parquet
Testing:
========
Ran core tests
Perf:
====
Ran this query a few times:
set num_nodes=1;
set mt_dop=1;
select min(l_returnflag), min(l_linestatus) from biglineitem;
summary;
I saw a speedup in scan time from ~2.25s -> 2.11s on my machine.
Change-Id: I04fb4ca73978d0100e1eb9835b87d37540b8b645
Reviewed-on: http://gerrit.cloudera.org:8080/8117
Reviewed-by: Lars Volker <lv...@cloudera.com>
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/98df9076
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/98df9076
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/98df9076
Branch: refs/heads/master
Commit: 98df90767a069585822eac8393a01a872e15d215
Parents: 71fd194
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Sep 20 13:41:24 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Sep 22 03:45:28 2017 +0000
----------------------------------------------------------------------
be/src/exec/parquet-column-readers.cc | 39 ++++++++++++++++++++++++------
1 file changed, 31 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/98df9076/be/src/exec/parquet-column-readers.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/parquet-column-readers.cc b/be/src/exec/parquet-column-readers.cc
index d55b545..de63859 100644
--- a/be/src/exec/parquet-column-readers.cc
+++ b/be/src/exec/parquet-column-readers.cc
@@ -280,9 +280,17 @@ class ScalarColumnReader : public BaseScalarColumnReader {
if (MATERIALIZED) {
if (def_level_ >= max_def_level()) {
if (page_encoding_ == parquet::Encoding::PLAIN_DICTIONARY) {
- if (!ReadSlot<true>(tuple, pool)) return false;
+ if (NeedsConversionInline()) {
+ if (!ReadSlot<true, true>(tuple, pool)) return false;
+ } else {
+ if (!ReadSlot<true, false>(tuple, pool)) return false;
+ }
} else {
- if (!ReadSlot<false>(tuple, pool)) return false;
+ if (NeedsConversionInline()) {
+ if (!ReadSlot<false, true>(tuple, pool)) return false;
+ } else {
+ if (!ReadSlot<false, false>(tuple, pool)) return false;
+ }
}
} else {
tuple->SetNull(null_indicator_offset_);
@@ -372,7 +380,7 @@ class ScalarColumnReader : public BaseScalarColumnReader {
/// conservatively assumes that buffers like 'tuple_mem', 'num_values' or the
/// 'def_levels_' 'rep_levels_' buffers may alias 'this', especially with
/// -fno-strict-alias).
- template <bool IN_COLLECTION, bool IS_DICT_ENCODED>
+ template <bool IN_COLLECTION, bool IS_DICT_ENCODED, bool NEEDS_CONVERSION>
bool MaterializeValueBatch(MemPool* RESTRICT pool, int max_values, int tuple_size,
uint8_t* RESTRICT tuple_mem, int* RESTRICT num_values) RESTRICT {
DCHECK(MATERIALIZED || IN_COLLECTION);
@@ -404,7 +412,8 @@ class ScalarColumnReader : public BaseScalarColumnReader {
if (MATERIALIZED) {
if (def_level >= max_def_level()) {
- bool continue_execution = ReadSlot<IS_DICT_ENCODED>(tuple, pool);
+ bool continue_execution =
+ ReadSlot<IS_DICT_ENCODED, NEEDS_CONVERSION>(tuple, pool);
if (UNLIKELY(!continue_execution)) return false;
} else {
tuple->SetNull(null_indicator_offset_);
@@ -417,6 +426,20 @@ class ScalarColumnReader : public BaseScalarColumnReader {
return true;
}
+ // Dispatch to the correct templated implementation of MaterializeValueBatch based
+ // on NeedsConversionInline().
+ template <bool IN_COLLECTION, bool IS_DICT_ENCODED>
+ bool MaterializeValueBatch(MemPool* RESTRICT pool, int max_values, int tuple_size,
+ uint8_t* RESTRICT tuple_mem, int* RESTRICT num_values) RESTRICT {
+ if (NeedsConversionInline()) {
+ return MaterializeValueBatch<IN_COLLECTION, IS_DICT_ENCODED, true>(
+ pool, max_values, tuple_size, tuple_mem, num_values);
+ } else {
+ return MaterializeValueBatch<IN_COLLECTION, IS_DICT_ENCODED, false>(
+ pool, max_values, tuple_size, tuple_mem, num_values);
+ }
+ }
+
virtual Status CreateDictionaryDecoder(uint8_t* values, int size,
DictDecoderBase** decoder) {
if (!dict_decoder_.Reset(values, size, fixed_len_size_)) {
@@ -470,13 +493,13 @@ class ScalarColumnReader : public BaseScalarColumnReader {
/// true.
///
/// Force inlining - GCC does not always inline this into hot loops.
- template <bool IS_DICT_ENCODED>
+ template <bool IS_DICT_ENCODED, bool NEEDS_CONVERSION>
ALWAYS_INLINE bool ReadSlot(Tuple* tuple, MemPool* pool) {
void* slot = tuple->GetSlot(tuple_offset_);
// Use an uninitialized stack allocation for temporary value to avoid running
// constructors doing work unnecessarily, e.g. if T == StringValue.
alignas(T) uint8_t val_buf[sizeof(T)];
- T* val_ptr = reinterpret_cast<T*>(NeedsConversionInline() ? val_buf : slot);
+ T* val_ptr = reinterpret_cast<T*>(NEEDS_CONVERSION ? val_buf : slot);
if (IS_DICT_ENCODED) {
DCHECK_EQ(page_encoding_, parquet::Encoding::PLAIN_DICTIONARY);
if (UNLIKELY(!dict_decoder_.GetNextValue(val_ptr))) {
@@ -497,8 +520,8 @@ class ScalarColumnReader : public BaseScalarColumnReader {
if (UNLIKELY(NeedsValidationInline() && !ValidateSlot(val_ptr, tuple))) {
return false;
}
- if (UNLIKELY(NeedsConversionInline() && !tuple->IsNull(null_indicator_offset_)
- && !ConvertSlot(val_ptr, slot, pool))) {
+ if (NEEDS_CONVERSION && !tuple->IsNull(null_indicator_offset_)
+ && UNLIKELY(!ConvertSlot(val_ptr, slot, pool))) {
return false;
}
return true;