You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by sa...@apache.org on 2017/10/05 05:38:57 UTC
[2/3] incubator-impala git commit: IMPALA-5988: optimise
MemPool::TryAllocate()
IMPALA-5988: optimise MemPool::TryAllocate()
Testing:
Ran core tests.
Perf:
Experiments using this on top of a WIP Avro patch for IMPALA-5307
showed noticable improvements in CPU efficiency - up to 10%
Change-Id: I088012084fe535a67a3cd1103ced5a02027f961a
Reviewed-on: http://gerrit.cloudera.org:8080/8145
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/226c99e0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/226c99e0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/226c99e0
Branch: refs/heads/master
Commit: 226c99e01c49d3d25840c5982474740a7dcb9c62
Parents: 4dd0f1b
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Thu Sep 21 16:01:15 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Oct 5 02:50:36 2017 +0000
----------------------------------------------------------------------
be/src/exec/analytic-eval-node.cc | 3 ++-
be/src/exec/data-source-scan-node.cc | 3 ++-
be/src/exec/hdfs-text-scanner.cc | 2 +-
be/src/exec/text-converter.inline.h | 2 +-
be/src/exprs/scalar-expr-evaluator.cc | 2 +-
be/src/runtime/mem-pool.h | 41 ++++++++++++++++++------------
6 files changed, 32 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/exec/analytic-eval-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/analytic-eval-node.cc b/be/src/exec/analytic-eval-node.cc
index af4f866..a3551f3 100644
--- a/be/src/exec/analytic-eval-node.cc
+++ b/be/src/exec/analytic-eval-node.cc
@@ -388,7 +388,8 @@ Status AnalyticEvalNode::AddResultTuple(int64_t stream_idx) {
StringValue* sv = reinterpret_cast<StringValue*>(
result_tuple->GetSlot(slot_desc->tuple_offset()));
if (sv == nullptr || sv->len == 0) continue;
- char* new_ptr = reinterpret_cast<char*>(cur_tuple_pool->TryAllocate(sv->len));
+ char* new_ptr = reinterpret_cast<char*>(
+ cur_tuple_pool->TryAllocateUnaligned(sv->len));
if (UNLIKELY(new_ptr == nullptr)) {
return cur_tuple_pool->mem_tracker()->MemLimitExceeded(nullptr,
"Failed to allocate memory for analytic function's result.", sv->len);
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/exec/data-source-scan-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/data-source-scan-node.cc b/be/src/exec/data-source-scan-node.cc
index 78ba492..2639510 100644
--- a/be/src/exec/data-source-scan-node.cc
+++ b/be/src/exec/data-source-scan-node.cc
@@ -222,7 +222,8 @@ Status DataSourceScanNode::MaterializeNextRow(MemPool* tuple_pool, Tuple* tuple)
}
const string& val = col.string_vals[val_idx];
size_t val_size = val.size();
- char* buffer = reinterpret_cast<char*>(tuple_pool->TryAllocate(val_size));
+ char* buffer = reinterpret_cast<char*>(
+ tuple_pool->TryAllocateUnaligned(val_size));
if (UNLIKELY(buffer == NULL)) {
string details = Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow",
val_size, "string slot");
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/exec/hdfs-text-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-text-scanner.cc b/be/src/exec/hdfs-text-scanner.cc
index eea4e80..a548ca1 100644
--- a/be/src/exec/hdfs-text-scanner.cc
+++ b/be/src/exec/hdfs-text-scanner.cc
@@ -870,7 +870,7 @@ Status HdfsTextScanner::CopyBoundaryField(FieldLocation* data, MemPool* pool) {
bool needs_escape = data->len < 0;
int copy_len = needs_escape ? -data->len : data->len;
int64_t total_len = copy_len + boundary_column_.len();
- char* str_data = reinterpret_cast<char*>(pool->TryAllocate(total_len));
+ char* str_data = reinterpret_cast<char*>(pool->TryAllocateUnaligned(total_len));
if (UNLIKELY(str_data == nullptr)) {
string details = Substitute("HdfsTextScanner::CopyBoundaryField() failed to allocate "
"$0 bytes.", total_len);
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/exec/text-converter.inline.h
----------------------------------------------------------------------
diff --git a/be/src/exec/text-converter.inline.h b/be/src/exec/text-converter.inline.h
index d8dea91..49f4e8a 100644
--- a/be/src/exec/text-converter.inline.h
+++ b/be/src/exec/text-converter.inline.h
@@ -79,7 +79,7 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tup
// 3. HdfsScanner::WriteCompleteTuple() always calls this function with
// 'copy_string' == false.
str.ptr = type.IsVarLenStringType() ?
- reinterpret_cast<char*>(pool->TryAllocate(buffer_len)) :
+ reinterpret_cast<char*>(pool->TryAllocateUnaligned(buffer_len)) :
reinterpret_cast<char*>(slot);
if (UNLIKELY(str.ptr == NULL)) return false;
if (need_escape) {
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/exprs/scalar-expr-evaluator.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/scalar-expr-evaluator.cc b/be/src/exprs/scalar-expr-evaluator.cc
index b2e0aa6..a327dd4 100644
--- a/be/src/exprs/scalar-expr-evaluator.cc
+++ b/be/src/exprs/scalar-expr-evaluator.cc
@@ -252,7 +252,7 @@ Status ScalarExprEvaluator::GetConstValue(RuntimeState* state, const ScalarExpr&
StringVal* sv = reinterpret_cast<StringVal*>(*const_val);
if (!sv->is_null && sv->len > 0) {
// Make sure the memory is owned by this evaluator.
- char* ptr_copy = reinterpret_cast<char*>(mem_pool_->TryAllocate(sv->len));
+ char* ptr_copy = reinterpret_cast<char*>(mem_pool_->TryAllocateUnaligned(sv->len));
if (ptr_copy == nullptr) {
return mem_pool_->mem_tracker()->MemLimitExceeded(
state, "Could not allocate constant string value", sv->len);
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/runtime/mem-pool.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/mem-pool.h b/be/src/runtime/mem-pool.h
index 648e382..b748f7f 100644
--- a/be/src/runtime/mem-pool.h
+++ b/be/src/runtime/mem-pool.h
@@ -118,6 +118,13 @@ class MemPool {
return Allocate<true>(size, alignment);
}
+ /// Same as TryAllocate() except returned memory is not aligned at all.
+ uint8_t* TryAllocateUnaligned(int64_t size) noexcept {
+ // Call templated implementation directly so that it is inlined here and the
+ // alignment logic can be optimised out.
+ return Allocate<true>(size, 1);
+ }
+
/// Returns 'byte_size' to the current chunk back to the mem pool. This can
/// only be used to return either all or part of the previous allocation returned
/// by Allocate().
@@ -234,31 +241,33 @@ class MemPool {
}
template <bool CHECK_LIMIT_FIRST>
- uint8_t* Allocate(int64_t size, int alignment) noexcept {
+ uint8_t* ALWAYS_INLINE Allocate(int64_t size, int alignment) noexcept {
DCHECK_GE(size, 0);
if (UNLIKELY(size == 0)) return reinterpret_cast<uint8_t*>(&zero_length_region_);
- bool fits_in_chunk = false;
if (current_chunk_idx_ != -1) {
+ ChunkInfo& info = chunks_[current_chunk_idx_];
int64_t aligned_allocated_bytes = BitUtil::RoundUpToPowerOf2(
- chunks_[current_chunk_idx_].allocated_bytes, alignment);
- if (aligned_allocated_bytes + size <= chunks_[current_chunk_idx_].size) {
+ info.allocated_bytes, alignment);
+ if (aligned_allocated_bytes + size <= info.size) {
// Ensure the requested alignment is respected.
- total_allocated_bytes_ +=
- aligned_allocated_bytes - chunks_[current_chunk_idx_].allocated_bytes;
- chunks_[current_chunk_idx_].allocated_bytes = aligned_allocated_bytes;
- fits_in_chunk = true;
+ int64_t padding = aligned_allocated_bytes - info.allocated_bytes;
+ uint8_t* result = info.data + aligned_allocated_bytes;
+ ASAN_UNPOISON_MEMORY_REGION(result, size);
+ DCHECK_LE(info.allocated_bytes + size, info.size);
+ info.allocated_bytes += padding + size;
+ total_allocated_bytes_ += padding + size;
+ DCHECK_LE(current_chunk_idx_, chunks_.size() - 1);
+ return result;
}
}
- if (!fits_in_chunk) {
- // If we couldn't allocate a new chunk, return NULL. malloc() guarantees alignment
- // of alignof(std::max_align_t), so we do not need to do anything additional to
- // guarantee alignment.
- static_assert(
- INITIAL_CHUNK_SIZE >= alignof(std::max_align_t), "Min chunk size too low");
- if (UNLIKELY(!FindChunk(size, CHECK_LIMIT_FIRST))) return NULL;
- }
+ // If we couldn't allocate a new chunk, return NULL. malloc() guarantees alignment
+ // of alignof(std::max_align_t), so we do not need to do anything additional to
+ // guarantee alignment.
+ static_assert(
+ INITIAL_CHUNK_SIZE >= alignof(std::max_align_t), "Min chunk size too low");
+ if (UNLIKELY(!FindChunk(size, CHECK_LIMIT_FIRST))) return NULL;
ChunkInfo& info = chunks_[current_chunk_idx_];
uint8_t* result = info.data + info.allocated_bytes;