You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by sa...@apache.org on 2017/10/05 05:38:57 UTC

[2/3] incubator-impala git commit: IMPALA-5988: optimise MemPool::TryAllocate()

IMPALA-5988: optimise MemPool::TryAllocate()

Testing:
Ran core tests.

Perf:
Experiments using this on top of a WIP Avro patch for IMPALA-5307
showed noticable improvements in CPU efficiency - up to 10%

Change-Id: I088012084fe535a67a3cd1103ced5a02027f961a
Reviewed-on: http://gerrit.cloudera.org:8080/8145
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/226c99e0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/226c99e0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/226c99e0

Branch: refs/heads/master
Commit: 226c99e01c49d3d25840c5982474740a7dcb9c62
Parents: 4dd0f1b
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Thu Sep 21 16:01:15 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Oct 5 02:50:36 2017 +0000

----------------------------------------------------------------------
 be/src/exec/analytic-eval-node.cc     |  3 ++-
 be/src/exec/data-source-scan-node.cc  |  3 ++-
 be/src/exec/hdfs-text-scanner.cc      |  2 +-
 be/src/exec/text-converter.inline.h   |  2 +-
 be/src/exprs/scalar-expr-evaluator.cc |  2 +-
 be/src/runtime/mem-pool.h             | 41 ++++++++++++++++++------------
 6 files changed, 32 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/exec/analytic-eval-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/analytic-eval-node.cc b/be/src/exec/analytic-eval-node.cc
index af4f866..a3551f3 100644
--- a/be/src/exec/analytic-eval-node.cc
+++ b/be/src/exec/analytic-eval-node.cc
@@ -388,7 +388,8 @@ Status AnalyticEvalNode::AddResultTuple(int64_t stream_idx) {
     StringValue* sv = reinterpret_cast<StringValue*>(
         result_tuple->GetSlot(slot_desc->tuple_offset()));
     if (sv == nullptr || sv->len == 0) continue;
-    char* new_ptr = reinterpret_cast<char*>(cur_tuple_pool->TryAllocate(sv->len));
+    char* new_ptr = reinterpret_cast<char*>(
+        cur_tuple_pool->TryAllocateUnaligned(sv->len));
     if (UNLIKELY(new_ptr == nullptr)) {
       return cur_tuple_pool->mem_tracker()->MemLimitExceeded(nullptr,
           "Failed to allocate memory for analytic function's result.", sv->len);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/exec/data-source-scan-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/data-source-scan-node.cc b/be/src/exec/data-source-scan-node.cc
index 78ba492..2639510 100644
--- a/be/src/exec/data-source-scan-node.cc
+++ b/be/src/exec/data-source-scan-node.cc
@@ -222,7 +222,8 @@ Status DataSourceScanNode::MaterializeNextRow(MemPool* tuple_pool, Tuple* tuple)
           }
           const string& val = col.string_vals[val_idx];
           size_t val_size = val.size();
-          char* buffer = reinterpret_cast<char*>(tuple_pool->TryAllocate(val_size));
+          char* buffer = reinterpret_cast<char*>(
+              tuple_pool->TryAllocateUnaligned(val_size));
           if (UNLIKELY(buffer == NULL)) {
             string details = Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow",
                 val_size, "string slot");

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/exec/hdfs-text-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-text-scanner.cc b/be/src/exec/hdfs-text-scanner.cc
index eea4e80..a548ca1 100644
--- a/be/src/exec/hdfs-text-scanner.cc
+++ b/be/src/exec/hdfs-text-scanner.cc
@@ -870,7 +870,7 @@ Status HdfsTextScanner::CopyBoundaryField(FieldLocation* data, MemPool* pool) {
   bool needs_escape = data->len < 0;
   int copy_len = needs_escape ? -data->len : data->len;
   int64_t total_len = copy_len + boundary_column_.len();
-  char* str_data = reinterpret_cast<char*>(pool->TryAllocate(total_len));
+  char* str_data = reinterpret_cast<char*>(pool->TryAllocateUnaligned(total_len));
   if (UNLIKELY(str_data == nullptr)) {
     string details = Substitute("HdfsTextScanner::CopyBoundaryField() failed to allocate "
         "$0 bytes.", total_len);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/exec/text-converter.inline.h
----------------------------------------------------------------------
diff --git a/be/src/exec/text-converter.inline.h b/be/src/exec/text-converter.inline.h
index d8dea91..49f4e8a 100644
--- a/be/src/exec/text-converter.inline.h
+++ b/be/src/exec/text-converter.inline.h
@@ -79,7 +79,7 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tup
         // 3. HdfsScanner::WriteCompleteTuple() always calls this function with
         //    'copy_string' == false.
         str.ptr = type.IsVarLenStringType() ?
-            reinterpret_cast<char*>(pool->TryAllocate(buffer_len)) :
+            reinterpret_cast<char*>(pool->TryAllocateUnaligned(buffer_len)) :
             reinterpret_cast<char*>(slot);
         if (UNLIKELY(str.ptr == NULL)) return false;
         if (need_escape) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/exprs/scalar-expr-evaluator.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/scalar-expr-evaluator.cc b/be/src/exprs/scalar-expr-evaluator.cc
index b2e0aa6..a327dd4 100644
--- a/be/src/exprs/scalar-expr-evaluator.cc
+++ b/be/src/exprs/scalar-expr-evaluator.cc
@@ -252,7 +252,7 @@ Status ScalarExprEvaluator::GetConstValue(RuntimeState* state, const ScalarExpr&
     StringVal* sv = reinterpret_cast<StringVal*>(*const_val);
     if (!sv->is_null && sv->len > 0) {
       // Make sure the memory is owned by this evaluator.
-      char* ptr_copy = reinterpret_cast<char*>(mem_pool_->TryAllocate(sv->len));
+      char* ptr_copy = reinterpret_cast<char*>(mem_pool_->TryAllocateUnaligned(sv->len));
       if (ptr_copy == nullptr) {
         return mem_pool_->mem_tracker()->MemLimitExceeded(
             state, "Could not allocate constant string value", sv->len);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/226c99e0/be/src/runtime/mem-pool.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/mem-pool.h b/be/src/runtime/mem-pool.h
index 648e382..b748f7f 100644
--- a/be/src/runtime/mem-pool.h
+++ b/be/src/runtime/mem-pool.h
@@ -118,6 +118,13 @@ class MemPool {
     return Allocate<true>(size, alignment);
   }
 
+  /// Same as TryAllocate() except returned memory is not aligned at all.
+  uint8_t* TryAllocateUnaligned(int64_t size) noexcept {
+    // Call templated implementation directly so that it is inlined here and the
+    // alignment logic can be optimised out.
+    return Allocate<true>(size, 1);
+  }
+
   /// Returns 'byte_size' to the current chunk back to the mem pool. This can
   /// only be used to return either all or part of the previous allocation returned
   /// by Allocate().
@@ -234,31 +241,33 @@ class MemPool {
   }
 
   template <bool CHECK_LIMIT_FIRST>
-  uint8_t* Allocate(int64_t size, int alignment) noexcept {
+  uint8_t* ALWAYS_INLINE Allocate(int64_t size, int alignment) noexcept {
     DCHECK_GE(size, 0);
     if (UNLIKELY(size == 0)) return reinterpret_cast<uint8_t*>(&zero_length_region_);
 
-    bool fits_in_chunk = false;
     if (current_chunk_idx_ != -1) {
+      ChunkInfo& info = chunks_[current_chunk_idx_];
       int64_t aligned_allocated_bytes = BitUtil::RoundUpToPowerOf2(
-          chunks_[current_chunk_idx_].allocated_bytes, alignment);
-      if (aligned_allocated_bytes + size <= chunks_[current_chunk_idx_].size) {
+          info.allocated_bytes, alignment);
+      if (aligned_allocated_bytes + size <= info.size) {
         // Ensure the requested alignment is respected.
-        total_allocated_bytes_ +=
-            aligned_allocated_bytes - chunks_[current_chunk_idx_].allocated_bytes;
-        chunks_[current_chunk_idx_].allocated_bytes = aligned_allocated_bytes;
-        fits_in_chunk = true;
+        int64_t padding = aligned_allocated_bytes - info.allocated_bytes;
+        uint8_t* result = info.data + aligned_allocated_bytes;
+        ASAN_UNPOISON_MEMORY_REGION(result, size);
+        DCHECK_LE(info.allocated_bytes + size, info.size);
+        info.allocated_bytes += padding + size;
+        total_allocated_bytes_ += padding + size;
+        DCHECK_LE(current_chunk_idx_, chunks_.size() - 1);
+        return result;
       }
     }
 
-    if (!fits_in_chunk) {
-      // If we couldn't allocate a new chunk, return NULL. malloc() guarantees alignment
-      // of alignof(std::max_align_t), so we do not need to do anything additional to
-      // guarantee alignment.
-      static_assert(
-          INITIAL_CHUNK_SIZE >= alignof(std::max_align_t), "Min chunk size too low");
-      if (UNLIKELY(!FindChunk(size, CHECK_LIMIT_FIRST))) return NULL;
-    }
+    // If we couldn't allocate a new chunk, return NULL. malloc() guarantees alignment
+    // of alignof(std::max_align_t), so we do not need to do anything additional to
+    // guarantee alignment.
+    static_assert(
+        INITIAL_CHUNK_SIZE >= alignof(std::max_align_t), "Min chunk size too low");
+    if (UNLIKELY(!FindChunk(size, CHECK_LIMIT_FIRST))) return NULL;
 
     ChunkInfo& info = chunks_[current_chunk_idx_];
     uint8_t* result = info.data + info.allocated_bytes;