You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2021/11/17 03:20:36 UTC

[incubator-doris] branch master updated: [Optimize] [Memory] BitShufflePageDecoder use memory allocated by ChunkAllocator instead of Faststring (#6515)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f5a35c2  [Optimize] [Memory] BitShufflePageDecoder use memory allocated by ChunkAllocator instead of Faststring (#6515)
f5a35c2 is described below

commit f5a35c28e9d252158135a6988b40bfa37a9c17de
Author: Xinyi Zou <zo...@gmail.com>
AuthorDate: Wed Nov 17 11:20:21 2021 +0800

    [Optimize] [Memory] BitShufflePageDecoder use memory allocated by ChunkAllocator instead of Faststring (#6515)
    
    BitShufflePageDecoder reuses the memory for storing decoder results, allocate memory directly from the
    `ChunkAllocator`, the performance is improved to a certain extent.
    
    In the case of #6285, the total time consumption is reduced by 13.5%, and the time consumption ratio of `~Reader()`
    has also been reduced from 17.65% to 1.53%, and the memory allocation is unified to `ChunkAllocator` for centralized
    management , Which is conducive to subsequent memory optimization.
    
    which can avoid the memory waste caused by `Mempool`, because the chunk can be free at any time, but the
    performance is lower than the allocation from `Mempool`. The guess is that there is no `Mempool` after secondary
    allocation of large chunks , Will directly apply for a large number of small chunks from `ChunkAllocator`, and it takes
    longer to lock in `pop_free_chunk` and `push_free_chunk` (but this is not proven from the flame graphs of BE's cpu and
    contention).
---
 be/src/olap/rowset/segment_v2/bitshuffle_page.h | 21 +++++++++++++++------
 be/src/runtime/memory/chunk_allocator.cpp       |  7 +++++++
 be/src/runtime/memory/chunk_allocator.h         |  2 ++
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
index e4abece..02ab39f 100644
--- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
@@ -33,6 +33,7 @@
 #include "olap/rowset/segment_v2/page_builder.h"
 #include "olap/rowset/segment_v2/page_decoder.h"
 #include "olap/types.h"
+#include "runtime/memory/chunk_allocator.h"
 #include "util/coding.h"
 #include "util/faststring.h"
 #include "util/slice.h"
@@ -214,6 +215,12 @@ public:
               _size_of_element(0),
               _cur_index(0) {}
 
+    ~BitShufflePageDecoder() {
+        if (_chunk.size != 0) {
+            ChunkAllocator::instance()->free(_chunk);
+        }
+    }
+
     Status init() override {
         CHECK(!_parsed);
         if (_data.size < BITSHUFFLE_PAGE_HEADER_SIZE) {
@@ -302,7 +309,7 @@ public:
         // - left == _num_elements when not found (all values < target)
         while (left < right) {
             size_t mid = left + (right - left) / 2;
-            mid_value = &_decoded[mid * SIZE_OF_TYPE];
+            mid_value = &_chunk.data[mid * SIZE_OF_TYPE];
             if (TypeTraits<Type>::cmp(mid_value, value) < 0) {
                 left = mid + 1;
             } else {
@@ -312,7 +319,7 @@ public:
         if (left >= _num_elements) {
             return Status::NotFound("all value small than the value");
         }
-        void* find_value = &_decoded[left * SIZE_OF_TYPE];
+        void* find_value = &_chunk.data[left * SIZE_OF_TYPE];
         if (TypeTraits<Type>::cmp(find_value, value) == 0) {
             *exact_match = true;
         } else {
@@ -353,15 +360,17 @@ public:
 
 private:
     void _copy_next_values(size_t n, void* data) {
-        memcpy(data, &_decoded[_cur_index * SIZE_OF_TYPE], n * SIZE_OF_TYPE);
+        memcpy(data, &_chunk.data[_cur_index * SIZE_OF_TYPE], n * SIZE_OF_TYPE);
     }
 
     Status _decode() {
         if (_num_elements > 0) {
             int64_t bytes;
-            _decoded.resize(_num_element_after_padding * _size_of_element);
+            if (!ChunkAllocator::instance()->allocate_align(_num_element_after_padding * _size_of_element, &_chunk)) {
+                return Status::RuntimeError("Decoded Memory Alloc failed");
+            }
             char* in = const_cast<char*>(&_data[BITSHUFFLE_PAGE_HEADER_SIZE]);
-            bytes = bitshuffle::decompress_lz4(in, _decoded.data(), _num_element_after_padding,
+            bytes = bitshuffle::decompress_lz4(in, _chunk.data, _num_element_after_padding,
                                                _size_of_element, 0);
             if (PREDICT_FALSE(bytes < 0)) {
                 // Ideally, this should not happen.
@@ -385,7 +394,7 @@ private:
 
     int _size_of_element;
     size_t _cur_index;
-    faststring _decoded;
+    Chunk _chunk;
 };
 
 } // namespace segment_v2
diff --git a/be/src/runtime/memory/chunk_allocator.cpp b/be/src/runtime/memory/chunk_allocator.cpp
index 0d50f2b..82327f2 100644
--- a/be/src/runtime/memory/chunk_allocator.cpp
+++ b/be/src/runtime/memory/chunk_allocator.cpp
@@ -135,6 +135,7 @@ bool ChunkAllocator::allocate(size_t size, Chunk* chunk) {
     chunk->core_id = core_id;
 
     if (_arenas[core_id]->pop_free_chunk(size, &chunk->data)) {
+        DCHECK_GE(_reserved_bytes, 0);
         _reserved_bytes.fetch_sub(size);
         chunk_pool_local_core_alloc_count->increment(1);
         return true;
@@ -144,6 +145,7 @@ bool ChunkAllocator::allocate(size_t size, Chunk* chunk) {
         ++core_id;
         for (int i = 1; i < _arenas.size(); ++i, ++core_id) {
             if (_arenas[core_id % _arenas.size()]->pop_free_chunk(size, &chunk->data)) {
+                DCHECK_GE(_reserved_bytes, 0);
                 _reserved_bytes.fetch_sub(size);
                 chunk_pool_other_core_alloc_count->increment(1);
                 // reset chunk's core_id to other
@@ -188,4 +190,9 @@ void ChunkAllocator::free(const Chunk& chunk) {
     _arenas[chunk.core_id]->push_free_chunk(chunk.data, chunk.size);
 }
 
+
+bool ChunkAllocator::allocate_align(size_t size, Chunk* chunk) {
+    return allocate(BitUtil::RoundUpToPowerOfTwo(size), chunk);
+}
+
 } // namespace doris
diff --git a/be/src/runtime/memory/chunk_allocator.h b/be/src/runtime/memory/chunk_allocator.h
index 230517d..d7eb22f 100644
--- a/be/src/runtime/memory/chunk_allocator.h
+++ b/be/src/runtime/memory/chunk_allocator.h
@@ -65,6 +65,8 @@ public:
     // Otherwise return false.
     bool allocate(size_t size, Chunk* chunk);
 
+    bool allocate_align(size_t size, Chunk* chunk);
+
     // Free chunk allocated from this allocator
     void free(const Chunk& chunk);
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org