You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2019/10/24 14:36:38 UTC

[incubator-doris] branch master updated: Optimize the convert of row block v2 to v1 #2011 (#2058)

This is an automated email from the ASF dual-hosted git repository.

zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 78bf825  Optimize the convert of row block v2 to v1 #2011 (#2058)
78bf825 is described below

commit 78bf825e734664ac258e944b2f353c6f92caa044
Author: kangpinghuang <ka...@126.com>
AuthorDate: Thu Oct 24 22:36:30 2019 +0800

    Optimize the convert of row block v2 to v1 #2011 (#2058)
    
    Use MemPool exchange to avoid string copy
    Use batch convert to replace row by row
---
 be/src/olap/row_block2.cpp                | 54 +++++++++++++++----------------
 be/src/olap/row_block2.h                  | 12 ++-----
 be/src/olap/rowset/beta_rowset_reader.cpp | 20 +-----------
 3 files changed, 31 insertions(+), 55 deletions(-)

diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index 69d5c1d..5ba1689 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -56,37 +56,37 @@ RowBlockV2::~RowBlockV2() {
     delete[] _selection_vector;
 }
 
-Status RowBlockV2::copy_to_row_cursor(size_t row_idx, RowCursor* cursor) {
-    if (row_idx >= _num_rows) {
-        return Status::InvalidArgument(
-            Substitute("invalid row index $0 (num_rows=$1)", row_idx, _num_rows));
-    }
+Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) {
     for (auto cid : _schema.column_ids()) {
-        bool is_null = _schema.column(cid)->is_nullable() && BitmapTest(_column_null_bitmaps[cid], row_idx);
-        if (is_null) {
-            cursor->set_null(cid);
+        bool is_nullable = _schema.column(cid)->is_nullable();
+        if (is_nullable) {
+            for (uint16_t i = 0; i < _selected_size; ++i) {
+                uint16_t row_idx = _selection_vector[i];
+                dst->get_row(row_idx, helper);
+                bool is_null = BitmapTest(_column_null_bitmaps[cid], row_idx);
+                if (is_null) {
+                    helper->set_null(cid);
+                } else {
+                    helper->set_not_null(cid);
+                    helper->set_field_content_shallow(cid,
+                            reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx)));
+                }
+            }
         } else {
-            cursor->set_not_null(cid);
-            cursor->set_field_content_shallow(cid, reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx)));
+            for (uint16_t i = 0; i < _selected_size; ++i) {
+                uint16_t row_idx = _selection_vector[i];
+                dst->get_row(row_idx, helper);
+                helper->set_not_null(cid);
+                helper->set_field_content_shallow(cid,
+                        reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx)));
+            }
         }
     }
-    return Status::OK();
-}
-
-Status RowBlockV2::deep_copy_to_row_cursor(size_t row_idx, RowCursor* cursor, MemPool* mem_pool) {                                                                  
-    if (row_idx >= _num_rows) {
-        return Status::InvalidArgument(
-            Substitute("invalid row index $0 (num_rows=$1)", row_idx, _num_rows));
-    }   
-    for (auto cid : _schema.column_ids()) {
-        bool is_null = _schema.column(cid)->is_nullable() && BitmapTest(_column_null_bitmaps[cid], row_idx);
-        if (is_null) {
-            cursor->set_null(cid);
-        } else {
-            cursor->set_not_null(cid);
-            cursor->set_field_content(cid, reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx)), mem_pool);
-        }   
-    }   
+    // swap MemPool to copy string content
+    dst->mem_pool()->exchange_data(_pool.get());
+    dst->set_pos(0);
+    dst->set_limit(_selected_size);
+    dst->finalize(_selected_size);
     return Status::OK();
 }
 
diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h
index ce3d5b6..27ea633 100644
--- a/be/src/olap/row_block2.h
+++ b/be/src/olap/row_block2.h
@@ -26,6 +26,7 @@
 #include "olap/schema.h"
 #include "olap/types.h"
 #include "olap/selection_vector.h"
+#include "olap/row_block.h"
 #include "runtime/mem_pool.h"
 #include "runtime/mem_tracker.h"
 
@@ -63,15 +64,8 @@ public:
         }
     }
 
-    // Copy the row_idx row's data into given row_cursor.
-    // This function will use shallow copy, so the client should
-    // notice the life time of returned value
-    Status copy_to_row_cursor(size_t row_idx, RowCursor* row_cursor);
-
-    // Copy the row_idx row's data into given row_cursor.
-    // This function will use deep copy.
-    // This function is used to convert RowBlockV2 to RowBlock
-    Status deep_copy_to_row_cursor(size_t row_idx, RowCursor* cursor, MemPool* mem_pool);
+    // convert RowBlockV2 to RowBlock
+    Status convert_to_row_block(RowCursor* helper, RowBlock* dst);
 
     // Get the column block for one of the columns in this row block.
     // `cid` must be one of `schema()->column_ids()`.
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index e2620dc..ace0f0a 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -123,28 +123,10 @@ OLAPStatus BetaRowsetReader::next_block(RowBlock** block) {
 
     // convert to output block
     _output_block->clear();
-    size_t rows_read = 0;
-    uint16_t* selection_vector = _input_block->selection_vector();
     {
         SCOPED_RAW_TIMER(&_context->stats->block_convert_ns);
-        for (uint16_t i = 0; i < _input_block->selected_size(); ++i) {
-            uint16_t row_idx = selection_vector[i];
-            // deep copy row from input block to output block because
-            // RowBlock use MemPool and RowBlockV2 use Arena
-            // TODO(hkp): unify RowBlockV2 to use MemPool to boost performance
-            _output_block->get_row(row_idx, _row.get());
-            // convert return_columns to seek_columns
-            auto s = _input_block->deep_copy_to_row_cursor(row_idx, _row.get(), _output_block->mem_pool());
-            if (!s.ok()) {
-                LOG(WARNING) << "failed to copy row: " << s.to_string();
-                return OLAP_ERR_ROWSET_READ_FAILED;
-            }
-            ++rows_read;
-        }
+        _input_block->convert_to_row_block(_row.get(), _output_block.get());
     }
-    _output_block->set_pos(0);
-    _output_block->set_limit(rows_read);
-    _output_block->finalize(rows_read);
     *block = _output_block.get();
     return OLAP_SUCCESS;
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org