You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2019/10/24 14:36:38 UTC
[incubator-doris] branch master updated: Optimize the convert of
row block v2 to v1 #2011 (#2058)
This is an automated email from the ASF dual-hosted git repository.
zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 78bf825 Optimize the convert of row block v2 to v1 #2011 (#2058)
78bf825 is described below
commit 78bf825e734664ac258e944b2f353c6f92caa044
Author: kangpinghuang <ka...@126.com>
AuthorDate: Thu Oct 24 22:36:30 2019 +0800
Optimize the convert of row block v2 to v1 #2011 (#2058)
Use MemPool exchange to avoid string copy
Use batch convert to replace row by row
---
be/src/olap/row_block2.cpp | 54 +++++++++++++++----------------
be/src/olap/row_block2.h | 12 ++-----
be/src/olap/rowset/beta_rowset_reader.cpp | 20 +-----------
3 files changed, 31 insertions(+), 55 deletions(-)
diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index 69d5c1d..5ba1689 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -56,37 +56,37 @@ RowBlockV2::~RowBlockV2() {
delete[] _selection_vector;
}
-Status RowBlockV2::copy_to_row_cursor(size_t row_idx, RowCursor* cursor) {
- if (row_idx >= _num_rows) {
- return Status::InvalidArgument(
- Substitute("invalid row index $0 (num_rows=$1)", row_idx, _num_rows));
- }
+Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) {
for (auto cid : _schema.column_ids()) {
- bool is_null = _schema.column(cid)->is_nullable() && BitmapTest(_column_null_bitmaps[cid], row_idx);
- if (is_null) {
- cursor->set_null(cid);
+ bool is_nullable = _schema.column(cid)->is_nullable();
+ if (is_nullable) {
+ for (uint16_t i = 0; i < _selected_size; ++i) {
+ uint16_t row_idx = _selection_vector[i];
+ dst->get_row(row_idx, helper);
+ bool is_null = BitmapTest(_column_null_bitmaps[cid], row_idx);
+ if (is_null) {
+ helper->set_null(cid);
+ } else {
+ helper->set_not_null(cid);
+ helper->set_field_content_shallow(cid,
+ reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx)));
+ }
+ }
} else {
- cursor->set_not_null(cid);
- cursor->set_field_content_shallow(cid, reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx)));
+ for (uint16_t i = 0; i < _selected_size; ++i) {
+ uint16_t row_idx = _selection_vector[i];
+ dst->get_row(row_idx, helper);
+ helper->set_not_null(cid);
+ helper->set_field_content_shallow(cid,
+ reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx)));
+ }
}
}
- return Status::OK();
-}
-
-Status RowBlockV2::deep_copy_to_row_cursor(size_t row_idx, RowCursor* cursor, MemPool* mem_pool) {
- if (row_idx >= _num_rows) {
- return Status::InvalidArgument(
- Substitute("invalid row index $0 (num_rows=$1)", row_idx, _num_rows));
- }
- for (auto cid : _schema.column_ids()) {
- bool is_null = _schema.column(cid)->is_nullable() && BitmapTest(_column_null_bitmaps[cid], row_idx);
- if (is_null) {
- cursor->set_null(cid);
- } else {
- cursor->set_not_null(cid);
- cursor->set_field_content(cid, reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx)), mem_pool);
- }
- }
+ // swap MemPool to copy string content
+ dst->mem_pool()->exchange_data(_pool.get());
+ dst->set_pos(0);
+ dst->set_limit(_selected_size);
+ dst->finalize(_selected_size);
return Status::OK();
}
diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h
index ce3d5b6..27ea633 100644
--- a/be/src/olap/row_block2.h
+++ b/be/src/olap/row_block2.h
@@ -26,6 +26,7 @@
#include "olap/schema.h"
#include "olap/types.h"
#include "olap/selection_vector.h"
+#include "olap/row_block.h"
#include "runtime/mem_pool.h"
#include "runtime/mem_tracker.h"
@@ -63,15 +64,8 @@ public:
}
}
- // Copy the row_idx row's data into given row_cursor.
- // This function will use shallow copy, so the client should
- // notice the life time of returned value
- Status copy_to_row_cursor(size_t row_idx, RowCursor* row_cursor);
-
- // Copy the row_idx row's data into given row_cursor.
- // This function will use deep copy.
- // This function is used to convert RowBlockV2 to RowBlock
- Status deep_copy_to_row_cursor(size_t row_idx, RowCursor* cursor, MemPool* mem_pool);
+ // convert RowBlockV2 to RowBlock
+ Status convert_to_row_block(RowCursor* helper, RowBlock* dst);
// Get the column block for one of the columns in this row block.
// `cid` must be one of `schema()->column_ids()`.
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index e2620dc..ace0f0a 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -123,28 +123,10 @@ OLAPStatus BetaRowsetReader::next_block(RowBlock** block) {
// convert to output block
_output_block->clear();
- size_t rows_read = 0;
- uint16_t* selection_vector = _input_block->selection_vector();
{
SCOPED_RAW_TIMER(&_context->stats->block_convert_ns);
- for (uint16_t i = 0; i < _input_block->selected_size(); ++i) {
- uint16_t row_idx = selection_vector[i];
- // deep copy row from input block to output block because
- // RowBlock use MemPool and RowBlockV2 use Arena
- // TODO(hkp): unify RowBlockV2 to use MemPool to boost performance
- _output_block->get_row(row_idx, _row.get());
- // convert return_columns to seek_columns
- auto s = _input_block->deep_copy_to_row_cursor(row_idx, _row.get(), _output_block->mem_pool());
- if (!s.ok()) {
- LOG(WARNING) << "failed to copy row: " << s.to_string();
- return OLAP_ERR_ROWSET_READ_FAILED;
- }
- ++rows_read;
- }
+ _input_block->convert_to_row_block(_row.get(), _output_block.get());
}
- _output_block->set_pos(0);
- _output_block->set_limit(rows_read);
- _output_block->finalize(rows_read);
*block = _output_block.get();
return OLAP_SUCCESS;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org