You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/06/23 00:44:49 UTC

[doris] branch master updated: [fix] do not read seq column when reading a compacted rowset (#10344)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 274a0f2603 [fix] do not read seq column when reading a compacted rowset (#10344)
274a0f2603 is described below

commit 274a0f26035dfe1f91a5c3bb0f8213bef9a4d7c1
Author: Yongqiang YANG <98...@users.noreply.github.com>
AuthorDate: Thu Jun 23 08:44:43 2022 +0800

    [fix] do not read seq column when reading a compacted rowset (#10344)
    
    SEQ_COL is used on tables with unique key to order data in one transaction(rowset),
    when there is only one rowset and the rowset is compacted, rows in the rowset is sorted
    and rows with same keys are resolved by compaction, so a scanner sets direct_mode to
    optimize read iterator to avoid sorting and aggregating, and iterators does not need SEQ_COL.
    However, init_return_columns adds SEQ_COL to return_columns, which is passed to SegmentIterator.
    Then segment Iterator would be called via get_next with a block without SEQ_COL, segment iterator
    creates columns included in return_columns but not in the block. SEQ_COL is nullable, segment Iterator
    does not handle it, so a core dump happen.
    
    Actually, in the above case, segment iterator does not need to read SEQ_COL.
    When SEQ_COL is really needed, iterators creates SEQ_COL column in block,
    so segment Iterator does not need do create SEQ_COL at all.
---
 be/src/exec/olap_scanner.cpp      | 43 ++++++++++++++++++++-------------------
 be/src/exec/olap_scanner.h        |  2 +-
 be/src/vec/exec/volap_scanner.cpp | 42 ++++++++++++++++++++------------------
 be/src/vec/exec/volap_scanner.h   |  2 +-
 4 files changed, 46 insertions(+), 43 deletions(-)

diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 3bd2783ea0..d78c21e996 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -147,7 +147,25 @@ Status OlapScanner::_init_tablet_reader_params(
         const std::vector<OlapScanRange*>& key_ranges, const std::vector<TCondition>& filters,
         const std::vector<std::pair<string, std::shared_ptr<IBloomFilterFuncBase>>>&
                 bloom_filters) {
-    RETURN_IF_ERROR(_init_return_columns());
+    // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty
+    bool single_version =
+            (_tablet_reader_params.rs_readers.size() == 1 &&
+             _tablet_reader_params.rs_readers[0]->rowset()->start_version() == 0 &&
+             !_tablet_reader_params.rs_readers[0]
+                      ->rowset()
+                      ->rowset_meta()
+                      ->is_segments_overlapping()) ||
+            (_tablet_reader_params.rs_readers.size() == 2 &&
+             _tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->num_rows() == 0 &&
+             _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 2 &&
+             !_tablet_reader_params.rs_readers[1]
+                      ->rowset()
+                      ->rowset_meta()
+                      ->is_segments_overlapping());
+
+    _tablet_reader_params.direct_mode = single_version || _aggregation;
+
+    RETURN_IF_ERROR(_init_return_columns(!_tablet_reader_params.direct_mode));
 
     _tablet_reader_params.tablet = _tablet;
     _tablet_reader_params.reader_type = READER_QUERY;
@@ -179,28 +197,11 @@ Status OlapScanner::_init_tablet_reader_params(
     // TODO(zc)
     _tablet_reader_params.profile = _parent->runtime_profile();
     _tablet_reader_params.runtime_state = _runtime_state;
-    // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty
-    bool single_version =
-            (_tablet_reader_params.rs_readers.size() == 1 &&
-             _tablet_reader_params.rs_readers[0]->rowset()->start_version() == 0 &&
-             !_tablet_reader_params.rs_readers[0]
-                      ->rowset()
-                      ->rowset_meta()
-                      ->is_segments_overlapping()) ||
-            (_tablet_reader_params.rs_readers.size() == 2 &&
-             _tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->num_rows() == 0 &&
-             _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 2 &&
-             !_tablet_reader_params.rs_readers[1]
-                      ->rowset()
-                      ->rowset_meta()
-                      ->is_segments_overlapping());
-
     _tablet_reader_params.origin_return_columns = &_return_columns;
     _tablet_reader_params.tablet_columns_convert_to_null_set = &_tablet_columns_convert_to_null_set;
 
-    if (_aggregation || single_version) {
+    if (_tablet_reader_params.direct_mode) {
         _tablet_reader_params.return_columns = _return_columns;
-        _tablet_reader_params.direct_mode = true;
     } else {
         // we need to fetch all key columns to do the right aggregation on storage engine side.
         for (size_t i = 0; i < _tablet->num_key_columns(); ++i) {
@@ -236,7 +237,7 @@ Status OlapScanner::_init_tablet_reader_params(
     return Status::OK();
 }
 
-Status OlapScanner::_init_return_columns() {
+Status OlapScanner::_init_return_columns(bool need_seq_col) {
     for (auto slot : _tuple_desc->slots()) {
         if (!slot->is_materialized()) {
             continue;
@@ -255,7 +256,7 @@ Status OlapScanner::_init_return_columns() {
     }
 
     // expand the sequence column
-    if (_tablet->tablet_schema().has_sequence_col()) {
+    if (_tablet->tablet_schema().has_sequence_col() && need_seq_col) {
         bool has_replace_col = false;
         for (auto col : _return_columns) {
             if (_tablet->tablet_schema().column(col).aggregation() ==
diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h
index 92b8c07205..65086eaceb 100644
--- a/be/src/exec/olap_scanner.h
+++ b/be/src/exec/olap_scanner.h
@@ -93,7 +93,7 @@ protected:
             const std::vector<OlapScanRange*>& key_ranges, const std::vector<TCondition>& filters,
             const std::vector<std::pair<string, std::shared_ptr<IBloomFilterFuncBase>>>&
                     bloom_filters);
-    Status _init_return_columns();
+    Status _init_return_columns(bool need_seq_col);
     void _convert_row_to_tuple(Tuple* tuple);
 
     // Update profile that need to be reported in realtime.
diff --git a/be/src/vec/exec/volap_scanner.cpp b/be/src/vec/exec/volap_scanner.cpp
index f1c15408e4..6c942dfd0d 100644
--- a/be/src/vec/exec/volap_scanner.cpp
+++ b/be/src/vec/exec/volap_scanner.cpp
@@ -135,7 +135,25 @@ Status VOlapScanner::_init_tablet_reader_params(
         const std::vector<OlapScanRange*>& key_ranges, const std::vector<TCondition>& filters,
         const std::vector<std::pair<string, std::shared_ptr<IBloomFilterFuncBase>>>&
                 bloom_filters) {
-    RETURN_IF_ERROR(_init_return_columns());
+    // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty
+    bool single_version =
+            (_tablet_reader_params.rs_readers.size() == 1 &&
+             _tablet_reader_params.rs_readers[0]->rowset()->start_version() == 0 &&
+             !_tablet_reader_params.rs_readers[0]
+                      ->rowset()
+                      ->rowset_meta()
+                      ->is_segments_overlapping()) ||
+            (_tablet_reader_params.rs_readers.size() == 2 &&
+             _tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->num_rows() == 0 &&
+             _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 2 &&
+             !_tablet_reader_params.rs_readers[1]
+                      ->rowset()
+                      ->rowset_meta()
+                      ->is_segments_overlapping());
+
+    _tablet_reader_params.direct_mode = _aggregation || single_version;
+
+    RETURN_IF_ERROR(_init_return_columns(!_tablet_reader_params.direct_mode));
 
     _tablet_reader_params.tablet = _tablet;
     _tablet_reader_params.reader_type = READER_QUERY;
@@ -166,28 +184,12 @@ Status VOlapScanner::_init_tablet_reader_params(
 
     _tablet_reader_params.profile = _parent->runtime_profile();
     _tablet_reader_params.runtime_state = _runtime_state;
-    // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty
-    bool single_version =
-            (_tablet_reader_params.rs_readers.size() == 1 &&
-             _tablet_reader_params.rs_readers[0]->rowset()->start_version() == 0 &&
-             !_tablet_reader_params.rs_readers[0]
-                      ->rowset()
-                      ->rowset_meta()
-                      ->is_segments_overlapping()) ||
-            (_tablet_reader_params.rs_readers.size() == 2 &&
-             _tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->num_rows() == 0 &&
-             _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 2 &&
-             !_tablet_reader_params.rs_readers[1]
-                      ->rowset()
-                      ->rowset_meta()
-                      ->is_segments_overlapping());
 
     _tablet_reader_params.origin_return_columns = &_return_columns;
     _tablet_reader_params.tablet_columns_convert_to_null_set = &_tablet_columns_convert_to_null_set;
 
-    if (_aggregation || single_version) {
+    if (_tablet_reader_params.direct_mode) {
         _tablet_reader_params.return_columns = _return_columns;
-        _tablet_reader_params.direct_mode = true;
     } else {
         // we need to fetch all key columns to do the right aggregation on storage engine side.
         for (size_t i = 0; i < _tablet->num_key_columns(); ++i) {
@@ -223,7 +225,7 @@ Status VOlapScanner::_init_tablet_reader_params(
     return Status::OK();
 }
 
-Status VOlapScanner::_init_return_columns() {
+Status VOlapScanner::_init_return_columns(bool need_seq_col) {
     for (auto slot : _tuple_desc->slots()) {
         if (!slot->is_materialized()) {
             continue;
@@ -242,7 +244,7 @@ Status VOlapScanner::_init_return_columns() {
     }
 
     // expand the sequence column
-    if (_tablet->tablet_schema().has_sequence_col()) {
+    if (_tablet->tablet_schema().has_sequence_col() && need_seq_col) {
         bool has_replace_col = false;
         for (auto col : _return_columns) {
             if (_tablet->tablet_schema().column(col).aggregation() ==
diff --git a/be/src/vec/exec/volap_scanner.h b/be/src/vec/exec/volap_scanner.h
index 22cab48006..5786a388ad 100644
--- a/be/src/vec/exec/volap_scanner.h
+++ b/be/src/vec/exec/volap_scanner.h
@@ -98,7 +98,7 @@ private:
             const std::vector<OlapScanRange*>& key_ranges, const std::vector<TCondition>& filters,
             const std::vector<std::pair<string, std::shared_ptr<IBloomFilterFuncBase>>>&
                     bloom_filters);
-    Status _init_return_columns();
+    Status _init_return_columns(bool need_seq_col);
 
     // Update profile that need to be reported in realtime.
     void _update_realtime_counter();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org