You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2022/01/17 12:31:58 UTC

[incubator-doris] 17/33: [Feature][Vectorized] Support String in vec exe engine (#7670)

This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch vectorized
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 1fc1c7005dccabdadeb0a5d2cd0cf0aa4871a3a8
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Mon Jan 10 20:27:45 2022 +0800

    [Feature][Vectorized] Support String in vec exe engine (#7670)
    
    Co-authored-by: lihaopeng <li...@baidu.com>
---
 be/src/olap/olap_define.h                          |  3 ++-
 be/src/olap/row_block2.cpp                         | 26 ++++++++++++++++++++--
 be/src/olap/row_block2.h                           |  2 +-
 be/src/olap/rowset/beta_rowset_reader.cpp          |  6 ++++-
 be/src/vec/exec/vunion_node.cpp                    |  9 ++++++++
 .../apache/doris/rewrite/FoldConstantsRule.java    |  7 ------
 6 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h
index c2d4b7f..a9ac731 100644
--- a/be/src/olap/olap_define.h
+++ b/be/src/olap/olap_define.h
@@ -384,7 +384,8 @@ enum OLAPStatus {
     OLAP_ERR_ROWSET_LOAD_FAILED = -3109,
     OLAP_ERR_ROWSET_READER_INIT = -3110,
     OLAP_ERR_ROWSET_READ_FAILED = -3111,
-    OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION = -3112
+    OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION = -3112,
+    OLAP_ERR_STRING_OVERFLOW_IN_VEC_ENGINE = -3113
 };
 
 enum ColumnFamilyIndex {
diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index 26b58ca..877f6a2 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -95,7 +95,9 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) {
     return Status::OK();
 }
 
-void RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnPtr& origin_column) {
+Status RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnPtr& origin_column) {
+    constexpr auto MAX_SIZE_OF_VEC_STRING = 1024l * 1024;
+
     auto* column = origin_column.get();
     bool nullable_mark_array[_selected_size];
 
@@ -170,6 +172,24 @@ void RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnP
         }
         break;
     }
+    case OLAP_FIELD_TYPE_STRING: {
+        auto column_string = assert_cast<vectorized::ColumnString*>(column);
+
+        for (uint16_t j = 0; j < _selected_size; ++j) {
+            if (!nullable_mark_array[j]) {
+                uint16_t row_idx = _selection_vector[j];
+                auto slice = reinterpret_cast<const Slice*>(column_block(cid).cell_ptr(row_idx));
+                if (LIKELY(slice->size <= MAX_SIZE_OF_VEC_STRING)) {
+                    column_string->insert_data(slice->data, slice->size);
+                } else {
+                    return Status::NotSupported("Not support string len over than 1MB in vec engine.");
+                }
+            } else {
+                column_string->insert_default();
+            }
+        }
+        break;
+    }
     case OLAP_FIELD_TYPE_CHAR: {
         auto column_string = assert_cast<vectorized::ColumnString*>(column);
 
@@ -286,13 +306,15 @@ void RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnP
         DCHECK(false) << "Invalid type in RowBlockV2:" << _schema.column(cid)->type();
     }
     }
+
+    return Status::OK();
 }
 
 Status RowBlockV2::convert_to_vec_block(vectorized::Block* block) {
     for (int i = 0; i < _schema.column_ids().size(); ++i) {
         auto cid = _schema.column_ids()[i];
         auto column = (*std::move(block->get_by_position(i).column)).assume_mutable();
-        _copy_data_to_column(cid, column);
+        RETURN_IF_ERROR(_copy_data_to_column(cid, column));
     }
     _pool->clear();
     return Status::OK();
diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h
index cdbf428..b98ab95 100644
--- a/be/src/olap/row_block2.h
+++ b/be/src/olap/row_block2.h
@@ -109,7 +109,7 @@ public:
     std::string debug_string();
 
 private:
-    void _copy_data_to_column(int cid, vectorized::MutableColumnPtr& mutable_column_ptr);
+    Status _copy_data_to_column(int cid, vectorized::MutableColumnPtr& mutable_column_ptr);
 
     Schema _schema;
     size_t _capacity;
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index 459f3ca..4d35f2f 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -204,7 +204,11 @@ OLAPStatus BetaRowsetReader::next_block(vectorized::Block* block) {
 
         {
             SCOPED_RAW_TIMER(&_stats->block_convert_ns);
-            _input_block->convert_to_vec_block(block);
+            auto s = _input_block->convert_to_vec_block(block);
+            if (UNLIKELY(!s.ok())) {
+                LOG(WARNING) << "failed to read next block: " << s.to_string();
+                return OLAP_ERR_STRING_OVERFLOW_IN_VEC_ENGINE;
+            }
         }
         is_first = false;
     } while (block->rows() < _context->runtime_state->batch_size()); // here we should keep block.rows() < batch_size
diff --git a/be/src/vec/exec/vunion_node.cpp b/be/src/vec/exec/vunion_node.cpp
index 1fa4da4..c05b3ef 100644
--- a/be/src/vec/exec/vunion_node.cpp
+++ b/be/src/vec/exec/vunion_node.cpp
@@ -195,6 +195,15 @@ Status VUnionNode::get_next_const(RuntimeState* state, Block* block) {
     if (!mem_reuse) {
         block->swap(mblock.to_block());
     }
+
+    // some insert query like "insert into string_test select 1, repeat('a', 1024 * 1024);"
+    // the const expr will be in output expr cause the union node return a empty block. so here we
+    // need add one row to make sure the union node exec const expr return at least one row
+    if (block->rows() == 0) {
+        block->insert({vectorized::ColumnUInt8::create(1),
+                    std::make_shared<vectorized::DataTypeUInt8>(), ""});
+    }
+
     return Status::OK();
 }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
index 628740a..8f5137f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
@@ -27,7 +27,6 @@ import org.apache.doris.analysis.LiteralExpr;
 import org.apache.doris.analysis.NullLiteral;
 import org.apache.doris.analysis.SysVariableDesc;
 import org.apache.doris.catalog.Catalog;
-import org.apache.doris.catalog.Function;
 import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.common.AnalysisException;
@@ -195,12 +194,6 @@ public class FoldConstantsRule implements ExprRewriteRule {
                               Analyzer analyzer, Map<String, Expr> sysVarMap, Map<String, Expr> infoFnMap)
             throws AnalysisException {
         if (expr.isConstant()) {
-            if (VectorizedUtil.isVectorized()) {
-                Function fn = expr.getFn();
-                if (fn != null && (fn.functionName().equalsIgnoreCase("curtime") ||
-                        fn.functionName().equalsIgnoreCase("current_time")))
-                    return;
-            }
             // Do not constant fold cast(null as dataType) because we cannot preserve the
             // cast-to-types and that can lead to query failures, e.g., CTAS
             if (expr instanceof CastExpr) {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org