You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2022/01/17 12:31:58 UTC
[incubator-doris] 17/33: [Feature][Vectorized] Support String in vec exe engine (#7670)
This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch vectorized
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 1fc1c7005dccabdadeb0a5d2cd0cf0aa4871a3a8
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Mon Jan 10 20:27:45 2022 +0800
[Feature][Vectorized] Support String in vec exe engine (#7670)
Co-authored-by: lihaopeng <li...@baidu.com>
---
be/src/olap/olap_define.h | 3 ++-
be/src/olap/row_block2.cpp | 26 ++++++++++++++++++++--
be/src/olap/row_block2.h | 2 +-
be/src/olap/rowset/beta_rowset_reader.cpp | 6 ++++-
be/src/vec/exec/vunion_node.cpp | 9 ++++++++
.../apache/doris/rewrite/FoldConstantsRule.java | 7 ------
6 files changed, 41 insertions(+), 12 deletions(-)
diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h
index c2d4b7f..a9ac731 100644
--- a/be/src/olap/olap_define.h
+++ b/be/src/olap/olap_define.h
@@ -384,7 +384,8 @@ enum OLAPStatus {
OLAP_ERR_ROWSET_LOAD_FAILED = -3109,
OLAP_ERR_ROWSET_READER_INIT = -3110,
OLAP_ERR_ROWSET_READ_FAILED = -3111,
- OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION = -3112
+ OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION = -3112,
+ OLAP_ERR_STRING_OVERFLOW_IN_VEC_ENGINE = -3113
};
enum ColumnFamilyIndex {
diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index 26b58ca..877f6a2 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -95,7 +95,9 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) {
return Status::OK();
}
-void RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnPtr& origin_column) {
+Status RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnPtr& origin_column) {
+ constexpr auto MAX_SIZE_OF_VEC_STRING = 1024l * 1024;
+
auto* column = origin_column.get();
bool nullable_mark_array[_selected_size];
@@ -170,6 +172,24 @@ void RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnP
}
break;
}
+ case OLAP_FIELD_TYPE_STRING: {
+ auto column_string = assert_cast<vectorized::ColumnString*>(column);
+
+ for (uint16_t j = 0; j < _selected_size; ++j) {
+ if (!nullable_mark_array[j]) {
+ uint16_t row_idx = _selection_vector[j];
+ auto slice = reinterpret_cast<const Slice*>(column_block(cid).cell_ptr(row_idx));
+ if (LIKELY(slice->size <= MAX_SIZE_OF_VEC_STRING)) {
+ column_string->insert_data(slice->data, slice->size);
+ } else {
+ return Status::NotSupported("Not support string len over than 1MB in vec engine.");
+ }
+ } else {
+ column_string->insert_default();
+ }
+ }
+ break;
+ }
case OLAP_FIELD_TYPE_CHAR: {
auto column_string = assert_cast<vectorized::ColumnString*>(column);
@@ -286,13 +306,15 @@ void RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnP
DCHECK(false) << "Invalid type in RowBlockV2:" << _schema.column(cid)->type();
}
}
+
+ return Status::OK();
}
Status RowBlockV2::convert_to_vec_block(vectorized::Block* block) {
for (int i = 0; i < _schema.column_ids().size(); ++i) {
auto cid = _schema.column_ids()[i];
auto column = (*std::move(block->get_by_position(i).column)).assume_mutable();
- _copy_data_to_column(cid, column);
+ RETURN_IF_ERROR(_copy_data_to_column(cid, column));
}
_pool->clear();
return Status::OK();
diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h
index cdbf428..b98ab95 100644
--- a/be/src/olap/row_block2.h
+++ b/be/src/olap/row_block2.h
@@ -109,7 +109,7 @@ public:
std::string debug_string();
private:
- void _copy_data_to_column(int cid, vectorized::MutableColumnPtr& mutable_column_ptr);
+ Status _copy_data_to_column(int cid, vectorized::MutableColumnPtr& mutable_column_ptr);
Schema _schema;
size_t _capacity;
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index 459f3ca..4d35f2f 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -204,7 +204,11 @@ OLAPStatus BetaRowsetReader::next_block(vectorized::Block* block) {
{
SCOPED_RAW_TIMER(&_stats->block_convert_ns);
- _input_block->convert_to_vec_block(block);
+ auto s = _input_block->convert_to_vec_block(block);
+ if (UNLIKELY(!s.ok())) {
+ LOG(WARNING) << "failed to read next block: " << s.to_string();
+ return OLAP_ERR_STRING_OVERFLOW_IN_VEC_ENGINE;
+ }
}
is_first = false;
} while (block->rows() < _context->runtime_state->batch_size()); // here we should keep block.rows() < batch_size
diff --git a/be/src/vec/exec/vunion_node.cpp b/be/src/vec/exec/vunion_node.cpp
index 1fa4da4..c05b3ef 100644
--- a/be/src/vec/exec/vunion_node.cpp
+++ b/be/src/vec/exec/vunion_node.cpp
@@ -195,6 +195,15 @@ Status VUnionNode::get_next_const(RuntimeState* state, Block* block) {
if (!mem_reuse) {
block->swap(mblock.to_block());
}
+
+ // some insert query like "insert into string_test select 1, repeat('a', 1024 * 1024);"
+ // the const expr will be in output expr cause the union node return a empty block. so here we
+ // need add one row to make sure the union node exec const expr return at least one row
+ if (block->rows() == 0) {
+ block->insert({vectorized::ColumnUInt8::create(1),
+ std::make_shared<vectorized::DataTypeUInt8>(), ""});
+ }
+
return Status::OK();
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
index 628740a..8f5137f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
@@ -27,7 +27,6 @@ import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.analysis.NullLiteral;
import org.apache.doris.analysis.SysVariableDesc;
import org.apache.doris.catalog.Catalog;
-import org.apache.doris.catalog.Function;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
@@ -195,12 +194,6 @@ public class FoldConstantsRule implements ExprRewriteRule {
Analyzer analyzer, Map<String, Expr> sysVarMap, Map<String, Expr> infoFnMap)
throws AnalysisException {
if (expr.isConstant()) {
- if (VectorizedUtil.isVectorized()) {
- Function fn = expr.getFn();
- if (fn != null && (fn.functionName().equalsIgnoreCase("curtime") ||
- fn.functionName().equalsIgnoreCase("current_time")))
- return;
- }
// Do not constant fold cast(null as dataType) because we cannot preserve the
// cast-to-types and that can lead to query failures, e.g., CTAS
if (expr instanceof CastExpr) {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org