You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ya...@apache.org on 2021/09/23 01:44:39 UTC
[incubator-doris] branch master updated: Fixed zone map init error
for string type (#6667)
This is an automated email from the ASF dual-hosted git repository.
yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5c45e26 Fixed zone map init error for string type (#6667)
5c45e26 is described below
commit 5c45e266440f7e281b41b66237fc04310d1150bf
Author: Zhengguo Yang <ya...@gmail.com>
AuthorDate: Thu Sep 23 09:44:22 2021 +0800
Fixed zone map init error for string type (#6667)
Fixed the problem that the StringValue memory generated by Expr may be released before use
Fixed from_string for String type may overflow
---
be/src/common/config.h | 11 -----------
be/src/exec/olap_scan_node.cpp | 17 ++++++++++-------
be/src/olap/field.h | 20 +++++++++++++++++++-
be/src/runtime/tuple.cpp | 13 +++++++------
4 files changed, 36 insertions(+), 25 deletions(-)
diff --git a/be/src/common/config.h b/be/src/common/config.h
index ef53f70..4bed7b5 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -116,8 +116,6 @@ CONF_mInt32(download_low_speed_limit_kbps, "50");
CONF_mInt32(download_low_speed_time, "300");
// sleep time for one second
CONF_Int32(sleep_one_second, "1");
-// sleep time for five seconds
-CONF_Int32(sleep_five_seconds, "5");
// log dir
CONF_String(sys_log_dir, "${DORIS_HOME}/log");
@@ -135,9 +133,6 @@ CONF_Int32(sys_log_verbose_level, "10");
// log buffer level
CONF_String(log_buffer_level, "");
-// Pull load task dir
-CONF_String(pull_load_task_dir, "${DORIS_HOME}/var/pull_load");
-
// number of threads available to serve backend execution requests
CONF_Int32(be_service_threads, "64");
@@ -160,8 +155,6 @@ CONF_Int32(doris_scanner_thread_pool_queue_size, "102400");
CONF_Int32(etl_thread_pool_size, "8");
// number of etl thread pool size
CONF_Int32(etl_thread_pool_queue_size, "256");
-// port on which to run Doris test backend
-CONF_Int32(port, "20001");
// default thrift client connect timeout(in seconds)
CONF_mInt32(thrift_connect_timeout_seconds, "3");
// default thrift client retry interval (in milliseconds)
@@ -368,10 +361,6 @@ CONF_Int32(fragment_pool_thread_num_min, "64");
CONF_Int32(fragment_pool_thread_num_max, "512");
CONF_Int32(fragment_pool_queue_size, "2048");
-// Spill to disk when query
-// Writable scratch directories, split by ";"
-CONF_String(query_scratch_dirs, "${DORIS_HOME}");
-
// Control the number of disks on the machine. If 0, this comes from the system settings.
CONF_Int32(num_disks, "0");
// The maximum number of the threads per disk is also the max queue depth per disk.
diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index 6584100..93d7947 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -454,15 +454,16 @@ Status OlapScanNode::start_scan(RuntimeState* state) {
// 3. Using ColumnValueRange to Build StorageEngine filters
RETURN_IF_ERROR(build_olap_filters());
+ VLOG_CRITICAL << "BuildScanKey";
+ // 4. Using `Key Column`'s ColumnValueRange to split ScanRange to several `Sub ScanRange`
+ RETURN_IF_ERROR(build_scan_key());
+
VLOG_CRITICAL << "Filter idle conjuncts";
- // 4. Filter idle conjunct which already trans to olap filters`
+ // 5. Filter idle conjunct which already trans to olap filters
+ // this must be after build_scan_key, it will free the StringValue memory
// TODO: filter idle conjunct in vexpr_contexts
remove_pushed_conjuncts(state);
- VLOG_CRITICAL << "BuildScanKey";
- // 5. Using `Key Column`'s ColumnValueRange to split ScanRange to several `Sub ScanRange`
- RETURN_IF_ERROR(build_scan_key());
-
VLOG_CRITICAL << "StartScanThread";
// 6. Start multi thread to read several `Sub Sub ScanRange`
RETURN_IF_ERROR(start_scan_thread(state));
@@ -1416,12 +1417,14 @@ void OlapScanNode::transfer_thread(RuntimeState* state) {
auto iter = olap_scanners.begin();
if (thread_token != nullptr) {
while (iter != olap_scanners.end()) {
- auto s = thread_token->submit_func(std::bind(&OlapScanNode::scanner_thread, this, *iter));
+ auto s = thread_token->submit_func(
+ std::bind(&OlapScanNode::scanner_thread, this, *iter));
if (s.ok()) {
(*iter)->start_wait_worker_timer();
olap_scanners.erase(iter++);
} else {
- LOG(FATAL) << "Failed to assign scanner task to thread pool! " << s.get_error_msg();
+ LOG(FATAL) << "Failed to assign scanner task to thread pool! "
+ << s.get_error_msg();
}
++_total_assign_num;
}
diff --git a/be/src/olap/field.h b/be/src/olap/field.h
index 7602363..63ef199 100644
--- a/be/src/olap/field.h
+++ b/be/src/olap/field.h
@@ -247,6 +247,14 @@ public:
// used by init scan key stored in string format
// value_string should end with '\0'
inline OLAPStatus from_string(char* buf, const std::string& value_string) const {
+ if (type() == OLAP_FIELD_TYPE_STRING) {
+ auto slice = reinterpret_cast<Slice*>(buf);
+ if (slice->size < value_string.size()) {
+ *_long_text_buf = static_cast<char*>(realloc(*_long_text_buf, value_string.size()));
+ slice->data = *_long_text_buf;
+ slice->size = value_string.size();
+ }
+ }
return _type_info->from_string(buf, value_string);
}
@@ -566,7 +574,7 @@ public:
return type_value;
}
- // only varchar filed need modify zone map index when zone map max_value
+ // only varchar/string filed need modify zone map index when zone map max_value
// index longer than `MAX_ZONE_MAP_INDEX_SIZE`. so here we add one
// for the last byte
// In UTF8 encoding, here do not appear 0xff in last byte
@@ -621,6 +629,16 @@ public:
auto slice = reinterpret_cast<Slice*>(ch);
memset(slice->data, 0xFF, slice->size);
}
+ // only varchar/string filed need modify zone map index when zone map max_value
+ // index longer than `MAX_ZONE_MAP_INDEX_SIZE`. so here we add one
+ // for the last byte
+ // In UTF8 encoding, here do not appear 0xff in last byte
+ void modify_zone_map_index(char* src) const override {
+ auto slice = reinterpret_cast<Slice*>(src);
+ if (slice->size == MAX_ZONE_MAP_INDEX_SIZE) {
+ slice->mutable_data()[slice->size - 1] += 1;
+ }
+ }
void set_to_zone_map_max(char* ch) const override {
auto slice = reinterpret_cast<Slice*>(ch);
diff --git a/be/src/runtime/tuple.cpp b/be/src/runtime/tuple.cpp
index 0cd7781..6073598 100644
--- a/be/src/runtime/tuple.cpp
+++ b/be/src/runtime/tuple.cpp
@@ -274,13 +274,14 @@ void Tuple::materialize_exprs(TupleRow* row, const TupleDescriptor& desc,
// TODO: revisit this logic in the FE
PrimitiveType slot_type = slot_desc->type().type;
PrimitiveType expr_type = materialize_expr_ctxs[mat_expr_index]->root()->type().type;
- if ((slot_type == TYPE_CHAR) || (slot_type == TYPE_VARCHAR) || (slot_type == TYPE_HLL)) {
- DCHECK((expr_type == TYPE_CHAR) || (expr_type == TYPE_VARCHAR) ||
- (expr_type == TYPE_HLL));
- } else if ((slot_type == TYPE_DATE) || (slot_type == TYPE_DATETIME)) {
- DCHECK((expr_type == TYPE_DATE) || (expr_type == TYPE_DATETIME));
+ if (slot_type == TYPE_CHAR || slot_type == TYPE_VARCHAR || slot_type == TYPE_HLL ||
+ slot_type == TYPE_STRING) {
+ DCHECK(expr_type == TYPE_CHAR || expr_type == TYPE_VARCHAR || expr_type == TYPE_HLL ||
+ expr_type == TYPE_STRING);
+ } else if (slot_type == TYPE_DATE || slot_type == TYPE_DATETIME) {
+ DCHECK(expr_type == TYPE_DATE || expr_type == TYPE_DATETIME);
} else if (slot_type == TYPE_ARRAY) {
- DCHECK((expr_type == TYPE_ARRAY));
+ DCHECK(expr_type == TYPE_ARRAY);
} else {
DCHECK(slot_type == TYPE_NULL || slot_type == expr_type);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org