You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ya...@apache.org on 2021/09/23 01:44:39 UTC

[incubator-doris] branch master updated: Fixed zone map init error for string type (#6667)

This is an automated email from the ASF dual-hosted git repository.

yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5c45e26  Fixed zone map init error for string type (#6667)
5c45e26 is described below

commit 5c45e266440f7e281b41b66237fc04310d1150bf
Author: Zhengguo Yang <ya...@gmail.com>
AuthorDate: Thu Sep 23 09:44:22 2021 +0800

    Fixed zone map init error for string type (#6667)
    
    Fixed the problem that the StringValue memory generated by Expr may be released before use
    Fixed from_string for String type may overflow
---
 be/src/common/config.h         | 11 -----------
 be/src/exec/olap_scan_node.cpp | 17 ++++++++++-------
 be/src/olap/field.h            | 20 +++++++++++++++++++-
 be/src/runtime/tuple.cpp       | 13 +++++++------
 4 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index ef53f70..4bed7b5 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -116,8 +116,6 @@ CONF_mInt32(download_low_speed_limit_kbps, "50");
 CONF_mInt32(download_low_speed_time, "300");
 // sleep time for one second
 CONF_Int32(sleep_one_second, "1");
-// sleep time for five seconds
-CONF_Int32(sleep_five_seconds, "5");
 
 // log dir
 CONF_String(sys_log_dir, "${DORIS_HOME}/log");
@@ -135,9 +133,6 @@ CONF_Int32(sys_log_verbose_level, "10");
 // log buffer level
 CONF_String(log_buffer_level, "");
 
-// Pull load task dir
-CONF_String(pull_load_task_dir, "${DORIS_HOME}/var/pull_load");
-
 // number of threads available to serve backend execution requests
 CONF_Int32(be_service_threads, "64");
 
@@ -160,8 +155,6 @@ CONF_Int32(doris_scanner_thread_pool_queue_size, "102400");
 CONF_Int32(etl_thread_pool_size, "8");
 // number of etl thread pool size
 CONF_Int32(etl_thread_pool_queue_size, "256");
-// port on which to run Doris test backend
-CONF_Int32(port, "20001");
 // default thrift client connect timeout(in seconds)
 CONF_mInt32(thrift_connect_timeout_seconds, "3");
 // default thrift client retry interval (in milliseconds)
@@ -368,10 +361,6 @@ CONF_Int32(fragment_pool_thread_num_min, "64");
 CONF_Int32(fragment_pool_thread_num_max, "512");
 CONF_Int32(fragment_pool_queue_size, "2048");
 
-// Spill to disk when query
-// Writable scratch directories, split by ";"
-CONF_String(query_scratch_dirs, "${DORIS_HOME}");
-
 // Control the number of disks on the machine.  If 0, this comes from the system settings.
 CONF_Int32(num_disks, "0");
 // The maximum number of the threads per disk is also the max queue depth per disk.
diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index 6584100..93d7947 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -454,15 +454,16 @@ Status OlapScanNode::start_scan(RuntimeState* state) {
     // 3. Using ColumnValueRange to Build StorageEngine filters
     RETURN_IF_ERROR(build_olap_filters());
 
+    VLOG_CRITICAL << "BuildScanKey";
+    // 4. Using `Key Column`'s ColumnValueRange to split ScanRange to several `Sub ScanRange`
+    RETURN_IF_ERROR(build_scan_key());
+
     VLOG_CRITICAL << "Filter idle conjuncts";
-    // 4. Filter idle conjunct which already trans to olap filters`
+    // 5. Filter idle conjunct which already trans to olap filters
+    // this must be after build_scan_key, it will free the StringValue memory
     // TODO: filter idle conjunct in vexpr_contexts
     remove_pushed_conjuncts(state);
 
-    VLOG_CRITICAL << "BuildScanKey";
-    // 5. Using `Key Column`'s ColumnValueRange to split ScanRange to several `Sub ScanRange`
-    RETURN_IF_ERROR(build_scan_key());
-
     VLOG_CRITICAL << "StartScanThread";
     // 6. Start multi thread to read several `Sub Sub ScanRange`
     RETURN_IF_ERROR(start_scan_thread(state));
@@ -1416,12 +1417,14 @@ void OlapScanNode::transfer_thread(RuntimeState* state) {
         auto iter = olap_scanners.begin();
         if (thread_token != nullptr) {
             while (iter != olap_scanners.end()) {
-                auto s = thread_token->submit_func(std::bind(&OlapScanNode::scanner_thread, this, *iter));
+                auto s = thread_token->submit_func(
+                        std::bind(&OlapScanNode::scanner_thread, this, *iter));
                 if (s.ok()) {
                     (*iter)->start_wait_worker_timer();
                     olap_scanners.erase(iter++);
                 } else {
-                    LOG(FATAL) << "Failed to assign scanner task to thread pool! " << s.get_error_msg();
+                    LOG(FATAL) << "Failed to assign scanner task to thread pool! "
+                               << s.get_error_msg();
                 }
                 ++_total_assign_num;
             }
diff --git a/be/src/olap/field.h b/be/src/olap/field.h
index 7602363..63ef199 100644
--- a/be/src/olap/field.h
+++ b/be/src/olap/field.h
@@ -247,6 +247,14 @@ public:
     // used by init scan key stored in string format
     // value_string should end with '\0'
     inline OLAPStatus from_string(char* buf, const std::string& value_string) const {
+        if (type() == OLAP_FIELD_TYPE_STRING) {
+            auto slice = reinterpret_cast<Slice*>(buf);
+            if (slice->size < value_string.size()) {
+                *_long_text_buf = static_cast<char*>(realloc(*_long_text_buf, value_string.size()));
+                slice->data = *_long_text_buf;
+                slice->size = value_string.size();
+            }
+        }
         return _type_info->from_string(buf, value_string);
     }
 
@@ -566,7 +574,7 @@ public:
         return type_value;
     }
 
-    // only varchar filed need modify zone map index when zone map max_value
+    // only varchar/string filed need modify zone map index when zone map max_value
     // index longer than `MAX_ZONE_MAP_INDEX_SIZE`. so here we add one
     // for the last byte
     // In UTF8 encoding, here do not appear 0xff in last byte
@@ -621,6 +629,16 @@ public:
         auto slice = reinterpret_cast<Slice*>(ch);
         memset(slice->data, 0xFF, slice->size);
     }
+    // only varchar/string filed need modify zone map index when zone map max_value
+    // index longer than `MAX_ZONE_MAP_INDEX_SIZE`. so here we add one
+    // for the last byte
+    // In UTF8 encoding, here do not appear 0xff in last byte
+    void modify_zone_map_index(char* src) const override {
+        auto slice = reinterpret_cast<Slice*>(src);
+        if (slice->size == MAX_ZONE_MAP_INDEX_SIZE) {
+            slice->mutable_data()[slice->size - 1] += 1;
+        }
+    }
 
     void set_to_zone_map_max(char* ch) const override {
         auto slice = reinterpret_cast<Slice*>(ch);
diff --git a/be/src/runtime/tuple.cpp b/be/src/runtime/tuple.cpp
index 0cd7781..6073598 100644
--- a/be/src/runtime/tuple.cpp
+++ b/be/src/runtime/tuple.cpp
@@ -274,13 +274,14 @@ void Tuple::materialize_exprs(TupleRow* row, const TupleDescriptor& desc,
         // TODO: revisit this logic in the FE
         PrimitiveType slot_type = slot_desc->type().type;
         PrimitiveType expr_type = materialize_expr_ctxs[mat_expr_index]->root()->type().type;
-        if ((slot_type == TYPE_CHAR) || (slot_type == TYPE_VARCHAR) || (slot_type == TYPE_HLL)) {
-            DCHECK((expr_type == TYPE_CHAR) || (expr_type == TYPE_VARCHAR) ||
-                   (expr_type == TYPE_HLL));
-        } else if ((slot_type == TYPE_DATE) || (slot_type == TYPE_DATETIME)) {
-            DCHECK((expr_type == TYPE_DATE) || (expr_type == TYPE_DATETIME));
+        if (slot_type == TYPE_CHAR || slot_type == TYPE_VARCHAR || slot_type == TYPE_HLL ||
+            slot_type == TYPE_STRING) {
+            DCHECK(expr_type == TYPE_CHAR || expr_type == TYPE_VARCHAR || expr_type == TYPE_HLL ||
+                   expr_type == TYPE_STRING);
+        } else if (slot_type == TYPE_DATE || slot_type == TYPE_DATETIME) {
+            DCHECK(expr_type == TYPE_DATE || expr_type == TYPE_DATETIME);
         } else if (slot_type == TYPE_ARRAY) {
-            DCHECK((expr_type == TYPE_ARRAY));
+            DCHECK(expr_type == TYPE_ARRAY);
         } else {
             DCHECK(slot_type == TYPE_NULL || slot_type == expr_type);
         }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org