You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2021/01/09 15:41:10 UTC

[incubator-doris] branch master updated: [Load] support ignoring eovercrowded when tablet sink (#5156)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5d6a1a7  [Load] support ignoring eovercrowded when tablet sink (#5156)
5d6a1a7 is described below

commit 5d6a1a72903ed042823a0a4d9887f15f65f9fff1
Author: HuangWei <hu...@apache.org>
AuthorDate: Sat Jan 9 23:40:51 2021 +0800

    [Load] support ignoring eovercrowded when tablet sink (#5156)
    
    If adding the ignore_eovercrowded flag, the `PTabletWriterAddBatchRequest`
    won't failed on `EOVERCROWDED` to avoid load jobs failed in this error.
    It only effects the NodeChannel(the load job), other rpc requests will still check if overcrowded.
---
 be/src/common/config.h                             |  6 ++++-
 be/src/exec/tablet_sink.cpp                        | 27 ++++++++++++++--------
 docs/en/administrator-guide/config/be_config.md    |  8 +++++++
 docs/zh-CN/administrator-guide/config/be_config.md |  8 +++++++
 4 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index 4454c7f..66e52a3 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -330,8 +330,12 @@ CONF_mInt64(streaming_load_json_max_mb, "100");
 CONF_mInt32(streaming_load_rpc_max_alive_time_sec, "1200");
 // the timeout of a rpc to open the tablet writer in remote BE.
 // short operation time, can set a short timeout
-CONF_mInt32(tablet_writer_open_rpc_timeout_sec, "60");
+CONF_Int32(tablet_writer_open_rpc_timeout_sec, "60");
+// You can ignore brpc error '[E1011]The server is overcrowded' when writing data.
+CONF_mBool(tablet_writer_ignore_eovercrowded, "false");
+
 // OlapTableSink sender's send interval, should be less than the real response time of a tablet writer rpc.
+// You may need to lower the speed when the sink receiver bes are too busy.
 CONF_mInt32(olap_table_sink_send_interval_ms, "1");
 
 // Fragment thread pool
diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp
index 98f5b35..47305d9 100644
--- a/be/src/exec/tablet_sink.cpp
+++ b/be/src/exec/tablet_sink.cpp
@@ -110,6 +110,9 @@ void NodeChannel::open() {
     // This ref is for RPC's reference
     _open_closure->ref();
     _open_closure->cntl.set_timeout_ms(config::tablet_writer_open_rpc_timeout_sec * 1000);
+    if (config::tablet_writer_ignore_eovercrowded) {
+        _open_closure->cntl.ignore_eovercrowded();
+    }
     _stub->tablet_writer_open(&_open_closure->cntl, &request, &_open_closure->result,
                               _open_closure);
     request.release_id();
@@ -294,6 +297,9 @@ void NodeChannel::cancel() {
 
     closure->ref();
     closure->cntl.set_timeout_ms(_rpc_timeout_ms);
+    if (config::tablet_writer_ignore_eovercrowded) {
+        closure->cntl.ignore_eovercrowded();
+    }
     _stub->tablet_writer_cancel(&closure->cntl, &request, &closure->result, closure);
     request.release_id();
 }
@@ -327,6 +333,9 @@ int NodeChannel::try_send_and_fetch_status() {
 
         _add_batch_closure->reset();
         _add_batch_closure->cntl.set_timeout_ms(_rpc_timeout_ms);
+        if (config::tablet_writer_ignore_eovercrowded) {
+            _add_batch_closure->cntl.ignore_eovercrowded();
+        }
 
         if (request.eos()) {
             for (auto pid : _parent->_partition_ids) {
@@ -674,13 +683,13 @@ Status OlapTableSink::send(RuntimeState* state, RowBatch* input_batch) {
 }
 
 Status OlapTableSink::close(RuntimeState* state, Status close_status) {
-	if (_is_closed) {
-		/// The close method may be called twice.
-		/// In the open_internal() method of plan_fragment_executor, close is called once.
-		/// If an error occurs in this call, it will be called again in fragment_mgr.
-		/// So here we use a flag to prevent repeated close operations.
-		return _close_status;
-	}
+    if (_is_closed) {
+        /// The close method may be called twice.
+        /// In the open_internal() method of plan_fragment_executor, close is called once.
+        /// If an error occurs in this call, it will be called again in fragment_mgr.
+        /// So here we use a flag to prevent repeated close operations.
+        return _close_status;
+    }
     Status status = close_status;
     if (status.ok()) {
         // only if status is ok can we call this _profile->total_time_counter().
@@ -758,8 +767,8 @@ Status OlapTableSink::close(RuntimeState* state, Status close_status) {
     Expr::close(_output_expr_ctxs, state);
     _output_batch.reset();
 
-	_close_status = status;
-	_is_closed = true;
+    _close_status = status;
+    _is_closed = true;
     return status;
 }
 
diff --git a/docs/en/administrator-guide/config/be_config.md b/docs/en/administrator-guide/config/be_config.md
index 73e0476..3437639 100644
--- a/docs/en/administrator-guide/config/be_config.md
+++ b/docs/en/administrator-guide/config/be_config.md
@@ -794,6 +794,14 @@ When writing is too frequent and the disk time is insufficient, you can configur
 
 ### `tablet_writer_open_rpc_timeout_sec`
 
+### `tablet_writer_ignore_eovercrowded`
+
+* Type: bool
+* Description: Used to ignore brpc error '[E1011]The server is overcrowded' when writing data. 
+* Default value: false
+
+When meet '[E1011]The server is overcrowded' error, you can tune the configuration `brpc_socket_max_unwritten_bytes`, but it can't be modified at runtime. Set it to `true` to avoid writing failed temporarily. Notice that, it only effects `write`, other rpc requests will still check if overcrowded.
+
 ### `tc_free_memory_rate`
 
 ### `tc_max_total_thread_cache_bytes`
diff --git a/docs/zh-CN/administrator-guide/config/be_config.md b/docs/zh-CN/administrator-guide/config/be_config.md
index 2a00307..7cc4c91 100644
--- a/docs/zh-CN/administrator-guide/config/be_config.md
+++ b/docs/zh-CN/administrator-guide/config/be_config.md
@@ -795,6 +795,14 @@ Stream Load 一般适用于导入几个GB以内的数据,不适合导入过大
 
 ### `tablet_writer_open_rpc_timeout_sec`
 
+### `tablet_writer_ignore_eovercrowded`
+
+* 类型:bool
+* 描述:写入时可忽略brpc的'[E1011]The server is overcrowded'错误。
+* 默认值:false
+
+当遇到'[E1011]The server is overcrowded'的错误时,可以调整配置项`brpc_socket_max_unwritten_bytes`,但这个配置项不能动态调整。所以可通过设置此项为`true`来临时避免写失败。注意,此配置项只影响写流程,其他的rpc请求依旧会检查是否overcrowded。
+
 ### `tc_free_memory_rate`
 
 ### `tc_max_total_thread_cache_bytes`


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org