You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/05/16 16:39:17 UTC
[incubator-doris] 07/17: [Bug] Missing error tablet list when close_wait return error (#9418)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 0c73dc85a6aabb30b4104f46fea1fbdf3a24587e
Author: pengxiangyu <di...@163.com>
AuthorDate: Sun May 8 06:45:28 2022 +0800
[Bug] Missing error tablet list when close_wait return error (#9418)
---
be/src/olap/delta_writer.cpp | 12 ++++++++++--
be/src/olap/delta_writer.h | 4 +++-
be/src/runtime/load_channel.cpp | 3 ++-
be/src/runtime/tablets_channel.cpp | 7 +++++--
be/src/runtime/tablets_channel.h | 5 ++++-
5 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 8446340d5a..198019561f 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -279,7 +279,9 @@ OLAPStatus DeltaWriter::close() {
return OLAP_SUCCESS;
}
-OLAPStatus DeltaWriter::close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec, bool is_broken) {
+OLAPStatus DeltaWriter::close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec,
+ google::protobuf::RepeatedPtrField<PTabletError>* tablet_errors,
+ bool is_broken) {
std::lock_guard<std::mutex> l(_lock);
DCHECK(_is_init)
<< "delta writer is supposed be to initialized before close_wait() being called";
@@ -289,7 +291,13 @@ OLAPStatus DeltaWriter::close_wait(google::protobuf::RepeatedPtrField<PTabletInf
}
// return error if previous flush failed
- RETURN_NOT_OK(_flush_token->wait());
+ OLAPStatus st = _flush_token->wait();
+ if (st != OLAP_SUCCESS) {
+ PTabletError* tablet_error = tablet_errors->Add();
+ tablet_error->set_tablet_id(_tablet->tablet_id());
+ tablet_error->set_msg("flush failed");
+ return st;
+ }
DCHECK_EQ(_mem_tracker->consumption(), 0);
// use rowset meta manager to save meta
diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h
index c765d03115..cf5a2729d2 100644
--- a/be/src/olap/delta_writer.h
+++ b/be/src/olap/delta_writer.h
@@ -67,7 +67,9 @@ public:
OLAPStatus close();
// wait for all memtables to be flushed.
// mem_consumption() should be 0 after this function returns.
- OLAPStatus close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec, bool is_broken);
+ OLAPStatus close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec,
+ google::protobuf::RepeatedPtrField<PTabletError>* tablet_errors,
+ bool is_broken);
// abandon current memtable and wait for all pending-flushing memtables to be destructed.
// mem_consumption() should be 0 after this function returns.
diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp
index db523f2aa7..715762bd18 100644
--- a/be/src/runtime/load_channel.cpp
+++ b/be/src/runtime/load_channel.cpp
@@ -99,7 +99,8 @@ Status LoadChannel::add_batch(const PTabletWriterAddBatchRequest& request,
bool finished = false;
RETURN_IF_ERROR(channel->close(request.sender_id(), request.backend_id(),
&finished, request.partition_ids(),
- response->mutable_tablet_vec()));
+ response->mutable_tablet_vec(),
+ response->mutable_tablet_errors()));
if (finished) {
std::lock_guard<std::mutex> l(_lock);
_tablets_channels.erase(index_id);
diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp
index 75f9936e9b..926cba7aaa 100644
--- a/be/src/runtime/tablets_channel.cpp
+++ b/be/src/runtime/tablets_channel.cpp
@@ -149,7 +149,8 @@ Status TabletsChannel::add_batch(const PTabletWriterAddBatchRequest& request,
Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished,
const google::protobuf::RepeatedField<int64_t>& partition_ids,
- google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec) {
+ google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec,
+ google::protobuf::RepeatedPtrField<PTabletError>* tablet_errors) {
std::lock_guard<std::mutex> l(_lock);
if (_state == kFinished) {
return _close_status;
@@ -197,7 +198,9 @@ Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished,
for (auto writer : need_wait_writers) {
// close may return failed, but no need to handle it here.
// tablet_vec will only contains success tablet, and then let FE judge it.
- writer->close_wait(tablet_vec, (_broken_tablets.find(writer->tablet_id()) != _broken_tablets.end()));
+ writer->close_wait(
+ tablet_vec, tablet_errors,
+ (_broken_tablets.find(writer->tablet_id()) != _broken_tablets.end()));
}
// TODO(gaodayue) clear and destruct all delta writers to make sure all memory are freed
// DCHECK_EQ(_mem_tracker->consumption(), 0);
diff --git a/be/src/runtime/tablets_channel.h b/be/src/runtime/tablets_channel.h
index e99ac6264b..360242ae88 100644
--- a/be/src/runtime/tablets_channel.h
+++ b/be/src/runtime/tablets_channel.h
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+#pragma once
+
#include <cstdint>
#include <unordered_map>
#include <utility>
@@ -69,7 +71,8 @@ public:
// no-op when this channel has been closed or cancelled
Status close(int sender_id, int64_t backend_id, bool* finished,
const google::protobuf::RepeatedField<int64_t>& partition_ids,
- google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec);
+ google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec,
+ google::protobuf::RepeatedPtrField<PTabletError>* tablet_error);
// no-op when this channel has been closed or cancelled
Status cancel();
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org