You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/05/16 16:39:17 UTC

[incubator-doris] 07/17: [Bug] Missing error tablet list when close_wait return error (#9418)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 0c73dc85a6aabb30b4104f46fea1fbdf3a24587e
Author: pengxiangyu <di...@163.com>
AuthorDate: Sun May 8 06:45:28 2022 +0800

    [Bug] Missing error tablet list when close_wait return error (#9418)
---
 be/src/olap/delta_writer.cpp       | 12 ++++++++++--
 be/src/olap/delta_writer.h         |  4 +++-
 be/src/runtime/load_channel.cpp    |  3 ++-
 be/src/runtime/tablets_channel.cpp |  7 +++++--
 be/src/runtime/tablets_channel.h   |  5 ++++-
 5 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 8446340d5a..198019561f 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -279,7 +279,9 @@ OLAPStatus DeltaWriter::close() {
     return OLAP_SUCCESS;
 }
 
-OLAPStatus DeltaWriter::close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec, bool is_broken) {
+OLAPStatus DeltaWriter::close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec,
+                               google::protobuf::RepeatedPtrField<PTabletError>* tablet_errors,
+                               bool is_broken) {
     std::lock_guard<std::mutex> l(_lock);
     DCHECK(_is_init)
             << "delta writer is supposed be to initialized before close_wait() being called";
@@ -289,7 +291,13 @@ OLAPStatus DeltaWriter::close_wait(google::protobuf::RepeatedPtrField<PTabletInf
     }
 
     // return error if previous flush failed
-    RETURN_NOT_OK(_flush_token->wait());
+    OLAPStatus st = _flush_token->wait();
+    if (st != OLAP_SUCCESS) {
+        PTabletError* tablet_error = tablet_errors->Add();
+        tablet_error->set_tablet_id(_tablet->tablet_id());
+        tablet_error->set_msg("flush failed");
+        return st;
+    }
     DCHECK_EQ(_mem_tracker->consumption(), 0);
 
     // use rowset meta manager to save meta
diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h
index c765d03115..cf5a2729d2 100644
--- a/be/src/olap/delta_writer.h
+++ b/be/src/olap/delta_writer.h
@@ -67,7 +67,9 @@ public:
     OLAPStatus close();
     // wait for all memtables to be flushed.
     // mem_consumption() should be 0 after this function returns.
-    OLAPStatus close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec, bool is_broken);
+    OLAPStatus close_wait(google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec,
+                      google::protobuf::RepeatedPtrField<PTabletError>* tablet_errors,
+                      bool is_broken);
 
     // abandon current memtable and wait for all pending-flushing memtables to be destructed.
     // mem_consumption() should be 0 after this function returns.
diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp
index db523f2aa7..715762bd18 100644
--- a/be/src/runtime/load_channel.cpp
+++ b/be/src/runtime/load_channel.cpp
@@ -99,7 +99,8 @@ Status LoadChannel::add_batch(const PTabletWriterAddBatchRequest& request,
         bool finished = false;
         RETURN_IF_ERROR(channel->close(request.sender_id(), request.backend_id(), 
                                        &finished, request.partition_ids(),
-                                       response->mutable_tablet_vec()));
+                                       response->mutable_tablet_vec(),
+                                       response->mutable_tablet_errors()));
         if (finished) {
             std::lock_guard<std::mutex> l(_lock);
             _tablets_channels.erase(index_id);
diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp
index 75f9936e9b..926cba7aaa 100644
--- a/be/src/runtime/tablets_channel.cpp
+++ b/be/src/runtime/tablets_channel.cpp
@@ -149,7 +149,8 @@ Status TabletsChannel::add_batch(const PTabletWriterAddBatchRequest& request,
 
 Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished,
                              const google::protobuf::RepeatedField<int64_t>& partition_ids,
-                             google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec) {
+                             google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec,
+                             google::protobuf::RepeatedPtrField<PTabletError>* tablet_errors) {
     std::lock_guard<std::mutex> l(_lock);
     if (_state == kFinished) {
         return _close_status;
@@ -197,7 +198,9 @@ Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished,
         for (auto writer : need_wait_writers) {
             // close may return failed, but no need to handle it here.
             // tablet_vec will only contains success tablet, and then let FE judge it.
-            writer->close_wait(tablet_vec, (_broken_tablets.find(writer->tablet_id()) != _broken_tablets.end()));
+            writer->close_wait(
+                    tablet_vec, tablet_errors,
+                    (_broken_tablets.find(writer->tablet_id()) != _broken_tablets.end()));
         }
         // TODO(gaodayue) clear and destruct all delta writers to make sure all memory are freed
         // DCHECK_EQ(_mem_tracker->consumption(), 0);
diff --git a/be/src/runtime/tablets_channel.h b/be/src/runtime/tablets_channel.h
index e99ac6264b..360242ae88 100644
--- a/be/src/runtime/tablets_channel.h
+++ b/be/src/runtime/tablets_channel.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include <cstdint>
 #include <unordered_map>
 #include <utility>
@@ -69,7 +71,8 @@ public:
     // no-op when this channel has been closed or cancelled
     Status close(int sender_id, int64_t backend_id, bool* finished,
                  const google::protobuf::RepeatedField<int64_t>& partition_ids,
-                 google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec);
+                 google::protobuf::RepeatedPtrField<PTabletInfo>* tablet_vec,
+                 google::protobuf::RepeatedPtrField<PTabletError>* tablet_error);
 
     // no-op when this channel has been closed or cancelled
     Status cancel();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org