You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pegasus.apache.org by GitBox <gi...@apache.org> on 2022/08/22 09:32:05 UTC

[GitHub] [incubator-pegasus] hycdong commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

hycdong commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r951218993


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);
+    if (_checkpoint_err != ERR_OK) {
+        derror_replica("checkpoint failed, error = {}", _checkpoint_err);
+        response.__set_checkpoint_upload_err(_checkpoint_err);
+    }
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::fill_response_unlock(/*out*/ backup_response &response)
+{
+    response.err = ERR_OK;
+    response.pid = get_gpid();
+    response.backup_id = _backup_id;
+    response.status = _status;
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION_LONG
+void replica_backup_manager::generate_checkpoint()
+{
+    const auto &local_checkpoint_dir = get_local_checkpoint_dir();

Review Comment:
   `generate_checkpoint` will do following logic:
   1. create a local directory to store backup checkpoint by function `copy_checkpoint_to_dir`
   2. create checkpoint under the directory above
   3. generate backup metadata
   
   Function `trigger_manual_emergency_checkpoint` will trigger a checkpoint and flush memtable in the DEFAULT checkpoint directory, and will do decree compare and perf-counter update, which are not useful in backup process. Besides, the functions in `replica_chkpt` will finnally call `copy_checkpoint_to_dir`, as a result, I don't reuse those functions.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org