You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pegasus.apache.org by GitBox <gi...@apache.org> on 2022/08/22 09:09:12 UTC

[GitHub] [incubator-pegasus] foreverneverer commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

foreverneverer commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r951144679


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);
+    if (_checkpoint_err != ERR_OK) {
+        derror_replica("checkpoint failed, error = {}", _checkpoint_err);
+        response.__set_checkpoint_upload_err(_checkpoint_err);
+    }
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::fill_response_unlock(/*out*/ backup_response &response)
+{
+    response.err = ERR_OK;
+    response.pid = get_gpid();
+    response.backup_id = _backup_id;
+    response.status = _status;
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION_LONG
+void replica_backup_manager::generate_checkpoint()
+{
+    const auto &local_checkpoint_dir = get_local_checkpoint_dir();

Review Comment:
   can it reuse  https://github.com/apache/incubator-pegasus/blob/master/src/rdsn/src/replica/replica_chkpt.cpp#L135 ?



##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);
+    if (_checkpoint_err != ERR_OK) {
+        derror_replica("checkpoint failed, error = {}", _checkpoint_err);
+        response.__set_checkpoint_upload_err(_checkpoint_err);
+    }
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::fill_response_unlock(/*out*/ backup_response &response)
+{
+    response.err = ERR_OK;
+    response.pid = get_gpid();
+    response.backup_id = _backup_id;
+    response.status = _status;
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION_LONG
+void replica_backup_manager::generate_checkpoint()
+{
+    const auto &local_checkpoint_dir = get_local_checkpoint_dir();
+
+    if (!utils::filesystem::directory_exists(local_checkpoint_dir) &&
+        !utils::filesystem::create_directory(local_checkpoint_dir)) {
+        derror_replica("create local backup dir {} failed", local_checkpoint_dir);
+        set_checkpoint_err(ERR_FILE_OPERATION_FAILED);
+        return;
+    }
+
+    // generate checkpoint and flush memtable
+    int64_t checkpoint_decree;
+    const auto &ec = _replica->_app->copy_checkpoint_to_dir(
+        local_checkpoint_dir.c_str(), &checkpoint_decree, true);
+    if (ec != ERR_OK) {
+        derror_replica("generate backup checkpoint failed, error = {}", ec);
+        set_checkpoint_err(ec);
+        return;
+    }
+    ddebug_replica(
+        "generate backup checkpoint succeed: checkpoint dir = {}, checkpoint decree = {}",
+        local_checkpoint_dir,
+        checkpoint_decree);
+
+    {
+        zauto_write_lock l(_lock);
+        if (!get_backup_metadata_unlock(

Review Comment:
   `set_backup_metadata_unlock` or `fill_XXXX`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org