You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pegasus.apache.org by GitBox <gi...@apache.org> on 2022/08/19 06:40:03 UTC

[GitHub] [incubator-pegasus] hycdong opened a new pull request, #1128: feat(backup): 5. replica create backup checkpoint

hycdong opened a new pull request, #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128

   https://github.com/apache/incubator-pegasus/issues/1081


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] foreverneverer commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
foreverneverer commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r951144679


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);
+    if (_checkpoint_err != ERR_OK) {
+        derror_replica("checkpoint failed, error = {}", _checkpoint_err);
+        response.__set_checkpoint_upload_err(_checkpoint_err);
+    }
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::fill_response_unlock(/*out*/ backup_response &response)
+{
+    response.err = ERR_OK;
+    response.pid = get_gpid();
+    response.backup_id = _backup_id;
+    response.status = _status;
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION_LONG
+void replica_backup_manager::generate_checkpoint()
+{
+    const auto &local_checkpoint_dir = get_local_checkpoint_dir();

Review Comment:
   can it reuse  https://github.com/apache/incubator-pegasus/blob/master/src/rdsn/src/replica/replica_chkpt.cpp#L135 ?



##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);
+    if (_checkpoint_err != ERR_OK) {
+        derror_replica("checkpoint failed, error = {}", _checkpoint_err);
+        response.__set_checkpoint_upload_err(_checkpoint_err);
+    }
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::fill_response_unlock(/*out*/ backup_response &response)
+{
+    response.err = ERR_OK;
+    response.pid = get_gpid();
+    response.backup_id = _backup_id;
+    response.status = _status;
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION_LONG
+void replica_backup_manager::generate_checkpoint()
+{
+    const auto &local_checkpoint_dir = get_local_checkpoint_dir();
+
+    if (!utils::filesystem::directory_exists(local_checkpoint_dir) &&
+        !utils::filesystem::create_directory(local_checkpoint_dir)) {
+        derror_replica("create local backup dir {} failed", local_checkpoint_dir);
+        set_checkpoint_err(ERR_FILE_OPERATION_FAILED);
+        return;
+    }
+
+    // generate checkpoint and flush memtable
+    int64_t checkpoint_decree;
+    const auto &ec = _replica->_app->copy_checkpoint_to_dir(
+        local_checkpoint_dir.c_str(), &checkpoint_decree, true);
+    if (ec != ERR_OK) {
+        derror_replica("generate backup checkpoint failed, error = {}", ec);
+        set_checkpoint_err(ec);
+        return;
+    }
+    ddebug_replica(
+        "generate backup checkpoint succeed: checkpoint dir = {}, checkpoint decree = {}",
+        local_checkpoint_dir,
+        checkpoint_decree);
+
+    {
+        zauto_write_lock l(_lock);
+        if (!get_backup_metadata_unlock(

Review Comment:
   `set_backup_metadata_unlock` or `fill_XXXX`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] hycdong commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
hycdong commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r953244164


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =

Review Comment:
   `start_checkpointing` will only be called when request.backup_status = checkpointing, local backup_status=uninitialized, and `_checkpointing_task` will be canceled when backup stopped. I think `_checkpointing_task` won't be re-write.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] hycdong commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
hycdong commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r951225631


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);
+    if (_checkpoint_err != ERR_OK) {
+        derror_replica("checkpoint failed, error = {}", _checkpoint_err);
+        response.__set_checkpoint_upload_err(_checkpoint_err);
+    }
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::fill_response_unlock(/*out*/ backup_response &response)
+{
+    response.err = ERR_OK;
+    response.pid = get_gpid();
+    response.backup_id = _backup_id;
+    response.status = _status;
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION_LONG
+void replica_backup_manager::generate_checkpoint()
+{
+    const auto &local_checkpoint_dir = get_local_checkpoint_dir();
+
+    if (!utils::filesystem::directory_exists(local_checkpoint_dir) &&
+        !utils::filesystem::create_directory(local_checkpoint_dir)) {
+        derror_replica("create local backup dir {} failed", local_checkpoint_dir);
+        set_checkpoint_err(ERR_FILE_OPERATION_FAILED);
+        return;
+    }
+
+    // generate checkpoint and flush memtable
+    int64_t checkpoint_decree;
+    const auto &ec = _replica->_app->copy_checkpoint_to_dir(
+        local_checkpoint_dir.c_str(), &checkpoint_decree, true);
+    if (ec != ERR_OK) {
+        derror_replica("generate backup checkpoint failed, error = {}", ec);
+        set_checkpoint_err(ec);
+        return;
+    }
+    ddebug_replica(
+        "generate backup checkpoint succeed: checkpoint dir = {}, checkpoint decree = {}",
+        local_checkpoint_dir,
+        checkpoint_decree);
+
+    {
+        zauto_write_lock l(_lock);
+        if (!get_backup_metadata_unlock(

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] foreverneverer commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
foreverneverer commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r951275697


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);

Review Comment:
   you have assign it into `THREAD_POOL_REPLICATION`, why still `lock`?



##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);

Review Comment:
   same



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] hycdong commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
hycdong commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r951284927


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);

Review Comment:
   Because `_status` will also be update in other thread pool, such as `THREAD_POOL_REPLICATION_LONG`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] foreverneverer commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
foreverneverer commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r952158117


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);

Review Comment:
   So if `_status` will be updated in other pool , the https://github.com/apache/incubator-pegasus/pull/1128/files/8cb6b13936af351c0d9e4c21ece5ee03292873ce#diff-31f2c9e07530258a2e3ad61a16a5af0155b303a4bc0409c69d9e48a4689c0646R51 is thread-safe?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] hycdong commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
hycdong commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r952351942


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);

Review Comment:
   I think it's okay in this pr, this needs to add a read lock, but it will add lock in `on_backup` function, will be added in future pr.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] hycdong commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
hycdong commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r951284897


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);

Review Comment:
   Same



##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);

Review Comment:
   Because `_status` will also be update in other thread pool, such as THREAD_POOL_REPLICATION_LONG



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] foreverneverer commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
foreverneverer commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r951254191


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);
+    if (_checkpoint_err != ERR_OK) {
+        derror_replica("checkpoint failed, error = {}", _checkpoint_err);
+        response.__set_checkpoint_upload_err(_checkpoint_err);
+    }
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::fill_response_unlock(/*out*/ backup_response &response)
+{
+    response.err = ERR_OK;
+    response.pid = get_gpid();
+    response.backup_id = _backup_id;
+    response.status = _status;
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION_LONG
+void replica_backup_manager::generate_checkpoint()
+{
+    const auto &local_checkpoint_dir = get_local_checkpoint_dir();

Review Comment:
   Ok, I just want `generate_checkpoint` can be unified function in `replica_chkpt.cpp`, since may many module need it, but now this module have own implementation.
   
   can it refactor base、unified function in `replica_chkpt.cpp` and used for backup?



##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);
+    if (_checkpoint_err != ERR_OK) {
+        derror_replica("checkpoint failed, error = {}", _checkpoint_err);
+        response.__set_checkpoint_upload_err(_checkpoint_err);
+    }
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::fill_response_unlock(/*out*/ backup_response &response)
+{
+    response.err = ERR_OK;
+    response.pid = get_gpid();
+    response.backup_id = _backup_id;
+    response.status = _status;
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION_LONG
+void replica_backup_manager::generate_checkpoint()
+{
+    const auto &local_checkpoint_dir = get_local_checkpoint_dir();

Review Comment:
   Ok, I just want `generate_checkpoint` can be unified function in `replica_chkpt.cpp`, since maybe many module need it, but now this module have own implementation.
   
   can it refactor base、unified function in `replica_chkpt.cpp` and used for backup?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] hycdong commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
hycdong commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r951218993


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);
+    if (_checkpoint_err != ERR_OK) {
+        derror_replica("checkpoint failed, error = {}", _checkpoint_err);
+        response.__set_checkpoint_upload_err(_checkpoint_err);
+    }
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::fill_response_unlock(/*out*/ backup_response &response)
+{
+    response.err = ERR_OK;
+    response.pid = get_gpid();
+    response.backup_id = _backup_id;
+    response.status = _status;
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION_LONG
+void replica_backup_manager::generate_checkpoint()
+{
+    const auto &local_checkpoint_dir = get_local_checkpoint_dir();

Review Comment:
   `generate_checkpoint` will do following logic:
   1. create a local directory to store backup checkpoint by function `copy_checkpoint_to_dir`
   2. create checkpoint under the directory above
   3. generate backup metadata
   
   Function `trigger_manual_emergency_checkpoint` will trigger a checkpoint and flush memtable in the DEFAULT checkpoint directory, and will do decree compare and perf-counter update, which are not useful in backup process. Besides, the functions in `replica_chkpt` will finnally call `copy_checkpoint_to_dir`, as a result, I don't reuse those functions.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] hycdong commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
hycdong commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r952351942


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);

Review Comment:
   I think it's okay in this pr, this needs to add to read lock, but it will add lock in `on_backup` function, will be added in future pr.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] foreverneverer commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
foreverneverer commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r952427703


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =

Review Comment:
   Just one note or question, please make sure `start_checkpoint` not run multi-time, which may will result in the `_checkpointing_task` was re-write, and the old task can't be stopped if we stop backup?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] hycdong merged pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
hycdong merged PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] foreverneverer commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
foreverneverer commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r952431531


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =
+        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,
+                         tracker(),
+                         std::bind(&replica_backup_manager::generate_checkpoint, this));
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::report_checkpointing(/*out*/ backup_response &response)
+{
+    ddebug_replica("check checkpoint, backup_id = {}", _backup_id);
+    zauto_read_lock l(_lock);
+    if (_checkpoint_err != ERR_OK) {
+        derror_replica("checkpoint failed, error = {}", _checkpoint_err);
+        response.__set_checkpoint_upload_err(_checkpoint_err);
+    }
+    fill_response_unlock(response);
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::fill_response_unlock(/*out*/ backup_response &response)
+{
+    response.err = ERR_OK;
+    response.pid = get_gpid();
+    response.backup_id = _backup_id;
+    response.status = _status;
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION_LONG
+void replica_backup_manager::generate_checkpoint()
+{
+    const auto &local_checkpoint_dir = get_local_checkpoint_dir();
+
+    if (!utils::filesystem::directory_exists(local_checkpoint_dir) &&
+        !utils::filesystem::create_directory(local_checkpoint_dir)) {
+        derror_replica("create local backup dir {} failed", local_checkpoint_dir);
+        set_checkpoint_err(ERR_FILE_OPERATION_FAILED);
+        return;
+    }
+
+    // generate checkpoint and flush memtable
+    int64_t checkpoint_decree;
+    const auto &ec = _replica->_app->copy_checkpoint_to_dir(
+        local_checkpoint_dir.c_str(), &checkpoint_decree, true);
+    if (ec != ERR_OK) {
+        derror_replica("generate backup checkpoint failed, error = {}", ec);
+        set_checkpoint_err(ec);
+        return;
+    }
+    ddebug_replica(
+        "generate backup checkpoint succeed: checkpoint dir = {}, checkpoint decree = {}",
+        local_checkpoint_dir,
+        checkpoint_decree);
+
+    {
+        zauto_write_lock l(_lock);
+        if (!set_backup_metadata_unlock(
+                local_checkpoint_dir, checkpoint_decree, static_cast<int64_t>(dsn_now_ms()))) {
+            _checkpoint_err = ERR_FILE_OPERATION_FAILED;
+            return;
+        }
+        _checkpoint_err = ERR_OK;
+        _status = backup_status::CHECKPOINTED;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION_LONG
+bool replica_backup_manager::set_backup_metadata_unlock(const std::string &local_checkpoint_dir,
+                                                        int64_t checkpoint_decree,
+                                                        int64_t checkpoint_timestamp)
+{
+    FAIL_POINT_INJECT_F("replica_set_backup_metadata", [](dsn::string_view) { return true; });
+
+    std::vector<std::string> sub_files;
+    if (!utils::filesystem::get_subfiles(local_checkpoint_dir, sub_files, false)) {
+        derror_replica("list sub files of local checkpoint dir = {} failed", local_checkpoint_dir);
+        return false;
+    }
+
+    int64_t total_file_size = 0;
+    for (std::string &file : sub_files) {

Review Comment:
   const auto?



##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);
+    _status = backup_status::CHECKPOINTING;
+    _backup_id = backup_id;
+    _checkpointing_task =

Review Comment:
   Just one note or question, please make sure `start_checkpoint` not run multi-time, which may will result in the `_checkpointing_task` was overload, and the old task can't be stopped?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org


[GitHub] [incubator-pegasus] foreverneverer commented on a diff in pull request #1128: feat(backup): 5. replica create backup checkpoint

Posted by GitBox <gi...@apache.org>.
foreverneverer commented on code in PR #1128:
URL: https://github.com/apache/incubator-pegasus/pull/1128#discussion_r952423205


##########
src/rdsn/src/replica/backup/replica_backup_manager.cpp:
##########
@@ -15,21 +15,182 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "replica_backup_manager.h"
-#include "replica/replica.h"
-
 #include <dsn/dist/fmt_logging.h>
-#include <dsn/utility/filesystem.h>
-#include <dsn/dist/replication/replication_app_base.h>
+#include <dsn/utility/fail_point.h>
+
+#include "replica_backup_manager.h"
 
 namespace dsn {
 namespace replication {
 
 // TODO(heyuchen): implement it
 
-replica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}
+replica_backup_manager::replica_backup_manager(replica *r)
+    : replica_base(r), _replica(r), _stub(r->get_replica_stub())
+{
+}
 
 replica_backup_manager::~replica_backup_manager() {}
 
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::on_backup(const backup_request &request,
+                                       /*out*/ backup_response &response)
+{
+    // TODO(heyuchen): add other status
+
+    if (request.status == backup_status::CHECKPOINTING) {
+        try_to_checkpoint(request.backup_id, response);
+        return;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::try_to_checkpoint(const int64_t &backup_id,
+                                               /*out*/ backup_response &response)
+{
+    switch (_status) {
+    case backup_status::UNINITIALIZED:
+        start_checkpointing(backup_id, response);
+        break;
+    case backup_status::CHECKPOINTING:
+    case backup_status::CHECKPOINTED:
+        report_checkpointing(response);
+        break;
+    default:
+        response.err = ERR_INVALID_STATE;
+        derror_replica("invalid local status({}) while request status = {}",
+                       enum_to_string(_status),
+                       enum_to_string(backup_status::CHECKPOINTING));
+        break;
+    }
+}
+
+// ThreadPool: THREAD_POOL_REPLICATION
+void replica_backup_manager::start_checkpointing(int64_t backup_id,
+                                                 /*out*/ backup_response &response)
+{
+    FAIL_POINT_INJECT_F("replica_backup_start_checkpointing", [&](dsn::string_view) {
+        _status = backup_status::CHECKPOINTING;
+        response.err = ERR_OK;
+    });
+
+    ddebug_replica("start to checkpoint, backup_id = {}", backup_id);
+    zauto_write_lock l(_lock);

Review Comment:
   Ok, better handle it well.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org