You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pegasus.apache.org by wa...@apache.org on 2022/10/28 10:31:07 UTC
[incubator-pegasus] branch master updated: fix: fix rollback by mistake for mutation_log::reset_from (#1208)
This is an automated email from the ASF dual-hosted git repository.
wangdan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git
The following commit(s) were added to refs/heads/master by this push:
new b6548e7b2 fix: fix rollback by mistake for mutation_log::reset_from (#1208)
b6548e7b2 is described below
commit b6548e7b218b2c7c13314481f8c56121ce641374
Author: Dan Wang <wa...@apache.org>
AuthorDate: Fri Oct 28 18:31:01 2022 +0800
fix: fix rollback by mistake for mutation_log::reset_from (#1208)
---
src/replica/mutation_log.cpp | 68 +++++++++++++++++++++++++-------------------
1 file changed, 39 insertions(+), 29 deletions(-)
diff --git a/src/replica/mutation_log.cpp b/src/replica/mutation_log.cpp
index 99cd120f0..947cb3d10 100644
--- a/src/replica/mutation_log.cpp
+++ b/src/replica/mutation_log.cpp
@@ -939,58 +939,68 @@ error_code mutation_log::reset_from(const std::string &dir,
replay_callback replay_error_callback,
io_failure_callback write_error_callback)
{
- error_code err = ERR_FILE_OPERATION_FAILED;
-
- // close for flushing current log and be ready to open new log files after reset
+ // Close for flushing current log and get ready to open new log files after reset.
close();
- // make sure logs in `dir` (such as /learn) are valid.
+ // Ensure that log files in `dir` (such as "/learn") are valid.
error_s es = log_utils::check_log_files_continuity(dir);
if (!es.is_ok()) {
- LOG_ERROR_F("the log of source dir {} is invalid:{}, will remove it.", dir, es);
+ LOG_ERROR_F("the log files of source dir {} are invalid: {}, will remove it", dir, es);
if (!utils::filesystem::remove_path(dir)) {
- LOG_ERROR_F("remove {} failed", dir);
- return err;
+ LOG_ERROR_F("remove source dir {} failed", dir);
+ return ERR_FILE_OPERATION_FAILED;
}
return es.code();
}
- std::string temp_dir = _dir + '.' + std::to_string(dsn_now_ns());
+ std::string temp_dir = fmt::format("{}.{}", _dir, dsn_now_ns());
if (!utils::filesystem::rename_path(_dir, temp_dir)) {
- LOG_ERROR_F("rename {} to {} failed", _dir, temp_dir);
- return err;
+ LOG_ERROR_F("rename current log dir {} to temp dir {} failed", _dir, temp_dir);
+ return ERR_FILE_OPERATION_FAILED;
}
- LOG_INFO_F("moved current log dir {} to tmp_dir {}", _dir, temp_dir);
- // define `defer` for rollback temp_dir when failed or remove temp_dir when success
- auto temp_dir_resolve = dsn::defer([this, err, temp_dir]() {
- if (err != ERR_OK) {
- if (!utils::filesystem::rename_path(temp_dir, _dir)) {
- // rollback failed means old log files are not be recovered, it may be lost if only
- // LOG_ERROR, dassert for manual resolve it
- // TODO(yingchun): will be fixed later
- // CHECK(false, "rollback {} to {} failed", temp_dir, _dir);
- }
- } else {
+ LOG_INFO_F("rename current log dir {} to temp dir {}", _dir, temp_dir);
+
+ error_code err = ERR_OK;
+
+ // If successful, just remove temp dir; otherwise, rename temp dir back to current dir.
+ auto temp_dir_resolve = dsn::defer([this, temp_dir, &err]() {
+ if (err == ERR_OK) {
if (!dsn::utils::filesystem::remove_path(temp_dir)) {
- // temp dir allow delete failed, it's only garbage
+ // Removing temp dir failed is allowed, it's just garbage.
LOG_ERROR_F("remove temp dir {} failed", temp_dir);
}
+ } else {
+ // Once rollback failed, dir should be recovered manually in case data is lost.
+ CHECK(utils::filesystem::rename_path(temp_dir, _dir),
+ "rename temp dir {} back to current dir {} failed",
+ temp_dir,
+ _dir);
}
});
- // move source dir to target dir
+ // Rename source dir to current dir.
if (!utils::filesystem::rename_path(dir, _dir)) {
- LOG_ERROR_F("rename {} to {} failed", dir, _dir);
+ LOG_ERROR_F("rename source dir {} to current dir {} failed", dir, _dir);
return err;
}
- LOG_INFO_F("move {} to {} as our new log directory", dir, _dir);
+ LOG_INFO_F("rename source dir {} to current dir {} successfully", dir, _dir);
+
+ auto dir_resolve = dsn::defer([this, dir, &err]() {
+ if (err != ERR_OK) {
+ CHECK(utils::filesystem::rename_path(_dir, dir),
+ "rename current dir {} back to source dir {} failed",
+ _dir,
+ dir);
+ }
+ });
- // - make sure logs in moved dir(such as /plog) are valid and can be opened successfully.
- // - re-open new log files for loading the new log file and register the files into replica,
- // please make sure the old log files has been closed
+ // 1. Ensure that logs in current dir(such as "/plog") are valid and can be opened
+ // successfully;
+ // 2. Reopen, load and register new log files into replica;
+ // 3. Be sure that the old log files should have been closed.
err = open(replay_error_callback, write_error_callback);
if (err != ERR_OK) {
- LOG_ERROR_F("the logs of moved dir {} are invalid and open failed:{}", _dir, err);
+ LOG_ERROR_F("the log files of current dir {} are invalid, thus open failed: {}", _dir, err);
}
return err;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pegasus.apache.org
For additional commands, e-mail: commits-help@pegasus.apache.org