You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kvrocks.apache.org by wa...@apache.org on 2022/06/10 07:47:00 UTC

[incubator-kvrocks] branch unstable updated: Fix RocksDB can't auto resume after disk quota exceeded error (#628)

This is an automated email from the ASF dual-hosted git repository.

wangyuan pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/incubator-kvrocks.git


The following commit(s) were added to refs/heads/unstable by this push:
     new 5cf450f  Fix RocksDB can't auto resume after disk quota exceeded error (#628)
5cf450f is described below

commit 5cf450f13f0245ccd78f81feda10d1967d367857
Author: Myth <ca...@outlook.com>
AuthorDate: Fri Jun 10 15:46:54 2022 +0800

    Fix RocksDB can't auto resume after disk quota exceeded error (#628)
    
    In #229, the issue where RocksDB could not recover from the no Space background
    error was fixed. This problem RocksDB at facebook/rocksdb#8376 has been repaired,
    but the issue has not been thoroughly solved, The same problem will still occur
    when an EDQUOT Disk Quota Exceeded error is encountered (see the detailed in
    facebook/rocksdb#10134).
    
    RocksDB cannot recover from this problem and must be restarted. This problem is
    more likely to occur when kvrocks is deployed in container.
    
    In order to handle all versions of RocksDB, we manually resume DB when we encounter
    two retryable io errors: No space left on device and Disk Quota Exceeded.
    
    For the Disk Quota Exceeded error, RocksDB did not expose a friendly interface,
    so we did a string match.
---
 src/event_listener.cc | 28 +++++++++++++++++++++-------
 src/server.cc         | 10 ++++++----
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/src/event_listener.cc b/src/event_listener.cc
index 6076eee..ae17fd9 100644
--- a/src/event_listener.cc
+++ b/src/event_listener.cc
@@ -59,6 +59,14 @@ const std::string compressType2String(const rocksdb::CompressionType type) {
   return iter->second;
 }
 
+bool isDiskQuotaExceeded(const rocksdb::Status &bg_error) {
+    // EDQUOT: Disk quota exceeded (POSIX.1-2001)
+    std::string exceeded_quota_str = "Disk quota exceeded";
+    std::string err_msg = bg_error.ToString();
+
+    return err_msg.find(exceeded_quota_str) != std::string::npos;
+}
+
 void EventListener::OnCompactionCompleted(rocksdb::DB *db, const rocksdb::CompactionJobInfo &ci) {
   LOG(INFO) << "[event_listener/compaction_completed] column family: " << ci.cf_name
             << ", compaction reason: " << static_cast<int>(ci.compaction_reason)
@@ -92,26 +100,32 @@ void EventListener::OnFlushCompleted(rocksdb::DB *db, const rocksdb::FlushJobInf
             << ", smallest seqno: " << fi.smallest_seqno;
 }
 
-void EventListener::OnBackgroundError(rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) {
+void EventListener::OnBackgroundError(rocksdb::BackgroundErrorReason reason, rocksdb::Status *bg_error) {
   std::string reason_str;
   switch (reason) {
-    case rocksdb::BackgroundErrorReason::kCompaction:reason_str = "compact";
+    case rocksdb::BackgroundErrorReason::kCompaction:
+      reason_str = "compact";
       break;
-    case rocksdb::BackgroundErrorReason::kFlush:reason_str = "flush";
+    case rocksdb::BackgroundErrorReason::kFlush:
+      reason_str = "flush";
       break;
-    case rocksdb::BackgroundErrorReason::kMemTable:reason_str = "memtable";
+    case rocksdb::BackgroundErrorReason::kMemTable:
+      reason_str = "memtable";
       break;
-    case rocksdb::BackgroundErrorReason::kWriteCallback:reason_str = "writecallback";
+    case rocksdb::BackgroundErrorReason::kWriteCallback:
+      reason_str = "writecallback";
       break;
     default:
       // Should not arrive here
       break;
   }
-  if (status->IsNoSpace() && status->severity() < rocksdb::Status::kFatalError) {
+  if ((bg_error->IsNoSpace() || isDiskQuotaExceeded(*bg_error)) &&
+      bg_error->severity() < rocksdb::Status::kFatalError) {
     storage_->SetDBInRetryableIOError(true);
   }
+
   LOG(ERROR) << "[event_listener/background_error] reason: " << reason_str
-             << ", status: " << status->ToString();
+             << ", bg_error: " << bg_error->ToString();
 }
 
 void EventListener::OnTableFileDeleted(const rocksdb::TableFileDeletionInfo &info) {
diff --git a/src/server.cc b/src/server.cc
index c270b6a..c45617e 100644
--- a/src/server.cc
+++ b/src/server.cc
@@ -613,12 +613,14 @@ void Server::cron() {
       }
     }
     // check if DB need to be resumed every minute
-    // rocksdb has auto resume feature after retryable io error, but the current implement can't trigger auto resume
-    // when the no space error is only trigger by db_->Write without any other background action (compact/flush),
-    // so manual trigger resume every minute after no space error to resume db under this scenario.
+    // Rocksdb has auto resume feature after retryable io error, earlier version(before v6.22.1) had
+    // bug when encounter no space error. The current version fixes the no space error issue, but it
+    // does not completely resolve, which still exists when encountered disk quota exceeded error.
+    // In order to properly handle all possible situations on rocksdb, we manually resume here
+    // when encountering no space error and disk quota exceeded error.
     if (counter != 0 && counter % 600 == 0 && storage_->IsDBInRetryableIOError()) {
       storage_->GetDB()->Resume();
-      LOG(INFO) << "[server] Schedule to resume DB after no space error";
+      LOG(INFO) << "[server] Schedule to resume DB after retryable io error";
       storage_->SetDBInRetryableIOError(false);
     }