You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bookkeeper.apache.org by yo...@apache.org on 2023/06/19 07:42:47 UTC

[bookkeeper] 23/31: Avoid compaction to trigger extra flushes DbLedgerStorage (#3959)

This is an automated email from the ASF dual-hosted git repository.

yong pushed a commit to branch branch-4.16
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git

commit 2aede4b5ca6f182ef40267c707bfb4ca8474b44a
Author: Matteo Merli <mm...@apache.org>
AuthorDate: Mon May 22 12:21:46 2023 -0700

    Avoid compaction to trigger extra flushes DbLedgerStorage (#3959)
    
    * Avoid compaction to trigger extra flushes DbLedgerStorage
    
    * Expanded comment
    
    * Fixed test
    
    * Fixed direct io test
    
    (cherry picked from commit c924cfeb509c4e65e33a82ae88ad423017edb669)
---
 .../ldb/SingleDirectoryDbLedgerStorage.java        | 25 ++++++++++++++++++++--
 .../bookie/storage/ldb/DbLedgerStorageTest.java    |  2 ++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
index dd673f319a..69e2724c47 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
@@ -931,9 +931,30 @@ public class SingleDirectoryDbLedgerStorage implements CompactableLedgerStorage
 
     @Override
     public void updateEntriesLocations(Iterable<EntryLocation> locations) throws IOException {
-        // Trigger a flush to have all the entries being compacted in the db storage
-        flush();
+        // Before updating the DB with the new location for the compacted entries, we need to
+        // make sure that there is no ongoing flush() operation.
+        // If there were a flush, we could have the following situation, which is highly
+        // unlikely though possible:
+        // 1. Flush operation has written the write-cache content into entry-log files
+        // 2. The DB location index is not yet updated
+        // 3. Compaction is triggered and starts compacting some of the recent files
+        // 4. Compaction will write the "new location" into the DB
+        // 5. The pending flush() will overwrite the DB with the "old location", pointing
+        //    to a file that no longer exists
+        //
+        // To avoid this race condition, we need that all the entries that are potentially
+        // included in the compaction round to have all the indexes already flushed into
+        // the DB.
+        // The easiest lightweight way to achieve this is to wait for any pending
+        // flush operation to be completed before updating the index with the compacted
+        // entries, by blocking on the flushMutex.
+        flushMutex.lock();
+        flushMutex.unlock();
 
+        // We don't need to keep the flush mutex locked here while updating the DB.
+        // It's fine to have a concurrent flush operation at this point, because we
+        // know that none of the entries being flushed was included in the compaction
+        // round that we are dealing with.
         entryLocationIndex.updateLocations(locations);
     }
 
diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
index 4efdf06edb..2a7e8e2869 100644
--- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
+++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
@@ -225,6 +225,7 @@ public class DbLedgerStorageTest {
         entry3.writeBytes("entry-3".getBytes());
         storage.addEntry(entry3);
 
+
         // Simulate bookie compaction
         SingleDirectoryDbLedgerStorage singleDirStorage = ((DbLedgerStorage) storage).getLedgerStorageList().get(0);
         EntryLogger entryLogger = singleDirStorage.getEntryLogger();
@@ -236,6 +237,7 @@ public class DbLedgerStorageTest {
         long location = entryLogger.addEntry(4L, newEntry3);
         newEntry3.resetReaderIndex();
 
+        storage.flush();
         List<EntryLocation> locations = Lists.newArrayList(new EntryLocation(4, 3, location));
         singleDirStorage.updateEntriesLocations(locations);