You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bookkeeper.apache.org by yo...@apache.org on 2023/06/19 07:42:47 UTC
[bookkeeper] 23/31: Avoid compaction to trigger extra flushes DbLedgerStorage (#3959)
This is an automated email from the ASF dual-hosted git repository.
yong pushed a commit to branch branch-4.16
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit 2aede4b5ca6f182ef40267c707bfb4ca8474b44a
Author: Matteo Merli <mm...@apache.org>
AuthorDate: Mon May 22 12:21:46 2023 -0700
Avoid compaction to trigger extra flushes DbLedgerStorage (#3959)
* Avoid compaction to trigger extra flushes DbLedgerStorage
* Expanded comment
* Fixed test
* Fixed direct io test
(cherry picked from commit c924cfeb509c4e65e33a82ae88ad423017edb669)
---
.../ldb/SingleDirectoryDbLedgerStorage.java | 25 ++++++++++++++++++++--
.../bookie/storage/ldb/DbLedgerStorageTest.java | 2 ++
2 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
index dd673f319a..69e2724c47 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java
@@ -931,9 +931,30 @@ public class SingleDirectoryDbLedgerStorage implements CompactableLedgerStorage
@Override
public void updateEntriesLocations(Iterable<EntryLocation> locations) throws IOException {
- // Trigger a flush to have all the entries being compacted in the db storage
- flush();
+ // Before updating the DB with the new location for the compacted entries, we need to
+ // make sure that there is no ongoing flush() operation.
+ // If there were a flush, we could have the following situation, which is highly
+ // unlikely though possible:
+ // 1. Flush operation has written the write-cache content into entry-log files
+ // 2. The DB location index is not yet updated
+ // 3. Compaction is triggered and starts compacting some of the recent files
+ // 4. Compaction will write the "new location" into the DB
+ // 5. The pending flush() will overwrite the DB with the "old location", pointing
+ // to a file that no longer exists
+ //
+ // To avoid this race condition, we need that all the entries that are potentially
+ // included in the compaction round to have all the indexes already flushed into
+ // the DB.
+ // The easiest lightweight way to achieve this is to wait for any pending
+ // flush operation to be completed before updating the index with the compacted
+ // entries, by blocking on the flushMutex.
+ flushMutex.lock();
+ flushMutex.unlock();
+ // We don't need to keep the flush mutex locked here while updating the DB.
+ // It's fine to have a concurrent flush operation at this point, because we
+ // know that none of the entries being flushed was included in the compaction
+ // round that we are dealing with.
entryLocationIndex.updateLocations(locations);
}
diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
index 4efdf06edb..2a7e8e2869 100644
--- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
+++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java
@@ -225,6 +225,7 @@ public class DbLedgerStorageTest {
entry3.writeBytes("entry-3".getBytes());
storage.addEntry(entry3);
+
// Simulate bookie compaction
SingleDirectoryDbLedgerStorage singleDirStorage = ((DbLedgerStorage) storage).getLedgerStorageList().get(0);
EntryLogger entryLogger = singleDirStorage.getEntryLogger();
@@ -236,6 +237,7 @@ public class DbLedgerStorageTest {
long location = entryLogger.addEntry(4L, newEntry3);
newEntry3.resetReaderIndex();
+ storage.flush();
List<EntryLocation> locations = Lists.newArrayList(new EntryLocation(4, 3, location));
singleDirStorage.updateEntriesLocations(locations);