You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by ad...@apache.org on 2022/10/14 12:19:33 UTC
[ozone] branch master updated: HDDS-7300. Race condition between full data scan and block deletion (#3811)
This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 46e58a6350 HDDS-7300. Race condition between full data scan and block deletion (#3811)
46e58a6350 is described below
commit 46e58a6350e86f7573fe45678e24c5dab96df45a
Author: Nibiru <ax...@qq.com>
AuthorDate: Fri Oct 14 20:19:27 2022 +0800
HDDS-7300. Race condition between full data scan and block deletion (#3811)
---
.../container/keyvalue/KeyValueContainer.java | 4 +-
.../container/keyvalue/KeyValueContainerCheck.java | 108 ++++++++++++++++-----
.../keyvalue/TestKeyValueContainerCheck.java | 4 +-
3 files changed, 87 insertions(+), 29 deletions(-)
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
index f8d918af7c..9fc8c44662 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
@@ -828,7 +828,7 @@ public class KeyValueContainer implements Container<KeyValueContainerData> {
long containerId = containerData.getContainerID();
KeyValueContainerCheck checker =
new KeyValueContainerCheck(containerData.getMetadataPath(), config,
- containerId, containerData.getVolume());
+ containerId, containerData.getVolume(), this);
return checker.fastCheck();
}
@@ -849,7 +849,7 @@ public class KeyValueContainer implements Container<KeyValueContainerData> {
long containerId = containerData.getContainerID();
KeyValueContainerCheck checker =
new KeyValueContainerCheck(containerData.getMetadataPath(), config,
- containerId, containerData.getVolume());
+ containerId, containerData.getVolume(), this);
return checker.fullCheck(throttler, canceler);
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java
index 90c392bc01..115fc7f373 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java
@@ -66,9 +66,10 @@ public class KeyValueContainerCheck {
private String metadataPath;
private HddsVolume volume;
+ private KeyValueContainer container;
public KeyValueContainerCheck(String metadataPath, ConfigurationSource conf,
- long containerID, HddsVolume volume) {
+ long containerID, HddsVolume volume, KeyValueContainer container) {
Preconditions.checkArgument(metadataPath != null);
this.checkConfig = conf;
@@ -76,6 +77,7 @@ public class KeyValueContainerCheck {
this.onDiskContainerData = null;
this.metadataPath = metadataPath;
this.volume = volume;
+ this.container = container;
}
/**
@@ -230,8 +232,6 @@ public class KeyValueContainerCheck {
onDiskContainerData.setDbFile(dbFile);
- ContainerLayoutVersion layout = onDiskContainerData.getLayoutVersion();
-
try (DBHandle db = BlockUtils.getDB(onDiskContainerData, checkConfig);
BlockIterator<BlockData> kvIter = db.getStore().getBlockIterator(
onDiskContainerData.getContainerID(),
@@ -239,35 +239,93 @@ public class KeyValueContainerCheck {
while (kvIter.hasNext()) {
BlockData block = kvIter.nextBlock();
- for (ContainerProtos.ChunkInfo chunk : block.getChunks()) {
- File chunkFile = layout.getChunkFile(onDiskContainerData,
- block.getBlockID(), ChunkInfo.getFromProtoBuf(chunk));
-
- if (!chunkFile.exists()) {
- // concurrent mutation in Block DB? lookup the block again.
- String blockKey =
- onDiskContainerData.blockKey(block.getBlockID().getLocalID());
- BlockData bdata = db.getStore()
- .getBlockDataTable()
- .get(blockKey);
- // In EC, client may write empty putBlock in padding block nodes.
- // So, we need to make sure, chunk length > 0, before declaring
- // the missing chunk file.
- if (bdata != null && bdata.getChunks().size() > 0 && bdata
- .getChunks().get(0).getLen() > 0) {
- throw new IOException(
- "Missing chunk file " + chunkFile.getAbsolutePath());
+
+ // If holding read lock for the entire duration, including wait() calls
+ // in DataTransferThrottler, would effectively make other threads
+ // throttled.
+ // Here try optimistically and retry with the container lock to
+ // make sure reading the latest record. If the record is just removed,
+ // the block should be skipped to scan.
+ try {
+ scanBlock(block, throttler, canceler);
+ } catch (OzoneChecksumException ex) {
+ throw ex;
+ } catch (IOException ex) {
+ if (getBlockDataFromDBWithLock(db, block) == null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Scanned outdated blockData {} in container {}.",
+ block, containerID);
}
- } else if (chunk.getChecksumData().getType()
- != ContainerProtos.ChecksumType.NONE) {
- verifyChecksum(block, chunk, chunkFile, layout, throttler,
- canceler);
+ } else {
+ throw ex;
}
}
}
}
}
+ /**
+ * Attempt to read the block data without the container lock.
+ * The block onDisk might be in modification by other thread and not yet
+ * flushed to DB, so the content might be outdated.
+ *
+ * @param db DB of container
+ * @param block last queried blockData
+ * @return blockData in DB
+ * @throws IOException
+ */
+ private BlockData getBlockDataFromDB(DBHandle db, BlockData block)
+ throws IOException {
+ String blockKey =
+ onDiskContainerData.blockKey(block.getBlockID().getLocalID());
+ return db.getStore().getBlockDataTable().get(blockKey);
+ }
+
+ /**
+ * Attempt to read the block data with the container lock.
+ * The container lock ensure the latest DB record could be retrieved, since
+ * other block related write operation will acquire the container write lock.
+ *
+ * @param db DB of container
+ * @param block last queried blockData
+ * @return blockData in DB
+ * @throws IOException
+ */
+ private BlockData getBlockDataFromDBWithLock(DBHandle db, BlockData block)
+ throws IOException {
+ container.readLock();
+ try {
+ return getBlockDataFromDB(db, block);
+ } finally {
+ container.readUnlock();
+ }
+ }
+
+ private void scanBlock(BlockData block, DataTransferThrottler throttler,
+ Canceler canceler) throws IOException {
+ ContainerLayoutVersion layout = onDiskContainerData.getLayoutVersion();
+
+ for (ContainerProtos.ChunkInfo chunk : block.getChunks()) {
+ File chunkFile = layout.getChunkFile(onDiskContainerData,
+ block.getBlockID(), ChunkInfo.getFromProtoBuf(chunk));
+
+ if (!chunkFile.exists()) {
+ // In EC, client may write empty putBlock in padding block nodes.
+ // So, we need to make sure, chunk length > 0, before declaring
+ // the missing chunk file.
+ if (block.getChunks().size() > 0 && block
+ .getChunks().get(0).getLen() > 0) {
+ throw new IOException(
+ "Missing chunk file " + chunkFile.getAbsolutePath());
+ }
+ } else if (chunk.getChecksumData().getType()
+ != ContainerProtos.ChecksumType.NONE) {
+ verifyChecksum(block, chunk, chunkFile, layout, throttler,
+ canceler);
+ }
+ }
+ }
+
private static void verifyChecksum(BlockData block,
ContainerProtos.ChunkInfo chunk, File chunkFile,
ContainerLayoutVersion layout,
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
index 4d8e5fa684..e8384caa0d 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
@@ -71,7 +71,7 @@ public class TestKeyValueContainerCheck
KeyValueContainerCheck kvCheck =
new KeyValueContainerCheck(containerData.getMetadataPath(), conf,
- containerID, containerData.getVolume());
+ containerID, containerData.getVolume(), container);
// first run checks on a Open Container
boolean valid = kvCheck.fastCheck();
@@ -106,7 +106,7 @@ public class TestKeyValueContainerCheck
KeyValueContainerCheck kvCheck =
new KeyValueContainerCheck(containerData.getMetadataPath(), conf,
- containerID, containerData.getVolume());
+ containerID, containerData.getVolume(), container);
File dbFile = KeyValueContainerLocationUtil
.getContainerDBFile(containerData);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org