You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by ck...@apache.org on 2022/11/11 02:29:05 UTC
[ozone] branch master updated: HDDS-7446. Cleanup possibly onDisk unreferenced blocks (#3923)
This is an automated email from the ASF dual-hosted git repository.
ckj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new c4e9e348fa HDDS-7446. Cleanup possibly onDisk unreferenced blocks (#3923)
c4e9e348fa is described below
commit c4e9e348fa3d83fd96db459e399e965145a9391e
Author: Nibiru <ax...@qq.com>
AuthorDate: Fri Nov 11 10:28:59 2022 +0800
HDDS-7446. Cleanup possibly onDisk unreferenced blocks (#3923)
---
.../container/common/helpers/ContainerUtils.java | 29 ++++++++
.../common/impl/ContainerLayoutVersion.java | 26 +------
.../ozone/container/common/interfaces/Handler.java | 11 +++
.../common/statemachine/DatanodeStateMachine.java | 2 +-
.../commandhandler/DeleteBlocksCommandHandler.java | 18 +++--
.../ozone/container/keyvalue/KeyValueHandler.java | 52 +++++++++++++
.../background/BlockDeletingService.java | 16 ++--
.../container/common/TestBlockDeletingService.java | 87 ++++++++++++++++++----
8 files changed, 192 insertions(+), 49 deletions(-)
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
index a8b83a114a..e0480735e5 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
@@ -23,6 +23,7 @@ import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Res
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.NO_SUCH_ALGORITHM;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CLOSED_CONTAINER_IO;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_NOT_OPEN;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.UNABLE_TO_FIND_DATA_DIR;
import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.getContainerCommandResponse;
import static org.apache.hadoop.ozone.container.common.impl.ContainerData.CHARSET_ENCODING;
@@ -242,6 +243,34 @@ public final class ContainerUtils {
return new File(containerBaseDir, containerFilePath);
}
+ /**
+ * Get the chunk directory from the containerData.
+ *
+ * @param containerData {@link ContainerData}
+ * @return the file of chunk directory
+ * @throws StorageContainerException
+ */
+ public static File getChunkDir(ContainerData containerData)
+ throws StorageContainerException {
+ Preconditions.checkNotNull(containerData, "Container data can't be null");
+
+ String chunksPath = containerData.getChunksPath();
+ if (chunksPath == null) {
+ LOG.error("Chunks path is null in the container data");
+ throw new StorageContainerException("Unable to get Chunks directory.",
+ UNABLE_TO_FIND_DATA_DIR);
+ }
+
+ File chunksDir = new File(chunksPath);
+ if (!chunksDir.exists()) {
+ LOG.error("Chunks dir {} does not exist", chunksDir.getAbsolutePath());
+ throw new StorageContainerException("Chunks directory " +
+ chunksDir.getAbsolutePath() + " does not exist.",
+ UNABLE_TO_FIND_DATA_DIR);
+ }
+ return chunksDir;
+ }
+
/**
* ContainerID can be decoded from the container base directory name.
*/
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerLayoutVersion.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerLayoutVersion.java
index e253977564..8444b3bda1 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerLayoutVersion.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerLayoutVersion.java
@@ -27,9 +27,8 @@ import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo;
-import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
-import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.UNABLE_TO_FIND_DATA_DIR;
+import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -123,7 +122,7 @@ public enum ContainerLayoutVersion {
public File getChunkFile(ContainerData containerData, BlockID blockID,
ChunkInfo info) throws StorageContainerException {
- File chunkDir = getChunkDir(containerData);
+ File chunkDir = ContainerUtils.getChunkDir(containerData);
return getChunkFile(chunkDir, blockID, info);
}
@@ -132,25 +131,4 @@ public enum ContainerLayoutVersion {
return "ContainerLayout:v" + version;
}
- private static File getChunkDir(ContainerData containerData)
- throws StorageContainerException {
- Preconditions.checkNotNull(containerData, "Container data can't be null");
-
- String chunksPath = containerData.getChunksPath();
- if (chunksPath == null) {
- LOG.error("Chunks path is null in the container data");
- throw new StorageContainerException("Unable to get Chunks directory.",
- UNABLE_TO_FIND_DATA_DIR);
- }
-
- File chunksDir = new File(chunksPath);
- if (!chunksDir.exists()) {
- LOG.error("Chunks dir {} does not exist", chunksDir.getAbsolutePath());
- throw new StorageContainerException("Chunks directory " +
- chunksDir.getAbsolutePath() + " does not exist.",
- UNABLE_TO_FIND_DATA_DIR);
- }
- return chunksDir;
- }
-
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java
index 67f977ca38..0fd7e14699 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java
@@ -191,6 +191,17 @@ public abstract class Handler {
public abstract void deleteBlock(Container container, BlockData blockData)
throws IOException;
+ /**
+ * Deletes the possible onDisk but unreferenced blocks/chunks with localID
+ * in the container.
+ *
+ * @param container container whose block/chunk is to be deleted
+ * @param localID localId of the block/chunk
+ * @throws IOException
+ */
+ public abstract void deleteUnreferenced(Container container, long localID)
+ throws IOException;
+
public void setClusterID(String clusterID) {
this.clusterId = clusterID;
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
index 019577a07e..4d72bb317f 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
@@ -200,7 +200,7 @@ public class DatanodeStateMachine implements Closeable {
// trick.
commandDispatcher = CommandDispatcher.newBuilder()
.addHandler(new CloseContainerCommandHandler())
- .addHandler(new DeleteBlocksCommandHandler(container.getContainerSet(),
+ .addHandler(new DeleteBlocksCommandHandler(getContainer(),
conf, dnConf.getBlockDeleteThreads(),
dnConf.getBlockDeleteQueueLimit()))
.addHandler(new ReplicateContainerCommandHandler(conf, supervisor))
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
index de92d1793f..c5912a2ffd 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
@@ -88,10 +88,12 @@ public class DeleteBlocksCommandHandler implements CommandHandler {
private final ExecutorService executor;
private final LinkedBlockingQueue<DeleteCmdInfo> deleteCommandQueues;
private final Daemon handlerThread;
+ private final OzoneContainer ozoneContainer;
- public DeleteBlocksCommandHandler(ContainerSet cset,
+ public DeleteBlocksCommandHandler(OzoneContainer container,
ConfigurationSource conf, int threadPoolSize, int queueLimit) {
- this.containerSet = cset;
+ this.ozoneContainer = container;
+ this.containerSet = container.getContainerSet();
this.conf = conf;
this.executor = Executors.newFixedThreadPool(
threadPoolSize, new ThreadFactoryBuilder()
@@ -117,7 +119,6 @@ public class DeleteBlocksCommandHandler implements CommandHandler {
deleteCommandQueues.add(cmd);
} catch (IllegalStateException e) {
LOG.warn("Command is discarded because of the command queue is full");
- return;
}
}
@@ -424,9 +425,14 @@ public class DeleteBlocksCommandHandler implements CommandHandler {
blkLong, containerId);
}
} else {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Block {} not found or already under deletion in"
- + " container {}, skip deleting it.", blkLong, containerId);
+ // try clean up the possibly onDisk but unreferenced blocks/chunks
+ try {
+ Container<?> container = containerSet.getContainer(containerId);
+ ozoneContainer.getDispatcher().getHandler(container
+ .getContainerType()).deleteUnreferenced(container, blkLong);
+ } catch (IOException e) {
+ LOG.error("Failed to delete files for unreferenced block {} of " +
+ "container {}", blkLong, containerId, e);
}
}
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index 67e9d13c27..0fd6631359 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.ozone.container.keyvalue;
+import java.io.File;
+import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -31,6 +33,7 @@ import java.util.concurrent.locks.Lock;
import java.util.function.Function;
import com.google.common.util.concurrent.Striped;
+import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdds.HddsUtils;
import org.apache.hadoop.hdds.client.BlockID;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
@@ -1110,6 +1113,55 @@ public class KeyValueHandler extends Handler {
}
}
+ @Override
+ public void deleteUnreferenced(Container container, long localID)
+ throws IOException {
+ // Since the block/chunk is already checked that is unreferenced, no
+ // need to lock the container here.
+ StringBuilder prefixBuilder = new StringBuilder();
+ ContainerLayoutVersion layoutVersion = container.getContainerData().
+ getLayoutVersion();
+ long containerID = container.getContainerData().getContainerID();
+ // Only supports the default chunk/block name format now
+ switch (layoutVersion) {
+ case FILE_PER_BLOCK:
+ prefixBuilder.append(localID).append(".block");
+ break;
+ case FILE_PER_CHUNK:
+ prefixBuilder.append(localID).append("_chunk_");
+ break;
+ default:
+ throw new IOException("Unsupported container layout version " +
+ layoutVersion + " for the container " + containerID);
+ }
+ String prefix = prefixBuilder.toString();
+ File chunkDir = ContainerUtils.getChunkDir(container.getContainerData());
+ // chunkNames here is an array of file/dir name, so if we cannot find any
+ // matching one, it means the client did not write any chunk into the block.
+ // Since the putBlock request may fail, we don't know if the chunk exists,
+ // thus we need to check it when receiving the request to delete such blocks
+ String[] chunkNames = getFilesWithPrefix(prefix, chunkDir);
+ if (chunkNames.length == 0) {
+ LOG.warn("Missing delete block(Container = {}, Block = {}",
+ containerID, localID);
+ return;
+ }
+ for (String name: chunkNames) {
+ File file = new File(chunkDir, name);
+ if (!file.isFile()) {
+ continue;
+ }
+ FileUtil.fullyDelete(file);
+ LOG.info("Deleted unreferenced chunk/block {} in container {}", name,
+ containerID);
+ }
+ }
+
+ private String[] getFilesWithPrefix(String prefix, File chunkDir) {
+ FilenameFilter filter = (dir, name) -> name.startsWith(prefix);
+ return chunkDir.list(filter);
+ }
+
private void deleteInternal(Container container, boolean force)
throws StorageContainerException {
container.writeLock();
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java
index ee57666a0a..c8cffc380e 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java
@@ -554,10 +554,15 @@ public class BlockDeletingService extends BackgroundService {
BlockData blkInfo = blockDataTable.get(blk);
LOG.debug("Deleting block {}", blkLong);
if (blkInfo == null) {
- LOG.warn("Missing delete block(Container = " +
- container.getContainerData().getContainerID() + ", Block = " +
- blkLong);
- blocksProcessed++;
+ try {
+ handler.deleteUnreferenced(container, blkLong);
+ } catch (IOException e) {
+ LOG.error("Failed to delete files for unreferenced block {} of" +
+ " container {}", blkLong,
+ container.getContainerData().getContainerID(), e);
+ } finally {
+ blocksProcessed++;
+ }
continue;
}
@@ -565,7 +570,6 @@ public class BlockDeletingService extends BackgroundService {
try {
handler.deleteBlock(container, blkInfo);
blocksDeleted++;
- blocksProcessed++;
deleted = true;
} catch (IOException e) {
// TODO: if deletion of certain block retries exceed the certain
@@ -573,6 +577,8 @@ public class BlockDeletingService extends BackgroundService {
// otherwise invalid numPendingDeletionBlocks could accumulate
// beyond the limit and the following deletion will stop.
LOG.error("Failed to delete files for block {}", blkLong, e);
+ } finally {
+ blocksProcessed++;
}
if (deleted) {
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java
index 357cd7c0c9..a27a675a3a 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.ozone.container.common.helpers.BlockData;
import org.apache.hadoop.ozone.container.common.helpers.BlockDeletingServiceMetrics;
import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo;
import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics;
+import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion;
import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
@@ -70,6 +71,7 @@ import org.apache.ozone.test.GenericTestUtils;
import org.apache.ozone.test.GenericTestUtils.LogCapturer;
import org.junit.After;
import org.junit.Assert;
+import org.junit.Assume;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
@@ -77,11 +79,15 @@ import org.junit.runners.Parameterized;
import java.io.File;
import java.io.IOException;
+import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
+import java.util.Set;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
@@ -306,7 +312,11 @@ public class TestBlockDeletingService {
long chunkLength = 100;
try {
for (int k = 0; k < numOfChunksPerBlock; k++) {
- final String chunkName = String.format("block.%d.chunk.%d", i, k);
+ // This real chunkName should be localID_chunk_chunkIndex, here is for
+ // explicit debug and the necessity of HDDS-7446 to detect the orphan
+ // chunks through the chunk file name
+ final String chunkName = String.format("%d_chunk_%d_block_%d",
+ blockID.getContainerBlockID().getLocalID(), k, i);
final long offset = k * chunkLength;
ContainerProtos.ChunkInfo info =
ContainerProtos.ChunkInfo.newBuilder().setChunkName(chunkName)
@@ -503,18 +513,20 @@ public class TestBlockDeletingService {
@Test
public void testWithUnrecordedBlocks() throws Exception {
// Skip schemaV1, when markBlocksForDeletionSchemaV1, the unrecorded blocks
- // from received TNXs will be skipped to delete and will not be added into
- // the NumPendingDeletionBlocks
- if (Objects.equals(schemaVersion, SCHEMA_V1)) {
- return;
- }
+ // from received TNXs will be deleted, not in BlockDeletingService
+ Assume.assumeFalse(Objects.equals(schemaVersion, SCHEMA_V1));
+
+ int numOfContainers = 2;
+ int numOfChunksPerBlock = 1;
+ int numOfBlocksPerContainer = 3;
DatanodeConfiguration dnConf = conf.getObject(DatanodeConfiguration.class);
dnConf.setBlockDeletionLimit(2);
this.blockLimitPerInterval = dnConf.getBlockDeletionLimit();
conf.setFromObject(dnConf);
ContainerSet containerSet = new ContainerSet(1000);
- createToDeleteBlocks(containerSet, 2, 3, 1);
+ createToDeleteBlocks(containerSet, numOfContainers, numOfBlocksPerContainer,
+ numOfChunksPerBlock);
ContainerMetrics metrics = ContainerMetrics.create(conf);
KeyValueHandler keyValueHandler =
@@ -535,18 +547,44 @@ public class TestBlockDeletingService {
KeyPrefixFilter filter = Objects.equals(schemaVersion, SCHEMA_V1) ?
ctr1.getDeletingBlockKeyFilter() : ctr1.getUnprefixedKeyFilter();
+ // Have two unrecorded blocks onDisk and another two not to simulate the
+ // possible cases
+ int numUnrecordedBlocks = 4;
+ int numExistingOnDiskUnrecordedBlocks = 2;
+ List<Long> unrecordedBlockIds = new ArrayList<>();
+ Set<File> unrecordedChunks = new HashSet<>();
+
try (DBHandle meta = BlockUtils.getDB(ctr1, conf)) {
// create unrecorded blocks in a new txn and update metadata,
// service shall first choose the top pendingDeletion container
// if using the TopNOrderedContainerDeletionChoosingPolicy
- List<Long> unrecordedBlocks = new ArrayList<>();
- int numUnrecorded = 4;
- for (int i = 0; i < numUnrecorded; i++) {
- unrecordedBlocks.add(System.nanoTime() + i);
+ File chunkDir = ContainerUtils.getChunkDir(ctr1);
+ for (int i = 0; i < numUnrecordedBlocks; i++) {
+ long localId = System.nanoTime() + i;
+ unrecordedBlockIds.add(localId);
+ String chunkName;
+ for (int indexOfChunk = 0; indexOfChunk < numOfChunksPerBlock;
+ indexOfChunk++) {
+ if (layout == FILE_PER_BLOCK) {
+ chunkName = localId + ".block";
+ } else {
+ chunkName = localId + "_chunk_" + indexOfChunk;
+ }
+ File chunkFile = new File(chunkDir, chunkName);
+ unrecordedChunks.add(chunkFile);
+ }
}
- createTxn(ctr1, unrecordedBlocks, 100, ctr1.getContainerID());
+ // create unreferenced onDisk chunks
+ Iterator<File> iter = unrecordedChunks.iterator();
+ for (int m = 0; m < numExistingOnDiskUnrecordedBlocks; m++) {
+ File chunk = iter.next();
+ createRandomContentFile(chunk.getName(), chunkDir, 100);
+ Assert.assertTrue(chunk.exists());
+ }
+
+ createTxn(ctr1, unrecordedBlockIds, 100, ctr1.getContainerID());
ctr1.updateDeleteTransactionId(100);
- ctr1.incrPendingDeletionBlocks(numUnrecorded);
+ ctr1.incrPendingDeletionBlocks(numUnrecordedBlocks);
updateMetaData(ctr1, (KeyValueContainer) containerSet.getContainer(
ctr1.getContainerID()), 3, 1);
// Ensure there are 3 + 4 = 7 blocks under deletion
@@ -577,6 +615,12 @@ public class TestBlockDeletingService {
// check if blockData get deleted
assertBlockDataTableRecordCount(0, ctr1, filter);
assertBlockDataTableRecordCount(0, ctr2, filter);
+
+ // check if all the unreferenced chunks get deleted
+ for (File f: unrecordedChunks) {
+ Assert.assertFalse(f.exists());
+ }
+
svc.shutdown();
}
@@ -888,4 +932,21 @@ public class TestBlockDeletingService {
+ ", but actual: " + count + " in the blockData table of container: "
+ containerID + ".", expectedCount, count);
}
+
+ /**
+ * Create the certain sized file in the appointed directory.
+ *
+ * @param fileName the string of file to be created
+ * @param dir the directory where file to be created
+ * @param sizeInBytes the bytes size of the file
+ * @throws IOException
+ */
+ private void createRandomContentFile(String fileName, File dir,
+ long sizeInBytes) throws IOException {
+ File file = new File(dir, fileName);
+ try (RandomAccessFile randomAccessFile = new RandomAccessFile(file,
+ "rw")) {
+ randomAccessFile.setLength(sizeInBytes);
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org