You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by we...@apache.org on 2018/02/03 01:19:01 UTC
hadoop git commit: HDFS-11187. Optimize disk access for last partial
chunk checksum of Finalized replica. Contributed by Wei-Chiu Chuang.
Repository: hadoop
Updated Branches:
refs/heads/trunk c7101fe21 -> 2021f4bdc
HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Wei-Chiu Chuang.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2021f4bd
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2021f4bd
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2021f4bd
Branch: refs/heads/trunk
Commit: 2021f4bdce3b27c46edaad198f0007a26a8a1391
Parents: c7101fe
Author: Wei-Chiu Chuang <we...@apache.org>
Authored: Fri Feb 2 17:15:26 2018 -0800
Committer: Wei-Chiu Chuang <we...@apache.org>
Committed: Fri Feb 2 17:18:42 2018 -0800
----------------------------------------------------------------------
.../hdfs/server/datanode/BlockSender.java | 56 +++++++++++----
.../hdfs/server/datanode/FinalizedReplica.java | 74 ++++++++++++--------
.../hdfs/server/datanode/ReplicaBuilder.java | 11 ++-
.../datanode/fsdataset/impl/FsDatasetImpl.java | 1 +
.../datanode/fsdataset/impl/FsVolumeImpl.java | 21 ++++--
.../org/apache/hadoop/hdfs/MiniDFSCluster.java | 23 ++++++
.../namenode/TestListCorruptFileBlocks.java | 4 +-
7 files changed, 140 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2021f4bd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
index 3ff5c75..268007f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
@@ -175,8 +175,13 @@ class BlockSender implements java.io.Closeable {
* See {{@link BlockSender#isLongRead()}
*/
private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024;
-
+ // The number of bytes per checksum here determines the alignment
+ // of reads: we always start reading at a checksum chunk boundary,
+ // even if the checksum type is NULL. So, choosing too big of a value
+ // would risk sending too much unnecessary data. 512 (1 disk sector)
+ // is likely to result in minimal extra IO.
+ private static final long CHUNK_SIZE = 512;
/**
* Constructor
*
@@ -250,18 +255,16 @@ class BlockSender implements java.io.Closeable {
try(AutoCloseableLock lock = datanode.data.acquireDatasetLock()) {
replica = getReplica(block, datanode);
replicaVisibleLength = replica.getVisibleLength();
- if (replica instanceof FinalizedReplica) {
- // Load last checksum in case the replica is being written
- // concurrently
- final FinalizedReplica frep = (FinalizedReplica) replica;
- chunkChecksum = frep.getLastChecksumAndDataLen();
- }
}
if (replica.getState() == ReplicaState.RBW) {
final ReplicaInPipeline rbw = (ReplicaInPipeline) replica;
waitForMinLength(rbw, startOffset + length);
chunkChecksum = rbw.getLastChecksumAndDataLen();
}
+ if (replica instanceof FinalizedReplica) {
+ chunkChecksum = getPartialChunkChecksumForFinalized(
+ (FinalizedReplica)replica);
+ }
if (replica.getGenerationStamp() < block.getGenerationStamp()) {
throw new IOException("Replica gen stamp < block genstamp, block="
@@ -348,12 +351,8 @@ class BlockSender implements java.io.Closeable {
}
}
if (csum == null) {
- // The number of bytes per checksum here determines the alignment
- // of reads: we always start reading at a checksum chunk boundary,
- // even if the checksum type is NULL. So, choosing too big of a value
- // would risk sending too much unnecessary data. 512 (1 disk sector)
- // is likely to result in minimal extra IO.
- csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512);
+ csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL,
+ (int)CHUNK_SIZE);
}
/*
@@ -427,6 +426,37 @@ class BlockSender implements java.io.Closeable {
}
}
+ private ChunkChecksum getPartialChunkChecksumForFinalized(
+ FinalizedReplica finalized) throws IOException {
+ // There are a number of places in the code base where a finalized replica
+ // object is created. If last partial checksum is loaded whenever a
+ // finalized replica is created, it would increase latency in DataNode
+ // initialization. Therefore, the last partial chunk checksum is loaded
+ // lazily.
+
+ // Load last checksum in case the replica is being written concurrently
+ final long replicaVisibleLength = replica.getVisibleLength();
+ if (replicaVisibleLength % CHUNK_SIZE != 0 &&
+ finalized.getLastPartialChunkChecksum() == null) {
+ // the finalized replica does not have precomputed last partial
+ // chunk checksum. Recompute now.
+ try {
+ finalized.loadLastPartialChunkChecksum();
+ return new ChunkChecksum(finalized.getVisibleLength(),
+ finalized.getLastPartialChunkChecksum());
+ } catch (FileNotFoundException e) {
+ // meta file is lost. Continue anyway to preserve existing behavior.
+ DataNode.LOG.warn(
+ "meta file " + finalized.getMetaFile() + " is missing!");
+ return null;
+ }
+ } else {
+ // If the checksum is null, BlockSender will use on-disk checksum.
+ return new ChunkChecksum(finalized.getVisibleLength(),
+ finalized.getLastPartialChunkChecksum());
+ }
+ }
+
/**
* close opened files.
*/
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2021f4bd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FinalizedReplica.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FinalizedReplica.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FinalizedReplica.java
index e3e0450..b6212be 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FinalizedReplica.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FinalizedReplica.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hdfs.server.datanode;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.IOException;
import org.apache.hadoop.hdfs.protocol.Block;
@@ -30,9 +29,9 @@ import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
* This class describes a replica that has been finalized.
*/
public class FinalizedReplica extends LocalReplica {
-
+ private byte[] lastPartialChunkChecksum;
/**
- * Constructor
+ * Constructor.
* @param blockId block id
* @param len replica length
* @param genStamp replica generation stamp
@@ -41,9 +40,24 @@ public class FinalizedReplica extends LocalReplica {
*/
public FinalizedReplica(long blockId, long len, long genStamp,
FsVolumeSpi vol, File dir) {
+ this(blockId, len, genStamp, vol, dir, null);
+ }
+
+ /**
+ * Constructor.
+ * @param blockId block id
+ * @param len replica length
+ * @param genStamp replica generation stamp
+ * @param vol volume where replica is located
+ * @param dir directory path where block and meta files are located
+ * @param checksum the last partial chunk checksum
+ */
+ public FinalizedReplica(long blockId, long len, long genStamp,
+ FsVolumeSpi vol, File dir, byte[] checksum) {
super(blockId, len, genStamp, vol, dir);
+ this.setLastPartialChunkChecksum(checksum);
}
-
+
/**
* Constructor
* @param block a block
@@ -51,7 +65,20 @@ public class FinalizedReplica extends LocalReplica {
* @param dir directory path where block and meta files are located
*/
public FinalizedReplica(Block block, FsVolumeSpi vol, File dir) {
+ this(block, vol, dir, null);
+ }
+
+ /**
+ * Constructor
+ * @param block a block
+ * @param vol volume where replica is located
+ * @param dir directory path where block and meta files are located
+ * @param checksum the last partial chunk checksum
+ */
+ public FinalizedReplica(Block block, FsVolumeSpi vol, File dir,
+ byte[] checksum) {
super(block, vol, dir);
+ this.setLastPartialChunkChecksum(checksum);
}
/**
@@ -60,6 +87,7 @@ public class FinalizedReplica extends LocalReplica {
*/
public FinalizedReplica(FinalizedReplica from) {
super(from);
+ this.setLastPartialChunkChecksum(from.getLastPartialChunkChecksum());
}
@Override // ReplicaInfo
@@ -116,30 +144,18 @@ public class FinalizedReplica extends LocalReplica {
" does not support createInfo");
}
- /**
- * gets the last chunk checksum and the length of the block corresponding
- * to that checksum.
- * Note, need to be called with the FsDataset lock acquired. May improve to
- * lock only the FsVolume in the future.
- * @throws IOException
- */
- public ChunkChecksum getLastChecksumAndDataLen() throws IOException {
- ChunkChecksum chunkChecksum = null;
- try {
- byte[] lastChecksum = getVolume().loadLastPartialChunkChecksum(
- getBlockFile(), getMetaFile());
- if (lastChecksum != null) {
- chunkChecksum =
- new ChunkChecksum(getVisibleLength(), lastChecksum);
- }
- } catch (FileNotFoundException e) {
- // meta file is lost. Try to continue anyway.
- DataNode.LOG.warn("meta file " + getMetaFile() +
- " is missing!");
- } catch (IOException ioe) {
- DataNode.LOG.warn("Unable to read checksum from meta file " +
- getMetaFile(), ioe);
- }
- return chunkChecksum;
+ public byte[] getLastPartialChunkChecksum() {
+ return lastPartialChunkChecksum;
+ }
+
+ public void setLastPartialChunkChecksum(byte[] checksum) {
+ lastPartialChunkChecksum = checksum;
+ }
+
+ public void loadLastPartialChunkChecksum()
+ throws IOException {
+ byte[] lastChecksum = getVolume().loadLastPartialChunkChecksum(
+ getBlockFile(), getMetaFile());
+ setLastPartialChunkChecksum(lastChecksum);
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2021f4bd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaBuilder.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaBuilder.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaBuilder.java
index 2c55e73..d198197 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaBuilder.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaBuilder.java
@@ -46,6 +46,7 @@ public class ReplicaBuilder {
private Thread writer;
private long recoveryId;
private Block block;
+ private byte[] lastPartialChunkChecksum;
private ReplicaInfo fromReplica;
@@ -178,6 +179,11 @@ public class ReplicaBuilder {
return this;
}
+ public ReplicaBuilder setLastPartialChunkChecksum(byte[] checksum) {
+ this.lastPartialChunkChecksum = checksum;
+ return this;
+ }
+
public LocalReplicaInPipeline buildLocalReplicaInPipeline()
throws IllegalArgumentException {
LocalReplicaInPipeline info = null;
@@ -258,10 +264,11 @@ public class ReplicaBuilder {
+ "state: " + fromReplica.getState());
} else {
if (null != block) {
- return new FinalizedReplica(block, volume, directoryUsed);
+ return new FinalizedReplica(block, volume, directoryUsed,
+ lastPartialChunkChecksum);
} else {
return new FinalizedReplica(blockId, length, genStamp, volume,
- directoryUsed);
+ directoryUsed, lastPartialChunkChecksum);
}
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2021f4bd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
index 8e7884d..c141293 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@@ -1724,6 +1724,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
replicaInfo.getOriginalReplica().getState()
== ReplicaState.FINALIZED) {
newReplicaInfo = replicaInfo.getOriginalReplica();
+ ((FinalizedReplica)newReplicaInfo).loadLastPartialChunkChecksum();
} else {
FsVolumeImpl v = (FsVolumeImpl)replicaInfo.getVolume();
if (v == null) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2021f4bd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
index 319bc0e..b8c95a4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
@@ -51,6 +51,8 @@ import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.DataNodeVolumeMetrics;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
+import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica;
+import org.apache.hadoop.hdfs.server.datanode.ReplicaBeingWritten;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtilClient;
import org.apache.hadoop.hdfs.protocol.Block;
@@ -950,10 +952,22 @@ public class FsVolumeImpl implements FsVolumeSpi {
long bytesReserved) throws IOException {
releaseReservedSpace(bytesReserved);
File dest = getBlockPoolSlice(bpid).addFinalizedBlock(b, replicaInfo);
+ byte[] checksum = null;
+ // copy the last partial checksum if the replica is originally
+ // in finalized or rbw state.
+ if (replicaInfo.getState() == ReplicaState.FINALIZED) {
+ FinalizedReplica finalized = (FinalizedReplica)replicaInfo;
+ checksum = finalized.getLastPartialChunkChecksum();
+ } else if (replicaInfo.getState() == ReplicaState.RBW) {
+ ReplicaBeingWritten rbw = (ReplicaBeingWritten)replicaInfo;
+ checksum = rbw.getLastChecksumAndDataLen().getChecksum();
+ }
+
return new ReplicaBuilder(ReplicaState.FINALIZED)
.setBlock(replicaInfo)
.setFsVolume(this)
.setDirectoryToUse(dest.getParentFile())
+ .setLastPartialChunkChecksum(checksum)
.build();
}
@@ -1183,12 +1197,11 @@ public class FsVolumeImpl implements FsVolumeSpi {
.setBytesToReserve(bytesReserved)
.buildLocalReplicaInPipeline();
+ // Only a finalized replica can be appended.
+ FinalizedReplica finalized = (FinalizedReplica)replicaInfo;
// load last checksum and datalen
- LocalReplica localReplica = (LocalReplica)replicaInfo;
- byte[] lastChunkChecksum = loadLastPartialChunkChecksum(
- localReplica.getBlockFile(), localReplica.getMetaFile());
newReplicaInfo.setLastChecksumAndDataLen(
- replicaInfo.getNumBytes(), lastChunkChecksum);
+ finalized.getVisibleLength(), finalized.getLastPartialChunkChecksum());
// rename meta file to rbw directory
// rename block file to rbw directory
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2021f4bd/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index c694854..107decf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -2987,6 +2987,29 @@ public class MiniDFSCluster implements AutoCloseable {
}
/**
+ * Return all block files in given directory (recursive search).
+ */
+ public static List<File> getAllBlockFiles(File storageDir) {
+ List<File> results = new ArrayList<File>();
+ File[] files = storageDir.listFiles();
+ if (files == null) {
+ return null;
+ }
+ for (File f : files) {
+ if (f.getName().startsWith(Block.BLOCK_FILE_PREFIX) &&
+ !f.getName().endsWith(Block.METADATA_EXTENSION)) {
+ results.add(f);
+ } else if (f.isDirectory()) {
+ List<File> subdirResults = getAllBlockFiles(f);
+ if (subdirResults != null) {
+ results.addAll(subdirResults);
+ }
+ }
+ }
+ return results;
+ }
+
+ /**
* Get the latest metadata file correpsonding to a block
* @param storageDir storage directory
* @param blk the block
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2021f4bd/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java
index 0b273df..1f31bdc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java
@@ -92,7 +92,7 @@ public class TestListCorruptFileBlocks {
File storageDir = cluster.getInstanceStorageDir(0, 1);
File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
assertTrue("data directory does not exist", data_dir.exists());
- List<File> metaFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir);
+ List<File> metaFiles = MiniDFSCluster.getAllBlockFiles(data_dir);
assertTrue("Data directory does not contain any blocks or there was an "
+ "IO error", metaFiles != null && !metaFiles.isEmpty());
File metaFile = metaFiles.get(0);
@@ -172,7 +172,7 @@ public class TestListCorruptFileBlocks {
File data_dir = MiniDFSCluster.getFinalizedDir(storageDir,
cluster.getNamesystem().getBlockPoolId());
assertTrue("data directory does not exist", data_dir.exists());
- List<File> metaFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir);
+ List<File> metaFiles = MiniDFSCluster.getAllBlockFiles(data_dir);
assertTrue("Data directory does not contain any blocks or there was an "
+ "IO error", metaFiles != null && !metaFiles.isEmpty());
File metaFile = metaFiles.get(0);
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org