You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by aw...@apache.org on 2015/07/16 19:36:54 UTC
[09/21] hadoop git commit: HDFS-8722. Optimize datanode writes for
small writes and flushes. Contributed by Kihwal Lee
HDFS-8722. Optimize datanode writes for small writes and flushes. Contributed by Kihwal Lee
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/59388a80
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/59388a80
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/59388a80
Branch: refs/heads/HADOOP-12111
Commit: 59388a801514d6af64ef27fbf246d8054f1dcc74
Parents: b7fb6ec
Author: Kihwal Lee <ki...@apache.org>
Authored: Tue Jul 14 14:04:06 2015 -0500
Committer: Kihwal Lee <ki...@apache.org>
Committed: Tue Jul 14 14:04:06 2015 -0500
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++
.../hdfs/server/datanode/BlockReceiver.java | 34 +++++++++++++-------
2 files changed, 24 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/59388a80/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 86b1ea1..14f3403 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -1053,6 +1053,8 @@ Release 2.7.2 - UNRELEASED
OPTIMIZATIONS
+ HDFS-8722. Optimize datanode writes for small writes and flushes (kihwal)
+
BUG FIXES
Release 2.7.1 - 2015-07-06
http://git-wip-us.apache.org/repos/asf/hadoop/blob/59388a80/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
index 2468f43..55c9d57 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
@@ -598,14 +598,19 @@ class BlockReceiver implements Closeable {
// bytes should be skipped when writing the data and checksum
// buffers out to disk.
long partialChunkSizeOnDisk = onDiskLen % bytesPerChecksum;
+ long lastChunkBoundary = onDiskLen - partialChunkSizeOnDisk;
boolean alignedOnDisk = partialChunkSizeOnDisk == 0;
boolean alignedInPacket = firstByteInBlock % bytesPerChecksum == 0;
- // Since data is always appended, not overwritten, partial CRC
- // recalculation is necessary if the on-disk data is not chunk-
- // aligned, regardless of whether the beginning of the data in
- // the packet is chunk-aligned.
- boolean doPartialCrc = !alignedOnDisk && !shouldNotWriteChecksum;
+ // If the end of the on-disk data is not chunk-aligned, the last
+ // checksum needs to be overwritten.
+ boolean overwriteLastCrc = !alignedOnDisk && !shouldNotWriteChecksum;
+ // If the starting offset of the packat data is at the last chunk
+ // boundary of the data on disk, the partial checksum recalculation
+ // can be skipped and the checksum supplied by the client can be used
+ // instead. This reduces disk reads and cpu load.
+ boolean doCrcRecalc = overwriteLastCrc &&
+ (lastChunkBoundary != firstByteInBlock);
// If this is a partial chunk, then verify that this is the only
// chunk in the packet. If the starting offset is not chunk
@@ -621,9 +626,10 @@ class BlockReceiver implements Closeable {
// If the last portion of the block file is not a full chunk,
// then read in pre-existing partial data chunk and recalculate
// the checksum so that the checksum calculation can continue
- // from the right state.
+ // from the right state. If the client provided the checksum for
+ // the whole chunk, this is not necessary.
Checksum partialCrc = null;
- if (doPartialCrc) {
+ if (doCrcRecalc) {
if (LOG.isDebugEnabled()) {
LOG.debug("receivePacket for " + block
+ ": previous write did not end at the chunk boundary."
@@ -659,8 +665,15 @@ class BlockReceiver implements Closeable {
int skip = 0;
byte[] crcBytes = null;
- // First, overwrite the partial crc at the end, if necessary.
- if (doPartialCrc) { // not chunk-aligned on disk
+ // First, prepare to overwrite the partial crc at the end.
+ if (overwriteLastCrc) { // not chunk-aligned on disk
+ // prepare to overwrite last checksum
+ adjustCrcFilePosition();
+ }
+
+ // The CRC was recalculated for the last partial chunk. Update the
+ // CRC by reading the rest of the chunk, then write it out.
+ if (doCrcRecalc) {
// Calculate new crc for this chunk.
int bytesToReadForRecalc =
(int)(bytesPerChecksum - partialChunkSizeOnDisk);
@@ -673,8 +686,6 @@ class BlockReceiver implements Closeable {
byte[] buf = FSOutputSummer.convertToByteStream(partialCrc,
checksumSize);
crcBytes = copyLastChunkChecksum(buf, checksumSize, buf.length);
- // prepare to overwrite last checksum
- adjustCrcFilePosition();
checksumOut.write(buf);
if(LOG.isDebugEnabled()) {
LOG.debug("Writing out partial crc for data len " + len +
@@ -687,7 +698,6 @@ class BlockReceiver implements Closeable {
// boundary. The checksum after the boundary was already counted
// above. Only count the number of checksums skipped up to the
// boundary here.
- long lastChunkBoundary = onDiskLen - (onDiskLen%bytesPerChecksum);
long skippedDataBytes = lastChunkBoundary - firstByteInBlock;
if (skippedDataBytes > 0) {