You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ju...@apache.org on 2016/02/02 19:27:24 UTC
hadoop git commit: HDFS-8722. Optimize datanode writes for small
writes and flushes. Contributed by Kihwal Lee.
Repository: hadoop
Updated Branches:
refs/heads/branch-2.6 30626ad35 -> cd93382a5
HDFS-8722. Optimize datanode writes for small writes and flushes. Contributed by Kihwal Lee.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/cd93382a
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/cd93382a
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/cd93382a
Branch: refs/heads/branch-2.6
Commit: cd93382a59c36f150be0a0c88ab98f2008678013
Parents: 30626ad
Author: Junping Du <ju...@apache.org>
Authored: Tue Feb 2 10:37:29 2016 -0800
Committer: Junping Du <ju...@apache.org>
Committed: Tue Feb 2 10:37:29 2016 -0800
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++
.../hdfs/server/datanode/BlockReceiver.java | 40 ++++++++++++--------
2 files changed, 28 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cd93382a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index ae6bb12..66f76ad 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -46,6 +46,9 @@ Release 2.6.4 - UNRELEASED
HDFS-9178. Slow datanode I/O can cause a wrong node to be marked bad.
(Kihwal Lee)
+ HDFS-8722. Optimize datanode writes for small writes and flushes.
+ (Kihwal Lee)
+
Release 2.6.3 - 2015-12-17
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cd93382a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
index 32e40d5..43f766e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
@@ -630,14 +630,19 @@ class BlockReceiver implements Closeable {
// bytes should be skipped when writing the data and checksum
// buffers out to disk.
long partialChunkSizeOnDisk = onDiskLen % bytesPerChecksum;
+ long lastChunkBoundary = onDiskLen - partialChunkSizeOnDisk;
boolean alignedOnDisk = partialChunkSizeOnDisk == 0;
boolean alignedInPacket = firstByteInBlock % bytesPerChecksum == 0;
- // Since data is always appended, not overwritten, partial CRC
- // recalculation is necessary if the on-disk data is not chunk-
- // aligned, regardless of whether the beginning of the data in
- // the packet is chunk-aligned.
- boolean doPartialCrc = !alignedOnDisk && !shouldNotWriteChecksum;
+ // If the end of the on-disk data is not chunk-aligned, the last
+ // checksum needs to be overwritten.
+ boolean overwriteLastCrc = !alignedOnDisk && !shouldNotWriteChecksum;
+ // If the starting offset of the packat data is at the last chunk
+ // boundary of the data on disk, the partial checksum recalculation
+ // can be skipped and the checksum supplied by the client can be used
+ // instead. This reduces disk reads and cpu load.
+ boolean doCrcRecalc = overwriteLastCrc &&
+ (lastChunkBoundary != firstByteInBlock);
// If this is a partial chunk, then verify that this is the only
// chunk in the packet. If the starting offset is not chunk
@@ -653,9 +658,10 @@ class BlockReceiver implements Closeable {
// If the last portion of the block file is not a full chunk,
// then read in pre-existing partial data chunk and recalculate
// the checksum so that the checksum calculation can continue
- // from the right state.
+ // from the right state. If the client provided the checksum for
+ // the whole chunk, this is not necessary.
Checksum partialCrc = null;
- if (doPartialCrc) {
+ if (doCrcRecalc) {
if (LOG.isDebugEnabled()) {
LOG.debug("receivePacket for " + block
+ ": previous write did not end at the chunk boundary."
@@ -691,11 +697,18 @@ class BlockReceiver implements Closeable {
int skip = 0;
byte[] crcBytes = null;
- // First, overwrite the partial crc at the end, if necessary.
- if (doPartialCrc) { // not chunk-aligned on disk
- // Calculate new crc for this chunk.
- int bytesToReadForRecalc =
- (int)(bytesPerChecksum - partialChunkSizeOnDisk);
+ // First, prepare to overwrite the partial crc at the end.
+ if (overwriteLastCrc) { // not chunk-aligned on disk
+ // prepare to overwrite last checksum
+ adjustCrcFilePosition();
+ }
+
+ // The CRC was recalculated for the last partial chunk. Update the
+ // CRC by reading the rest of the chunk, then write it out.
+ if (doCrcRecalc) {
+ // Calculate new crc for this chunk.
+ int bytesToReadForRecalc =
+ (int)(bytesPerChecksum - partialChunkSizeOnDisk);
if (numBytesToDisk < bytesToReadForRecalc) {
bytesToReadForRecalc = numBytesToDisk;
}
@@ -705,8 +718,6 @@ class BlockReceiver implements Closeable {
byte[] buf = FSOutputSummer.convertToByteStream(partialCrc,
checksumSize);
crcBytes = copyLastChunkChecksum(buf, checksumSize, buf.length);
- // prepare to overwrite last checksum
- adjustCrcFilePosition();
checksumOut.write(buf);
if(LOG.isDebugEnabled()) {
LOG.debug("Writing out partial crc for data len " + len +
@@ -719,7 +730,6 @@ class BlockReceiver implements Closeable {
// boundary. The checksum after the boundary was already counted
// above. Only count the number of checksums skipped up to the
// boundary here.
- long lastChunkBoundary = onDiskLen - (onDiskLen%bytesPerChecksum);
long skippedDataBytes = lastChunkBoundary - firstByteInBlock;
if (skippedDataBytes > 0) {