You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ju...@apache.org on 2016/02/02 19:27:24 UTC

hadoop git commit: HDFS-8722. Optimize datanode writes for small writes and flushes. Contributed by Kihwal Lee.

Repository: hadoop
Updated Branches:
  refs/heads/branch-2.6 30626ad35 -> cd93382a5


HDFS-8722. Optimize datanode writes for small writes and flushes. Contributed by Kihwal Lee.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/cd93382a
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/cd93382a
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/cd93382a

Branch: refs/heads/branch-2.6
Commit: cd93382a59c36f150be0a0c88ab98f2008678013
Parents: 30626ad
Author: Junping Du <ju...@apache.org>
Authored: Tue Feb 2 10:37:29 2016 -0800
Committer: Junping Du <ju...@apache.org>
Committed: Tue Feb 2 10:37:29 2016 -0800

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 ++
 .../hdfs/server/datanode/BlockReceiver.java     | 40 ++++++++++++--------
 2 files changed, 28 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/cd93382a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index ae6bb12..66f76ad 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -46,6 +46,9 @@ Release 2.6.4 - UNRELEASED
     HDFS-9178. Slow datanode I/O can cause a wrong node to be marked bad.
     (Kihwal Lee)
 
+    HDFS-8722. Optimize datanode writes for small writes and flushes. 
+    (Kihwal Lee)
+
 Release 2.6.3 - 2015-12-17
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cd93382a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
index 32e40d5..43f766e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
@@ -630,14 +630,19 @@ class BlockReceiver implements Closeable {
           // bytes should be skipped when writing the data and checksum
           // buffers out to disk.
           long partialChunkSizeOnDisk = onDiskLen % bytesPerChecksum;
+          long lastChunkBoundary = onDiskLen - partialChunkSizeOnDisk;
           boolean alignedOnDisk = partialChunkSizeOnDisk == 0;
           boolean alignedInPacket = firstByteInBlock % bytesPerChecksum == 0;
 
-          // Since data is always appended, not overwritten, partial CRC
-          // recalculation is necessary if the on-disk data is not chunk-
-          // aligned, regardless of whether the beginning of the data in
-          // the packet is chunk-aligned.
-          boolean doPartialCrc = !alignedOnDisk && !shouldNotWriteChecksum;
+          // If the end of the on-disk data is not chunk-aligned, the last
+          // checksum needs to be overwritten.
+          boolean overwriteLastCrc = !alignedOnDisk && !shouldNotWriteChecksum;
+          // If the starting offset of the packat data is at the last chunk
+          // boundary of the data on disk, the partial checksum recalculation
+          // can be skipped and the checksum supplied by the client can be used
+          // instead. This reduces disk reads and cpu load.
+          boolean doCrcRecalc = overwriteLastCrc &&
+              (lastChunkBoundary != firstByteInBlock);
 
           // If this is a partial chunk, then verify that this is the only
           // chunk in the packet. If the starting offset is not chunk
@@ -653,9 +658,10 @@ class BlockReceiver implements Closeable {
           // If the last portion of the block file is not a full chunk,
           // then read in pre-existing partial data chunk and recalculate
           // the checksum so that the checksum calculation can continue
-          // from the right state.
+          // from the right state. If the client provided the checksum for
+          // the whole chunk, this is not necessary.
           Checksum partialCrc = null;
-          if (doPartialCrc) {
+          if (doCrcRecalc) {
             if (LOG.isDebugEnabled()) {
               LOG.debug("receivePacket for " + block 
                   + ": previous write did not end at the chunk boundary."
@@ -691,11 +697,18 @@ class BlockReceiver implements Closeable {
             int skip = 0;
             byte[] crcBytes = null;
 
-            // First, overwrite the partial crc at the end, if necessary.
-            if (doPartialCrc) { // not chunk-aligned on disk
-               // Calculate new crc for this chunk.
-               int bytesToReadForRecalc =
-                   (int)(bytesPerChecksum - partialChunkSizeOnDisk);
+            // First, prepare to overwrite the partial crc at the end.
+            if (overwriteLastCrc) { // not chunk-aligned on disk
+              // prepare to overwrite last checksum
+              adjustCrcFilePosition();
+            }
+
+            // The CRC was recalculated for the last partial chunk. Update the
+            // CRC by reading the rest of the chunk, then write it out.
+            if (doCrcRecalc) {
+              // Calculate new crc for this chunk.
+              int bytesToReadForRecalc =
+                  (int)(bytesPerChecksum - partialChunkSizeOnDisk);
               if (numBytesToDisk < bytesToReadForRecalc) {
                 bytesToReadForRecalc = numBytesToDisk;
               }
@@ -705,8 +718,6 @@ class BlockReceiver implements Closeable {
               byte[] buf = FSOutputSummer.convertToByteStream(partialCrc,
                   checksumSize);
               crcBytes = copyLastChunkChecksum(buf, checksumSize, buf.length);
-              // prepare to overwrite last checksum
-              adjustCrcFilePosition();
               checksumOut.write(buf);
               if(LOG.isDebugEnabled()) {
                 LOG.debug("Writing out partial crc for data len " + len +
@@ -719,7 +730,6 @@ class BlockReceiver implements Closeable {
             // boundary. The checksum after the boundary was already counted
             // above. Only count the number of checksums skipped up to the
             // boundary here.
-            long lastChunkBoundary = onDiskLen - (onDiskLen%bytesPerChecksum);
             long skippedDataBytes = lastChunkBoundary - firstByteInBlock;
 
             if (skippedDataBytes > 0) {