You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by so...@apache.org on 2021/02/02 10:57:01 UTC

[hadoop] branch branch-3.1 updated: HDFS-15795. EC: Wrong checksum when reconstruction was failed by exception. Contributed by Yushi Hayasaka (#2657)

This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 69635e0  HDFS-15795. EC: Wrong checksum when reconstruction was failed by exception. Contributed by Yushi Hayasaka (#2657)
69635e0 is described below

commit 69635e09d2adc75c627c9e9e2e379cd42666bb2f
Author: crossfire <bs...@outlook.com>
AuthorDate: Tue Feb 2 18:02:09 2021 +0900

    HDFS-15795. EC: Wrong checksum when reconstruction was failed by exception. Contributed by Yushi Hayasaka (#2657)
    
    
    (cherry picked from commit 18978f2e204d105fb05807d33387a048e9ddb762)
---
 .../hdfs/server/datanode/BlockChecksumHelper.java  |  7 +++--
 .../server/datanode/DataNodeFaultInjector.java     |  6 ++++
 .../StripedBlockChecksumReconstructor.java         |  4 ++-
 .../org/apache/hadoop/hdfs/TestFileChecksum.java   | 34 ++++++++++++++++++++++
 4 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java
index 988c463..014e009 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java
@@ -480,8 +480,9 @@ final class BlockChecksumHelper {
         // Before populating the blockChecksum at this index, record the byte
         // offset where it will begin.
         blockChecksumPositions[idx] = blockChecksumBuf.getLength();
+        ExtendedBlock block = null;
         try {
-          ExtendedBlock block = getInternalBlock(numDataUnits, idx);
+          block = getInternalBlock(numDataUnits, idx);
 
           LiveBlockInfo liveBlkInfo = liveDns.get((byte) idx);
           if (liveBlkInfo == null) {
@@ -502,7 +503,9 @@ final class BlockChecksumHelper {
             break; // done with the computation, simply return.
           }
         } catch (IOException e) {
-          LOG.warn("Failed to get the checksum", e);
+          LOG.warn("Failed to get the checksum for block {} at index {} "
+              + "in blockGroup {}", block, idx, blockGroup, e);
+          throw e;
         }
       }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java
index 08123c1..10b114b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java
@@ -97,6 +97,12 @@ public class DataNodeFaultInjector {
   public void stripedBlockReconstruction() throws IOException {}
 
   /**
+   * Used as a hook to inject failure in erasure coding checksum reconstruction
+   * process.
+   */
+  public void stripedBlockChecksumReconstruction() throws IOException {}
+
+  /**
    * Used as a hook to inject latency when read block
    * in erasure coding reconstruction process.
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java
index a600626..e28d6c5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java
@@ -23,6 +23,7 @@ import java.nio.ByteBuffer;
 import java.util.Arrays;
 
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
 import org.apache.hadoop.io.DataOutputBuffer;
 
 /**
@@ -75,6 +76,7 @@ public abstract class StripedBlockChecksumReconstructor
     prepareDigester();
     long maxTargetLength = getMaxTargetLength();
     while (requestedLen > 0 && getPositionInBlock() < maxTargetLength) {
+      DataNodeFaultInjector.get().stripedBlockChecksumReconstruction();
       long remaining = maxTargetLength - getPositionInBlock();
       final int toReconstructLen = (int) Math
           .min(getStripedReader().getBufferSize(), remaining);
@@ -225,4 +227,4 @@ public abstract class StripedBlockChecksumReconstructor
     getStripedReader().close();
     cleanup();
   }
-}
\ No newline at end of file
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java
index 0ff2d4b..83ac946 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.After;
 import org.junit.Assert;
@@ -43,6 +44,8 @@ import java.io.IOException;
 import java.util.Random;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
 
 /**
  * This test serves a prototype to demo the idea proposed so far. It creates two
@@ -535,6 +538,37 @@ public class TestFileChecksum {
   }
 
   @Test(timeout = 90000)
+  public void testStripedFileChecksumWithReconstructFail()
+      throws Exception {
+    String stripedFile4 = ecDir + "/stripedFileChecksum4";
+    prepareTestFiles(fileSize, new String[] {stripedFile4});
+
+    // get checksum
+    FileChecksum fileChecksum = getFileChecksum(stripedFile4, -1, false);
+
+    DataNodeFaultInjector oldInjector = DataNodeFaultInjector.get();
+    DataNodeFaultInjector newInjector = mock(DataNodeFaultInjector.class);
+    doThrow(new IOException())
+        .doNothing()
+        .when(newInjector)
+        .stripedBlockChecksumReconstruction();
+    DataNodeFaultInjector.set(newInjector);
+
+    try {
+      // Get checksum again with reconstruction.
+      // If the reconstruction task fails, a client try to get checksum from
+      // another DN which has a block of the block group because of a failure of
+      // getting result.
+      FileChecksum fileChecksum1 = getFileChecksum(stripedFile4, -1, true);
+
+      Assert.assertEquals("checksum should be same", fileChecksum,
+          fileChecksum1);
+    } finally {
+      DataNodeFaultInjector.set(oldInjector);
+    }
+  }
+
+  @Test(timeout = 90000)
   public void testMixedBytesPerChecksum() throws Exception {
     int fileLength = bytesPerCRC * 3;
     byte[] fileData = StripedFileTestUtil.generateBytes(fileLength);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org