You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by so...@apache.org on 2021/02/02 10:57:01 UTC
[hadoop] branch branch-3.1 updated: HDFS-15795. EC: Wrong checksum
when reconstruction was failed by exception. Contributed by Yushi Hayasaka
(#2657)
This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 69635e0 HDFS-15795. EC: Wrong checksum when reconstruction was failed by exception. Contributed by Yushi Hayasaka (#2657)
69635e0 is described below
commit 69635e09d2adc75c627c9e9e2e379cd42666bb2f
Author: crossfire <bs...@outlook.com>
AuthorDate: Tue Feb 2 18:02:09 2021 +0900
HDFS-15795. EC: Wrong checksum when reconstruction was failed by exception. Contributed by Yushi Hayasaka (#2657)
(cherry picked from commit 18978f2e204d105fb05807d33387a048e9ddb762)
---
.../hdfs/server/datanode/BlockChecksumHelper.java | 7 +++--
.../server/datanode/DataNodeFaultInjector.java | 6 ++++
.../StripedBlockChecksumReconstructor.java | 4 ++-
.../org/apache/hadoop/hdfs/TestFileChecksum.java | 34 ++++++++++++++++++++++
4 files changed, 48 insertions(+), 3 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java
index 988c463..014e009 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java
@@ -480,8 +480,9 @@ final class BlockChecksumHelper {
// Before populating the blockChecksum at this index, record the byte
// offset where it will begin.
blockChecksumPositions[idx] = blockChecksumBuf.getLength();
+ ExtendedBlock block = null;
try {
- ExtendedBlock block = getInternalBlock(numDataUnits, idx);
+ block = getInternalBlock(numDataUnits, idx);
LiveBlockInfo liveBlkInfo = liveDns.get((byte) idx);
if (liveBlkInfo == null) {
@@ -502,7 +503,9 @@ final class BlockChecksumHelper {
break; // done with the computation, simply return.
}
} catch (IOException e) {
- LOG.warn("Failed to get the checksum", e);
+ LOG.warn("Failed to get the checksum for block {} at index {} "
+ + "in blockGroup {}", block, idx, blockGroup, e);
+ throw e;
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java
index 08123c1..10b114b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java
@@ -97,6 +97,12 @@ public class DataNodeFaultInjector {
public void stripedBlockReconstruction() throws IOException {}
/**
+ * Used as a hook to inject failure in erasure coding checksum reconstruction
+ * process.
+ */
+ public void stripedBlockChecksumReconstruction() throws IOException {}
+
+ /**
* Used as a hook to inject latency when read block
* in erasure coding reconstruction process.
*/
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java
index a600626..e28d6c5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockChecksumReconstructor.java
@@ -23,6 +23,7 @@ import java.nio.ByteBuffer;
import java.util.Arrays;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
import org.apache.hadoop.io.DataOutputBuffer;
/**
@@ -75,6 +76,7 @@ public abstract class StripedBlockChecksumReconstructor
prepareDigester();
long maxTargetLength = getMaxTargetLength();
while (requestedLen > 0 && getPositionInBlock() < maxTargetLength) {
+ DataNodeFaultInjector.get().stripedBlockChecksumReconstruction();
long remaining = maxTargetLength - getPositionInBlock();
final int toReconstructLen = (int) Math
.min(getStripedReader().getBufferSize(), remaining);
@@ -225,4 +227,4 @@ public abstract class StripedBlockChecksumReconstructor
getStripedReader().close();
cleanup();
}
-}
\ No newline at end of file
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java
index 0ff2d4b..83ac946 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.After;
import org.junit.Assert;
@@ -43,6 +44,8 @@ import java.io.IOException;
import java.util.Random;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
/**
* This test serves a prototype to demo the idea proposed so far. It creates two
@@ -535,6 +538,37 @@ public class TestFileChecksum {
}
@Test(timeout = 90000)
+ public void testStripedFileChecksumWithReconstructFail()
+ throws Exception {
+ String stripedFile4 = ecDir + "/stripedFileChecksum4";
+ prepareTestFiles(fileSize, new String[] {stripedFile4});
+
+ // get checksum
+ FileChecksum fileChecksum = getFileChecksum(stripedFile4, -1, false);
+
+ DataNodeFaultInjector oldInjector = DataNodeFaultInjector.get();
+ DataNodeFaultInjector newInjector = mock(DataNodeFaultInjector.class);
+ doThrow(new IOException())
+ .doNothing()
+ .when(newInjector)
+ .stripedBlockChecksumReconstruction();
+ DataNodeFaultInjector.set(newInjector);
+
+ try {
+ // Get checksum again with reconstruction.
+ // If the reconstruction task fails, a client try to get checksum from
+ // another DN which has a block of the block group because of a failure of
+ // getting result.
+ FileChecksum fileChecksum1 = getFileChecksum(stripedFile4, -1, true);
+
+ Assert.assertEquals("checksum should be same", fileChecksum,
+ fileChecksum1);
+ } finally {
+ DataNodeFaultInjector.set(oldInjector);
+ }
+ }
+
+ @Test(timeout = 90000)
public void testMixedBytesPerChecksum() throws Exception {
int fileLength = bytesPerCRC * 3;
byte[] fileData = StripedFileTestUtil.generateBytes(fileLength);
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org