You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by yj...@apache.org on 2015/05/01 17:54:45 UTC
hadoop git commit: HDFS-7281. Missing block is marked as corrupted
block (Ming Ma via Yongjun Zhang)
Repository: hadoop
Updated Branches:
refs/heads/trunk 1b3b9e5c3 -> 279958b77
HDFS-7281. Missing block is marked as corrupted block (Ming Ma via Yongjun Zhang)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/279958b7
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/279958b7
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/279958b7
Branch: refs/heads/trunk
Commit: 279958b772c25e0633bd967828b7d27d5c0a6a56
Parents: 1b3b9e5
Author: Yongjun Zhang <yz...@cloudera.com>
Authored: Fri May 1 08:42:00 2015 -0700
Committer: Yongjun Zhang <yz...@cloudera.com>
Committed: Fri May 1 08:42:00 2015 -0700
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++
.../server/blockmanagement/BlockManager.java | 3 +-
.../hdfs/server/namenode/NamenodeFsck.java | 54 +++++++++++++++-----
.../hadoop/hdfs/server/namenode/TestFsck.java | 23 ++++++---
4 files changed, 63 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/279958b7/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 3bee852..9accdc0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -20,6 +20,9 @@ Trunk (Unreleased)
HDFS-7985. WebHDFS should be always enabled. (Li Lu via wheat9)
+ HDFS-7281. Missing block is marked as corrupted block (Ming Ma via
+ Yongjun Zhang)
+
NEW FEATURES
HDFS-3125. Add JournalService to enable Journal Daemon. (suresh)
http://git-wip-us.apache.org/repos/asf/hadoop/blob/279958b7/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 1db1356..53ffe0b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -849,7 +849,8 @@ public class BlockManager {
}
final int numNodes = blocksMap.numNodes(blk);
- final boolean isCorrupt = numCorruptNodes == numNodes;
+ final boolean isCorrupt = numCorruptNodes != 0 &&
+ numCorruptNodes == numNodes;
final int numMachines = isCorrupt ? numNodes: numNodes - numCorruptNodes;
final DatanodeStorageInfo[] machines = new DatanodeStorageInfo[numMachines];
int j = 0;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/279958b7/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
index 0cfe31a..ac77394 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
@@ -531,6 +531,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
int missing = 0;
int corrupt = 0;
long missize = 0;
+ long corruptSize = 0;
int underReplicatedPerFile = 0;
int misReplicatedPerFile = 0;
StringBuilder report = new StringBuilder();
@@ -570,10 +571,11 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
// count corrupt blocks
boolean isCorrupt = lBlk.isCorrupt();
if (isCorrupt) {
+ res.addCorrupt(block.getNumBytes());
corrupt++;
- res.corruptBlocks++;
- out.print("\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() +
- " block " + block.getBlockName()+"\n");
+ corruptSize += block.getNumBytes();
+ out.print("\n" + path + ": CORRUPT blockpool " +
+ block.getBlockPoolId() + " block " + block.getBlockName() + "\n");
}
// count minimally replicated blocks
@@ -619,7 +621,11 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
// report
String blkName = block.toString();
report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes());
- if (totalReplicasPerBlock == 0) {
+ if (totalReplicasPerBlock == 0 && !isCorrupt) {
+ // If the block is corrupted, it means all its available replicas are
+ // corrupted. We don't mark it as missing given these available replicas
+ // might still be accessible as the block might be incorrectly marked as
+ // corrupted by client machines.
report.append(" MISSING!");
res.addMissing(block.toString(), block.getNumBytes());
missing++;
@@ -674,9 +680,15 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
// count corrupt file & move or delete if necessary
if ((missing > 0) || (corrupt > 0)) {
- if (!showFiles && (missing > 0)) {
- out.print("\n" + path + ": MISSING " + missing
- + " blocks of total size " + missize + " B.");
+ if (!showFiles) {
+ if (missing > 0) {
+ out.print("\n" + path + ": MISSING " + missing
+ + " blocks of total size " + missize + " B.");
+ }
+ if (corrupt > 0) {
+ out.print("\n" + path + ": CORRUPT " + corrupt
+ + " blocks of total size " + corruptSize + " B.");
+ }
}
res.corruptFiles++;
if (isOpen) {
@@ -688,9 +700,16 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
}
if (showFiles) {
- if (missing > 0) {
- out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n");
- } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
+ if (missing > 0 || corrupt > 0) {
+ if (missing > 0) {
+ out.print(" MISSING " + missing + " blocks of total size " +
+ missize + " B\n");
+ }
+ if (corrupt > 0) {
+ out.print(" CORRUPT " + corrupt + " blocks of total size " +
+ corruptSize + " B\n");
+ }
+ } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
out.print(" OK\n");
}
if (showBlocks) {
@@ -956,6 +975,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
long missingSize = 0L;
long corruptFiles = 0L;
long corruptBlocks = 0L;
+ long corruptSize = 0L;
long excessiveReplicas = 0L;
long missingReplicas = 0L;
long decommissionedReplicas = 0L;
@@ -998,7 +1018,13 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
missingIds.add(id);
missingSize += size;
}
-
+
+ /** Add a corrupt block. */
+ void addCorrupt(long size) {
+ corruptBlocks++;
+ corruptSize += size;
+ }
+
/** Return the actual replication factor. */
float getReplicationFactor() {
if (totalBlocks == 0)
@@ -1051,7 +1077,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
"\n MISSING SIZE:\t\t").append(missingSize).append(" B");
}
if (corruptBlocks > 0) {
- res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks);
+ res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks).append(
+ "\n CORRUPT SIZE:\t\t").append(corruptSize).append(" B");
}
}
res.append("\n ********************************");
@@ -1086,7 +1113,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
}
res.append("\n Default replication factor:\t").append(replication)
.append("\n Average block replication:\t").append(
- getReplicationFactor()).append("\n Corrupt blocks:\t\t").append(
+ getReplicationFactor()).append("\n Missing blocks:\t\t").append(
+ missingIds.size()).append("\n Corrupt blocks:\t\t").append(
corruptBlocks).append("\n Missing replicas:\t\t").append(
missingReplicas);
if (totalReplicas > 0) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/279958b7/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
index 8fe273b..1ce09e1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
@@ -120,7 +120,10 @@ public class TestFsck {
"ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\s" +
"cmd=getfileinfo\\ssrc=\\/\\sdst=null\\s" +
"perm=null\\s" + "proto=.*");
-
+
+ static final Pattern numMissingBlocksPattern = Pattern.compile(
+ ".*Missing blocks:\t\t([0123456789]*).*");
+
static final Pattern numCorruptBlocksPattern = Pattern.compile(
".*Corrupt blocks:\t\t([0123456789]*).*");
@@ -360,19 +363,27 @@ public class TestFsck {
// Wait for fsck to discover all the missing blocks
while (true) {
outStr = runFsck(conf, 1, false, "/");
+ String numMissing = null;
String numCorrupt = null;
for (String line : outStr.split(LINE_SEPARATOR)) {
- Matcher m = numCorruptBlocksPattern.matcher(line);
+ Matcher m = numMissingBlocksPattern.matcher(line);
+ if (m.matches()) {
+ numMissing = m.group(1);
+ }
+ m = numCorruptBlocksPattern.matcher(line);
if (m.matches()) {
numCorrupt = m.group(1);
+ }
+ if (numMissing != null && numCorrupt != null) {
break;
}
}
- if (numCorrupt == null) {
- throw new IOException("failed to find number of corrupt " +
- "blocks in fsck output.");
+ if (numMissing == null || numCorrupt == null) {
+ throw new IOException("failed to find number of missing or corrupt" +
+ " blocks in fsck output.");
}
- if (numCorrupt.equals(Integer.toString(totalMissingBlocks))) {
+ if (numMissing.equals(Integer.toString(totalMissingBlocks))) {
+ assertTrue(numCorrupt.equals(Integer.toString(0)));
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
break;
}