You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xy...@apache.org on 2018/02/26 22:32:07 UTC
[58/59] [abbrv] hadoop git commit: HDFS-12070. Failed block recovery
leaves files open indefinitely and at risk for data loss. Contributed by
Kihwal Lee.
HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/451265a8
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/451265a8
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/451265a8
Branch: refs/heads/HDFS-7240
Commit: 451265a83d8798624ae2a144bc58fa41db826704
Parents: 2fa7963
Author: Kihwal Lee <ki...@apache.org>
Authored: Mon Feb 26 10:28:04 2018 -0600
Committer: Kihwal Lee <ki...@apache.org>
Committed: Mon Feb 26 10:28:04 2018 -0600
----------------------------------------------------------------------
.../server/datanode/BlockRecoveryWorker.java | 6 +--
.../apache/hadoop/hdfs/TestLeaseRecovery.java | 44 ++++++++++++++++++++
2 files changed, 46 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/451265a8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java
index 2ecd986..94835e2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java
@@ -307,10 +307,8 @@ public class BlockRecoveryWorker {
}
}
- // If any of the data-nodes failed, the recovery fails, because
- // we never know the actual state of the replica on failed data-nodes.
- // The recovery should be started over.
- if (!failedList.isEmpty()) {
+ // Abort if all failed.
+ if (successList.isEmpty()) {
throw new IOException("Cannot recover " + block
+ ", the following datanodes failed: " + failedList);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/451265a8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
index d62194c..c82b47c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
@@ -228,6 +228,50 @@ public class TestLeaseRecovery {
}
/**
+ * Block/lease recovery should be retried with failed nodes from the second
+ * stage removed to avoid perpetual recovery failures.
+ */
+ @Test
+ public void testBlockRecoveryRetryAfterFailedRecovery() throws Exception {
+ Configuration conf = new Configuration();
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+ Path file = new Path("/testBlockRecoveryRetryAfterFailedRecovery");
+ DistributedFileSystem dfs = cluster.getFileSystem();
+
+ // Create a file.
+ FSDataOutputStream out = dfs.create(file);
+ final int FILE_SIZE = 128 * 1024;
+ int count = 0;
+ while (count < FILE_SIZE) {
+ out.writeBytes("DE K9SUL");
+ count += 8;
+ }
+ out.hsync();
+
+ // Abort the original stream.
+ ((DFSOutputStream) out.getWrappedStream()).abort();
+
+ LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations(
+ file.toString(), 0, count);
+ ExtendedBlock block = locations.get(0).getBlock();
+
+ // Finalize one replica to simulate a partial close failure.
+ cluster.getDataNodes().get(0).getFSDataset().finalizeBlock(block, false);
+ // Delete the meta file to simulate a rename/move failure.
+ cluster.deleteMeta(0, block);
+
+ // Try to recover the lease.
+ DistributedFileSystem newDfs = (DistributedFileSystem) FileSystem
+ .newInstance(cluster.getConfiguration(0));
+ count = 0;
+ while (count++ < 15 && !newDfs.recoverLease(file)) {
+ Thread.sleep(1000);
+ }
+ // The lease should have been recovered.
+ assertTrue("File should be closed", newDfs.recoverLease(file));
+ }
+
+ /**
* Recover the lease on a file and append file from another client.
*/
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org