You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xy...@apache.org on 2018/02/26 22:32:07 UTC

[58/59] [abbrv] hadoop git commit: HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee.

HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/451265a8
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/451265a8
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/451265a8

Branch: refs/heads/HDFS-7240
Commit: 451265a83d8798624ae2a144bc58fa41db826704
Parents: 2fa7963
Author: Kihwal Lee <ki...@apache.org>
Authored: Mon Feb 26 10:28:04 2018 -0600
Committer: Kihwal Lee <ki...@apache.org>
Committed: Mon Feb 26 10:28:04 2018 -0600

----------------------------------------------------------------------
 .../server/datanode/BlockRecoveryWorker.java    |  6 +--
 .../apache/hadoop/hdfs/TestLeaseRecovery.java   | 44 ++++++++++++++++++++
 2 files changed, 46 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/451265a8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java
index 2ecd986..94835e2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java
@@ -307,10 +307,8 @@ public class BlockRecoveryWorker {
         }
       }
 
-      // If any of the data-nodes failed, the recovery fails, because
-      // we never know the actual state of the replica on failed data-nodes.
-      // The recovery should be started over.
-      if (!failedList.isEmpty()) {
+      // Abort if all failed.
+      if (successList.isEmpty()) {
         throw new IOException("Cannot recover " + block
             + ", the following datanodes failed: " + failedList);
       }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/451265a8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
index d62194c..c82b47c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
@@ -228,6 +228,50 @@ public class TestLeaseRecovery {
   }
 
   /**
+   * Block/lease recovery should be retried with failed nodes from the second
+   * stage removed to avoid perpetual recovery failures.
+   */
+  @Test
+  public void testBlockRecoveryRetryAfterFailedRecovery() throws Exception {
+    Configuration conf = new Configuration();
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    Path file = new Path("/testBlockRecoveryRetryAfterFailedRecovery");
+    DistributedFileSystem dfs = cluster.getFileSystem();
+
+    // Create a file.
+    FSDataOutputStream out = dfs.create(file);
+    final int FILE_SIZE = 128 * 1024;
+    int count = 0;
+    while (count < FILE_SIZE) {
+      out.writeBytes("DE K9SUL");
+      count += 8;
+    }
+    out.hsync();
+
+    // Abort the original stream.
+    ((DFSOutputStream) out.getWrappedStream()).abort();
+
+    LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations(
+        file.toString(), 0, count);
+    ExtendedBlock block = locations.get(0).getBlock();
+
+    // Finalize one replica to simulate a partial close failure.
+    cluster.getDataNodes().get(0).getFSDataset().finalizeBlock(block, false);
+    // Delete the meta file to simulate a rename/move failure.
+    cluster.deleteMeta(0, block);
+
+    // Try to recover the lease.
+    DistributedFileSystem newDfs = (DistributedFileSystem) FileSystem
+        .newInstance(cluster.getConfiguration(0));
+    count = 0;
+    while (count++ < 15 && !newDfs.recoverLease(file)) {
+      Thread.sleep(1000);
+    }
+    // The lease should have been recovered.
+    assertTrue("File should be closed", newDfs.recoverLease(file));
+  }
+
+  /**
    * Recover the lease on a file and append file from another client.
    */
   @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org