You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2015/07/22 01:14:07 UTC

[23/44] hadoop git commit: HDFS-8344. NameNode doesn't recover lease for files with missing blocks (raviprak)

HDFS-8344. NameNode doesn't recover lease for files with missing blocks (raviprak)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e4f75626
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e4f75626
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e4f75626

Branch: refs/heads/YARN-1197
Commit: e4f756260f16156179ba4adad974ec92279c2fac
Parents: 98c2bc8
Author: Ravi Prakash <ra...@altiscale.com>
Authored: Mon Jul 20 14:03:34 2015 -0700
Committer: Ravi Prakash <ra...@altiscale.com>
Committed: Mon Jul 20 14:03:34 2015 -0700

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 +
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java   |  3 +
 .../BlockInfoUnderConstruction.java             | 19 ++++-
 .../server/blockmanagement/BlockManager.java    | 14 +++-
 .../hdfs/server/namenode/FSNamesystem.java      | 10 +++
 .../src/main/resources/hdfs-default.xml         |  9 +++
 .../apache/hadoop/hdfs/TestLeaseRecovery.java   | 78 ++++++++++++++++++++
 7 files changed, 132 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/e4f75626/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 58491a6..13d9969 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -1050,6 +1050,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-8778. TestBlockReportRateLimiting#testLeaseExpiration can deadlock.
     (Arpit Agarwal)
 
+    HDFS-8344. NameNode doesn't recover lease for files with missing blocks
+    (raviprak)
+
 Release 2.7.2 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e4f75626/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 0e569f0..210d1e5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -440,6 +440,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final long    DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT = 10 * 1000;
   public static final String  DFS_BLOCK_INVALIDATE_LIMIT_KEY = "dfs.block.invalidate.limit";
   public static final int     DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT = 1000;
+  public static final String  DFS_BLOCK_UC_MAX_RECOVERY_ATTEMPTS = "dfs.block.uc.max.recovery.attempts";
+  public static final int     DFS_BLOCK_UC_MAX_RECOVERY_ATTEMPTS_DEFAULT = 5;
+
   public static final String  DFS_DEFAULT_MAX_CORRUPT_FILES_RETURNED_KEY = "dfs.corruptfilesreturned.max";
   public static final int     DFS_DEFAULT_MAX_CORRUPT_FILES_RETURNED = 500;
   /* Maximum number of blocks to process for initializing replication queues */

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e4f75626/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java
index 9cd3987..28f1633 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java
@@ -18,7 +18,6 @@
 package org.apache.hadoop.hdfs.server.blockmanagement;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 
@@ -61,6 +60,11 @@ public abstract class BlockInfoUnderConstruction extends BlockInfo {
    */
   protected Block truncateBlock;
 
+  /** The number of times all replicas will be used to attempt recovery before
+   * giving up and marking the block under construction missing.
+   */
+  private int recoveryAttemptsBeforeMarkingBlockMissing;
+
   /**
    * ReplicaUnderConstruction contains information about replicas while
    * they are under construction.
@@ -174,6 +178,8 @@ public abstract class BlockInfoUnderConstruction extends BlockInfo {
         "BlockInfoUnderConstruction cannot be in COMPLETE state");
     this.blockUCState = state;
     setExpectedLocations(targets);
+    this.recoveryAttemptsBeforeMarkingBlockMissing =
+      BlockManager.getMaxBlockUCRecoveries();
   }
 
   /** Set expected locations. */
@@ -271,7 +277,7 @@ public abstract class BlockInfoUnderConstruction extends BlockInfo {
     if (replicas.size() == 0) {
       NameNode.blockStateChangeLog.warn("BLOCK* " +
           "BlockInfoUnderConstruction.initLeaseRecovery: " +
-          "No blocks found, lease removed.");
+          "No replicas found.");
     }
     boolean allLiveReplicasTriedAsPrimary = true;
     for (int i = 0; i < replicas.size(); i++) {
@@ -283,6 +289,11 @@ public abstract class BlockInfoUnderConstruction extends BlockInfo {
       }
     }
     if (allLiveReplicasTriedAsPrimary) {
+      recoveryAttemptsBeforeMarkingBlockMissing--;
+      NameNode.blockStateChangeLog.info("Tried to recover " + this +" using all"
+          + " replicas. Will try " + recoveryAttemptsBeforeMarkingBlockMissing
+          + " more times");
+
       // Just set all the replicas to be chosen whether they are alive or not.
       for (int i = 0; i < replicas.size(); i++) {
         replicas.get(i).setChosenAsPrimary(false);
@@ -341,6 +352,10 @@ public abstract class BlockInfoUnderConstruction extends BlockInfo {
     replicas.add(new ReplicaUnderConstruction(block, storage, rState));
   }
 
+  public int getNumRecoveryAttemptsLeft() {
+    return recoveryAttemptsBeforeMarkingBlockMissing;
+  }
+
   /**
    * Convert an under construction block to a complete block.
    *

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e4f75626/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 7dce2a8..16dfe45 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hdfs.server.blockmanagement;
 
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX;
 import static org.apache.hadoop.util.ExitUtil.terminate;
 
 import java.io.IOException;
@@ -274,6 +273,13 @@ public class BlockManager implements BlockStatsMXBean {
   private BlockPlacementPolicy blockplacement;
   private final BlockStoragePolicySuite storagePolicySuite;
 
+  /** The number of times a block under construction's recovery will be
+    * attempted using all known replicas. e.g. if there are 3 replicas, each
+    * node will be tried 5 times (for a total of 15 retries across all nodes)*/
+  private static int maxBlockUCRecoveries =
+    DFSConfigKeys.DFS_BLOCK_UC_MAX_RECOVERY_ATTEMPTS_DEFAULT;
+  public static int getMaxBlockUCRecoveries() { return maxBlockUCRecoveries; }
+
   /** Check whether name system is running before terminating */
   private boolean checkNSRunning = true;
 
@@ -282,6 +288,9 @@ public class BlockManager implements BlockStatsMXBean {
     this.namesystem = namesystem;
     datanodeManager = new DatanodeManager(this, namesystem, conf);
     heartbeatManager = datanodeManager.getHeartbeatManager();
+    maxBlockUCRecoveries = conf.getInt(
+      DFSConfigKeys.DFS_BLOCK_UC_MAX_RECOVERY_ATTEMPTS,
+      DFSConfigKeys.DFS_BLOCK_UC_MAX_RECOVERY_ATTEMPTS_DEFAULT);
 
     startupDelayBlockDeletionInMs = conf.getLong(
         DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_SEC_KEY,
@@ -731,7 +740,8 @@ public class BlockManager implements BlockStatsMXBean {
   /**
    * Force the given block in the given file to be marked as complete,
    * regardless of whether enough replicas are present. This is necessary
-   * when tailing edit logs as a Standby.
+   * when tailing edit logs as a Standby or when recovering a lease on a file
+   * with missing blocks.
    */
   public BlockInfo forceCompleteBlock(final BlockCollection bc,
       final BlockInfoUnderConstruction block) throws IOException {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e4f75626/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 7c6d6a1..becd05a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -3287,6 +3287,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
             + "Removed empty last block and closed file.");
         return true;
       }
+
+      //If the block's recovery has been attempted enough times, mark the block
+      //complete anyway and recover the lease
+      if(uc.getNumRecoveryAttemptsLeft() == 0) {
+        blockManager.forceCompleteBlock(pendingFile, uc);
+        finalizeINodeFileUnderConstruction(src, pendingFile,
+            iip.getLatestSnapshotId());
+        return true;
+      }
+
       // start recovery of the last block for this file
       long blockRecoveryId = nextGenerationStamp(blockIdManager.isLegacyBlock(uc));
       lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e4f75626/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 8cb7d5f..dee9111 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -425,6 +425,15 @@
 </property>
 
 <property>
+  <name>dfs.block.uc.max.recovery.attempts</name>
+  <value>5</value>
+  <description>The number of times a block under construction's recovery will be
+  attempted using all known replicas. e.g. if there are 3 replicas, each node
+  will be tried 5 times (for a total of 15 retries across all nodes).
+  </description>
+</property>
+
+<property>
   <name>dfs.datanode.data.dir</name>
   <value>file://${hadoop.tmp.dir}/dfs/data</value>
   <description>Determines where on the local filesystem an DFS data node

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e4f75626/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
index c9f3842..c9448ac 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
@@ -27,6 +27,7 @@ import java.util.EnumSet;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CreateFlag;
+import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -258,4 +259,81 @@ public class TestLeaseRecovery {
       }
     }
   }
+
+  /**
+   * Test that when a client was writing to a file and died, and before the
+   * lease can be recovered, all the datanodes to which the file was written
+   * also die, after some time (5 * lease recovery times) the file is indeed
+   * closed and lease recovered.
+   * We also check that if the datanode came back after some time, the data
+   * originally written is not truncated
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  @Test
+  public void testLeaseRecoveryWithMissingBlocks()
+    throws IOException, InterruptedException {
+    Configuration conf = new HdfsConfiguration();
+
+    //Start a cluster with 3 datanodes
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    cluster.setLeasePeriod(LEASE_PERIOD, LEASE_PERIOD);
+    cluster.waitActive();
+
+    //create a file (with replication 1)
+    Path file = new Path("/testRecoveryFile");
+    DistributedFileSystem dfs = cluster.getFileSystem();
+    FSDataOutputStream out = dfs.create(file, (short) 1);
+
+    //This keeps count of the number of bytes written (AND is also the data we
+    //are writing)
+    long writtenBytes = 0;
+    while (writtenBytes < 2 * 1024 * 1024) {
+      out.writeLong(writtenBytes);
+      writtenBytes += 8;
+    }
+    System.out.println("Written " + writtenBytes + " bytes");
+    out.hsync();
+    System.out.println("hsynced the data");
+
+    //Kill the datanode to which the file was written.
+    DatanodeInfo dn =
+      ((DFSOutputStream) out.getWrappedStream()).getPipeline()[0];
+    DataNodeProperties dnStopped = cluster.stopDataNode(dn.getName());
+
+    //Wait at most 20 seconds for the lease to be recovered
+    LeaseManager lm = NameNodeAdapter.getLeaseManager(cluster.getNamesystem());
+    int i = 40;
+    while(i-- > 0 && lm.countLease() != 0) {
+      System.out.println("Still got " + lm.countLease() + " lease(s)");
+      Thread.sleep(500);
+    }
+    assertTrue("The lease was not recovered", lm.countLease() == 0);
+    System.out.println("Got " + lm.countLease() + " leases");
+
+    //Make sure we can't read any data because the datanode is dead
+    FSDataInputStream in = dfs.open(file);
+    try {
+      in.readLong();
+      assertTrue("Shouldn't have reached here", false);
+    } catch(BlockMissingException bme) {
+      System.out.println("Correctly got BlockMissingException because datanode"
+        + " is still dead");
+    }
+
+    //Bring the dead datanode back.
+    cluster.restartDataNode(dnStopped);
+    System.out.println("Restart datanode");
+
+    //Make sure we can read all the data back (since we hsync'ed).
+    in = dfs.open(file);
+    int readBytes = 0;
+    while(in.available() != 0) {
+      assertEquals("Didn't read the data we wrote", in.readLong(), readBytes);
+      readBytes += 8;
+    }
+    assertEquals("Didn't get all the data", readBytes, writtenBytes);
+    System.out.println("Read back all the " + readBytes + " bytes");
+  }
+
 }