You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cm...@apache.org on 2014/11/24 19:54:31 UTC

hadoop git commit: HDFS-4882. Prevent the Namenode's LeaseManager from looping forever in checkLeases (Ravi Prakash via Colin P. McCabe)

Repository: hadoop
Updated Branches:
  refs/heads/trunk 555fa2d9d -> daacbc18d


HDFS-4882. Prevent the Namenode's LeaseManager from looping forever in checkLeases (Ravi Prakash via Colin P. McCabe)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/daacbc18
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/daacbc18
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/daacbc18

Branch: refs/heads/trunk
Commit: daacbc18d739d030822df0b75205eeb067f89850
Parents: 555fa2d
Author: Colin Patrick Mccabe <cm...@cloudera.com>
Authored: Mon Nov 24 10:46:33 2014 -0800
Committer: Colin Patrick Mccabe <cm...@cloudera.com>
Committed: Mon Nov 24 10:46:33 2014 -0800

----------------------------------------------------------------------
 .../hdfs/server/namenode/FSNamesystem.java      | 20 +----
 .../hdfs/server/namenode/LeaseManager.java      | 82 ++++++++++++++++----
 .../hdfs/server/namenode/TestLeaseManager.java  | 25 ++++++
 3 files changed, 91 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/daacbc18/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 582b620..dfdfa79 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -6226,26 +6226,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     // Calculate number of blocks under construction
     long numUCBlocks = 0;
     readLock();
+    numUCBlocks = leaseManager.getNumUnderConstructionBlocks();
     try {
-      for (Lease lease : leaseManager.getSortedLeases()) {
-        for (String path : lease.getPaths()) {
-          final INodeFile cons;
-          try {
-            cons = dir.getINode(path).asFile();
-            Preconditions.checkState(cons.isUnderConstruction());
-          } catch (UnresolvedLinkException e) {
-            throw new AssertionError("Lease files should reside on this FS");
-          }
-          BlockInfo[] blocks = cons.getBlocks();
-          if(blocks == null)
-            continue;
-          for(BlockInfo b : blocks) {
-            if(!b.isComplete())
-              numUCBlocks++;
-          }
-        }
-      }
-      LOG.info("Number of blocks under construction: " + numUCBlocks);
       return getBlocksTotal() - numUCBlocks;
     } finally {
       readUnlock();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/daacbc18/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
index fea5b14..e13a5c6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
@@ -25,8 +25,9 @@ import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.NavigableSet;
+import java.util.NoSuchElementException;
 import java.util.SortedMap;
-import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
@@ -36,6 +37,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.util.Daemon;
 
@@ -79,7 +81,7 @@ public class LeaseManager {
   //
   private final SortedMap<String, Lease> leases = new TreeMap<String, Lease>();
   // Set of: Lease
-  private final SortedSet<Lease> sortedLeases = new TreeSet<Lease>();
+  private final NavigableSet<Lease> sortedLeases = new TreeSet<Lease>();
 
   // 
   // Map path names to leases. It is protected by the sortedLeases lock.
@@ -95,8 +97,41 @@ public class LeaseManager {
   Lease getLease(String holder) {
     return leases.get(holder);
   }
-  
-  SortedSet<Lease> getSortedLeases() {return sortedLeases;}
+
+  @VisibleForTesting
+  int getNumSortedLeases() {return sortedLeases.size();}
+
+  /**
+   * This method iterates through all the leases and counts the number of blocks
+   * which are not COMPLETE. The FSNamesystem read lock MUST be held before
+   * calling this method.
+   * @return
+   */
+  synchronized long getNumUnderConstructionBlocks() {
+    assert this.fsnamesystem.hasReadLock() : "The FSNamesystem read lock wasn't"
+      + "acquired before counting under construction blocks";
+    long numUCBlocks = 0;
+    for (Lease lease : sortedLeases) {
+      for (String path : lease.getPaths()) {
+        final INodeFile cons;
+        try {
+          cons = this.fsnamesystem.getFSDirectory().getINode(path).asFile();
+            Preconditions.checkState(cons.isUnderConstruction());
+        } catch (UnresolvedLinkException e) {
+          throw new AssertionError("Lease files should reside on this FS");
+        }
+        BlockInfo[] blocks = cons.getBlocks();
+        if(blocks == null)
+          continue;
+        for(BlockInfo b : blocks) {
+          if(!b.isComplete())
+            numUCBlocks++;
+        }
+      }
+    }
+    LOG.info("Number of blocks under construction: " + numUCBlocks);
+    return numUCBlocks;
+  }
 
   /** @return the lease containing src */
   public Lease getLeaseByPath(String src) {return sortedLeasesByPath.get(src);}
@@ -421,33 +456,38 @@ public class LeaseManager {
   /** Check the leases beginning from the oldest.
    *  @return true is sync is needed.
    */
-  private synchronized boolean checkLeases() {
+  @VisibleForTesting
+  synchronized boolean checkLeases() {
     boolean needSync = false;
     assert fsnamesystem.hasWriteLock();
-    for(; sortedLeases.size() > 0; ) {
-      final Lease oldest = sortedLeases.first();
-      if (!oldest.expiredHardLimit()) {
-        return needSync;
+    Lease leaseToCheck = null;
+    try {
+      leaseToCheck = sortedLeases.first();
+    } catch(NoSuchElementException e) {}
+
+    while(leaseToCheck != null) {
+      if (!leaseToCheck.expiredHardLimit()) {
+        break;
       }
 
-      LOG.info(oldest + " has expired hard limit");
+      LOG.info(leaseToCheck + " has expired hard limit");
 
       final List<String> removing = new ArrayList<String>();
-      // need to create a copy of the oldest lease paths, becuase 
+      // need to create a copy of the oldest lease paths, because 
       // internalReleaseLease() removes paths corresponding to empty files,
       // i.e. it needs to modify the collection being iterated over
       // causing ConcurrentModificationException
-      String[] leasePaths = new String[oldest.getPaths().size()];
-      oldest.getPaths().toArray(leasePaths);
+      String[] leasePaths = new String[leaseToCheck.getPaths().size()];
+      leaseToCheck.getPaths().toArray(leasePaths);
       for(String p : leasePaths) {
         try {
-          boolean completed = fsnamesystem.internalReleaseLease(oldest, p,
+          boolean completed = fsnamesystem.internalReleaseLease(leaseToCheck, p,
               HdfsServerConstants.NAMENODE_LEASE_HOLDER);
           if (LOG.isDebugEnabled()) {
             if (completed) {
               LOG.debug("Lease recovery for " + p + " is complete. File closed.");
             } else {
-              LOG.debug("Started block recovery " + p + " lease " + oldest);
+              LOG.debug("Started block recovery " + p + " lease " + leaseToCheck);
             }
           }
           // If a lease recovery happened, we need to sync later.
@@ -456,15 +496,23 @@ public class LeaseManager {
           }
         } catch (IOException e) {
           LOG.error("Cannot release the path " + p + " in the lease "
-              + oldest, e);
+              + leaseToCheck, e);
           removing.add(p);
         }
       }
 
       for(String p : removing) {
-        removeLease(oldest, p);
+        removeLease(leaseToCheck, p);
       }
+      leaseToCheck = sortedLeases.higher(leaseToCheck);
     }
+
+    try {
+      if(leaseToCheck != sortedLeases.first()) {
+        LOG.warn("Unable to release hard-limit expired lease: "
+          + sortedLeases.first());
+      }
+    } catch(NoSuchElementException e) {}
     return needSync;
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/daacbc18/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java
index 5c906b4..9b454ea 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLeaseManager.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 
@@ -54,4 +55,28 @@ public class TestLeaseManager {
     assertNull(lm.getLeaseByPath("/a/b"));
     assertNull(lm.getLeaseByPath("/a/c"));
   }
+
+  /** Check that even if LeaseManager.checkLease is not able to relinquish
+   * leases, the Namenode does't enter an infinite loop while holding the FSN
+   * write lock and thus become unresponsive
+   */
+  @Test (timeout=1000)
+  public void testCheckLeaseNotInfiniteLoop() {
+    FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
+    Mockito.when(fsn.isRunning()).thenReturn(true);
+    Mockito.when(fsn.hasWriteLock()).thenReturn(true);
+    LeaseManager lm = new LeaseManager(fsn);
+
+    //Make sure the leases we are going to add exceed the hard limit
+    lm.setLeasePeriod(0,0);
+
+    //Add some leases to the LeaseManager
+    lm.addLease("holder1", "src1");
+    lm.addLease("holder2", "src2");
+    lm.addLease("holder3", "src3");
+    assertEquals(lm.getNumSortedLeases(), 3);
+
+    //Initiate a call to checkLease. This should exit within the test timeout
+    lm.checkLeases();
+  }
 }