You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by vi...@apache.org on 2014/12/03 06:23:38 UTC

[06/50] [abbrv] hadoop git commit: HDFS-7097. Allow block reports to be processed during checkpointing on standby name node. (kihwal via wang)

HDFS-7097. Allow block reports to be processed during checkpointing on standby name node. (kihwal via wang)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f43a20c5
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f43a20c5
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f43a20c5

Branch: refs/heads/HDFS-EC
Commit: f43a20c529ac3f104add95b222de6580757b3763
Parents: 56f3eec
Author: Andrew Wang <wa...@apache.org>
Authored: Tue Nov 25 15:37:11 2014 -0800
Committer: Andrew Wang <wa...@apache.org>
Committed: Tue Nov 25 15:37:11 2014 -0800

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 ++
 .../hdfs/server/namenode/FSImageFormat.java     |  5 ++-
 .../hdfs/server/namenode/FSNamesystem.java      | 39 +++++++++++++++++++-
 .../hdfs/server/namenode/ha/EditLogTailer.java  | 12 +++++-
 .../server/namenode/ha/StandbyCheckpointer.java | 12 ++++--
 .../namenode/ha/TestStandbyCheckpoints.java     | 21 ++++++++++-
 6 files changed, 83 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/f43a20c5/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 3cd8e15..5d13308 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -486,6 +486,9 @@ Release 2.7.0 - UNRELEASED
     HDFS-7303. NN UI fails to distinguish datanodes on the same host.
     (Benoy Antony via wheat9)
 
+    HDFS-7097. Allow block reports to be processed during checkpointing on
+    standby name node. (kihwal via wang)
+
 Release 2.6.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f43a20c5/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
index 76f51cd..c50f506 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
@@ -1183,9 +1183,11 @@ public class FSImageFormat {
   @Deprecated
   static class Saver {
     private static final int LAYOUT_VERSION = -51;
+    public static final int CHECK_CANCEL_INTERVAL = 4096;
     private final SaveNamespaceContext context;
     /** Set to true once an image has been written */
     private boolean saved = false;
+    private long checkCancelCounter = 0;
 
     /** The MD5 checksum of the file that was written */
     private MD5Hash savedDigest;
@@ -1322,7 +1324,6 @@ public class FSImageFormat {
       // Write normal children INode.
       out.writeInt(children.size());
       int dirNum = 0;
-      int i = 0;
       for(INode child : children) {
         // print all children first
         // TODO: for HDFS-5428, we cannot change the format/content of fsimage
@@ -1335,7 +1336,7 @@ public class FSImageFormat {
             && child.asFile().isUnderConstruction()) {
           this.snapshotUCMap.put(child.getId(), child.asFile());
         }
-        if (i++ % 50 == 0) {
+        if (checkCancelCounter++ % CHECK_CANCEL_INTERVAL == 0) {
           context.checkCancelled();
         }
       }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f43a20c5/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 899c126..82396c6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -496,6 +496,15 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   /** Lock to protect FSNamesystem. */
   private final FSNamesystemLock fsLock;
 
+  /** 
+   * Checkpoint lock to protect FSNamesystem modification on standby NNs.
+   * Unlike fsLock, it does not affect block updates. On active NNs, this lock
+   * does not provide proper protection, because there are operations that
+   * modify both block and name system state.  Even on standby, fsLock is 
+   * used when block state changes need to be blocked.
+   */
+  private final ReentrantLock cpLock;
+
   /**
    * Used when this NN is in standby state to read from the shared edit log.
    */
@@ -758,6 +767,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     LOG.info("fsLock is fair:" + fair);
     fsLock = new FSNamesystemLock(fair);
     cond = fsLock.writeLock().newCondition();
+    cpLock = new ReentrantLock();
+
     this.fsImage = fsImage;
     try {
       resourceRecheckInterval = conf.getLong(
@@ -1554,6 +1565,22 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     return this.fsLock.getWriteHoldCount();
   }
 
+  /** Lock the checkpoint lock */
+  public void cpLock() {
+    this.cpLock.lock();
+  }
+
+  /** Lock the checkpoint lock interrupibly */
+  public void cpLockInterruptibly() throws InterruptedException {
+    this.cpLock.lockInterruptibly();
+  }
+
+  /** Unlock the checkpoint lock */
+  public void cpUnlock() {
+    this.cpLock.unlock();
+  }
+    
+
   NamespaceInfo getNamespaceInfo() {
     readLock();
     try {
@@ -5274,7 +5301,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     checkOperation(OperationCategory.UNCHECKED);
     checkSuperuserPrivilege();
 
-
+    cpLock();  // Block if a checkpointing is in progress on standby.
     readLock();
     try {
       checkOperation(OperationCategory.UNCHECKED);
@@ -5286,6 +5313,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       getFSImage().saveNamespace(this);
     } finally {
       readUnlock();
+      cpUnlock();
     }
     LOG.info("New namespace image has been created");
   }
@@ -5300,6 +5328,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       StandbyException {
     checkSuperuserPrivilege();
     checkOperation(OperationCategory.UNCHECKED);
+    cpLock();  // Block if a checkpointing is in progress on standby.
     writeLock();
     try {
       checkOperation(OperationCategory.UNCHECKED);
@@ -5314,6 +5343,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       return val;
     } finally {
       writeUnlock();
+      cpUnlock();
     }
   }
 
@@ -5324,12 +5354,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   void finalizeUpgrade() throws IOException {
     checkSuperuserPrivilege();
     checkOperation(OperationCategory.UNCHECKED);
+    cpLock();  // Block if a checkpointing is in progress on standby.
     writeLock();
     try {
       checkOperation(OperationCategory.UNCHECKED);
       getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState());
     } finally {
       writeUnlock();
+      cpUnlock();
     }
   }
 
@@ -7543,6 +7575,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   }
 
   @VisibleForTesting
+  public ReentrantLock getCpLockForTests() {
+    return cpLock;
+  }
+
+  @VisibleForTesting
   public SafeModeInfo getSafeModeInfoForTests() {
     return safeMode;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f43a20c5/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index a16af37..3d72645 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -183,6 +183,8 @@ public class EditLogTailer {
       @Override
       public Void run() throws Exception {
         try {
+          // It is already under the full name system lock and the checkpointer
+          // thread is already stopped. No need to acqure any other lock.
           doTailEdits();
         } catch (InterruptedException e) {
           throw new IOException(e);
@@ -321,7 +323,15 @@ public class EditLogTailer {
           if (!shouldRun) {
             break;
           }
-          doTailEdits();
+          // Prevent reading of name system while being modified. The full
+          // name system lock will be acquired to further block even the block
+          // state updates.
+          namesystem.cpLockInterruptibly();
+          try {
+            doTailEdits();
+          } finally {
+            namesystem.cpUnlock();
+          }
         } catch (EditLogInputException elie) {
           LOG.warn("Error while reading edits from disk. Will try again.", elie);
         } catch (InterruptedException ie) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f43a20c5/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
index c7a0d62..1e40368 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -153,7 +153,10 @@ public class StandbyCheckpointer {
     final long txid;
     final NameNodeFile imageType;
     
-    namesystem.longReadLockInterruptibly();
+    // Acquire cpLock to make sure no one is modifying the name system.
+    // It does not need the full namesystem write lock, since the only thing
+    // that modifies namesystem on standby node is edit log replaying.
+    namesystem.cpLockInterruptibly();
     try {
       assert namesystem.getEditLog().isOpenForRead() :
         "Standby Checkpointer should only attempt a checkpoint when " +
@@ -190,7 +193,7 @@ public class StandbyCheckpointer {
         img.saveLegacyOIVImage(namesystem, outputDir, canceler);
       }
     } finally {
-      namesystem.longReadUnlock();
+      namesystem.cpUnlock();
     }
     
     // Upload the saved checkpoint back to the active
@@ -226,8 +229,11 @@ public class StandbyCheckpointer {
    * minute or so.
    */
   public void cancelAndPreventCheckpoints(String msg) throws ServiceFailedException {
-    thread.preventCheckpointsFor(PREVENT_AFTER_CANCEL_MS);
     synchronized (cancelLock) {
+      // The checkpointer thread takes this lock and checks if checkpointing is
+      // postponed. 
+      thread.preventCheckpointsFor(PREVENT_AFTER_CANCEL_MS);
+
       // Before beginning a checkpoint, the checkpointer thread
       // takes this lock, and creates a canceler object.
       // If the canceler is non-null, then a checkpoint is in

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f43a20c5/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index b00f916..2f9b945 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -26,6 +26,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
@@ -91,7 +92,7 @@ public class TestStandbyCheckpoints {
 
         cluster = new MiniDFSCluster.Builder(conf)
             .nnTopology(topology)
-            .numDataNodes(0)
+            .numDataNodes(1)
             .build();
         cluster.waitActive();
 
@@ -359,6 +360,13 @@ public class TestStandbyCheckpoints {
     } catch (StandbyException se) {
       GenericTestUtils.assertExceptionContains("is not supported", se);
     }
+
+    // Make sure new incremental block reports are processed during
+    // checkpointing on the SBN.
+    assertEquals(0, cluster.getNamesystem(1).getPendingDataNodeMessageCount());
+    doCreate();
+    Thread.sleep(1000);
+    assertTrue(cluster.getNamesystem(1).getPendingDataNodeMessageCount() > 0);
     
     // Make sure that the checkpoint is still going on, implying that the client
     // RPC to the SBN happened during the checkpoint.
@@ -410,7 +418,7 @@ public class TestStandbyCheckpoints {
     
     assertFalse(nn1.getNamesystem().getFsLockForTests().hasQueuedThreads());
     assertFalse(nn1.getNamesystem().getFsLockForTests().isWriteLocked());
-    assertTrue(nn1.getNamesystem().getLongReadLockForTests().hasQueuedThreads());
+    assertTrue(nn1.getNamesystem().getCpLockForTests().hasQueuedThreads());
     
     // Get /jmx of the standby NN web UI, which will cause the FSNS read lock to
     // be taken.
@@ -437,6 +445,15 @@ public class TestStandbyCheckpoints {
       fs.mkdirs(p);
     }
   }
+
+  private void doCreate() throws IOException {
+    Path p = new Path("/testFile");
+    fs.delete(p, false);
+    FSDataOutputStream out = fs.create(p, (short)1);
+    out.write(42);
+    out.close();
+  }
+  
   
   /**
    * A codec which just slows down the saving of the image significantly