You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wa...@apache.org on 2014/11/26 00:42:06 UTC
hadoop git commit: HDFS-7097. Allow block reports to be processed
during checkpointing on standby name node. (kihwal via wang)
Repository: hadoop
Updated Branches:
refs/heads/branch-2 aad16f8f5 -> 915176c4e
HDFS-7097. Allow block reports to be processed during checkpointing on standby name node. (kihwal via wang)
(cherry picked from commit f43a20c529ac3f104add95b222de6580757b3763)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/915176c4
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/915176c4
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/915176c4
Branch: refs/heads/branch-2
Commit: 915176c4e769f510f835f1ad182bc62586a01e0b
Parents: aad16f8
Author: Andrew Wang <wa...@apache.org>
Authored: Tue Nov 25 15:37:11 2014 -0800
Committer: Andrew Wang <wa...@apache.org>
Committed: Tue Nov 25 15:37:21 2014 -0800
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++
.../hdfs/server/namenode/FSImageFormat.java | 5 ++-
.../hdfs/server/namenode/FSNamesystem.java | 39 +++++++++++++++++++-
.../hdfs/server/namenode/ha/EditLogTailer.java | 12 +++++-
.../server/namenode/ha/StandbyCheckpointer.java | 12 ++++--
.../namenode/ha/TestStandbyCheckpoints.java | 21 ++++++++++-
6 files changed, 83 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/915176c4/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index fed1846..3eb9d3b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -229,6 +229,9 @@ Release 2.7.0 - UNRELEASED
HDFS-7303. NN UI fails to distinguish datanodes on the same host.
(Benoy Antony via wheat9)
+ HDFS-7097. Allow block reports to be processed during checkpointing on
+ standby name node. (kihwal via wang)
+
Release 2.6.1 - UNRELEASED
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/915176c4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
index 1f1bf4a..9cb4419 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
@@ -1188,9 +1188,11 @@ public class FSImageFormat {
@Deprecated
static class Saver {
private static final int LAYOUT_VERSION = -51;
+ public static final int CHECK_CANCEL_INTERVAL = 4096;
private final SaveNamespaceContext context;
/** Set to true once an image has been written */
private boolean saved = false;
+ private long checkCancelCounter = 0;
/** The MD5 checksum of the file that was written */
private MD5Hash savedDigest;
@@ -1327,7 +1329,6 @@ public class FSImageFormat {
// Write normal children INode.
out.writeInt(children.size());
int dirNum = 0;
- int i = 0;
for(INode child : children) {
// print all children first
// TODO: for HDFS-5428, we cannot change the format/content of fsimage
@@ -1340,7 +1341,7 @@ public class FSImageFormat {
&& child.asFile().isUnderConstruction()) {
this.snapshotUCMap.put(child.getId(), child.asFile());
}
- if (i++ % 50 == 0) {
+ if (checkCancelCounter++ % CHECK_CANCEL_INTERVAL == 0) {
context.checkCancelled();
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/915176c4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 336acd5..328eff3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -485,6 +485,15 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
/** Lock to protect FSNamesystem. */
private final FSNamesystemLock fsLock;
+ /**
+ * Checkpoint lock to protect FSNamesystem modification on standby NNs.
+ * Unlike fsLock, it does not affect block updates. On active NNs, this lock
+ * does not provide proper protection, because there are operations that
+ * modify both block and name system state. Even on standby, fsLock is
+ * used when block state changes need to be blocked.
+ */
+ private final ReentrantLock cpLock;
+
/**
* Used when this NN is in standby state to read from the shared edit log.
*/
@@ -747,6 +756,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
LOG.info("fsLock is fair:" + fair);
fsLock = new FSNamesystemLock(fair);
cond = fsLock.writeLock().newCondition();
+ cpLock = new ReentrantLock();
+
this.fsImage = fsImage;
try {
resourceRecheckInterval = conf.getLong(
@@ -1545,6 +1556,22 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
return this.fsLock.getWriteHoldCount();
}
+ /** Lock the checkpoint lock */
+ public void cpLock() {
+ this.cpLock.lock();
+ }
+
+ /** Lock the checkpoint lock interrupibly */
+ public void cpLockInterruptibly() throws InterruptedException {
+ this.cpLock.lockInterruptibly();
+ }
+
+ /** Unlock the checkpoint lock */
+ public void cpUnlock() {
+ this.cpLock.unlock();
+ }
+
+
NamespaceInfo getNamespaceInfo() {
readLock();
try {
@@ -5274,7 +5301,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
checkOperation(OperationCategory.UNCHECKED);
checkSuperuserPrivilege();
-
+ cpLock(); // Block if a checkpointing is in progress on standby.
readLock();
try {
checkOperation(OperationCategory.UNCHECKED);
@@ -5286,6 +5313,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
getFSImage().saveNamespace(this);
} finally {
readUnlock();
+ cpUnlock();
}
LOG.info("New namespace image has been created");
}
@@ -5300,6 +5328,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
StandbyException {
checkSuperuserPrivilege();
checkOperation(OperationCategory.UNCHECKED);
+ cpLock(); // Block if a checkpointing is in progress on standby.
writeLock();
try {
checkOperation(OperationCategory.UNCHECKED);
@@ -5314,6 +5343,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
return val;
} finally {
writeUnlock();
+ cpUnlock();
}
}
@@ -5324,12 +5354,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
void finalizeUpgrade() throws IOException {
checkSuperuserPrivilege();
checkOperation(OperationCategory.UNCHECKED);
+ cpLock(); // Block if a checkpointing is in progress on standby.
writeLock();
try {
checkOperation(OperationCategory.UNCHECKED);
getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState());
} finally {
writeUnlock();
+ cpUnlock();
}
}
@@ -7543,6 +7575,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
}
@VisibleForTesting
+ public ReentrantLock getCpLockForTests() {
+ return cpLock;
+ }
+
+ @VisibleForTesting
public SafeModeInfo getSafeModeInfoForTests() {
return safeMode;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/915176c4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index a16af37..3d72645 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -183,6 +183,8 @@ public class EditLogTailer {
@Override
public Void run() throws Exception {
try {
+ // It is already under the full name system lock and the checkpointer
+ // thread is already stopped. No need to acqure any other lock.
doTailEdits();
} catch (InterruptedException e) {
throw new IOException(e);
@@ -321,7 +323,15 @@ public class EditLogTailer {
if (!shouldRun) {
break;
}
- doTailEdits();
+ // Prevent reading of name system while being modified. The full
+ // name system lock will be acquired to further block even the block
+ // state updates.
+ namesystem.cpLockInterruptibly();
+ try {
+ doTailEdits();
+ } finally {
+ namesystem.cpUnlock();
+ }
} catch (EditLogInputException elie) {
LOG.warn("Error while reading edits from disk. Will try again.", elie);
} catch (InterruptedException ie) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/915176c4/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
index c7a0d62..1e40368 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -153,7 +153,10 @@ public class StandbyCheckpointer {
final long txid;
final NameNodeFile imageType;
- namesystem.longReadLockInterruptibly();
+ // Acquire cpLock to make sure no one is modifying the name system.
+ // It does not need the full namesystem write lock, since the only thing
+ // that modifies namesystem on standby node is edit log replaying.
+ namesystem.cpLockInterruptibly();
try {
assert namesystem.getEditLog().isOpenForRead() :
"Standby Checkpointer should only attempt a checkpoint when " +
@@ -190,7 +193,7 @@ public class StandbyCheckpointer {
img.saveLegacyOIVImage(namesystem, outputDir, canceler);
}
} finally {
- namesystem.longReadUnlock();
+ namesystem.cpUnlock();
}
// Upload the saved checkpoint back to the active
@@ -226,8 +229,11 @@ public class StandbyCheckpointer {
* minute or so.
*/
public void cancelAndPreventCheckpoints(String msg) throws ServiceFailedException {
- thread.preventCheckpointsFor(PREVENT_AFTER_CANCEL_MS);
synchronized (cancelLock) {
+ // The checkpointer thread takes this lock and checks if checkpointing is
+ // postponed.
+ thread.preventCheckpointsFor(PREVENT_AFTER_CANCEL_MS);
+
// Before beginning a checkpoint, the checkpointer thread
// takes this lock, and creates a canceler object.
// If the canceler is non-null, then a checkpoint is in
http://git-wip-us.apache.org/repos/asf/hadoop/blob/915176c4/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index b00f916..2f9b945 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -26,6 +26,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
@@ -91,7 +92,7 @@ public class TestStandbyCheckpoints {
cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(topology)
- .numDataNodes(0)
+ .numDataNodes(1)
.build();
cluster.waitActive();
@@ -359,6 +360,13 @@ public class TestStandbyCheckpoints {
} catch (StandbyException se) {
GenericTestUtils.assertExceptionContains("is not supported", se);
}
+
+ // Make sure new incremental block reports are processed during
+ // checkpointing on the SBN.
+ assertEquals(0, cluster.getNamesystem(1).getPendingDataNodeMessageCount());
+ doCreate();
+ Thread.sleep(1000);
+ assertTrue(cluster.getNamesystem(1).getPendingDataNodeMessageCount() > 0);
// Make sure that the checkpoint is still going on, implying that the client
// RPC to the SBN happened during the checkpoint.
@@ -410,7 +418,7 @@ public class TestStandbyCheckpoints {
assertFalse(nn1.getNamesystem().getFsLockForTests().hasQueuedThreads());
assertFalse(nn1.getNamesystem().getFsLockForTests().isWriteLocked());
- assertTrue(nn1.getNamesystem().getLongReadLockForTests().hasQueuedThreads());
+ assertTrue(nn1.getNamesystem().getCpLockForTests().hasQueuedThreads());
// Get /jmx of the standby NN web UI, which will cause the FSNS read lock to
// be taken.
@@ -437,6 +445,15 @@ public class TestStandbyCheckpoints {
fs.mkdirs(p);
}
}
+
+ private void doCreate() throws IOException {
+ Path p = new Path("/testFile");
+ fs.delete(p, false);
+ FSDataOutputStream out = fs.create(p, (short)1);
+ out.write(42);
+ out.close();
+ }
+
/**
* A codec which just slows down the saving of the image significantly