You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xg...@apache.org on 2016/12/05 18:47:33 UTC
[15/29] hadoop git commit: HDFS-11180. Intermittent deadlock in
NameNode when failover happens.
HDFS-11180. Intermittent deadlock in NameNode when failover happens.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e0fa4923
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e0fa4923
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e0fa4923
Branch: refs/heads/YARN-5734
Commit: e0fa49234fd37aca88e1caa95bac77bca192bae4
Parents: 1f7613b
Author: Akira Ajisaka <aa...@apache.org>
Authored: Thu Dec 1 23:08:59 2016 +0900
Committer: Akira Ajisaka <aa...@apache.org>
Committed: Thu Dec 1 23:08:59 2016 +0900
----------------------------------------------------------------------
.../dev-support/findbugsExcludeFile.xml | 27 ++++++++
.../hadoop/hdfs/server/namenode/FSEditLog.java | 72 +++++++++++++++++---
.../hadoop/hdfs/server/namenode/FSImage.java | 15 +++-
.../hdfs/server/namenode/FSNamesystem.java | 27 ++++++--
.../hdfs/server/namenode/NameNodeRpcServer.java | 2 +-
.../server/namenode/ha/StandbyCheckpointer.java | 4 +-
.../server/namenode/TestFSNamesystemMBean.java | 24 +++++++
7 files changed, 148 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
index 426fb72..e6e4057 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
@@ -109,6 +109,33 @@
<Field name="journalSet" />
<Bug pattern="IS2_INCONSISTENT_SYNC" />
</Match>
+ <!--
+ FSEditLog#getTotalSyncCount is not synchronized because this method is
+ used by metrics. NullPointerException can happen and it is ignored.
+ -->
+ <Match>
+ <Class name="org.apache.hadoop.hdfs.server.namenode.FSEditLog" />
+ <Field name="editLogStream" />
+ <Bug pattern="IS2_INCONSISTENT_SYNC" />
+ </Match>
+ <!--
+ FSEditLog#isOpenForWriteWithoutLock and FSEditLog#isSegmentOpenWithoutLock
+ are not synchronized because these methods are used by metrics.
+ -->
+ <Match>
+ <Class name="org.apache.hadoop.hdfs.server.namenode.FSEditLog" />
+ <Field name="state" />
+ <Bug pattern="IS2_INCONSISTENT_SYNC" />
+ </Match>
+ <!--
+ All of the threads which update/increment txid are synchronized,
+ so make txid volatile instead of AtomicLong.
+ -->
+ <Match>
+ <Class name="org.apache.hadoop.hdfs.server.namenode.FSEditLog" />
+ <Field name="txid" />
+ <Bug pattern="VO_VOLATILE_INCREMENT" />
+ </Match>
<!--
This method isn't performance-critical and is much clearer to write as it's written.
-->
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index ef9eb68..c9ee32b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -155,14 +155,16 @@ public class FSEditLog implements LogsPurgeable {
private EditLogOutputStream editLogStream = null;
// a monotonically increasing counter that represents transactionIds.
- private long txid = 0;
+ // All of the threads which update/increment txid are synchronized,
+ // so make txid volatile instead of AtomicLong.
+ private volatile long txid = 0;
// stores the last synced transactionId.
private long synctxid = 0;
// the first txid of the log that's currently open for writing.
// If this value is N, we are currently writing to edits_inprogress_N
- private long curSegmentTxId = HdfsServerConstants.INVALID_TXID;
+ private volatile long curSegmentTxId = HdfsServerConstants.INVALID_TXID;
// the time of printing the statistics to the log file.
private long lastPrintTime;
@@ -338,7 +340,18 @@ public class FSEditLog implements LogsPurgeable {
return state == State.IN_SEGMENT ||
state == State.BETWEEN_LOG_SEGMENTS;
}
-
+
+ /**
+ * Return true if the log is currently open in write mode.
+ * This method is not synchronized and must be used only for metrics.
+ * @return true if the log is currently open in write mode, regardless
+ * of whether it actually has an open segment.
+ */
+ boolean isOpenForWriteWithoutLock() {
+ return state == State.IN_SEGMENT ||
+ state == State.BETWEEN_LOG_SEGMENTS;
+ }
+
/**
* @return true if the log is open in write mode and has a segment open
* ready to take edits.
@@ -348,6 +361,16 @@ public class FSEditLog implements LogsPurgeable {
}
/**
+ * Return true the state is IN_SEGMENT.
+ * This method is not synchronized and must be used only for metrics.
+ * @return true if the log is open in write mode and has a segment open
+ * ready to take edits.
+ */
+ boolean isSegmentOpenWithoutLock() {
+ return state == State.IN_SEGMENT;
+ }
+
+ /**
* @return true if the log is open in read mode.
*/
public synchronized boolean isOpenForRead() {
@@ -522,7 +545,16 @@ public class FSEditLog implements LogsPurgeable {
public synchronized long getLastWrittenTxId() {
return txid;
}
-
+
+ /**
+ * Return the transaction ID of the last transaction written to the log.
+ * This method is not synchronized and must be used only for metrics.
+ * @return The transaction ID of the last transaction written to the log
+ */
+ long getLastWrittenTxIdWithoutLock() {
+ return txid;
+ }
+
/**
* @return the first transaction ID in the current log segment
*/
@@ -531,7 +563,16 @@ public class FSEditLog implements LogsPurgeable {
"Bad state: %s", state);
return curSegmentTxId;
}
-
+
+ /**
+ * Return the first transaction ID in the current log segment.
+ * This method is not synchronized and must be used only for metrics.
+ * @return The first transaction ID in the current log segment
+ */
+ long getCurSegmentTxIdWithoutLock() {
+ return curSegmentTxId;
+ }
+
/**
* Set the transaction ID to use for the next transaction written.
*/
@@ -1182,7 +1223,9 @@ public class FSEditLog implements LogsPurgeable {
/**
* Get all the journals this edit log is currently operating on.
*/
- synchronized List<JournalAndStream> getJournals() {
+ List<JournalAndStream> getJournals() {
+ // The list implementation is CopyOnWriteArrayList,
+ // so we don't need to synchronize this method.
return journalSet.getAllJournalStreams();
}
@@ -1190,7 +1233,7 @@ public class FSEditLog implements LogsPurgeable {
* Used only by tests.
*/
@VisibleForTesting
- synchronized public JournalSet getJournalSet() {
+ public JournalSet getJournalSet() {
return journalSet;
}
@@ -1731,11 +1774,18 @@ public class FSEditLog implements LogsPurgeable {
* Return total number of syncs happened on this edit log.
* @return long - count
*/
- public synchronized long getTotalSyncCount() {
- if (editLogStream != null) {
- return editLogStream.getNumSync();
- } else {
+ public long getTotalSyncCount() {
+ // Avoid NPE as possible.
+ if (editLogStream == null) {
return 0;
}
+ long count = 0;
+ try {
+ count = editLogStream.getNumSync();
+ } catch (NullPointerException ignore) {
+ // This method is used for metrics, so we don't synchronize it.
+ // Therefore NPE can happen even if there is a null check before.
+ }
+ return count;
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index d02a6ca..c40c626 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -924,7 +924,7 @@ public class FSImage implements Closeable {
Canceler canceler) throws IOException {
FSImageCompression compression =
FSImageCompression.createCompression(conf);
- long txid = getLastAppliedOrWrittenTxId();
+ long txid = getCorrectLastAppliedOrWrittenTxId();
SaveNamespaceContext ctx = new SaveNamespaceContext(source, txid,
canceler);
FSImageFormat.Saver saver = new FSImageFormat.Saver(ctx);
@@ -1019,7 +1019,7 @@ public class FSImage implements Closeable {
final long checkpointTxId = image.getCheckpointTxId();
final long checkpointAge = Time.now() - imageFile.lastModified();
if (checkpointAge <= timeWindow * 1000 &&
- checkpointTxId >= this.getLastAppliedOrWrittenTxId() - txGap) {
+ checkpointTxId >= this.getCorrectLastAppliedOrWrittenTxId() - txGap) {
return false;
}
}
@@ -1046,7 +1046,7 @@ public class FSImage implements Closeable {
if (editLogWasOpen) {
editLog.endCurrentLogSegment(true);
}
- long imageTxId = getLastAppliedOrWrittenTxId();
+ long imageTxId = getCorrectLastAppliedOrWrittenTxId();
if (!addToCheckpointing(imageTxId)) {
throw new IOException(
"FS image is being downloaded from another NN at txid " + imageTxId);
@@ -1418,6 +1418,15 @@ public class FSImage implements Closeable {
public long getLastAppliedOrWrittenTxId() {
return Math.max(lastAppliedTxId,
+ editLog != null ? editLog.getLastWrittenTxIdWithoutLock() : 0);
+ }
+
+ /**
+ * This method holds a lock of FSEditLog to get the correct value.
+ * This method must not be used for metrics.
+ */
+ public long getCorrectLastAppliedOrWrittenTxId() {
+ return Math.max(lastAppliedTxId,
editLog != null ? editLog.getLastWrittenTxId() : 0);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index c78ef46..8a750a0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -3654,7 +3654,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
//create ha status
final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(
haContext.getState().getServiceState(),
- getFSImage().getLastAppliedOrWrittenTxId());
+ getFSImage().getCorrectLastAppliedOrWrittenTxId());
return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo,
blockReportLeaseId);
@@ -3779,7 +3779,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
public void run() {
while (fsRunning && shouldRun) {
try {
- long numEdits = getTransactionsSinceLastLogRoll();
+ long numEdits = getCorrectTransactionsSinceLastLogRoll();
if (numEdits > rollThreshold) {
FSNamesystem.LOG.info("NameNode rolling its own edit log because"
+ " number of edits in open segment exceeds threshold of "
@@ -3928,17 +3928,30 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
@Metric({"TransactionsSinceLastLogRoll",
"Number of transactions since last edit log roll"})
public long getTransactionsSinceLastLogRoll() {
+ if (isInStandbyState() || !getEditLog().isSegmentOpenWithoutLock()) {
+ return 0;
+ } else {
+ return getEditLog().getLastWrittenTxIdWithoutLock() -
+ getEditLog().getCurSegmentTxIdWithoutLock() + 1;
+ }
+ }
+
+ /**
+ * Get the correct number of transactions since last edit log roll.
+ * This method holds a lock of FSEditLog and must not be used for metrics.
+ */
+ private long getCorrectTransactionsSinceLastLogRoll() {
if (isInStandbyState() || !getEditLog().isSegmentOpen()) {
return 0;
} else {
return getEditLog().getLastWrittenTxId() -
- getEditLog().getCurSegmentTxId() + 1;
+ getEditLog().getCurSegmentTxId() + 1;
}
}
-
+
@Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"})
public long getLastWrittenTransactionId() {
- return getEditLog().getLastWrittenTxId();
+ return getEditLog().getLastWrittenTxIdWithoutLock();
}
@Metric({"LastCheckpointTime",
@@ -5613,7 +5626,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
List<Map<String, String>> jasList = new ArrayList<Map<String, String>>();
FSEditLog log = getFSImage().getEditLog();
if (log != null) {
- boolean openForWrite = log.isOpenForWrite();
+ // This flag can be false because we cannot hold a lock of FSEditLog
+ // for metrics.
+ boolean openForWrite = log.isOpenForWriteWithoutLock();
for (JournalAndStream jas : log.getJournals()) {
final Map<String, String> jasMap = new HashMap<String, String>();
String manager = jas.getManager().toString();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index c55c4d1..0fc3e60 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -1215,7 +1215,7 @@ public class NameNodeRpcServer implements NamenodeProtocols {
checkNNStartup();
namesystem.checkOperation(OperationCategory.UNCHECKED);
namesystem.checkSuperuserPrivilege();
- return namesystem.getFSImage().getLastAppliedOrWrittenTxId();
+ return namesystem.getFSImage().getCorrectLastAppliedOrWrittenTxId();
}
@Override // NamenodeProtocol
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
index 099a6aa..569c6bf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -172,7 +172,7 @@ public class StandbyCheckpointer {
FSImage img = namesystem.getFSImage();
long prevCheckpointTxId = img.getStorage().getMostRecentCheckpointTxId();
- long thisCheckpointTxId = img.getLastAppliedOrWrittenTxId();
+ long thisCheckpointTxId = img.getCorrectLastAppliedOrWrittenTxId();
assert thisCheckpointTxId >= prevCheckpointTxId;
if (thisCheckpointTxId == prevCheckpointTxId) {
LOG.info("A checkpoint was triggered but the Standby Node has not " +
@@ -315,7 +315,7 @@ public class StandbyCheckpointer {
private long countUncheckpointedTxns() {
FSImage img = namesystem.getFSImage();
- return img.getLastAppliedOrWrittenTxId() -
+ return img.getCorrectLastAppliedOrWrittenTxId() -
img.getStorage().getMostRecentCheckpointTxId();
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0fa4923/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java
index 4687cfd..d311494 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java
@@ -162,6 +162,30 @@ public class TestFSNamesystemMBean {
}
}
+ // The test makes sure JMX request can be processed even if FSEditLog
+ // is synchronized.
+ @Test
+ public void testWithFSEditLogLock() throws Exception {
+ Configuration conf = new Configuration();
+ MiniDFSCluster cluster = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(conf).build();
+ cluster.waitActive();
+ synchronized (cluster.getNameNode().getFSImage().getEditLog()) {
+ MBeanClient client = new MBeanClient();
+ client.start();
+ client.join(20000);
+ assertTrue("JMX calls are blocked when FSEditLog" +
+ " is synchronized by another thread", client.succeeded);
+ client.interrupt();
+ }
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+
@Test(timeout = 120000)
public void testFsEditLogMetrics() throws Exception {
final Configuration conf = new Configuration();
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org