You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by wa...@apache.org on 2013/11/01 22:17:09 UTC
svn commit: r1538064 - in
/hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs: ./
src/main/java/org/apache/hadoop/hdfs/
src/main/java/org/apache/hadoop/hdfs/server/namenode/
src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ src/ma...
Author: wang
Date: Fri Nov 1 21:17:09 2013
New Revision: 1538064
URL: http://svn.apache.org/r1538064
Log:
HDFS-5037. Active NN should trigger its own edit log rolls. Contributed by Andrew Wang.
Modified:
hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
Modified: hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1538064&r1=1538063&r2=1538064&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Nov 1 21:17:09 2013
@@ -78,6 +78,8 @@ Release 2.2.1 - UNRELEASED
HDFS-4633 TestDFSClientExcludedNodes fails sporadically if excluded nodes
cache expires too quickly (Chris Nauroth via Sanjay)
+ HDFS-5037. Active NN should trigger its own edit log rolls. (wang)
+
Release 2.2.0 - 2013-10-13
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java?rev=1538064&r1=1538063&r2=1538064&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java (original)
+++ hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java Fri Nov 1 21:17:09 2013
@@ -181,6 +181,11 @@ public class DFSConfigKeys extends Commo
public static final String DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY = "dfs.namenode.edits.dir.minimum";
public static final int DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT = 1;
+ public static final String DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD = "dfs.namenode.edit.log.autoroll.multiplier.threshold";
+ public static final float DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT = 2.0f;
+ public static final String DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS = "dfs.namenode.edit.log.autoroll.check.interval.ms";
+ public static final int DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT = 5*60*1000;
+
public static final String DFS_NAMENODE_EDITS_NOEDITLOGCHANNELFLUSH = "dfs.namenode.edits.noeditlogchannelflush";
public static final boolean DFS_NAMENODE_EDITS_NOEDITLOGCHANNELFLUSH_DEFAULT = false;
Modified: hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1538064&r1=1538063&r2=1538064&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Nov 1 21:17:09 2013
@@ -38,6 +38,8 @@ import static org.apache.hadoop.hdfs.DFS
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
@@ -49,6 +51,10 @@ import static org.apache.hadoop.hdfs.DFS
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
@@ -366,6 +372,16 @@ public class FSNamesystem implements Nam
Daemon nnrmthread = null; // NamenodeResourceMonitor thread
+ Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread
+ /**
+ * When an active namenode will roll its own edit log, in # edits
+ */
+ private final long editLogRollerThreshold;
+ /**
+ * Check interval of an active namenode's edit log roller thread
+ */
+ private final int editLogRollerInterval;
+
private volatile boolean hasResourcesAvailable = false;
private volatile boolean fsRunning = true;
@@ -676,7 +692,17 @@ public class FSNamesystem implements Nam
this.standbyShouldCheckpoint = conf.getBoolean(
DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
-
+ // # edit autoroll threshold is a multiple of the checkpoint threshold
+ this.editLogRollerThreshold = (long)
+ (conf.getFloat(
+ DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD,
+ DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) *
+ conf.getLong(
+ DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
+ DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT));
+ this.editLogRollerInterval = conf.getInt(
+ DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS,
+ DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT);
this.inodeId = new INodeId();
// For testing purposes, allow the DT secret manager to be started regardless
@@ -944,6 +970,11 @@ public class FSNamesystem implements Nam
//ResourceMonitor required only at ActiveNN. See HDFS-2914
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
nnrmthread.start();
+
+ nnEditLogRoller = new Daemon(new NameNodeEditLogRoller(
+ editLogRollerThreshold, editLogRollerInterval));
+ nnEditLogRoller.start();
+
} finally {
writeUnlock();
startingActiveService = false;
@@ -981,6 +1012,10 @@ public class FSNamesystem implements Nam
((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
nnrmthread.interrupt();
}
+ if (nnEditLogRoller != null) {
+ ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop();
+ nnEditLogRoller.interrupt();
+ }
if (dir != null && dir.fsImage != null) {
if (dir.fsImage.editLog != null) {
dir.fsImage.editLog.close();
@@ -4050,7 +4085,48 @@ public class FSNamesystem implements Nam
shouldNNRmRun = false;
}
}
-
+
+ class NameNodeEditLogRoller implements Runnable {
+
+ private boolean shouldRun = true;
+ private final long rollThreshold;
+ private final long sleepIntervalMs;
+
+ public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) {
+ this.rollThreshold = rollThreshold;
+ this.sleepIntervalMs = sleepIntervalMs;
+ }
+
+ @Override
+ public void run() {
+ while (fsRunning && shouldRun) {
+ try {
+ FSEditLog editLog = getFSImage().getEditLog();
+ long numEdits =
+ editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId();
+ if (numEdits > rollThreshold) {
+ FSNamesystem.LOG.info("NameNode rolling its own edit log because"
+ + " number of edits in open segment exceeds threshold of "
+ + rollThreshold);
+ rollEditLog();
+ }
+ Thread.sleep(sleepIntervalMs);
+ } catch (InterruptedException e) {
+ FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName()
+ + " was interrupted, exiting");
+ break;
+ } catch (Exception e) {
+ FSNamesystem.LOG.error("Swallowing exception in "
+ + NameNodeEditLogRoller.class.getSimpleName() + ":", e);
+ }
+ }
+ }
+
+ public void stop() {
+ shouldRun = false;
+ }
+ }
+
public FSImage getFSImage() {
return dir.fsImage;
}
@@ -5059,7 +5135,9 @@ public class FSNamesystem implements Nam
try {
checkOperation(OperationCategory.JOURNAL);
checkNameNodeSafeMode("Log not rolled");
- LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
+ if (Server.isRpcInvocation()) {
+ LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
+ }
return getFSImage().rollEditLog();
} finally {
writeUnlock();
Modified: hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java?rev=1538064&r1=1538063&r2=1538064&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java (original)
+++ hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java Fri Nov 1 21:17:09 2013
@@ -38,7 +38,7 @@ public class ActiveState extends HAState
@Override
public void checkOperation(HAContext context, OperationCategory op) {
- return; // Other than journal all operations are allowed in active state
+ return; // All operations are allowed in active state
}
@Override
Modified: hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml?rev=1538064&r1=1538063&r2=1538064&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml (original)
+++ hadoop/common/branches/branch-2.2/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml Fri Nov 1 21:17:09 2013
@@ -1415,4 +1415,30 @@
linearly increases.
</description>
</property>
+
+<property>
+ <name>dfs.namenode.edit.log.autoroll.multiplier.threshold</name>
+ <value>2.0</value>
+ <description>
+ Determines when an active namenode will roll its own edit log.
+ The actual threshold (in number of edits) is determined by multiplying
+ this value by dfs.namenode.checkpoint.txns.
+
+ This prevents extremely large edit files from accumulating on the active
+ namenode, which can cause timeouts during namenode startup and pose an
+ administrative hassle. This behavior is intended as a failsafe for when
+ the standby or secondary namenode fail to roll the edit log by the normal
+ checkpoint threshold.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.edit.log.autoroll.check.interval.ms</name>
+ <value>300000</value>
+ <description>
+ How often an active namenode will check if it needs to roll its edit log,
+ in milliseconds.
+ </description>
+</property>
+
</configuration>