You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2011/07/05 00:47:00 UTC
svn commit: r1142838 - in /hadoop/common/branches/HDFS-1073/hdfs: ./
src/docs/src/documentation/content/xdocs/ src/java/
src/java/org/apache/hadoop/hdfs/
src/java/org/apache/hadoop/hdfs/server/namenode/
src/java/org/apache/hadoop/hdfs/server/protocol/ ...
Author: todd
Date: Mon Jul 4 22:46:59 2011
New Revision: 1142838
URL: http://svn.apache.org/viewvc?rev=1142838&view=rev
Log:
HDFS-2123. Checkpoint interval should be based on txn count, not size. Contributed by Todd Lipcon.
Modified:
hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt
hadoop/common/branches/HDFS-1073/hdfs/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
hadoop/common/branches/HDFS-1073/hdfs/src/java/hdfs-default.xml
hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/HdfsConfiguration.java
hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java
hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java
Modified: hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt Mon Jul 4 22:46:59 2011
@@ -66,3 +66,4 @@ HDFS-2102. Zero-pad edits filename to ma
Kelly via todd)
HDFS-2010. Fix NameNode to exit if all edit streams become inaccessible. (atm
via todd)
+HDFS-2123. Checkpoint interval should be based on txn count, not size. (todd)
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml Mon Jul 4 22:46:59 2011
@@ -271,9 +271,9 @@
the maximum delay between two consecutive checkpoints, and
</li>
<li>
- <code>dfs.namenode.checkpoint.size</code>, set to 64MB by default, defines the
- size of the edits log file that forces an urgent checkpoint even if
- the maximum checkpoint delay is not reached.
+ <code>dfs.namenode.checkpoint.txns</code>, set to 40000 default, defines the
+ number of uncheckpointed transactions on the NameNode which will force
+ an urgent checkpoint, even if the checkpoint period has not been reached.
</li>
</ul>
<p>
@@ -322,9 +322,9 @@
the maximum delay between two consecutive checkpoints
</li>
<li>
- <code>dfs.namenode.checkpoint.size</code>, set to 64MB by default, defines the
- size of the edits log file that forces an urgent checkpoint even if
- the maximum checkpoint delay is not reached.
+ <code>dfs.namenode.checkpoint.txns</code>, set to 40000 default, defines the
+ number of uncheckpointed transactions on the NameNode which will force
+ an urgent checkpoint, even if the checkpoint period has not been reached.
</li>
</ul>
<p>
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/hdfs-default.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/hdfs-default.xml?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/hdfs-default.xml (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/hdfs-default.xml Mon Jul 4 22:46:59 2011
@@ -582,10 +582,20 @@ creations/deletions), or "all".</descrip
</property>
<property>
- <name>dfs.namenode.checkpoint.size</name>
- <value>67108864</value>
- <description>The size of the current edit log (in bytes) that triggers
- a periodic checkpoint even if the dfs.namenode.checkpoint.period hasn't expired.
+ <name>dfs.namenode.checkpoint.txns</name>
+ <value>40000</value>
+ <description>The Secondary NameNode or CheckpointNode will create a checkpoint
+ of the namespace every 'dfs.namenode.checkpoint.txns' transactions, regardless
+ of whether 'dfs.namenode.checkpoint.period' has expired.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.checkpoint.check.period</name>
+ <value>60</value>
+ <description>The SecondaryNameNode and CheckpointNode will poll the NameNode
+ every 'dfs.namenode.checkpoint.check.period' seconds to query the number
+ of uncheckpointed transactions.
</description>
</property>
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java Mon Jul 4 22:46:59 2011
@@ -73,10 +73,12 @@ public class DFSConfigKeys extends Commo
public static final int DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT = 0;
public static final String DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY = "dfs.namenode.secondary.http-address";
public static final String DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_DEFAULT = "0.0.0.0:50090";
+ public static final String DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY = "dfs.namenode.checkpoint.check.period";
+ public static final long DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_DEFAULT = 60;
public static final String DFS_NAMENODE_CHECKPOINT_PERIOD_KEY = "dfs.namenode.checkpoint.period";
public static final long DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT = 3600;
- public static final String DFS_NAMENODE_CHECKPOINT_SIZE_KEY = "dfs.namenode.checkpoint.size";
- public static final long DFS_NAMENODE_CHECKPOINT_SIZE_DEFAULT = 4194304;
+ public static final String DFS_NAMENODE_CHECKPOINT_TXNS_KEY = "dfs.namenode.checkpoint.txns";
+ public static final long DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT = 40000;
public static final String DFS_NAMENODE_UPGRADE_PERMISSION_KEY = "dfs.namenode.upgrade.permission";
public static final int DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT = 00777;
public static final String DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY = "dfs.namenode.heartbeat.recheck-interval";
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/HdfsConfiguration.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/HdfsConfiguration.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/HdfsConfiguration.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/HdfsConfiguration.java Mon Jul 4 22:46:59 2011
@@ -85,7 +85,6 @@ public class HdfsConfiguration extends C
deprecate("fs.checkpoint.dir", DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY);
deprecate("fs.checkpoint.edits.dir", DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY);
deprecate("fs.checkpoint.period", DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY);
- deprecate("fs.checkpoint.size", DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_KEY);
deprecate("dfs.upgrade.permission", DFSConfigKeys.DFS_NAMENODE_UPGRADE_PERMISSION_KEY);
deprecate("heartbeat.recheck.interval", DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY);
deprecate("StorageId", DFSConfigKeys.DFS_DATANODE_STORAGEID_KEY);
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java Mon Jul 4 22:46:59 2011
@@ -331,15 +331,6 @@ public class BackupNode extends NameNode
((BackupImage)getFSImage()).reset();
}
- /**
- * Get size of the local journal (edit log).
- * @return size of the current journal
- * @throws IOException
- */
- long journalSize() throws IOException {
- return namesystem.getEditLogSize();
- }
-
// TODO: move to a common with DataNode util class
private static NamespaceInfo handshake(NamenodeProtocol namenode)
throws IOException, SocketTimeoutException {
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java Mon Jul 4 22:46:59 2011
@@ -60,7 +60,7 @@ class Checkpointer extends Daemon {
private BackupNode backupNode;
volatile boolean shouldRun;
private long checkpointPeriod; // in seconds
- private long checkpointSize; // size (in MB) of current Edit Log
+ private long checkpointTxnCount; // size (in MB) of current Edit Log
private String infoBindAddress;
@@ -95,8 +95,9 @@ class Checkpointer extends Daemon {
// Initialize other scheduling parameters from the configuration
checkpointPeriod = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY,
DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT);
- checkpointSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_KEY,
- DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_DEFAULT);
+ checkpointTxnCount = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
+ DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
+ SecondaryNameNode.warnForDeprecatedConfigs(conf);
// Pull out exact http address for posting url to avoid ip aliasing issues
String fullInfoAddr = conf.get(DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
@@ -110,8 +111,7 @@ class Checkpointer extends Daemon {
LOG.info("Checkpoint Period : " + checkpointPeriod + " secs " +
"(" + checkpointPeriod/60 + " min)");
- LOG.info("Log Size Trigger : " + checkpointSize + " bytes " +
- "(" + checkpointSize/1024 + " KB)");
+ LOG.info("Log Size Trigger : " + checkpointTxnCount + " txns ");
}
/**
@@ -143,8 +143,8 @@ class Checkpointer extends Daemon {
if(now >= lastCheckpointTime + periodMSec) {
shouldCheckpoint = true;
} else {
- long size = getJournalSize();
- if(size >= checkpointSize)
+ long txns = countUncheckpointedTxns();
+ if(txns >= checkpointTxnCount)
shouldCheckpoint = true;
}
if(shouldCheckpoint) {
@@ -166,14 +166,12 @@ class Checkpointer extends Daemon {
}
}
- private long getJournalSize() throws IOException {
- // If BACKUP node has been loaded
- // get edits size from the local file. ACTIVE has the same.
- if(backupNode.isRole(NamenodeRole.BACKUP)
- && getFSImage().getEditLog().isOpen())
- return backupNode.journalSize();
- // Go to the ACTIVE node for its size
- return getNamenode().journalSize(backupNode.getRegistration());
+ private long countUncheckpointedTxns() throws IOException {
+ long curTxId = getNamenode().getTransactionID();
+ long uncheckpointedTxns = curTxId -
+ getFSImage().getStorage().getMostRecentCheckpointTxId();
+ assert uncheckpointedTxns >= 0;
+ return uncheckpointedTxns;
}
/**
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Mon Jul 4 22:46:59 2011
@@ -4795,8 +4795,8 @@ public class FSNamesystem implements FSC
}
}
- long getEditLogSize() throws IOException {
- return getEditLog().getEditLogSize();
+ public long getTransactionID() {
+ return getEditLog().getSyncTxId();
}
CheckpointSignature rollEditLog() throws IOException {
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java Mon Jul 4 22:46:59 2011
@@ -738,13 +738,6 @@ public class NameNode implements Namenod
}
@Override // NamenodeProtocol
- public long journalSize(NamenodeRegistration registration)
- throws IOException {
- verifyRequest(registration);
- return namesystem.getEditLogSize();
- }
-
- @Override // NamenodeProtocol
public void journal(NamenodeRegistration registration,
int jAction,
int length,
@@ -1124,9 +1117,9 @@ public class NameNode implements Namenod
namesystem.refreshNodes(new HdfsConfiguration());
}
- @Deprecated // NamenodeProtocol
- public long getEditLogSize() throws IOException {
- return namesystem.getEditLogSize();
+ @Override // NamenodeProtocol
+ public long getTransactionID() {
+ return namesystem.getTransactionID();
}
@Deprecated
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java Mon Jul 4 22:46:59 2011
@@ -108,8 +108,16 @@ public class SecondaryNameNode implement
private Collection<URI> checkpointDirs;
private Collection<URI> checkpointEditsDirs;
+
+ /** How often to checkpoint regardless of number of txns */
private long checkpointPeriod; // in seconds
- private long checkpointSize; // size (in bytes) of current Edit Log
+
+ /** How often to poll the NN to check checkpointTxnCount */
+ private long checkpointCheckPeriod; // in seconds
+
+ /** checkpoint once every this many transactions, regardless of time */
+ private long checkpointTxnCount;
+
/** {@inheritDoc} */
public String toString() {
@@ -118,8 +126,8 @@ public class SecondaryNameNode implement
+ "\nStart Time : " + new Date(starttime)
+ "\nLast Checkpoint Time : " + (lastCheckpointTime == 0? "--": new Date(lastCheckpointTime))
+ "\nCheckpoint Period : " + checkpointPeriod + " seconds"
- + "\nCheckpoint Size : " + StringUtils.byteDesc(checkpointSize)
- + " (= " + checkpointSize + " bytes)"
+ + "\nCheckpoint Size : " + StringUtils.byteDesc(checkpointTxnCount)
+ + " (= " + checkpointTxnCount + " bytes)"
+ "\nCheckpoint Dirs : " + checkpointDirs
+ "\nCheckpoint Edits Dirs: " + checkpointEditsDirs;
}
@@ -205,10 +213,15 @@ public class SecondaryNameNode implement
checkpointImage.recoverCreate();
// Initialize other scheduling parameters from the configuration
+ checkpointCheckPeriod = conf.getLong(
+ DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY,
+ DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_DEFAULT);
+
checkpointPeriod = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY,
DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT);
- checkpointSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_KEY,
- DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_DEFAULT);
+ checkpointTxnCount = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
+ DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
+ warnForDeprecatedConfigs(conf);
// initialize the webserver for uploading files.
// Kerberized SSL servers must be run from the host principal...
@@ -264,10 +277,21 @@ public class SecondaryNameNode implement
conf.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, infoBindAddress + ":" +infoPort);
LOG.info("Secondary Web-server up at: " + infoBindAddress + ":" +infoPort);
LOG.info("Secondary image servlet up at: " + infoBindAddress + ":" + imagePort);
- LOG.warn("Checkpoint Period :" + checkpointPeriod + " secs " +
+ LOG.info("Checkpoint Period :" + checkpointPeriod + " secs " +
"(" + checkpointPeriod/60 + " min)");
- LOG.warn("Log Size Trigger :" + checkpointSize + " bytes " +
- "(" + checkpointSize/1024 + " KB)");
+ LOG.info("Log Size Trigger :" + checkpointTxnCount + " txns");
+ }
+
+ static void warnForDeprecatedConfigs(Configuration conf) {
+ for (String key : ImmutableList.of(
+ "fs.checkpoint.size",
+ "dfs.namenode.checkpoint.size")) {
+ if (conf.get(key) != null) {
+ LOG.warn("Configuration key " + key + " is deprecated! Ignoring..." +
+ " Instead please specify a value for " +
+ DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY);
+ }
+ }
}
/**
@@ -315,13 +339,10 @@ public class SecondaryNameNode implement
public void doWork() {
//
- // Poll the Namenode (once every 5 minutes) to find the size of the
- // pending edit log.
+ // Poll the Namenode (once every checkpointCheckPeriod seconds) to find the
+ // number of transactions in the edit log that haven't yet been checkpointed.
//
- long period = 5 * 60; // 5 minutes
- if (checkpointPeriod < period) {
- period = checkpointPeriod;
- }
+ long period = Math.min(checkpointCheckPeriod, checkpointPeriod);
while (shouldRun) {
try {
@@ -339,8 +360,7 @@ public class SecondaryNameNode implement
long now = System.currentTimeMillis();
- long size = namenode.getEditLogSize();
- if (size >= checkpointSize ||
+ if (shouldCheckpointBasedOnCount() ||
now >= lastCheckpointTime + 1000 * checkpointPeriod) {
doCheckpoint();
lastCheckpointTime = now;
@@ -551,19 +571,20 @@ public class SecondaryNameNode implement
exitCode = 0;
try {
if ("-checkpoint".equals(cmd)) {
- long size = namenode.getEditLogSize();
- if (size >= checkpointSize ||
+ long count = countUncheckpointedTxns();
+ if (count > checkpointTxnCount ||
argv.length == 2 && "force".equals(argv[i])) {
doCheckpoint();
} else {
- System.err.println("EditLog size " + size + " bytes is " +
+ System.err.println("EditLog size " + count + " transactions is " +
"smaller than configured checkpoint " +
- "size " + checkpointSize + " bytes.");
+ "interval " + checkpointTxnCount + " transactions.");
System.err.println("Skipping checkpoint.");
}
} else if ("-geteditsize".equals(cmd)) {
- long size = namenode.getEditLogSize();
- System.out.println("EditLog size is " + size + " bytes");
+ long uncheckpointed = countUncheckpointedTxns();
+ System.out.println("NameNode has " + uncheckpointed +
+ " uncheckpointed transactions");
} else {
exitCode = -1;
LOG.error(cmd.substring(1) + ": Unknown command");
@@ -596,6 +617,18 @@ public class SecondaryNameNode implement
return exitCode;
}
+ private long countUncheckpointedTxns() throws IOException {
+ long curTxId = namenode.getTransactionID();
+ long uncheckpointedTxns = curTxId -
+ checkpointImage.getStorage().getMostRecentCheckpointTxId();
+ assert uncheckpointedTxns >= 0;
+ return uncheckpointedTxns;
+ }
+
+ boolean shouldCheckpointBasedOnCount() throws IOException {
+ return countUncheckpointedTxns() >= checkpointTxnCount;
+ }
+
/**
* Displays format of commands.
* @param cmd The command that is being executed.
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java Mon Jul 4 22:46:59 2011
@@ -82,14 +82,11 @@ public interface NamenodeProtocol extend
public ExportedBlockKeys getBlockKeys() throws IOException;
/**
- * Get the size of the current edit log (in bytes).
- * @return The number of bytes in the current edit log.
+ * @return The most recent transaction ID that has been synced to
+ * persistent storage.
* @throws IOException
- * @deprecated
- * See {@link org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode}
*/
- @Deprecated
- public long getEditLogSize() throws IOException;
+ public long getTransactionID() throws IOException;
/**
* Closes the current edit log and opens a new one. The
@@ -172,15 +169,6 @@ public interface NamenodeProtocol extend
throws IOException;
/**
- * Get the size of the active name-node journal (edit log) in bytes.
- *
- * @param registration the requesting node
- * @return The number of bytes in the journal.
- * @throws IOException
- */
- public long journalSize(NamenodeRegistration registration) throws IOException;
-
- /**
* Journal edit records.
* This message is sent by the active name-node to the backup node
* via {@code EditLogBackupOutputStream} in order to synchronize meta-data
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java Mon Jul 4 22:46:59 2011
@@ -65,6 +65,7 @@ import org.mockito.invocation.Invocation
import org.mockito.stubbing.Answer;
import com.google.common.base.Joiner;
+import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
@@ -1525,6 +1526,57 @@ public class TestCheckpoint extends Test
}
}
}
+
+ /**
+ * Test that the 2NN triggers a checkpoint after the configurable interval
+ */
+ @SuppressWarnings("deprecation")
+ public void testCheckpointTriggerOnTxnCount() throws Exception {
+ MiniDFSCluster cluster = null;
+ SecondaryNameNode secondary = null;
+ Configuration conf = new HdfsConfiguration();
+
+ conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 10);
+ conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
+
+ try {
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+ .format(true).build();
+ FileSystem fs = cluster.getFileSystem();
+ secondary = startSecondaryNameNode(conf);
+ Thread t = new Thread(secondary);
+ t.start();
+ final NNStorage storage = secondary.getFSImage().getStorage();
+
+ // 2NN should checkpoint at startup
+ GenericTestUtils.waitFor(new Supplier<Boolean>() {
+ @Override
+ public Boolean get() {
+ LOG.info("Waiting for checkpoint txn id to go to 2");
+ return storage.getMostRecentCheckpointTxId() == 2;
+ }
+ }, 200, 15000);
+
+ // If we make 10 transactions, it should checkpoint again
+ for (int i = 0; i < 10; i++) {
+ fs.mkdirs(new Path("/test" + i));
+ }
+
+ GenericTestUtils.waitFor(new Supplier<Boolean>() {
+ @Override
+ public Boolean get() {
+ LOG.info("Waiting for checkpoint txn id to go > 2");
+ return storage.getMostRecentCheckpointTxId() > 2;
+ }
+ }, 200, 15000);
+ } finally {
+ cleanup(secondary);
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+
@SuppressWarnings("deprecation")
private void cleanup(SecondaryNameNode snn) {
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java Mon Jul 4 22:46:59 2011
@@ -23,6 +23,7 @@ import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeoutException;
import org.apache.commons.logging.Log;
import org.apache.hadoop.fs.FileUtil;
@@ -34,6 +35,8 @@ import org.mockito.invocation.Invocation
import org.mockito.stubbing.Answer;
import com.google.common.base.Joiner;
+import com.google.common.base.Predicate;
+import com.google.common.base.Supplier;
import com.google.common.collect.Sets;
/**
@@ -84,6 +87,23 @@ public abstract class GenericTestUtils {
msg.contains(string));
}
+ public static void waitFor(Supplier<Boolean> check,
+ int checkEveryMillis, int waitForMillis)
+ throws TimeoutException, InterruptedException
+ {
+ long st = System.currentTimeMillis();
+ do {
+ boolean result = check.get();
+ if (result) {
+ return;
+ }
+
+ Thread.sleep(checkEveryMillis);
+ } while (System.currentTimeMillis() - st < waitForMillis);
+ throw new TimeoutException("Timed out waiting for condition");
+ }
+
+
/**
* Mockito answer helper that triggers one latch as soon as the
* method is called, then waits on another before continuing.