You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2011/07/05 00:47:00 UTC

svn commit: r1142838 - in /hadoop/common/branches/HDFS-1073/hdfs: ./ src/docs/src/documentation/content/xdocs/ src/java/ src/java/org/apache/hadoop/hdfs/ src/java/org/apache/hadoop/hdfs/server/namenode/ src/java/org/apache/hadoop/hdfs/server/protocol/ ...

Author: todd
Date: Mon Jul  4 22:46:59 2011
New Revision: 1142838

URL: http://svn.apache.org/viewvc?rev=1142838&view=rev
Log:
HDFS-2123. Checkpoint interval should be based on txn count, not size. Contributed by Todd Lipcon.

Modified:
    hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt
    hadoop/common/branches/HDFS-1073/hdfs/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
    hadoop/common/branches/HDFS-1073/hdfs/src/java/hdfs-default.xml
    hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
    hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/HdfsConfiguration.java
    hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
    hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
    hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
    hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
    hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java
    hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
    hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java

Modified: hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt Mon Jul  4 22:46:59 2011
@@ -66,3 +66,4 @@ HDFS-2102. Zero-pad edits filename to ma
            Kelly via todd)
 HDFS-2010. Fix NameNode to exit if all edit streams become inaccessible. (atm
            via todd)
+HDFS-2123. Checkpoint interval should be based on txn count, not size. (todd)

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/docs/src/documentation/content/xdocs/hdfs_user_guide.xml Mon Jul  4 22:46:59 2011
@@ -271,9 +271,9 @@
         the maximum delay between two consecutive checkpoints, and 
       </li>
       <li>
-        <code>dfs.namenode.checkpoint.size</code>, set to 64MB by default, defines the
-        size of the edits log file that forces an urgent checkpoint even if 
-        the maximum checkpoint delay is not reached.
+        <code>dfs.namenode.checkpoint.txns</code>, set to 40000 default, defines the
+        number of uncheckpointed transactions on the NameNode which will force
+        an urgent checkpoint, even if the checkpoint period has not been reached.
       </li>
    </ul>
    <p>
@@ -322,9 +322,9 @@
         the maximum delay between two consecutive checkpoints 
       </li>
       <li>
-        <code>dfs.namenode.checkpoint.size</code>, set to 64MB by default, defines the
-        size of the edits log file that forces an urgent checkpoint even if 
-        the maximum checkpoint delay is not reached.
+        <code>dfs.namenode.checkpoint.txns</code>, set to 40000 default, defines the
+        number of uncheckpointed transactions on the NameNode which will force
+        an urgent checkpoint, even if the checkpoint period has not been reached.
       </li>
    </ul>
    <p>

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/hdfs-default.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/hdfs-default.xml?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/hdfs-default.xml (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/hdfs-default.xml Mon Jul  4 22:46:59 2011
@@ -582,10 +582,20 @@ creations/deletions), or "all".</descrip
 </property>
 
 <property>
-  <name>dfs.namenode.checkpoint.size</name>
-  <value>67108864</value>
-  <description>The size of the current edit log (in bytes) that triggers
-       a periodic checkpoint even if the dfs.namenode.checkpoint.period hasn't expired.
+  <name>dfs.namenode.checkpoint.txns</name>
+  <value>40000</value>
+  <description>The Secondary NameNode or CheckpointNode will create a checkpoint
+  of the namespace every 'dfs.namenode.checkpoint.txns' transactions, regardless
+  of whether 'dfs.namenode.checkpoint.period' has expired.
+  </description>
+</property>
+
+<property>
+  <name>dfs.namenode.checkpoint.check.period</name>
+  <value>60</value>
+  <description>The SecondaryNameNode and CheckpointNode will poll the NameNode
+  every 'dfs.namenode.checkpoint.check.period' seconds to query the number
+  of uncheckpointed transactions.
   </description>
 </property>
 

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java Mon Jul  4 22:46:59 2011
@@ -73,10 +73,12 @@ public class DFSConfigKeys extends Commo
   public static final int     DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT = 0;
   public static final String  DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY = "dfs.namenode.secondary.http-address";
   public static final String  DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_DEFAULT = "0.0.0.0:50090";
+  public static final String  DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY = "dfs.namenode.checkpoint.check.period";
+  public static final long    DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_DEFAULT = 60;
   public static final String  DFS_NAMENODE_CHECKPOINT_PERIOD_KEY = "dfs.namenode.checkpoint.period";
   public static final long    DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT = 3600;
-  public static final String  DFS_NAMENODE_CHECKPOINT_SIZE_KEY = "dfs.namenode.checkpoint.size";
-  public static final long    DFS_NAMENODE_CHECKPOINT_SIZE_DEFAULT = 4194304;
+  public static final String  DFS_NAMENODE_CHECKPOINT_TXNS_KEY = "dfs.namenode.checkpoint.txns";
+  public static final long    DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT = 40000;
   public static final String  DFS_NAMENODE_UPGRADE_PERMISSION_KEY = "dfs.namenode.upgrade.permission";
   public static final int     DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT = 00777;
   public static final String  DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY = "dfs.namenode.heartbeat.recheck-interval";

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/HdfsConfiguration.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/HdfsConfiguration.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/HdfsConfiguration.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/HdfsConfiguration.java Mon Jul  4 22:46:59 2011
@@ -85,7 +85,6 @@ public class HdfsConfiguration extends C
     deprecate("fs.checkpoint.dir", DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY);
     deprecate("fs.checkpoint.edits.dir", DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY);
     deprecate("fs.checkpoint.period", DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY);
-    deprecate("fs.checkpoint.size", DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_KEY);
     deprecate("dfs.upgrade.permission", DFSConfigKeys.DFS_NAMENODE_UPGRADE_PERMISSION_KEY);
     deprecate("heartbeat.recheck.interval", DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY);
     deprecate("StorageId", DFSConfigKeys.DFS_DATANODE_STORAGEID_KEY);

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java Mon Jul  4 22:46:59 2011
@@ -331,15 +331,6 @@ public class BackupNode extends NameNode
     ((BackupImage)getFSImage()).reset();
   }
 
-  /**
-   * Get size of the local journal (edit log).
-   * @return size of the current journal
-   * @throws IOException
-   */
-  long journalSize() throws IOException {
-    return namesystem.getEditLogSize();
-  }
-
   // TODO: move to a common with DataNode util class
   private static NamespaceInfo handshake(NamenodeProtocol namenode)
   throws IOException, SocketTimeoutException {

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java Mon Jul  4 22:46:59 2011
@@ -60,7 +60,7 @@ class Checkpointer extends Daemon {
   private BackupNode backupNode;
   volatile boolean shouldRun;
   private long checkpointPeriod;    // in seconds
-  private long checkpointSize;    // size (in MB) of current Edit Log
+  private long checkpointTxnCount;    // size (in MB) of current Edit Log
 
   private String infoBindAddress;
 
@@ -95,8 +95,9 @@ class Checkpointer extends Daemon {
     // Initialize other scheduling parameters from the configuration
     checkpointPeriod = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 
                                     DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT);
-    checkpointSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_KEY, 
-                                  DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_DEFAULT);
+    checkpointTxnCount = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 
+                                  DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
+    SecondaryNameNode.warnForDeprecatedConfigs(conf);
 
     // Pull out exact http address for posting url to avoid ip aliasing issues
     String fullInfoAddr = conf.get(DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY, 
@@ -110,8 +111,7 @@ class Checkpointer extends Daemon {
 
     LOG.info("Checkpoint Period : " + checkpointPeriod + " secs " +
              "(" + checkpointPeriod/60 + " min)");
-    LOG.info("Log Size Trigger  : " + checkpointSize + " bytes " +
-             "(" + checkpointSize/1024 + " KB)");
+    LOG.info("Log Size Trigger  : " + checkpointTxnCount + " txns ");
   }
 
   /**
@@ -143,8 +143,8 @@ class Checkpointer extends Daemon {
         if(now >= lastCheckpointTime + periodMSec) {
           shouldCheckpoint = true;
         } else {
-          long size = getJournalSize();
-          if(size >= checkpointSize)
+          long txns = countUncheckpointedTxns();
+          if(txns >= checkpointTxnCount)
             shouldCheckpoint = true;
         }
         if(shouldCheckpoint) {
@@ -166,14 +166,12 @@ class Checkpointer extends Daemon {
     }
   }
 
-  private long getJournalSize() throws IOException {
-    // If BACKUP node has been loaded
-    // get edits size from the local file. ACTIVE has the same.
-    if(backupNode.isRole(NamenodeRole.BACKUP)
-        && getFSImage().getEditLog().isOpen())
-      return backupNode.journalSize();
-    // Go to the ACTIVE node for its size
-    return getNamenode().journalSize(backupNode.getRegistration());
+  private long countUncheckpointedTxns() throws IOException {
+    long curTxId = getNamenode().getTransactionID();
+    long uncheckpointedTxns = curTxId -
+      getFSImage().getStorage().getMostRecentCheckpointTxId();
+    assert uncheckpointedTxns >= 0;
+    return uncheckpointedTxns;
   }
 
   /**

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Mon Jul  4 22:46:59 2011
@@ -4795,8 +4795,8 @@ public class FSNamesystem implements FSC
     }
   }
 
-  long getEditLogSize() throws IOException {
-    return getEditLog().getEditLogSize();
+  public long getTransactionID() {
+    return getEditLog().getSyncTxId();
   }
 
   CheckpointSignature rollEditLog() throws IOException {

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java Mon Jul  4 22:46:59 2011
@@ -738,13 +738,6 @@ public class NameNode implements Namenod
   }
 
   @Override // NamenodeProtocol
-  public long journalSize(NamenodeRegistration registration)
-  throws IOException {
-    verifyRequest(registration);
-    return namesystem.getEditLogSize();
-  }
-
-  @Override // NamenodeProtocol
   public void journal(NamenodeRegistration registration,
                       int jAction,
                       int length,
@@ -1124,9 +1117,9 @@ public class NameNode implements Namenod
     namesystem.refreshNodes(new HdfsConfiguration());
   }
 
-  @Deprecated // NamenodeProtocol
-  public long getEditLogSize() throws IOException {
-    return namesystem.getEditLogSize();
+  @Override // NamenodeProtocol
+  public long getTransactionID() {
+    return namesystem.getTransactionID();
   }
 
   @Deprecated

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java Mon Jul  4 22:46:59 2011
@@ -108,8 +108,16 @@ public class SecondaryNameNode implement
 
   private Collection<URI> checkpointDirs;
   private Collection<URI> checkpointEditsDirs;
+  
+  /** How often to checkpoint regardless of number of txns */
   private long checkpointPeriod;    // in seconds
-  private long checkpointSize;    // size (in bytes) of current Edit Log
+  
+  /** How often to poll the NN to check checkpointTxnCount */
+  private long checkpointCheckPeriod; // in seconds
+  
+  /** checkpoint once every this many transactions, regardless of time */
+  private long checkpointTxnCount;
+
 
   /** {@inheritDoc} */
   public String toString() {
@@ -118,8 +126,8 @@ public class SecondaryNameNode implement
       + "\nStart Time           : " + new Date(starttime)
       + "\nLast Checkpoint Time : " + (lastCheckpointTime == 0? "--": new Date(lastCheckpointTime))
       + "\nCheckpoint Period    : " + checkpointPeriod + " seconds"
-      + "\nCheckpoint Size      : " + StringUtils.byteDesc(checkpointSize)
-                                    + " (= " + checkpointSize + " bytes)" 
+      + "\nCheckpoint Size      : " + StringUtils.byteDesc(checkpointTxnCount)
+                                    + " (= " + checkpointTxnCount + " bytes)" 
       + "\nCheckpoint Dirs      : " + checkpointDirs
       + "\nCheckpoint Edits Dirs: " + checkpointEditsDirs;
   }
@@ -205,10 +213,15 @@ public class SecondaryNameNode implement
     checkpointImage.recoverCreate();
 
     // Initialize other scheduling parameters from the configuration
+    checkpointCheckPeriod = conf.getLong(
+        DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY,
+        DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_DEFAULT);
+        
     checkpointPeriod = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 
                                     DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT);
-    checkpointSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_KEY, 
-                                  DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_DEFAULT);
+    checkpointTxnCount = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 
+                                  DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
+    warnForDeprecatedConfigs(conf);
 
     // initialize the webserver for uploading files.
     // Kerberized SSL servers must be run from the host principal...
@@ -264,10 +277,21 @@ public class SecondaryNameNode implement
     conf.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, infoBindAddress + ":" +infoPort); 
     LOG.info("Secondary Web-server up at: " + infoBindAddress + ":" +infoPort);
     LOG.info("Secondary image servlet up at: " + infoBindAddress + ":" + imagePort);
-    LOG.warn("Checkpoint Period   :" + checkpointPeriod + " secs " +
+    LOG.info("Checkpoint Period   :" + checkpointPeriod + " secs " +
              "(" + checkpointPeriod/60 + " min)");
-    LOG.warn("Log Size Trigger    :" + checkpointSize + " bytes " +
-             "(" + checkpointSize/1024 + " KB)");
+    LOG.info("Log Size Trigger    :" + checkpointTxnCount + " txns");
+  }
+
+  static void warnForDeprecatedConfigs(Configuration conf) {
+    for (String key : ImmutableList.of(
+          "fs.checkpoint.size",
+          "dfs.namenode.checkpoint.size")) {
+      if (conf.get(key) != null) {
+        LOG.warn("Configuration key " + key + " is deprecated! Ignoring..." +
+            " Instead please specify a value for " +
+            DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY);
+      }
+    }
   }
 
   /**
@@ -315,13 +339,10 @@ public class SecondaryNameNode implement
   public void doWork() {
 
     //
-    // Poll the Namenode (once every 5 minutes) to find the size of the
-    // pending edit log.
+    // Poll the Namenode (once every checkpointCheckPeriod seconds) to find the
+    // number of transactions in the edit log that haven't yet been checkpointed.
     //
-    long period = 5 * 60;              // 5 minutes
-    if (checkpointPeriod < period) {
-      period = checkpointPeriod;
-    }
+    long period = Math.min(checkpointCheckPeriod, checkpointPeriod);
 
     while (shouldRun) {
       try {
@@ -339,8 +360,7 @@ public class SecondaryNameNode implement
         
         long now = System.currentTimeMillis();
 
-        long size = namenode.getEditLogSize();
-        if (size >= checkpointSize || 
+        if (shouldCheckpointBasedOnCount() ||
             now >= lastCheckpointTime + 1000 * checkpointPeriod) {
           doCheckpoint();
           lastCheckpointTime = now;
@@ -551,19 +571,20 @@ public class SecondaryNameNode implement
     exitCode = 0;
     try {
       if ("-checkpoint".equals(cmd)) {
-        long size = namenode.getEditLogSize();
-        if (size >= checkpointSize || 
+        long count = countUncheckpointedTxns();
+        if (count > checkpointTxnCount ||
             argv.length == 2 && "force".equals(argv[i])) {
           doCheckpoint();
         } else {
-          System.err.println("EditLog size " + size + " bytes is " +
+          System.err.println("EditLog size " + count + " transactions is " +
                              "smaller than configured checkpoint " +
-                             "size " + checkpointSize + " bytes.");
+                             "interval " + checkpointTxnCount + " transactions.");
           System.err.println("Skipping checkpoint.");
         }
       } else if ("-geteditsize".equals(cmd)) {
-        long size = namenode.getEditLogSize();
-        System.out.println("EditLog size is " + size + " bytes");
+        long uncheckpointed = countUncheckpointedTxns();
+        System.out.println("NameNode has " + uncheckpointed +
+            " uncheckpointed transactions");
       } else {
         exitCode = -1;
         LOG.error(cmd.substring(1) + ": Unknown command");
@@ -596,6 +617,18 @@ public class SecondaryNameNode implement
     return exitCode;
   }
 
+  private long countUncheckpointedTxns() throws IOException {
+    long curTxId = namenode.getTransactionID();
+    long uncheckpointedTxns = curTxId -
+      checkpointImage.getStorage().getMostRecentCheckpointTxId();
+    assert uncheckpointedTxns >= 0;
+    return uncheckpointedTxns;
+  }
+
+  boolean shouldCheckpointBasedOnCount() throws IOException {
+    return countUncheckpointedTxns() >= checkpointTxnCount;
+  }
+
   /**
    * Displays format of commands.
    * @param cmd The command that is being executed.

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java Mon Jul  4 22:46:59 2011
@@ -82,14 +82,11 @@ public interface NamenodeProtocol extend
   public ExportedBlockKeys getBlockKeys() throws IOException;
 
   /**
-   * Get the size of the current edit log (in bytes).
-   * @return The number of bytes in the current edit log.
+   * @return The most recent transaction ID that has been synced to
+   * persistent storage.
    * @throws IOException
-   * @deprecated 
-   *    See {@link org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode}
    */
-  @Deprecated
-  public long getEditLogSize() throws IOException;
+  public long getTransactionID() throws IOException;
 
   /**
    * Closes the current edit log and opens a new one. The 
@@ -172,15 +169,6 @@ public interface NamenodeProtocol extend
     throws IOException;
 
   /**
-   * Get the size of the active name-node journal (edit log) in bytes.
-   * 
-   * @param registration the requesting node
-   * @return The number of bytes in the journal.
-   * @throws IOException
-   */
-  public long journalSize(NamenodeRegistration registration) throws IOException;
-
-  /**
    * Journal edit records.
    * This message is sent by the active name-node to the backup node
    * via {@code EditLogBackupOutputStream} in order to synchronize meta-data

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java Mon Jul  4 22:46:59 2011
@@ -65,6 +65,7 @@ import org.mockito.invocation.Invocation
 import org.mockito.stubbing.Answer;
 
 import com.google.common.base.Joiner;
+import com.google.common.base.Supplier;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
@@ -1525,6 +1526,57 @@ public class TestCheckpoint extends Test
       }
     }
   }
+  
+  /**
+   * Test that the 2NN triggers a checkpoint after the configurable interval
+   */
+  @SuppressWarnings("deprecation")
+  public void testCheckpointTriggerOnTxnCount() throws Exception {
+    MiniDFSCluster cluster = null;
+    SecondaryNameNode secondary = null;
+    Configuration conf = new HdfsConfiguration();
+
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 10);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
+    
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+          .format(true).build();
+      FileSystem fs = cluster.getFileSystem();
+      secondary = startSecondaryNameNode(conf);
+      Thread t = new Thread(secondary);
+      t.start();
+      final NNStorage storage = secondary.getFSImage().getStorage();
+
+      // 2NN should checkpoint at startup
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        @Override
+        public Boolean get() {
+          LOG.info("Waiting for checkpoint txn id to go to 2");
+          return storage.getMostRecentCheckpointTxId() == 2;
+        }
+      }, 200, 15000);
+
+      // If we make 10 transactions, it should checkpoint again
+      for (int i = 0; i < 10; i++) {
+        fs.mkdirs(new Path("/test" + i));
+      }
+      
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        @Override
+        public Boolean get() {
+          LOG.info("Waiting for checkpoint txn id to go > 2");
+          return storage.getMostRecentCheckpointTxId() > 2;
+        }
+      }, 200, 15000);
+    } finally {
+      cleanup(secondary);
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
 
   @SuppressWarnings("deprecation")
   private void cleanup(SecondaryNameNode snn) {

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java?rev=1142838&r1=1142837&r2=1142838&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java Mon Jul  4 22:46:59 2011
@@ -23,6 +23,7 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.fs.FileUtil;
@@ -34,6 +35,8 @@ import org.mockito.invocation.Invocation
 import org.mockito.stubbing.Answer;
 
 import com.google.common.base.Joiner;
+import com.google.common.base.Predicate;
+import com.google.common.base.Supplier;
 import com.google.common.collect.Sets;
 
 /**
@@ -84,6 +87,23 @@ public abstract class GenericTestUtils {
         msg.contains(string));    
   }  
 
+  public static void waitFor(Supplier<Boolean> check,
+      int checkEveryMillis, int waitForMillis)
+      throws TimeoutException, InterruptedException
+  {
+    long st = System.currentTimeMillis();
+    do {
+      boolean result = check.get();
+      if (result) {
+        return;
+      }
+      
+      Thread.sleep(checkEveryMillis);
+    } while (System.currentTimeMillis() - st < waitForMillis);
+    throw new TimeoutException("Timed out waiting for condition");
+  }
+  
+  
   /**
    * Mockito answer helper that triggers one latch as soon as the
    * method is called, then waits on another before continuing.