You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by at...@apache.org on 2012/02/14 09:27:15 UTC

svn commit: r1243822 - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/

Author: atm
Date: Tue Feb 14 08:27:15 2012
New Revision: 1243822

URL: http://svn.apache.org/viewvc?rev=1243822&view=rev
Log:
HDFS-2943. Expose last checkpoint time and transaction stats as JMX metrics. Contributed by Aaron T. Myers.

Modified:
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1243822&r1=1243821&r2=1243822&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Tue Feb 14 08:27:15 2012
@@ -221,6 +221,9 @@ Release 0.23.2 - UNRELEASED 
 
   NEW FEATURES
 
+    HDFS-2943. Expose last checkpoint time and transaction stats as JMX
+    metrics. (atm)
+
   IMPROVEMENTS
 
     HDFS-2931. Switch DataNode's BlockVolumeChoosingPolicy to private-audience.

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1243822&r1=1243821&r2=1243822&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java Tue Feb 14 08:27:15 2012
@@ -711,7 +711,7 @@ public class FSImage implements Closeabl
     long txId = loader.getLoadedImageTxId();
     LOG.info("Loaded image for txid " + txId + " from " + curFile);
     lastAppliedTxId = txId;
-    storage.setMostRecentCheckpointTxId(txId);
+    storage.setMostRecentCheckpointInfo(txId, curFile.lastModified());
   }
 
   /**
@@ -728,7 +728,7 @@ public class FSImage implements Closeabl
     saver.save(newFile, compression);
     
     MD5FileUtils.saveMD5File(dstFile, saver.getSavedDigest());
-    storage.setMostRecentCheckpointTxId(txid);
+    storage.setMostRecentCheckpointInfo(txid, Util.now());
   }
 
   /**
@@ -1032,7 +1032,7 @@ public class FSImage implements Closeabl
     // advertise it as such to other checkpointers
     // from now on
     if (txid > storage.getMostRecentCheckpointTxId()) {
-      storage.setMostRecentCheckpointTxId(txid);
+      storage.setMostRecentCheckpointInfo(txid, Util.now());
     }
   }
 

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1243822&r1=1243821&r2=1243822&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Tue Feb 14 08:27:15 2012
@@ -2680,6 +2680,31 @@ public class FSNamesystem implements Nam
   public int getExpiredHeartbeats() {
     return datanodeStatistics.getExpiredHeartbeats();
   }
+  
+  @Metric({"TransactionsSinceLastCheckpoint",
+      "Number of transactions since last checkpoint"})
+  public long getTransactionsSinceLastCheckpoint() {
+    return getEditLog().getLastWrittenTxId() -
+        getFSImage().getStorage().getMostRecentCheckpointTxId();
+  }
+  
+  @Metric({"TransactionsSinceLastLogRoll",
+      "Number of transactions since last edit log roll"})
+  public long getTransactionsSinceLastLogRoll() {
+    return (getEditLog().getLastWrittenTxId() -
+        getEditLog().getCurSegmentTxId()) + 1;
+  }
+  
+  @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"})
+  public long getLastWrittenTransactionId() {
+    return getEditLog().getLastWrittenTxId();
+  }
+  
+  @Metric({"LastCheckpointTime",
+      "Time in milliseconds since the epoch of the last checkpoint"})
+  public long getLastCheckpointTime() {
+    return getFSImage().getStorage().getMostRecentCheckpointTime();
+  }
 
   /** @see ClientProtocol#getStats() */
   long[] getStats() {

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java?rev=1243822&r1=1243821&r2=1243822&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java Tue Feb 14 08:27:15 2012
@@ -127,6 +127,11 @@ public class NNStorage extends Storage i
    * that have since been written to the edit log.
    */
   protected long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
+  
+  /**
+   * Time of the last checkpoint, in milliseconds since the epoch.
+   */
+  private long mostRecentCheckpointTime = 0;
 
   /**
    * list of failed (and thus removed) storages
@@ -440,18 +445,29 @@ public class NNStorage extends Storage i
   }
 
   /**
-   * Set the transaction ID of the last checkpoint
+   * Set the transaction ID and time of the last checkpoint
+   * 
+   * @param txid transaction id of the last checkpoint
+   * @param time time of the last checkpoint, in millis since the epoch
    */
-  void setMostRecentCheckpointTxId(long txid) {
+  void setMostRecentCheckpointInfo(long txid, long time) {
     this.mostRecentCheckpointTxId = txid;
+    this.mostRecentCheckpointTime = time;
   }
 
   /**
-   * Return the transaction ID of the last checkpoint.
+   * @return the transaction ID of the last checkpoint.
    */
   long getMostRecentCheckpointTxId() {
     return mostRecentCheckpointTxId;
   }
+  
+  /**
+   * @return the time of the most recent checkpoint in millis since the epoch.
+   */
+  long getMostRecentCheckpointTime() {
+    return mostRecentCheckpointTime;
+  }
 
   /**
    * Write a small file in all available storage directories that

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java?rev=1243822&r1=1243821&r2=1243822&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java Tue Feb 14 08:27:15 2012
@@ -20,13 +20,12 @@ package org.apache.hadoop.hdfs.server.na
 import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
 import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
 import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
+import static org.junit.Assert.*;
 
 import java.io.DataInputStream;
 import java.io.IOException;
 import java.util.Random;
 
-import junit.framework.TestCase;
-
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
@@ -39,17 +38,21 @@ import org.apache.hadoop.hdfs.Distribute
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.test.MetricsAsserts;
 import org.apache.log4j.Level;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
 
 /**
  * Test for metrics published by the Namenode
  */
-public class TestNameNodeMetrics extends TestCase {
+public class TestNameNodeMetrics {
   private static final Configuration CONF = new HdfsConfiguration();
   private static final int DFS_REPLICATION_INTERVAL = 1;
   private static final Path TEST_ROOT_DIR_PATH = 
@@ -81,8 +84,8 @@ public class TestNameNodeMetrics extends
     return new Path(TEST_ROOT_DIR_PATH, fileName);
   }
   
-  @Override
-  protected void setUp() throws Exception {
+  @Before
+  public void setUp() throws Exception {
     cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(DATANODE_COUNT).build();
     cluster.waitActive();
     namesystem = cluster.getNamesystem();
@@ -90,8 +93,8 @@ public class TestNameNodeMetrics extends
     fs = (DistributedFileSystem) cluster.getFileSystem();
   }
   
-  @Override
-  protected void tearDown() throws Exception {
+  @After
+  public void tearDown() throws Exception {
     cluster.shutdown();
   }
   
@@ -115,6 +118,7 @@ public class TestNameNodeMetrics extends
   }
   
   /** Test metrics associated with addition of a file */
+  @Test
   public void testFileAdd() throws Exception {
     // Add files with 100 blocks
     final Path file = getTestPath("testFileAdd");
@@ -159,6 +163,7 @@ public class TestNameNodeMetrics extends
   }
   
   /** Corrupt a block and ensure metrics reflects it */
+  @Test
   public void testCorruptBlock() throws Exception {
     // Create a file with single block with two replicas
     final Path file = getTestPath("testCorruptBlock");
@@ -184,6 +189,7 @@ public class TestNameNodeMetrics extends
   /** Create excess blocks by reducing the replication factor for
    * for a file and ensure metrics reflects it
    */
+  @Test
   public void testExcessBlocks() throws Exception {
     Path file = getTestPath("testExcessBlocks");
     createFile(file, 100, (short)2);
@@ -196,6 +202,7 @@ public class TestNameNodeMetrics extends
   }
   
   /** Test to ensure metrics reflects missing blocks */
+  @Test
   public void testMissingBlock() throws Exception {
     // Create a file with single block with two replicas
     Path file = getTestPath("testMissingBlocks");
@@ -220,6 +227,7 @@ public class TestNameNodeMetrics extends
     Thread.sleep(DFS_REPLICATION_INTERVAL * (DATANODE_COUNT + 1) * 1000);
   }
   
+  @Test
   public void testRenameMetrics() throws Exception {
     Path src = getTestPath("src");
     createFile(src, 100, (short)1);
@@ -244,7 +252,8 @@ public class TestNameNodeMetrics extends
    *    
    * @throws IOException in case of an error
    */
-  public void testGetBlockLocationMetric() throws Exception{
+  @Test
+  public void testGetBlockLocationMetric() throws Exception {
     Path file1_Path = new Path(TEST_ROOT_DIR_PATH, "file1.dat");
 
     // When cluster starts first time there are no file  (read,create,open)
@@ -272,4 +281,46 @@ public class TestNameNodeMetrics extends
     updateMetrics();
     assertCounter("GetBlockLocations", 3L, getMetrics(NN_METRICS));
   }
+  
+  /**
+   * Test NN checkpoint and transaction-related metrics.
+   */
+  @Test
+  public void testTransactionAndCheckpointMetrics() throws Exception {
+    long lastCkptTime = MetricsAsserts.getLongGauge("LastCheckpointTime",
+        getMetrics(NS_METRICS));
+    
+    assertGauge("LastCheckpointTime", lastCkptTime, getMetrics(NS_METRICS));
+    assertGauge("LastWrittenTransactionId", 1L, getMetrics(NS_METRICS));
+    assertGauge("TransactionsSinceLastCheckpoint", 1L, getMetrics(NS_METRICS));
+    assertGauge("TransactionsSinceLastLogRoll", 1L, getMetrics(NS_METRICS));
+    
+    fs.mkdirs(new Path(TEST_ROOT_DIR_PATH, "/tmp"));
+    updateMetrics();
+    
+    assertGauge("LastCheckpointTime", lastCkptTime, getMetrics(NS_METRICS));
+    assertGauge("LastWrittenTransactionId", 2L, getMetrics(NS_METRICS));
+    assertGauge("TransactionsSinceLastCheckpoint", 2L, getMetrics(NS_METRICS));
+    assertGauge("TransactionsSinceLastLogRoll", 2L, getMetrics(NS_METRICS));
+    
+    cluster.getNameNodeRpc().rollEditLog();
+    updateMetrics();
+    
+    assertGauge("LastCheckpointTime", lastCkptTime, getMetrics(NS_METRICS));
+    assertGauge("LastWrittenTransactionId", 4L, getMetrics(NS_METRICS));
+    assertGauge("TransactionsSinceLastCheckpoint", 4L, getMetrics(NS_METRICS));
+    assertGauge("TransactionsSinceLastLogRoll", 1L, getMetrics(NS_METRICS));
+    
+    cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+    cluster.getNameNodeRpc().saveNamespace();
+    cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+    updateMetrics();
+    
+    long newLastCkptTime = MetricsAsserts.getLongGauge("LastCheckpointTime",
+        getMetrics(NS_METRICS));
+    assertTrue(lastCkptTime < newLastCkptTime);
+    assertGauge("LastWrittenTransactionId", 6L, getMetrics(NS_METRICS));
+    assertGauge("TransactionsSinceLastCheckpoint", 1L, getMetrics(NS_METRICS));
+    assertGauge("TransactionsSinceLastLogRoll", 1L, getMetrics(NS_METRICS));
+  }
 }