You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by at...@apache.org on 2012/02/09 18:39:51 UTC

svn commit: r1242410 - in /hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ src/test/java/org/apache/hadoop/hdfs/server/n...

Author: atm
Date: Thu Feb  9 17:39:50 2012
New Revision: 1242410

URL: http://svn.apache.org/viewvc?rev=1242410&view=rev
Log:
HDFS-2510. Add HA-related metrics. Contributed by Aaron T. Myers.

Added:
    hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java
Modified:
    hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
    hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java

Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt?rev=1242410&r1=1242409&r2=1242410&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt Thu Feb  9 17:39:50 2012
@@ -184,3 +184,5 @@ HDFS-2901. Improvements for SBN web UI -
 HDFS-2905. HA: Standby NN NPE when shared edits dir is deleted. (Bikas Saha via jitendra)
 
 HDFS-2579. Starting delegation token manager during safemode fails. (todd)
+
+HDFS-2510. Add HA-related metrics. (atm)

Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1242410&r1=1242409&r2=1242410&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Thu Feb  9 17:39:50 2012
@@ -118,6 +118,7 @@ import org.apache.hadoop.fs.permission.F
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -4241,16 +4242,34 @@ public class FSNamesystem implements Nam
     return blockManager.getExcessBlocksCount();
   }
   
+  // HA-only metric
   @Metric
   public long getPostponedMisreplicatedBlocks() {
     return blockManager.getPostponedMisreplicatedBlocksCount();
   }
-  
+
+  // HA-only metric
   @Metric
   public int getPendingDataNodeMessageCount() {
     return blockManager.getPendingDataNodeMessageCount();
   }
   
+  // HA-only metric
+  @Metric
+  public String getHAState() {
+    return haContext.getState().toString();
+  }
+
+  // HA-only metric
+  @Metric
+  public long getMillisSinceLastLoadedEdits() {
+    if (isInStandbyState() && editLogTailer != null) {
+      return now() - editLogTailer.getLastLoadTimestamp();
+    } else {
+      return 0;
+    }
+  }
+  
   @Metric
   public int getBlockCapacity() {
     return blockManager.getCapacity();

Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java?rev=1242410&r1=1242409&r2=1242410&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java Thu Feb  9 17:39:50 2012
@@ -240,6 +240,13 @@ public class EditLogTailer {
   }
 
   /**
+   * @return timestamp (in msec) of when we last loaded a non-zero number of edits.
+   */
+  public long getLastLoadTimestamp() {
+    return lastLoadTimestamp;
+  }
+
+  /**
    * @return true if the configured log roll period has elapsed.
    */
   private boolean tooLongSinceLastLoad() {

Added: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java?rev=1242410&view=auto
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java (added)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java Thu Feb  9 17:39:50 2012
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.io.IOUtils;
+import org.junit.Test;
+
+/**
+ * Make sure HA-related metrics are updated and reported appropriately.
+ */
+public class TestHAMetrics {
+  
+  private static final Log LOG = LogFactory.getLog(TestHAMetrics.class);
+  
+  @Test
+  public void testHAMetrics() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, Integer.MAX_VALUE);
+
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1)
+        .build();
+    FileSystem fs = null;
+    try {
+      cluster.waitActive();
+      
+      FSNamesystem nn0 = cluster.getNamesystem(0);
+      FSNamesystem nn1 = cluster.getNamesystem(1);
+      
+      assertEquals(nn0.getHAState(), "standby");
+      assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
+      assertEquals(nn1.getHAState(), "standby");
+      assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
+
+      cluster.transitionToActive(0);
+      
+      assertEquals("active", nn0.getHAState());
+      assertEquals(0, nn0.getMillisSinceLastLoadedEdits());
+      assertEquals("standby", nn1.getHAState());
+      assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
+      
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      
+      assertEquals("standby", nn0.getHAState());
+      assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
+      assertEquals("active", nn1.getHAState());
+      assertEquals(0, nn1.getMillisSinceLastLoadedEdits());
+      
+      Thread.sleep(2000); // make sure standby gets a little out-of-date
+      assertTrue(2000 <= nn0.getMillisSinceLastLoadedEdits());
+      
+      assertEquals(0, nn0.getPendingDataNodeMessageCount());
+      assertEquals(0, nn1.getPendingDataNodeMessageCount());
+      
+      fs = HATestUtil.configureFailoverFs(cluster, conf);
+      DFSTestUtil.createFile(fs, new Path("/foo"),
+          10, (short)1, 1L);
+      
+      assertTrue(0 < nn0.getPendingDataNodeMessageCount());
+      assertEquals(0, nn1.getPendingDataNodeMessageCount());
+      long millisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
+      
+      HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(1),
+          cluster.getNameNode(0));
+      
+      assertEquals(0, nn0.getPendingDataNodeMessageCount());
+      assertEquals(0, nn1.getPendingDataNodeMessageCount());
+      long newMillisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
+      // Since we just waited for the standby to catch up, the time since we
+      // last loaded edits should be very low.
+      assertTrue("expected " + millisSinceLastLoadedEdits + " > " +
+          newMillisSinceLastLoadedEdits,
+          millisSinceLastLoadedEdits > newMillisSinceLastLoadedEdits);
+    } finally {
+      IOUtils.cleanup(LOG, fs);
+      cluster.shutdown();
+    }
+  }
+}