You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by at...@apache.org on 2012/02/09 18:39:51 UTC
svn commit: r1242410 - in
/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs: ./
src/main/java/org/apache/hadoop/hdfs/server/namenode/
src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/
src/test/java/org/apache/hadoop/hdfs/server/n...
Author: atm
Date: Thu Feb 9 17:39:50 2012
New Revision: 1242410
URL: http://svn.apache.org/viewvc?rev=1242410&view=rev
Log:
HDFS-2510. Add HA-related metrics. Contributed by Aaron T. Myers.
Added:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt?rev=1242410&r1=1242409&r2=1242410&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt Thu Feb 9 17:39:50 2012
@@ -184,3 +184,5 @@ HDFS-2901. Improvements for SBN web UI -
HDFS-2905. HA: Standby NN NPE when shared edits dir is deleted. (Bikas Saha via jitendra)
HDFS-2579. Starting delegation token manager during safemode fails. (todd)
+
+HDFS-2510. Add HA-related metrics. (atm)
Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1242410&r1=1242409&r2=1242410&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Thu Feb 9 17:39:50 2012
@@ -118,6 +118,7 @@ import org.apache.hadoop.fs.permission.F
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -4241,16 +4242,34 @@ public class FSNamesystem implements Nam
return blockManager.getExcessBlocksCount();
}
+ // HA-only metric
@Metric
public long getPostponedMisreplicatedBlocks() {
return blockManager.getPostponedMisreplicatedBlocksCount();
}
-
+
+ // HA-only metric
@Metric
public int getPendingDataNodeMessageCount() {
return blockManager.getPendingDataNodeMessageCount();
}
+ // HA-only metric
+ @Metric
+ public String getHAState() {
+ return haContext.getState().toString();
+ }
+
+ // HA-only metric
+ @Metric
+ public long getMillisSinceLastLoadedEdits() {
+ if (isInStandbyState() && editLogTailer != null) {
+ return now() - editLogTailer.getLastLoadTimestamp();
+ } else {
+ return 0;
+ }
+ }
+
@Metric
public int getBlockCapacity() {
return blockManager.getCapacity();
Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java?rev=1242410&r1=1242409&r2=1242410&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java Thu Feb 9 17:39:50 2012
@@ -240,6 +240,13 @@ public class EditLogTailer {
}
/**
+ * @return timestamp (in msec) of when we last loaded a non-zero number of edits.
+ */
+ public long getLastLoadTimestamp() {
+ return lastLoadTimestamp;
+ }
+
+ /**
* @return true if the configured log roll period has elapsed.
*/
private boolean tooLongSinceLastLoad() {
Added: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java?rev=1242410&view=auto
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java (added)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java Thu Feb 9 17:39:50 2012
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.io.IOUtils;
+import org.junit.Test;
+
+/**
+ * Make sure HA-related metrics are updated and reported appropriately.
+ */
+public class TestHAMetrics {
+
+ private static final Log LOG = LogFactory.getLog(TestHAMetrics.class);
+
+ @Test
+ public void testHAMetrics() throws Exception {
+ Configuration conf = new Configuration();
+ conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+ conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, Integer.MAX_VALUE);
+
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+ .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1)
+ .build();
+ FileSystem fs = null;
+ try {
+ cluster.waitActive();
+
+ FSNamesystem nn0 = cluster.getNamesystem(0);
+ FSNamesystem nn1 = cluster.getNamesystem(1);
+
+ assertEquals(nn0.getHAState(), "standby");
+ assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
+ assertEquals(nn1.getHAState(), "standby");
+ assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
+
+ cluster.transitionToActive(0);
+
+ assertEquals("active", nn0.getHAState());
+ assertEquals(0, nn0.getMillisSinceLastLoadedEdits());
+ assertEquals("standby", nn1.getHAState());
+ assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
+
+ cluster.transitionToStandby(0);
+ cluster.transitionToActive(1);
+
+ assertEquals("standby", nn0.getHAState());
+ assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
+ assertEquals("active", nn1.getHAState());
+ assertEquals(0, nn1.getMillisSinceLastLoadedEdits());
+
+ Thread.sleep(2000); // make sure standby gets a little out-of-date
+ assertTrue(2000 <= nn0.getMillisSinceLastLoadedEdits());
+
+ assertEquals(0, nn0.getPendingDataNodeMessageCount());
+ assertEquals(0, nn1.getPendingDataNodeMessageCount());
+
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
+ DFSTestUtil.createFile(fs, new Path("/foo"),
+ 10, (short)1, 1L);
+
+ assertTrue(0 < nn0.getPendingDataNodeMessageCount());
+ assertEquals(0, nn1.getPendingDataNodeMessageCount());
+ long millisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
+
+ HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(1),
+ cluster.getNameNode(0));
+
+ assertEquals(0, nn0.getPendingDataNodeMessageCount());
+ assertEquals(0, nn1.getPendingDataNodeMessageCount());
+ long newMillisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
+ // Since we just waited for the standby to catch up, the time since we
+ // last loaded edits should be very low.
+ assertTrue("expected " + millisSinceLastLoadedEdits + " > " +
+ newMillisSinceLastLoadedEdits,
+ millisSinceLastLoadedEdits > newMillisSinceLastLoadedEdits);
+ } finally {
+ IOUtils.cleanup(LOG, fs);
+ cluster.shutdown();
+ }
+ }
+}