You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by el...@apache.org on 2012/07/18 17:42:19 UTC
svn commit: r1362985 - in
/hadoop/common/branches/branch-2/hadoop-hdfs-project: ./ hadoop-hdfs/
hadoop-hdfs/src/main/java/ hadoop-hdfs/src/main/native/
hadoop-hdfs/src/main/webapps/datanode/ hadoop-hdfs/src/main/webapps/hdfs/
hadoop-hdfs/src/main/webap...
Author: eli
Date: Wed Jul 18 15:42:19 2012
New Revision: 1362985
URL: http://svn.apache.org/viewvc?rev=1362985&view=rev
Log:
HDFS-2966. TestNameNodeMetrics tests can fail under load. Contributed by Steve Loughran
Modified:
hadoop/common/branches/branch-2/hadoop-hdfs-project/ (props changed)
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/ (props changed)
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/ (props changed)
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/ (props changed)
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/ (props changed)
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/ (props changed)
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/secondary/ (props changed)
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/hdfs/ (props changed)
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
Propchange: hadoop/common/branches/branch-2/hadoop-hdfs-project/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-hdfs-project:r1298820
Propchange: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs:r1298820
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1362985&r1=1362984&r2=1362985&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Wed Jul 18 15:42:19 2012
@@ -348,6 +348,8 @@ Release 2.0.1-alpha - UNRELEASED
HDFS-3609. libhdfs: don't force the URI to look like hdfs://hostname:port.
(Colin Patrick McCabe via eli)
+ HDFS-2966 TestNameNodeMetrics tests can fail under load. (stevel)
+
BREAKDOWN OF HDFS-3042 SUBTASKS
HDFS-2185. HDFS portion of ZK-based FailoverController (todd)
Propchange: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java:r1298820
Propchange: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/native/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/native:r1298820
Propchange: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode:r1298820
Propchange: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs:r1298820
Propchange: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/secondary/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/secondary:r1298820
Propchange: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/hdfs/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/hdfs:r1298820
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java?rev=1362985&r1=1362984&r2=1362985&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java Wed Jul 18 15:42:19 2012
@@ -62,6 +62,8 @@ public class TestNameNodeMetrics {
// Number of datanodes in the cluster
private static final int DATANODE_COUNT = 3;
+ private static final int WAIT_GAUGE_VALUE_RETRIES = 20;
+
static {
CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 100);
CONF.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 1);
@@ -148,11 +150,8 @@ public class TestNameNodeMetrics {
assertGauge("BlockCapacity", blockCapacity, rb);
fs.delete(file, true);
filesTotal--; // reduce the filecount for deleted file
-
- waitForDeletion();
- updateMetrics();
- rb = getMetrics(NS_METRICS);
- assertGauge("FilesTotal", filesTotal, rb);
+
+ rb = waitForDnMetricValue(NS_METRICS, "FilesTotal", filesTotal);
assertGauge("BlocksTotal", 0L, rb);
assertGauge("PendingDeletionBlocks", 0L, rb);
@@ -185,9 +184,7 @@ public class TestNameNodeMetrics {
assertGauge("PendingReplicationBlocks", 1L, rb);
assertGauge("ScheduledReplicationBlocks", 1L, rb);
fs.delete(file, true);
- waitForDeletion();
- rb = getMetrics(NS_METRICS);
- assertGauge("CorruptBlocks", 0L, rb);
+ rb = waitForDnMetricValue(NS_METRICS, "CorruptBlocks", 0L);
assertGauge("PendingReplicationBlocks", 0L, rb);
assertGauge("ScheduledReplicationBlocks", 0L, rb);
}
@@ -229,8 +226,7 @@ public class TestNameNodeMetrics {
assertGauge("UnderReplicatedBlocks", 1L, rb);
assertGauge("MissingBlocks", 1L, rb);
fs.delete(file, true);
- waitForDeletion();
- assertGauge("UnderReplicatedBlocks", 0L, getMetrics(NS_METRICS));
+ waitForDnMetricValue(NS_METRICS, "UnderReplicatedBlocks", 0L);
}
private void waitForDeletion() throws InterruptedException {
@@ -238,7 +234,44 @@ public class TestNameNodeMetrics {
// the blocks pending deletion are sent for deletion to the datanodes.
Thread.sleep(DFS_REPLICATION_INTERVAL * (DATANODE_COUNT + 1) * 1000);
}
-
+
+ /**
+ * Wait for the named gauge value from the metrics source to reach the
+ * desired value.
+ *
+ * There's an initial delay then a spin cycle of sleep and poll. Because
+ * all the tests use a shared FS instance, these tests are not independent;
+ * that's why the initial sleep is in there.
+ *
+ * @param source metrics source
+ * @param name gauge name
+ * @param expected expected value
+ * @return the last metrics record polled
+ * @throws Exception if something went wrong.
+ */
+ private MetricsRecordBuilder waitForDnMetricValue(String source,
+ String name,
+ long expected)
+ throws Exception {
+ MetricsRecordBuilder rb;
+ long gauge;
+ //initial wait.
+ waitForDeletion();
+ //lots of retries are allowed for slow systems; fast ones will still
+ //exit early
+ int retries = (DATANODE_COUNT + 1) * WAIT_GAUGE_VALUE_RETRIES;
+ rb = getMetrics(source);
+ gauge = MetricsAsserts.getLongGauge(name, rb);
+ while (gauge != expected && (--retries > 0)) {
+ Thread.sleep(DFS_REPLICATION_INTERVAL * 500);
+ rb = getMetrics(source);
+ gauge = MetricsAsserts.getLongGauge(name, rb);
+ }
+ //at this point the assertion is valid or the retry count ran out
+ assertGauge(name, expected, rb);
+ return rb;
+ }
+
@Test
public void testRenameMetrics() throws Exception {
Path src = getTestPath("src");