You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by tg...@apache.org on 2012/05/21 22:06:41 UTC
svn commit: r1341184 - in
/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project: ./
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/
hadoop-yarn/hadoop-yarn-...
Author: tgraves
Date: Mon May 21 20:06:40 2012
New Revision: 1341184
URL: http://svn.apache.org/viewvc?rev=1341184&view=rev
Log:
merge -r 1341161:1341162 from branch-2. FIXES: MAPREDUCE-3870
Modified:
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt?rev=1341184&r1=1341183&r2=1341184&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt Mon May 21 20:06:40 2012
@@ -208,6 +208,9 @@ Release 0.23.3 - UNRELEASED
MAPREDUCE-4269. documentation: Gridmix has javadoc warnings in
StressJobFactory (Jonathon Eagles via tgraves).
+ MAPREDUCE-3870. Invalid App Metrics
+ (Bhallamudi Venkata Siva Kamesh via tgraves).
+
Release 0.23.2 - UNRELEASED
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java?rev=1341184&r1=1341183&r2=1341184&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java Mon May 21 20:06:40 2012
@@ -49,7 +49,7 @@ public class QueueMetrics {
@Metric("# of pending apps") MutableGaugeInt appsPending;
@Metric("# of apps completed") MutableCounterInt appsCompleted;
@Metric("# of apps killed") MutableCounterInt appsKilled;
- @Metric("# of apps failed") MutableCounterInt appsFailed;
+ @Metric("# of apps failed") MutableGaugeInt appsFailed;
@Metric("Allocated memory in MB") MutableGaugeInt allocatedMB;
@Metric("# of allocated containers") MutableGaugeInt allocatedContainers;
@@ -131,15 +131,19 @@ public class QueueMetrics {
return metrics;
}
- public void submitApp(String user) {
- appsSubmitted.incr();
+ public void submitApp(String user, int attemptId) {
+ if (attemptId == 1) {
+ appsSubmitted.incr();
+ } else {
+ appsFailed.decr();
+ }
appsPending.incr();
QueueMetrics userMetrics = getUserMetrics(user);
if (userMetrics != null) {
- userMetrics.submitApp(user);
+ userMetrics.submitApp(user, attemptId);
}
if (parent != null) {
- parent.submitApp(user);
+ parent.submitApp(user, attemptId);
}
}
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java?rev=1341184&r1=1341183&r2=1341184&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java Mon May 21 20:06:40 2012
@@ -631,9 +631,7 @@ public class LeafQueue implements CSQueu
}
int attemptId = application.getApplicationAttemptId().getAttemptId();
- if (attemptId == 1) {
- metrics.submitApp(userName);
- }
+ metrics.submitApp(userName, attemptId);
// Inform the parent queue
try {
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java?rev=1341184&r1=1341183&r2=1341184&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java Mon May 21 20:06:40 2012
@@ -295,9 +295,7 @@ public class FifoScheduler implements Re
new SchedulerApp(appAttemptId, user, DEFAULT_QUEUE, activeUsersManager,
this.rmContext, null);
applications.put(appAttemptId, schedulerApp);
- if (appAttemptId.getAttemptId() == 1) {
- metrics.submitApp(user);
- }
+ metrics.submitApp(user, appAttemptId.getAttemptId());
LOG.info("Application Submission: " + appAttemptId.getApplicationId() +
" from " + user + ", currently active: " + applications.size());
rmContext.getDispatcher().getEventHandler().handle(
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java?rev=1341184&r1=1341183&r2=1341184&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java Mon May 21 20:06:40 2012
@@ -31,9 +31,12 @@ import org.apache.hadoop.metrics2.Metric
import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.impl.MetricsSystemImpl;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.util.BuilderUtils;
import org.junit.Test;
public class TestQueueMetrics {
@@ -49,7 +52,7 @@ public class TestQueueMetrics {
MetricsSource queueSource= queueSource(ms, queueName);
AppSchedulingInfo app = mockApp(user);
- metrics.submitApp(user);
+ metrics.submitApp(user, 1);
MetricsSource userSource = userSource(ms, queueName, user);
checkApps(queueSource, 1, 1, 0, 0, 0, 0);
@@ -72,6 +75,52 @@ public class TestQueueMetrics {
checkApps(queueSource, 1, 0, 0, 1, 0, 0);
assertNull(userSource);
}
+
+ @Test
+ public void testQueueAppMetricsForMultipleFailures() {
+ String queueName = "single";
+ String user = "alice";
+
+ QueueMetrics metrics = QueueMetrics.forQueue(ms, queueName, null, false);
+ MetricsSource queueSource = queueSource(ms, queueName);
+ AppSchedulingInfo app = mockApp(user);
+
+ metrics.submitApp(user, 1);
+ MetricsSource userSource = userSource(ms, queueName, user);
+ checkApps(queueSource, 1, 1, 0, 0, 0, 0);
+
+ metrics.incrAppsRunning(user);
+ checkApps(queueSource, 1, 0, 1, 0, 0, 0);
+
+ metrics.finishApp(app, RMAppAttemptState.FAILED);
+ checkApps(queueSource, 1, 0, 0, 0, 1, 0);
+
+ // As the application has failed, framework retries the same application
+ // based on configuration
+ metrics.submitApp(user, 2);
+ checkApps(queueSource, 1, 1, 0, 0, 0, 0);
+
+ metrics.incrAppsRunning(user);
+ checkApps(queueSource, 1, 0, 1, 0, 0, 0);
+
+ // Suppose say application has failed this time as well.
+ metrics.finishApp(app, RMAppAttemptState.FAILED);
+ checkApps(queueSource, 1, 0, 0, 0, 1, 0);
+
+ // As the application has failed, framework retries the same application
+ // based on configuration
+ metrics.submitApp(user, 3);
+ checkApps(queueSource, 1, 1, 0, 0, 0, 0);
+
+ metrics.incrAppsRunning(user);
+ checkApps(queueSource, 1, 0, 1, 0, 0, 0);
+
+ // Suppose say application has finished.
+ metrics.finishApp(app, RMAppAttemptState.FINISHED);
+ checkApps(queueSource, 1, 0, 0, 1, 0, 0);
+
+ assertNull(userSource);
+ }
@Test public void testSingleQueueWithUserMetrics() {
String queueName = "single2";
@@ -81,7 +130,7 @@ public class TestQueueMetrics {
MetricsSource queueSource = queueSource(ms, queueName);
AppSchedulingInfo app = mockApp(user);
- metrics.submitApp(user);
+ metrics.submitApp(user, 1);
MetricsSource userSource = userSource(ms, queueName, user);
checkApps(queueSource, 1, 1, 0, 0, 0, 0);
@@ -127,7 +176,7 @@ public class TestQueueMetrics {
MetricsSource queueSource = queueSource(ms, leafQueueName);
AppSchedulingInfo app = mockApp(user);
- metrics.submitApp(user);
+ metrics.submitApp(user, 1);
MetricsSource userSource = userSource(ms, leafQueueName, user);
MetricsSource parentUserSource = userSource(ms, parentQueueName, user);
@@ -180,7 +229,7 @@ public class TestQueueMetrics {
assertGauge("AppsPending", pending, rb);
assertGauge("AppsRunning", running, rb);
assertCounter("AppsCompleted", completed, rb);
- assertCounter("AppsFailed", failed, rb);
+ assertGauge("AppsFailed", failed, rb);
assertCounter("AppsKilled", killed, rb);
}
@@ -203,6 +252,9 @@ public class TestQueueMetrics {
private static AppSchedulingInfo mockApp(String user) {
AppSchedulingInfo app = mock(AppSchedulingInfo.class);
when(app.getUser()).thenReturn(user);
+ ApplicationId appId = BuilderUtils.newApplicationId(1, 1);
+ ApplicationAttemptId id = BuilderUtils.newApplicationAttemptId(appId, 1);
+ when(app.getApplicationAttemptId()).thenReturn(id);
return app;
}
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java?rev=1341184&r1=1341183&r2=1341184&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java Mon May 21 20:06:40 2012
@@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.factories.
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
@@ -62,6 +63,7 @@ import org.apache.hadoop.yarn.server.res
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -88,7 +90,8 @@ public class TestLeafQueue {
@Before
public void setUp() throws Exception {
- cs = new CapacityScheduler();
+ CapacityScheduler spyCs = new CapacityScheduler();
+ cs = spy(spyCs);
rmContext = TestUtils.getMockRMContext();
csConf =
@@ -306,6 +309,14 @@ public class TestLeafQueue {
SchedulerApp app_0 = new SchedulerApp(appAttemptId_0, user_0, a, null,
rmContext, null);
a.submitApplication(app_0, user_0, B);
+
+ when(cs.getApplication(appAttemptId_0)).thenReturn(app_0);
+ AppRemovedSchedulerEvent event = new AppRemovedSchedulerEvent(
+ appAttemptId_0, RMAppAttemptState.FAILED);
+ cs.handle(event);
+
+ assertEquals(0, a.getMetrics().getAppsPending());
+ assertEquals(1, a.getMetrics().getAppsFailed());
// Attempt the same application again
final ApplicationAttemptId appAttemptId_1 = TestUtils
@@ -316,6 +327,16 @@ public class TestLeafQueue {
assertEquals(1, a.getMetrics().getAppsSubmitted());
assertEquals(1, a.getMetrics().getAppsPending());
+
+ when(cs.getApplication(appAttemptId_1)).thenReturn(app_0);
+ event = new AppRemovedSchedulerEvent(appAttemptId_0,
+ RMAppAttemptState.FINISHED);
+ cs.handle(event);
+
+ assertEquals(1, a.getMetrics().getAppsSubmitted());
+ assertEquals(0, a.getMetrics().getAppsPending());
+ assertEquals(0, a.getMetrics().getAppsFailed());
+ assertEquals(1, a.getMetrics().getAppsCompleted());
QueueMetrics userMetrics = a.getMetrics().getUserMetrics(user_0);
assertEquals(1, userMetrics.getAppsSubmitted());