You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by in...@apache.org on 2023/02/08 19:48:57 UTC
[hadoop] branch trunk updated: YARN-11217. [Federation] Add dumpSchedulerLogs REST APIs for Router. (#5272)
This is an automated email from the ASF dual-hosted git repository.
inigoiri pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new af20841fb1f YARN-11217. [Federation] Add dumpSchedulerLogs REST APIs for Router. (#5272)
af20841fb1f is described below
commit af20841fb1f437b30d333eba0a6ef21fdb7a82ba
Author: slfan1989 <55...@users.noreply.github.com>
AuthorDate: Thu Feb 9 03:48:38 2023 +0800
YARN-11217. [Federation] Add dumpSchedulerLogs REST APIs for Router. (#5272)
---
.../hadoop/yarn/server/router/RouterMetrics.java | 31 +++++++++++
.../router/webapp/FederationInterceptorREST.java | 62 +++++++++++++++++++++-
.../yarn/server/router/TestRouterMetrics.java | 33 ++++++++++++
.../webapp/MockDefaultRequestInterceptorREST.java | 20 +++++++
.../webapp/TestFederationInterceptorREST.java | 36 ++++++++++++-
5 files changed, 180 insertions(+), 2 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java
index 033aa076658..47396371ff4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java
@@ -135,6 +135,8 @@ public final class RouterMetrics {
private MutableGaugeInt numRenewDelegationTokenFailedRetrieved;
@Metric("# of renewDelegationToken failed to be retrieved")
private MutableGaugeInt numCancelDelegationTokenFailedRetrieved;
+ @Metric("# of dumpSchedulerLogs failed to be retrieved")
+ private MutableGaugeInt numDumpSchedulerLogsFailedRetrieved;
@Metric("# of getActivities failed to be retrieved")
private MutableGaugeInt numGetActivitiesFailedRetrieved;
@Metric("# of getBulkActivities failed to be retrieved")
@@ -241,6 +243,8 @@ public final class RouterMetrics {
private MutableRate totalSucceededRenewDelegationTokenRetrieved;
@Metric("Total number of successful Retrieved CancelDelegationToken and latency(ms)")
private MutableRate totalSucceededCancelDelegationTokenRetrieved;
+ @Metric("Total number of successful Retrieved DumpSchedulerLogs and latency(ms)")
+ private MutableRate totalSucceededDumpSchedulerLogsRetrieved;
@Metric("Total number of successful Retrieved GetActivities and latency(ms)")
private MutableRate totalSucceededGetActivitiesRetrieved;
@Metric("Total number of successful Retrieved GetBulkActivities and latency(ms)")
@@ -303,6 +307,7 @@ public final class RouterMetrics {
private MutableQuantiles getDelegationTokenLatency;
private MutableQuantiles renewDelegationTokenLatency;
private MutableQuantiles cancelDelegationTokenLatency;
+ private MutableQuantiles dumpSchedulerLogsLatency;
private MutableQuantiles getActivitiesLatency;
private MutableQuantiles getBulkActivitiesLatency;
private MutableQuantiles getSchedulerInfoRetrievedLatency;
@@ -482,6 +487,9 @@ public final class RouterMetrics {
cancelDelegationTokenLatency = registry.newQuantiles("cancelDelegationTokenLatency",
"latency of cancel delegation token timeouts", "ops", "latency", 10);
+ dumpSchedulerLogsLatency = registry.newQuantiles("dumpSchedulerLogsLatency",
+ "latency of dump scheduler logs timeouts", "ops", "latency", 10);
+
getActivitiesLatency = registry.newQuantiles("getActivitiesLatency",
"latency of get activities timeouts", "ops", "latency", 10);
@@ -752,6 +760,11 @@ public final class RouterMetrics {
return totalSucceededCancelDelegationTokenRetrieved.lastStat().numSamples();
}
+ @VisibleForTesting
+ public long getNumSucceededDumpSchedulerLogsRetrieved() {
+ return totalSucceededDumpSchedulerLogsRetrieved.lastStat().numSamples();
+ }
+
@VisibleForTesting
public long getNumSucceededGetActivitiesRetrieved() {
return totalSucceededGetActivitiesRetrieved.lastStat().numSamples();
@@ -1007,6 +1020,11 @@ public final class RouterMetrics {
return totalSucceededCancelDelegationTokenRetrieved.lastStat().mean();
}
+ @VisibleForTesting
+ public double getLatencySucceededDumpSchedulerLogsRetrieved() {
+ return totalSucceededDumpSchedulerLogsRetrieved.lastStat().mean();
+ }
+
@VisibleForTesting
public double getLatencySucceededGetActivitiesRetrieved() {
return totalSucceededGetActivitiesRetrieved.lastStat().mean();
@@ -1245,6 +1263,10 @@ public final class RouterMetrics {
return numCancelDelegationTokenFailedRetrieved.value();
}
+ public int getDumpSchedulerLogsFailedRetrieved() {
+ return numDumpSchedulerLogsFailedRetrieved.value();
+ }
+
public int getActivitiesFailedRetrieved() {
return numGetActivitiesFailedRetrieved.value();
}
@@ -1492,6 +1514,11 @@ public final class RouterMetrics {
cancelDelegationTokenLatency.add(duration);
}
+ public void succeededDumpSchedulerLogsRetrieved(long duration) {
+ totalSucceededDumpSchedulerLogsRetrieved.add(duration);
+ dumpSchedulerLogsLatency.add(duration);
+ }
+
public void succeededGetActivitiesLatencyRetrieved(long duration) {
totalSucceededGetActivitiesRetrieved.add(duration);
getActivitiesLatency.add(duration);
@@ -1713,6 +1740,10 @@ public final class RouterMetrics {
numCancelDelegationTokenFailedRetrieved.incr();
}
+ public void incrDumpSchedulerLogsFailedRetrieved() {
+ numDumpSchedulerLogsFailedRetrieved.incr();
+ }
+
public void incrGetActivitiesFailedRetrieved() {
numGetActivitiesFailedRetrieved.incr();
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
index 69dba5b07e6..7cc403a492a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java
@@ -1183,10 +1183,70 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
throw new RuntimeException("getSchedulerInfo error.");
}
+ /**
+ * This method dumps the scheduler logs for the time got in input, and it is
+ * reachable by using {@link RMWSConsts#SCHEDULER_LOGS}.
+ *
+ * @param time the period of time. It is a FormParam.
+ * @param hsr the servlet request
+ * @return the result of the operation
+ * @throws IOException when it cannot create dump log file
+ */
@Override
public String dumpSchedulerLogs(String time, HttpServletRequest hsr)
throws IOException {
- throw new NotImplementedException("Code is not implemented");
+
+ // Step1. We will check the time parameter to
+ // ensure that the time parameter is not empty and greater than 0.
+
+ if (StringUtils.isBlank(time)) {
+ routerMetrics.incrDumpSchedulerLogsFailedRetrieved();
+ throw new IllegalArgumentException("Parameter error, the time is empty or null.");
+ }
+
+ try {
+ int period = Integer.parseInt(time);
+ if (period <= 0) {
+ throw new IllegalArgumentException("time must be greater than 0.");
+ }
+ } catch (NumberFormatException e) {
+ routerMetrics.incrDumpSchedulerLogsFailedRetrieved();
+ throw new IllegalArgumentException("time must be a number.");
+ } catch (IllegalArgumentException e) {
+ routerMetrics.incrDumpSchedulerLogsFailedRetrieved();
+ throw e;
+ }
+
+ // Step2. Call dumpSchedulerLogs of each subcluster.
+ try {
+ long startTime = clock.getTime();
+ Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
+ final HttpServletRequest hsrCopy = clone(hsr);
+ Class[] argsClasses = new Class[]{String.class, HttpServletRequest.class};
+ Object[] args = new Object[]{time, hsrCopy};
+ ClientMethod remoteMethod = new ClientMethod("dumpSchedulerLogs", argsClasses, args);
+ Map<SubClusterInfo, String> dumpSchedulerLogsMap = invokeConcurrent(
+ subClustersActive.values(), remoteMethod, String.class);
+ StringBuilder stringBuilder = new StringBuilder();
+ dumpSchedulerLogsMap.forEach((subClusterInfo, msg) -> {
+ SubClusterId subClusterId = subClusterInfo.getSubClusterId();
+ stringBuilder.append("subClusterId" + subClusterId + " : " + msg + "; ");
+ });
+ long stopTime = clock.getTime();
+ routerMetrics.succeededDumpSchedulerLogsRetrieved(stopTime - startTime);
+ return stringBuilder.toString();
+ } catch (IllegalArgumentException e) {
+ routerMetrics.incrDumpSchedulerLogsFailedRetrieved();
+ RouterServerUtil.logAndThrowRunTimeException(e,
+ "Unable to dump SchedulerLogs by time: %s.", time);
+ } catch (YarnException e) {
+ routerMetrics.incrDumpSchedulerLogsFailedRetrieved();
+ RouterServerUtil.logAndThrowRunTimeException(e,
+ "dumpSchedulerLogs by time = %s error .", time);
+ }
+
+ routerMetrics.incrDumpSchedulerLogsFailedRetrieved();
+ throw new RuntimeException("dumpSchedulerLogs Failed.");
}
/**
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java
index c26df63c954..b86d85a94fd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/TestRouterMetrics.java
@@ -534,6 +534,11 @@ public class TestRouterMetrics {
metrics.incrRenewDelegationTokenFailedRetrieved();
}
+ public void getDumpSchedulerLogsFailed() {
+ LOG.info("Mocked: failed DumpSchedulerLogs call");
+ metrics.incrDumpSchedulerLogsFailedRetrieved();
+ }
+
public void getActivitiesFailed() {
LOG.info("Mocked: failed getBulkActivitie call");
metrics.incrGetActivitiesFailedRetrieved();
@@ -774,6 +779,11 @@ public class TestRouterMetrics {
metrics.succeededRenewDelegationTokenRetrieved(duration);
}
+ public void getDumpSchedulerLogsRetrieved(long duration) {
+ LOG.info("Mocked: successful DumpSchedulerLogs call with duration {}", duration);
+ metrics.succeededDumpSchedulerLogsRetrieved(duration);
+ }
+
public void getActivitiesRetrieved(long duration) {
LOG.info("Mocked: successful GetActivities call with duration {}", duration);
metrics.succeededGetActivitiesLatencyRetrieved(duration);
@@ -1618,6 +1628,29 @@ public class TestRouterMetrics {
metrics.getRenewDelegationTokenFailedRetrieved());
}
+ @Test
+ public void testDumpSchedulerLogsRetrieved() {
+ long totalGoodBefore = metrics.getNumSucceededDumpSchedulerLogsRetrieved();
+ goodSubCluster.getDumpSchedulerLogsRetrieved(150);
+ Assert.assertEquals(totalGoodBefore + 1,
+ metrics.getNumSucceededDumpSchedulerLogsRetrieved());
+ Assert.assertEquals(150,
+ metrics.getLatencySucceededDumpSchedulerLogsRetrieved(), ASSERT_DOUBLE_DELTA);
+ goodSubCluster.getDumpSchedulerLogsRetrieved(300);
+ Assert.assertEquals(totalGoodBefore + 2,
+ metrics.getNumSucceededDumpSchedulerLogsRetrieved());
+ Assert.assertEquals(225,
+ metrics.getLatencySucceededDumpSchedulerLogsRetrieved(), ASSERT_DOUBLE_DELTA);
+ }
+
+ @Test
+ public void testDumpSchedulerLogsRetrievedFailed() {
+ long totalBadBefore = metrics.getDumpSchedulerLogsFailedRetrieved();
+ badSubCluster.getDumpSchedulerLogsFailed();
+ Assert.assertEquals(totalBadBefore + 1,
+ metrics.getDumpSchedulerLogsFailedRetrieved());
+ }
+
@Test
public void testGetActivitiesRetrieved() {
long totalGoodBefore = metrics.getNumSucceededGetActivitiesRetrieved();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java
index 2e118d172c1..91f3a7d4cea 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/MockDefaultRequestInterceptorREST.java
@@ -1214,6 +1214,26 @@ public class MockDefaultRequestInterceptorREST
return new RMQueueAclInfo(true, user.getUserName(), "");
}
+
+ public String dumpSchedulerLogs(String time, HttpServletRequest hsr)
+ throws IOException {
+
+ int period = Integer.parseInt(time);
+ if (period <= 0) {
+ throw new BadRequestException("Period must be greater than 0");
+ }
+
+ return "Capacity scheduler logs are being created.";
+ }
+ }
+
+ @Override
+ public String dumpSchedulerLogs(String time, HttpServletRequest hsr) throws IOException {
+ ResourceManager mockResourceManager = mock(ResourceManager.class);
+ Configuration conf = new YarnConfiguration();
+ MockRMWebServices webSvc = new MockRMWebServices(mockResourceManager, conf,
+ mock(HttpServletResponse.class));
+ return webSvc.dumpSchedulerLogs(time, hsr);
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
index edaa1e26e93..b7ddda7d30b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/webapp/TestFederationInterceptorREST.java
@@ -134,7 +134,6 @@ import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
import static org.apache.hadoop.yarn.conf.YarnConfiguration.RM_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
import static org.apache.hadoop.yarn.conf.YarnConfiguration.RM_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
import static org.apache.hadoop.yarn.conf.YarnConfiguration.RM_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT;
@@ -1785,6 +1784,41 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Assert.assertEquals(response.getStatus(), Status.OK.getStatusCode());
}
+ @Test
+ public void testDumpSchedulerLogs() throws Exception {
+ HttpServletRequest mockHsr = mockHttpServletRequestByUserName("admin");
+ String dumpSchedulerLogsMsg = interceptor.dumpSchedulerLogs("1", mockHsr);
+
+ // We cannot guarantee the calling order of the sub-clusters,
+ // We guarantee that the returned result contains the information of each subCluster.
+ Assert.assertNotNull(dumpSchedulerLogsMsg);
+ subClusters.stream().forEach(subClusterId -> {
+ String subClusterMsg =
+ "subClusterId" + subClusterId + " : Capacity scheduler logs are being created.; ";
+ Assert.assertTrue(dumpSchedulerLogsMsg.contains(subClusterMsg));
+ });
+ }
+
+ @Test
+ public void testDumpSchedulerLogsError() throws Exception {
+ HttpServletRequest mockHsr = mockHttpServletRequestByUserName("admin");
+
+ // time is empty
+ LambdaTestUtils.intercept(IllegalArgumentException.class,
+ "Parameter error, the time is empty or null.",
+ () -> interceptor.dumpSchedulerLogs(null, mockHsr));
+
+ // time is negative
+ LambdaTestUtils.intercept(IllegalArgumentException.class,
+ "time must be greater than 0.",
+ () -> interceptor.dumpSchedulerLogs("-1", mockHsr));
+
+ // time is non-numeric
+ LambdaTestUtils.intercept(IllegalArgumentException.class,
+ "time must be a number.",
+ () -> interceptor.dumpSchedulerLogs("abc", mockHsr));
+ }
+
@Test
public void testGetActivitiesNormal() {
ActivitiesInfo activitiesInfo = interceptor.getActivities(null, "1", "DIAGNOSTIC");
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org