You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by we...@apache.org on 2020/12/31 06:20:36 UTC
[hadoop] branch trunk updated: YARN-10541. capture the performance
metrics of ZKRMStateStore (#2568)
This is an automated email from the ASF dual-hosted git repository.
weichiu pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new fa4cf91 YARN-10541. capture the performance metrics of ZKRMStateStore (#2568)
fa4cf91 is described below
commit fa4cf91b5750327ed67355c35f1ed0f10bb0100d
Author: Neil <wx...@foxmail.com>
AuthorDate: Thu Dec 31 14:20:12 2020 +0800
YARN-10541. capture the performance metrics of ZKRMStateStore (#2568)
---
.../resourcemanager/recovery/ZKRMStateStore.java | 16 ++++
.../recovery/ZKRMStateStoreOpDurations.java | 97 ++++++++++++++++++++++
.../recovery/TestZKRMStateStore.java | 39 +++++++++
3 files changed, 152 insertions(+)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
index 7f39fe1..55c9d9e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
@@ -19,6 +19,8 @@
package org.apache.hadoop.yarn.server.resourcemanager.recovery;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.SystemClock;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.curator.framework.CuratorFramework;
@@ -234,6 +236,10 @@ public class ZKRMStateStore extends RMStateStore {
/** Manager for the ZooKeeper connection. */
private ZKCuratorManager zkManager;
+ private volatile Clock clock = SystemClock.getInstance();
+ @VisibleForTesting
+ protected ZKRMStateStoreOpDurations opDurations;
+
/*
* Indicates different app attempt state store operations.
*/
@@ -329,6 +335,8 @@ public class ZKRMStateStore extends RMStateStore {
appIdNodeSplitIndex = YarnConfiguration.DEFAULT_ZK_APPID_NODE_SPLIT_INDEX;
}
+ opDurations = ZKRMStateStoreOpDurations.getInstance();
+
zkAcl = ZKCuratorManager.getZKAcls(conf);
if (HAUtil.isHAEnabled(conf)) {
@@ -518,6 +526,7 @@ public class ZKRMStateStore extends RMStateStore {
@Override
public synchronized RMState loadState() throws Exception {
+ long start = clock.getTime();
RMState rmState = new RMState();
// recover DelegationTokenSecretManager
loadRMDTSecretManagerState(rmState);
@@ -529,6 +538,7 @@ public class ZKRMStateStore extends RMStateStore {
loadReservationSystemState(rmState);
// recover ProxyCAManager state
loadProxyCAManagerState(rmState);
+ opDurations.addLoadStateCallDuration(clock.getTime() - start);
return rmState;
}
@@ -834,6 +844,7 @@ public class ZKRMStateStore extends RMStateStore {
@Override
public synchronized void storeApplicationStateInternal(ApplicationId appId,
ApplicationStateData appStateDataPB) throws Exception {
+ long start = clock.getTime();
String nodeCreatePath = getLeafAppIdNodePath(appId.toString(), true);
LOG.debug("Storing info for app: {} at: {}", appId, nodeCreatePath);
@@ -850,12 +861,14 @@ public class ZKRMStateStore extends RMStateStore {
+ " exceeds the maximum allowed size for application data. "
+ "See yarn.resourcemanager.zk-max-znode-size.bytes.");
}
+ opDurations.addStoreApplicationStateCallDuration(clock.getTime() - start);
}
@Override
protected synchronized void updateApplicationStateInternal(
ApplicationId appId, ApplicationStateData appStateDataPB)
throws Exception {
+ long start = clock.getTime();
String nodeUpdatePath = getLeafAppIdNodePath(appId.toString(), false);
boolean pathExists = true;
// Look for paths based on other split indices if path as per split index
@@ -892,6 +905,7 @@ public class ZKRMStateStore extends RMStateStore {
LOG.debug("Path {} for {} didn't exist. Creating a new znode to update"
+ " the application state.", nodeUpdatePath, appId);
}
+ opDurations.addUpdateApplicationStateCallDuration(clock.getTime() - start);
}
/*
@@ -976,8 +990,10 @@ public class ZKRMStateStore extends RMStateStore {
@Override
protected synchronized void removeApplicationStateInternal(
ApplicationStateData appState) throws Exception {
+ long start = clock.getTime();
removeApp(appState.getApplicationSubmissionContext().
getApplicationId().toString(), true, appState.attempts.keySet());
+ opDurations.addRemoveApplicationStateCallDuration(clock.getTime() - start);
}
private void removeApp(String removeAppId) throws Exception {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStoreOpDurations.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStoreOpDurations.java
new file mode 100644
index 0000000..f1ec241
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStoreOpDurations.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager.recovery;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.metrics2.MetricsCollector;
+import org.apache.hadoop.metrics2.MetricsInfo;
+import org.apache.hadoop.metrics2.MetricsSource;
+import org.apache.hadoop.metrics2.MetricsSystem;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+
+import static org.apache.hadoop.metrics2.lib.Interns.info;
+import org.apache.hadoop.metrics2.lib.MutableRate;
+
+/**
+ * Class to capture the performance metrics of ZKRMStateStore.
+ * This should be a singleton.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+@Metrics(context="ZKRMStateStore-op-durations")
+public final class ZKRMStateStoreOpDurations implements MetricsSource {
+
+ @Metric("Duration for a load state call")
+ MutableRate loadStateCall;
+
+ @Metric("Duration for a store application state call")
+ MutableRate storeApplicationStateCall;
+
+ @Metric("Duration for a update application state call")
+ MutableRate updateApplicationStateCall;
+
+ @Metric("Duration to handle a remove application state call")
+ MutableRate removeApplicationStateCall;
+
+ protected static final MetricsInfo RECORD_INFO =
+ info("ZKRMStateStoreOpDurations", "Durations of ZKRMStateStore calls");
+
+ private final MetricsRegistry registry;
+
+ private static final ZKRMStateStoreOpDurations INSTANCE
+ = new ZKRMStateStoreOpDurations();
+
+ public static ZKRMStateStoreOpDurations getInstance() {
+ return INSTANCE;
+ }
+
+ private ZKRMStateStoreOpDurations() {
+ registry = new MetricsRegistry(RECORD_INFO);
+ registry.tag(RECORD_INFO, "ZKRMStateStoreOpDurations");
+
+ MetricsSystem ms = DefaultMetricsSystem.instance();
+ if (ms != null) {
+ ms.register(RECORD_INFO.name(), RECORD_INFO.description(), this);
+ }
+ }
+
+ @Override
+ public synchronized void getMetrics(MetricsCollector collector, boolean all) {
+ registry.snapshot(collector.addRecord(registry.info()), all);
+ }
+
+ public void addLoadStateCallDuration(long value) {
+ loadStateCall.add(value);
+ }
+
+ public void addStoreApplicationStateCallDuration(long value) {
+ storeApplicationStateCall.add(value);
+ }
+
+ public void addUpdateApplicationStateCallDuration(long value) {
+ updateApplicationStateCall.add(value);
+ }
+
+ public void addRemoveApplicationStateCallDuration(long value) {
+ removeApplicationStateCall.add(value);
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java
index 69b9be2..7ffaba5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java
@@ -22,6 +22,9 @@ import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.retry.RetryNTimes;
import org.apache.curator.test.TestingServer;
+import org.apache.hadoop.metrics2.MetricsRecord;
+import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl;
+import org.apache.hadoop.metrics2.impl.MetricsRecords;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -1567,4 +1570,40 @@ public class TestZKRMStateStore extends RMStateStoreTestBase {
Collections.emptyMap(), ctx.getApplicationSchedulingPropertiesMap());
store.close();
}
+
+ @Test
+ public void testMetricsInited() throws Exception {
+ TestZKRMStateStoreTester zkTester = new TestZKRMStateStoreTester();
+ Configuration conf = createConfForDelegationTokenNodeSplit(1);
+ MetricsCollectorImpl collector = new MetricsCollectorImpl();
+ ZKRMStateStoreOpDurations opDurations =
+ ((ZKRMStateStore)zkTester.getRMStateStore(conf)).opDurations;
+
+ long anyDuration = 10;
+ opDurations.addLoadStateCallDuration(anyDuration);
+ opDurations.addStoreApplicationStateCallDuration(anyDuration);
+ opDurations.addUpdateApplicationStateCallDuration(anyDuration);
+ opDurations.addRemoveApplicationStateCallDuration(anyDuration);
+
+ Thread.sleep(110);
+
+ opDurations.getMetrics(collector, true);
+ assertEquals("Incorrect number of perf metrics", 1,
+ collector.getRecords().size());
+ MetricsRecord record = collector.getRecords().get(0);
+ MetricsRecords.assertTag(record,
+ ZKRMStateStoreOpDurations.RECORD_INFO.name(),
+ "ZKRMStateStoreOpDurations");
+
+ double expectAvgTime = anyDuration;
+ MetricsRecords.assertMetric(record,
+ "LoadStateCallAvgTime", expectAvgTime);
+ MetricsRecords.assertMetric(record,
+ "StoreApplicationStateCallAvgTime", expectAvgTime);
+ MetricsRecords.assertMetric(record,
+ "UpdateApplicationStateCallAvgTime", expectAvgTime);
+ MetricsRecords.assertMetric(record,
+ "RemoveApplicationStateCallAvgTime", expectAvgTime);
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org