You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sn...@apache.org on 2021/07/29 17:23:12 UTC
[hadoop] branch branch-3.2 updated: YARN-10789. RM HA startup can
fail due to race conditions in ZKConfigurationStore. Contributed by Tarun
Parimi
This is an automated email from the ASF dual-hosted git repository.
snemeth pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 7a8b626 YARN-10789. RM HA startup can fail due to race conditions in ZKConfigurationStore. Contributed by Tarun Parimi
7a8b626 is described below
commit 7a8b6265c626c45d3af099723f0b1b9b76dd5cb4
Author: Szilard Nemeth <sn...@apache.org>
AuthorDate: Thu Jul 29 19:22:57 2021 +0200
YARN-10789. RM HA startup can fail due to race conditions in ZKConfigurationStore. Contributed by Tarun Parimi
---
.../capacity/conf/ZKConfigurationStore.java | 25 +++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java
index 3742c36..6f3612c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.conf;
import com.google.common.annotations.VisibleForTesting;
+import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -62,7 +63,8 @@ public class ZKConfigurationStore extends YarnConfigurationStore {
private static final String CONF_STORE_PATH = "CONF_STORE";
private static final String FENCING_PATH = "FENCING";
private static final String CONF_VERSION_PATH = "CONF_VERSION";
-
+ private static final String NODEEXISTS_MSG = "Encountered NodeExists error."
+ + " Skipping znode creation since another RM has already created it";
private String zkVersionPath;
private String logsPath;
private String confStorePath;
@@ -93,7 +95,11 @@ public class ZKConfigurationStore extends YarnConfigurationStore {
this.fencingNodePath = getNodePath(znodeParentPath, FENCING_PATH);
this.confVersionPath = getNodePath(znodeParentPath, CONF_VERSION_PATH);
- zkManager.createRootDirRecursively(znodeParentPath, zkAcl);
+ try {
+ zkManager.createRootDirRecursively(znodeParentPath, zkAcl);
+ } catch(NodeExistsException e) {
+ LOG.warn(NODEEXISTS_MSG, e);
+ }
zkManager.delete(fencingNodePath);
if (createNewZkPath(logsPath)) {
@@ -247,7 +253,12 @@ public class ZKConfigurationStore extends YarnConfigurationStore {
*/
private boolean createNewZkPath(String path) throws Exception {
if (!zkManager.exists(path)) {
- zkManager.create(path);
+ try {
+ zkManager.create(path);
+ } catch(NodeExistsException e) {
+ LOG.warn(NODEEXISTS_MSG, e);
+ return false;
+ }
return true;
} else {
return false;
@@ -282,8 +293,12 @@ public class ZKConfigurationStore extends YarnConfigurationStore {
@VisibleForTesting
protected void safeCreateZkData(String path, byte[] data) throws Exception {
- zkManager.safeCreate(path, data, zkAcl, CreateMode.PERSISTENT,
- zkAcl, fencingNodePath);
+ try {
+ zkManager.safeCreate(path, data, zkAcl, CreateMode.PERSISTENT,
+ zkAcl, fencingNodePath);
+ } catch(NodeExistsException e) {
+ LOG.warn(NODEEXISTS_MSG, e);
+ }
}
private static String getNodePath(String root, String nodeName) {
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org