You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sn...@apache.org on 2021/07/29 17:23:12 UTC

[hadoop] branch branch-3.2 updated: YARN-10789. RM HA startup can fail due to race conditions in ZKConfigurationStore. Contributed by Tarun Parimi

This is an automated email from the ASF dual-hosted git repository.

snemeth pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 7a8b626  YARN-10789. RM HA startup can fail due to race conditions in ZKConfigurationStore. Contributed by Tarun Parimi
7a8b626 is described below

commit 7a8b6265c626c45d3af099723f0b1b9b76dd5cb4
Author: Szilard Nemeth <sn...@apache.org>
AuthorDate: Thu Jul 29 19:22:57 2021 +0200

    YARN-10789. RM HA startup can fail due to race conditions in ZKConfigurationStore. Contributed by Tarun Parimi
---
 .../capacity/conf/ZKConfigurationStore.java        | 25 +++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java
index 3742c36..6f3612c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.conf;
 
 import com.google.common.annotations.VisibleForTesting;
+import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -62,7 +63,8 @@ public class ZKConfigurationStore extends YarnConfigurationStore {
   private static final String CONF_STORE_PATH = "CONF_STORE";
   private static final String FENCING_PATH = "FENCING";
   private static final String CONF_VERSION_PATH = "CONF_VERSION";
-
+  private static final String NODEEXISTS_MSG = "Encountered NodeExists error."
+      + " Skipping znode creation since another RM has already created it";
   private String zkVersionPath;
   private String logsPath;
   private String confStorePath;
@@ -93,7 +95,11 @@ public class ZKConfigurationStore extends YarnConfigurationStore {
     this.fencingNodePath = getNodePath(znodeParentPath, FENCING_PATH);
     this.confVersionPath = getNodePath(znodeParentPath, CONF_VERSION_PATH);
 
-    zkManager.createRootDirRecursively(znodeParentPath, zkAcl);
+    try {
+      zkManager.createRootDirRecursively(znodeParentPath, zkAcl);
+    } catch(NodeExistsException e) {
+      LOG.warn(NODEEXISTS_MSG, e);
+    }
     zkManager.delete(fencingNodePath);
 
     if (createNewZkPath(logsPath)) {
@@ -247,7 +253,12 @@ public class ZKConfigurationStore extends YarnConfigurationStore {
    */
   private boolean createNewZkPath(String path) throws Exception {
     if (!zkManager.exists(path)) {
-      zkManager.create(path);
+      try {
+        zkManager.create(path);
+      } catch(NodeExistsException e) {
+        LOG.warn(NODEEXISTS_MSG, e);
+        return false;
+      }
       return true;
     } else {
       return false;
@@ -282,8 +293,12 @@ public class ZKConfigurationStore extends YarnConfigurationStore {
 
   @VisibleForTesting
   protected void safeCreateZkData(String path, byte[] data) throws Exception {
-    zkManager.safeCreate(path, data, zkAcl, CreateMode.PERSISTENT,
-        zkAcl, fencingNodePath);
+    try {
+      zkManager.safeCreate(path, data, zkAcl, CreateMode.PERSISTENT,
+          zkAcl, fencingNodePath);
+    } catch(NodeExistsException e) {
+      LOG.warn(NODEEXISTS_MSG, e);
+    }
   }
 
   private static String getNodePath(String root, String nodeName) {

---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org