You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xg...@apache.org on 2015/09/08 02:26:47 UTC
hadoop git commit: YARN-2019. Retrospect on decision of making RM
crashed if any exception throw in ZKRMStateStore. Contributed by Jian He.
Repository: hadoop
Updated Branches:
refs/heads/branch-2.7 e8a15495b -> 96b9455c6
YARN-2019. Retrospect on decision of making RM crashed if any exception throw in ZKRMStateStore. Contributed by Jian He.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/96b9455c
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/96b9455c
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/96b9455c
Branch: refs/heads/branch-2.7
Commit: 96b9455c6f2c0e4a2bf42b4e87b9845e74a3afe4
Parents: e8a1549
Author: Xuan <xg...@apache.org>
Authored: Mon Sep 7 17:23:56 2015 -0700
Committer: Xuan <xg...@apache.org>
Committed: Mon Sep 7 17:23:56 2015 -0700
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 +++
.../apache/hadoop/yarn/conf/YarnConfiguration.java | 11 +++++++++++
.../src/main/resources/yarn-default.xml | 16 ++++++++++++++++
.../resourcemanager/recovery/RMStateStore.java | 9 +++++++--
4 files changed, 37 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/96b9455c/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index fef462a..cc5a02f 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -6,6 +6,9 @@ Release 2.7.2 - UNRELEASED
NEW FEATURES
+ YARN-2019. Retrospect on decision of making RM crashed if any exception throw
+ in ZKRMStateStore. (Jian He via junping_du)
+
IMPROVEMENTS
YARN-3170. YARN architecture document needs updating. (Brahma Reddy Battula
http://git-wip-us.apache.org/repos/asf/hadoop/blob/96b9455c/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 9e391a1..f98db44 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -401,6 +401,11 @@ public class YarnConfiguration extends Configuration {
public static final String RECOVERY_ENABLED = RM_PREFIX + "recovery.enabled";
public static final boolean DEFAULT_RM_RECOVERY_ENABLED = false;
+ public static final String YARN_FAIL_FAST = YARN_PREFIX + "fail-fast";
+ public static final boolean DEFAULT_YARN_FAIL_FAST = true;
+
+ public static final String RM_FAIL_FAST = RM_PREFIX + "fail-fast";
+
@Private
public static final String RM_WORK_PRESERVING_RECOVERY_ENABLED = RM_PREFIX
+ "work-preserving-recovery.enabled";
@@ -1836,6 +1841,12 @@ public class YarnConfiguration extends Configuration {
YARN_HTTP_POLICY_DEFAULT));
}
+ public static boolean shouldRMFailFast(Configuration conf) {
+ return conf.getBoolean(YarnConfiguration.RM_FAIL_FAST,
+ conf.getBoolean(YarnConfiguration.YARN_FAIL_FAST,
+ YarnConfiguration.DEFAULT_YARN_FAIL_FAST));
+ }
+
@Private
public static String getClusterId(Configuration conf) {
String clusterId = conf.get(YarnConfiguration.RM_CLUSTER_ID);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/96b9455c/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 4e6d7fe..4827b9a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -288,6 +288,22 @@
</property>
<property>
+ <description>Should RM fail fast if it encounters any errors. By defalt, it
+ points to ${yarn.fail-fast}. Errors include:
+ 1) exceptions when state-store write/read operations fails.
+ </description>
+ <name>yarn.resourcemanager.fail-fast</name>
+ <value>${yarn.fail-fast}</value>
+ </property>
+
+ <property>
+ <description>Should YARN fail fast if it encounters any errors.
+ </description>
+ <name>yarn.fail-fast</name>
+ <value>true</value>
+ </property>
+
+ <property>
<description>Enable RM work preserving recovery. This configuration is private
to YARN for experimenting the feature.
</description>
http://git-wip-us.apache.org/repos/asf/hadoop/blob/96b9455c/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
index bccde53..95977ea 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
@@ -855,6 +856,7 @@ public abstract class RMStateStore extends AbstractService {
* @param failureCause the exception due to which the operation failed
*/
protected void notifyStoreOperationFailed(Exception failureCause) {
+ LOG.error("State store operation failed ", failureCause);
if (failureCause instanceof StoreFencedException) {
updateFencedState();
Thread standByTransitionThread =
@@ -862,8 +864,11 @@ public abstract class RMStateStore extends AbstractService {
standByTransitionThread.setName("StandByTransitionThread Handler");
standByTransitionThread.start();
} else {
- rmDispatcher.getEventHandler().handle(
- new RMFatalEvent(RMFatalEventType.STATE_STORE_OP_FAILED, failureCause));
+ if (YarnConfiguration.shouldRMFailFast(getConfig())) {
+ rmDispatcher.getEventHandler().handle(
+ new RMFatalEvent(RMFatalEventType.STATE_STORE_OP_FAILED,
+ failureCause));
+ }
}
}