You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ey...@apache.org on 2018/05/17 21:19:47 UTC
[1/2] hadoop git commit: YARN-8080. Add restart policy for YARN
services. Contributed by Suma Shivaprasad
Repository: hadoop
Updated Branches:
refs/heads/trunk 7802af6e9 -> 7f083ed86
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java
index 0b56d7e..26e8c93 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -18,56 +18,80 @@
package org.apache.hadoop.yarn.service.component.instance;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.service.ServiceContext;
+import org.apache.hadoop.yarn.service.ServiceScheduler;
import org.apache.hadoop.yarn.service.ServiceTestUtils;
import org.apache.hadoop.yarn.service.api.records.Container;
import org.apache.hadoop.yarn.service.api.records.ContainerState;
+import org.apache.hadoop.yarn.service.api.records.Service;
import org.apache.hadoop.yarn.service.component.Component;
import org.apache.hadoop.yarn.service.component.ComponentEvent;
import org.apache.hadoop.yarn.service.component.ComponentEventType;
import org.apache.hadoop.yarn.service.component.TestComponent;
+import org.apache.hadoop.yarn.service.utils.ServiceUtils;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
/**
* Tests for {@link ComponentInstance}.
*/
public class TestComponentInstance {
- @Rule
- public ServiceTestUtils.ServiceFSWatcher rule =
+ @Rule public ServiceTestUtils.ServiceFSWatcher rule =
new ServiceTestUtils.ServiceFSWatcher();
- @Test
- public void testContainerUpgrade() throws Exception {
+ @Test public void testContainerUpgrade() throws Exception {
ServiceContext context = TestComponent.createTestContext(rule,
"testContainerUpgrade");
- Component component = context.scheduler.getAllComponents().entrySet()
- .iterator().next().getValue();
+ Component component =
+ context.scheduler.getAllComponents().entrySet().iterator().next()
+ .getValue();
upgradeComponent(component);
- ComponentInstance instance = component.getAllComponentInstances()
- .iterator().next();
+ ComponentInstance instance =
+ component.getAllComponentInstances().iterator().next();
ComponentInstanceEvent instanceEvent = new ComponentInstanceEvent(
instance.getContainer().getId(), ComponentInstanceEventType.UPGRADE);
instance.handle(instanceEvent);
Container containerSpec = component.getComponentSpec().getContainer(
instance.getContainer().getId().toString());
- Assert.assertEquals("instance not upgrading",
- ContainerState.UPGRADING, containerSpec.getState());
+ Assert.assertEquals("instance not upgrading", ContainerState.UPGRADING,
+ containerSpec.getState());
}
- @Test
- public void testContainerReadyAfterUpgrade() throws Exception {
+ @Test public void testContainerReadyAfterUpgrade() throws Exception {
ServiceContext context = TestComponent.createTestContext(rule,
"testContainerStarted");
- Component component = context.scheduler.getAllComponents().entrySet()
- .iterator().next().getValue();
+ Component component =
+ context.scheduler.getAllComponents().entrySet().iterator().next()
+ .getValue();
upgradeComponent(component);
- ComponentInstance instance = component.getAllComponentInstances()
- .iterator().next();
+ ComponentInstance instance =
+ component.getAllComponentInstances().iterator().next();
ComponentInstanceEvent instanceEvent = new ComponentInstanceEvent(
instance.getContainer().getId(), ComponentInstanceEventType.UPGRADE);
@@ -75,14 +99,426 @@ public class TestComponentInstance {
instance.handle(new ComponentInstanceEvent(instance.getContainer().getId(),
ComponentInstanceEventType.BECOME_READY));
- Assert.assertEquals("instance not ready",
- ContainerState.READY, instance.getCompSpec().getContainer(
- instance.getContainer().getId().toString()).getState());
+ Assert.assertEquals("instance not ready", ContainerState.READY,
+ instance.getCompSpec()
+ .getContainer(instance.getContainer().getId().toString())
+ .getState());
}
private void upgradeComponent(Component component) {
component.handle(new ComponentEvent(component.getName(),
- ComponentEventType.UPGRADE)
- .setTargetSpec(component.getComponentSpec()).setUpgradeVersion("v2"));
+ ComponentEventType.UPGRADE).setTargetSpec(component.getComponentSpec())
+ .setUpgradeVersion("v2"));
+ }
+
+ private Component createComponent(ServiceScheduler scheduler,
+ org.apache.hadoop.yarn.service.api.records.Component.RestartPolicyEnum
+ restartPolicy,
+ int nSucceededInstances, int nFailedInstances, int totalAsk,
+ int componentId) {
+
+ assert (nSucceededInstances + nFailedInstances) <= totalAsk;
+
+ Component comp = mock(Component.class);
+ org.apache.hadoop.yarn.service.api.records.Component componentSpec = mock(
+ org.apache.hadoop.yarn.service.api.records.Component.class);
+ when(componentSpec.getRestartPolicy()).thenReturn(restartPolicy);
+ when(comp.getRestartPolicyHandler()).thenReturn(
+ Component.getRestartPolicyHandler(restartPolicy));
+ when(componentSpec.getNumberOfContainers()).thenReturn(
+ Long.valueOf(totalAsk));
+ when(comp.getComponentSpec()).thenReturn(componentSpec);
+ when(comp.getScheduler()).thenReturn(scheduler);
+
+ Map<String, ComponentInstance> succeeded = new ConcurrentHashMap<>();
+ Map<String, ComponentInstance> failed = new ConcurrentHashMap<>();
+ scheduler.getAllComponents().put("comp" + componentId, comp);
+
+ Map<String, ComponentInstance> componentInstances = new HashMap<>();
+
+ for (int i = 0; i < nSucceededInstances; i++) {
+ ComponentInstance componentInstance = createComponentInstance(comp, i);
+ componentInstances.put(componentInstance.getCompInstanceName(),
+ componentInstance);
+ succeeded.put(componentInstance.getCompInstanceName(), componentInstance);
+ }
+
+ for (int i = 0; i < nFailedInstances; i++) {
+ ComponentInstance componentInstance = createComponentInstance(comp,
+ i + nSucceededInstances);
+ componentInstances.put(componentInstance.getCompInstanceName(),
+ componentInstance);
+ failed.put(componentInstance.getCompInstanceName(), componentInstance);
+ }
+
+ int delta = totalAsk - nFailedInstances - nSucceededInstances;
+
+ for (int i = 0; i < delta; i++) {
+ ComponentInstance componentInstance = createComponentInstance(comp,
+ i + nSucceededInstances + nFailedInstances);
+ componentInstances.put(componentInstance.getCompInstanceName(),
+ componentInstance);
+ }
+
+ when(comp.getAllComponentInstances()).thenReturn(
+ componentInstances.values());
+ when(comp.getSucceededInstances()).thenReturn(succeeded.values());
+ when(comp.getFailedInstances()).thenReturn(failed.values());
+ return comp;
+ }
+
+ private Component createComponent(ServiceScheduler scheduler,
+ org.apache.hadoop.yarn.service.api.records.Component.RestartPolicyEnum
+ restartPolicy,
+ int totalAsk, int componentId) {
+
+ Component comp = mock(Component.class);
+ org.apache.hadoop.yarn.service.api.records.Component componentSpec = mock(
+ org.apache.hadoop.yarn.service.api.records.Component.class);
+ when(componentSpec.getRestartPolicy()).thenReturn(restartPolicy);
+ when(comp.getRestartPolicyHandler()).thenReturn(
+ Component.getRestartPolicyHandler(restartPolicy));
+ when(componentSpec.getNumberOfContainers()).thenReturn(
+ Long.valueOf(totalAsk));
+ when(comp.getComponentSpec()).thenReturn(componentSpec);
+ when(comp.getScheduler()).thenReturn(scheduler);
+
+ scheduler.getAllComponents().put("comp" + componentId, comp);
+
+ Map<String, ComponentInstance> componentInstances = new HashMap<>();
+
+ for (int i = 0; i < totalAsk; i++) {
+ ComponentInstance componentInstance = createComponentInstance(comp, i);
+ componentInstances.put(componentInstance.getCompInstanceName(),
+ componentInstance);
+ }
+
+ when(comp.getAllComponentInstances()).thenReturn(
+ componentInstances.values());
+ return comp;
+ }
+
+ private ComponentInstance createComponentInstance(Component component,
+ int instanceId) {
+
+ ComponentInstance componentInstance = mock(ComponentInstance.class);
+ when(componentInstance.getComponent()).thenReturn(component);
+ when(componentInstance.getCompInstanceName()).thenReturn(
+ "compInstance" + instanceId);
+
+ ServiceUtils.ProcessTerminationHandler terminationHandler = mock(
+ ServiceUtils.ProcessTerminationHandler.class);
+ when(component.getScheduler().getTerminationHandler()).thenReturn(
+ terminationHandler);
+
+ return componentInstance;
+ }
+
+ @Test public void testComponentRestartPolicy() {
+
+ Map<String, Component> allComponents = new HashMap<>();
+ Service mockService = mock(Service.class);
+ ServiceContext serviceContext = mock(ServiceContext.class);
+ when(serviceContext.getService()).thenReturn(mockService);
+ ServiceScheduler serviceSchedulerInstance = new ServiceScheduler(
+ serviceContext);
+ ServiceScheduler serviceScheduler = spy(serviceSchedulerInstance);
+ when(serviceScheduler.getAllComponents()).thenReturn(allComponents);
+ Mockito.doNothing().when(serviceScheduler).setGracefulStop(
+ any(FinalApplicationStatus.class));
+
+ ComponentInstanceEvent componentInstanceEvent = mock(
+ ComponentInstanceEvent.class);
+ ContainerId containerId = ContainerId.newContainerId(ApplicationAttemptId
+ .newInstance(ApplicationId.newInstance(1234L, 1), 1), 1);
+ ContainerStatus containerStatus = ContainerStatus.newInstance(containerId,
+ org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE, "hello", 0);
+
+ when(componentInstanceEvent.getStatus()).thenReturn(containerStatus);
+
+ // Test case1: one component, one instance, restart policy = ALWAYS, exit=0
+ Component comp = createComponent(serviceScheduler,
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.ALWAYS,
+ 1, 0, 1, 0);
+ ComponentInstance componentInstance =
+ comp.getAllComponentInstances().iterator().next();
+
+ ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
+ componentInstanceEvent);
+
+ verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
+ verify(comp, never()).markAsFailed(any(ComponentInstance.class));
+ verify(comp, times(1)).reInsertPendingInstance(
+ any(ComponentInstance.class));
+ verify(serviceScheduler.getTerminationHandler(), never()).terminate(
+ anyInt());
+
+ // Test case2: one component, one instance, restart policy = ALWAYS, exit=1
+ comp = createComponent(serviceScheduler,
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.ALWAYS,
+ 0, 1, 1, 0);
+ componentInstance = comp.getAllComponentInstances().iterator().next();
+ containerStatus.setExitStatus(1);
+ ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
+ componentInstanceEvent);
+ verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
+ verify(comp, never()).markAsFailed(any(ComponentInstance.class));
+ verify(comp, times(1)).reInsertPendingInstance(
+ any(ComponentInstance.class));
+ verify(serviceScheduler.getTerminationHandler(), never()).terminate(
+ anyInt());
+
+ // Test case3: one component, one instance, restart policy = NEVER, exit=0
+ // Should exit with code=0
+ comp = createComponent(serviceScheduler,
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.NEVER,
+ 1, 0, 1, 0);
+ componentInstance = comp.getAllComponentInstances().iterator().next();
+ containerStatus.setExitStatus(0);
+
+ Map<String, ComponentInstance> succeededInstances = new HashMap<>();
+ succeededInstances.put(componentInstance.getCompInstanceName(),
+ componentInstance);
+ when(comp.getSucceededInstances()).thenReturn(succeededInstances.values());
+ when(comp.getNumSucceededInstances()).thenReturn(new Long(1));
+
+ ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
+ componentInstanceEvent);
+ verify(comp, times(1)).markAsSucceeded(any(ComponentInstance.class));
+ verify(comp, never()).markAsFailed(any(ComponentInstance.class));
+ verify(comp, times(0)).reInsertPendingInstance(
+ any(ComponentInstance.class));
+ verify(serviceScheduler.getTerminationHandler(), times(1)).terminate(eq(0));
+
+ // Test case4: one component, one instance, restart policy = NEVER, exit=1
+ // Should exit with code=-1
+ comp = createComponent(serviceScheduler,
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.NEVER,
+ 0, 1, 1, 0);
+ componentInstance = comp.getAllComponentInstances().iterator().next();
+ containerStatus.setExitStatus(-1);
+
+ when(comp.getNumFailedInstances()).thenReturn(new Long(1));
+ ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
+ componentInstanceEvent);
+
+ verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
+ verify(comp, times(1)).markAsFailed(any(ComponentInstance.class));
+ verify(comp, times(0)).reInsertPendingInstance(
+ any(ComponentInstance.class));
+ verify(serviceScheduler.getTerminationHandler(), times(1)).terminate(
+ eq(-1));
+
+ // Test case5: one component, one instance, restart policy = ON_FAILURE,
+ // exit=1
+ // Should continue run.
+ comp = createComponent(serviceScheduler,
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.ON_FAILURE,
+ 0, 1, 1, 0);
+ componentInstance = comp.getAllComponentInstances().iterator().next();
+ containerStatus.setExitStatus(1);
+ ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
+ componentInstanceEvent);
+ verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
+ verify(comp, never()).markAsFailed(any(ComponentInstance.class));
+ verify(comp, times(1)).reInsertPendingInstance(
+ any(ComponentInstance.class));
+ verify(serviceScheduler.getTerminationHandler(), times(0)).terminate(
+ anyInt());
+
+ // Test case6: one component, 3 instances, restart policy = NEVER, exit=1
+ // 2 of the instances not completed, it should continue run.
+ comp = createComponent(serviceScheduler,
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.NEVER,
+ 0, 1, 3, 0);
+ componentInstance = comp.getAllComponentInstances().iterator().next();
+ containerStatus.setExitStatus(1);
+ ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
+ componentInstanceEvent);
+ verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
+ verify(comp, times(1)).markAsFailed(any(ComponentInstance.class));
+ verify(comp, times(0)).reInsertPendingInstance(
+ any(ComponentInstance.class));
+ verify(serviceScheduler.getTerminationHandler(), times(0)).terminate(
+ anyInt());
+
+ // Test case7: one component, 3 instances, restart policy = ON_FAILURE,
+ // exit=1
+ // 2 of the instances completed, it should continue run.
+
+ comp = createComponent(serviceScheduler,
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.ON_FAILURE,
+ 0, 1, 3, 0);
+
+ Iterator<ComponentInstance> iter =
+ comp.getAllComponentInstances().iterator();
+
+ containerStatus.setExitStatus(1);
+ ComponentInstance commponentInstance = iter.next();
+ ComponentInstance.handleComponentInstanceRelaunch(commponentInstance,
+ componentInstanceEvent);
+
+ verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
+ verify(comp, never()).markAsFailed(any(ComponentInstance.class));
+ verify(comp, times(1)).reInsertPendingInstance(
+ any(ComponentInstance.class));
+ verify(serviceScheduler.getTerminationHandler(), times(0)).terminate(
+ anyInt());
+
+ // Test case8: 2 components, 2 instances for each
+ // comp2 already finished.
+ // comp1 has a new instance finish, we should terminate the service
+
+ comp = createComponent(serviceScheduler,
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.NEVER,
+ 2, 0);
+ Collection<ComponentInstance> component1Instances =
+ comp.getAllComponentInstances();
+
+ containerStatus.setExitStatus(-1);
+
+ Component comp2 = createComponent(
+ componentInstance.getComponent().getScheduler(),
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.NEVER,
+ 2, 1);
+
+ Collection<ComponentInstance> component2Instances =
+ comp2.getAllComponentInstances();
+
+ Map<String, ComponentInstance> failed2Instances = new HashMap<>();
+
+ for (ComponentInstance component2Instance : component2Instances) {
+ failed2Instances.put(component2Instance.getCompInstanceName(),
+ component2Instance);
+ when(component2Instance.getComponent().getFailedInstances()).thenReturn(
+ failed2Instances.values());
+ when(component2Instance.getComponent().getNumFailedInstances())
+ .thenReturn(new Long(failed2Instances.size()));
+ ComponentInstance.handleComponentInstanceRelaunch(component2Instance,
+ componentInstanceEvent);
+ }
+
+ Map<String, ComponentInstance> failed1Instances = new HashMap<>();
+
+ // 2nd component, already finished.
+ for (ComponentInstance component1Instance : component1Instances) {
+ failed1Instances.put(component1Instance.getCompInstanceName(),
+ component1Instance);
+ when(component1Instance.getComponent().getFailedInstances()).thenReturn(
+ failed1Instances.values());
+ when(component1Instance.getComponent().getNumFailedInstances())
+ .thenReturn(new Long(failed1Instances.size()));
+ ComponentInstance.handleComponentInstanceRelaunch(component1Instance,
+ componentInstanceEvent);
+ }
+
+ verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
+ verify(comp, times(2)).markAsFailed(any(ComponentInstance.class));
+ verify(comp, times(0)).reInsertPendingInstance(
+ any(ComponentInstance.class));
+
+ verify(serviceScheduler.getTerminationHandler(), times(1)).terminate(
+ eq(-1));
+
+ // Test case9: 2 components, 2 instances for each
+ // comp2 already finished.
+ // comp1 has a new instance finish, we should terminate the service
+ // All instance finish with 0, service should exit with 0 as well.
+ containerStatus.setExitStatus(0);
+
+ comp = createComponent(serviceScheduler,
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.ON_FAILURE,
+ 2, 0);
+ component1Instances = comp.getAllComponentInstances();
+
+ comp2 = createComponent(componentInstance.getComponent().getScheduler(),
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.ON_FAILURE,
+ 2, 1);
+
+ component2Instances = comp2.getAllComponentInstances();
+
+ Map<String, ComponentInstance> succeeded2Instances = new HashMap<>();
+
+ for (ComponentInstance component2Instance : component2Instances) {
+ succeeded2Instances.put(component2Instance.getCompInstanceName(),
+ component2Instance);
+ when(component2Instance.getComponent().getSucceededInstances())
+ .thenReturn(succeeded2Instances.values());
+ when(component2Instance.getComponent().getNumSucceededInstances())
+ .thenReturn(new Long(succeeded2Instances.size()));
+ ComponentInstance.handleComponentInstanceRelaunch(component2Instance,
+ componentInstanceEvent);
+ }
+
+ Map<String, ComponentInstance> succeeded1Instances = new HashMap<>();
+ // 2nd component, already finished.
+ for (ComponentInstance component1Instance : component1Instances) {
+ succeeded1Instances.put(component1Instance.getCompInstanceName(),
+ component1Instance);
+ when(component1Instance.getComponent().getSucceededInstances())
+ .thenReturn(succeeded1Instances.values());
+ when(component1Instance.getComponent().getNumSucceededInstances())
+ .thenReturn(new Long(succeeded1Instances.size()));
+ ComponentInstance.handleComponentInstanceRelaunch(component1Instance,
+ componentInstanceEvent);
+ }
+
+ verify(comp, times(2)).markAsSucceeded(any(ComponentInstance.class));
+ verify(comp, never()).markAsFailed(any(ComponentInstance.class));
+ verify(componentInstance.getComponent(), times(0)).reInsertPendingInstance(
+ any(ComponentInstance.class));
+ verify(serviceScheduler.getTerminationHandler(), times(1)).terminate(eq(0));
+
+ // Test case10: 2 components, 2 instances for each
+ // comp2 hasn't finished
+ // comp1 finished.
+ // Service should continue run.
+
+ comp = createComponent(serviceScheduler,
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.NEVER,
+ 2, 0);
+ component1Instances = comp.getAllComponentInstances();
+
+ comp2 = createComponent(componentInstance.getComponent().getScheduler(),
+ org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum.NEVER,
+ 2, 1);
+
+ component2Instances = comp2.getAllComponentInstances();
+
+ for (ComponentInstance component2Instance : component2Instances) {
+ ComponentInstance.handleComponentInstanceRelaunch(component2Instance,
+ componentInstanceEvent);
+ }
+
+ succeeded1Instances = new HashMap<>();
+ // 2nd component, already finished.
+ for (ComponentInstance component1Instance : component1Instances) {
+ succeeded1Instances.put(component1Instance.getCompInstanceName(),
+ component1Instance);
+ when(component1Instance.getComponent().getSucceededInstances())
+ .thenReturn(succeeded1Instances.values());
+ ComponentInstance.handleComponentInstanceRelaunch(component1Instance,
+ componentInstanceEvent);
+ }
+
+ verify(comp, times(2)).markAsSucceeded(any(ComponentInstance.class));
+ verify(comp, never()).markAsFailed(any(ComponentInstance.class));
+ verify(componentInstance.getComponent(), times(0)).reInsertPendingInstance(
+ any(ComponentInstance.class));
+ verify(serviceScheduler.getTerminationHandler(), never()).terminate(eq(0));
+
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/YarnServiceAPI.md
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/YarnServiceAPI.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/YarnServiceAPI.md
index c648046..2b567b0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/YarnServiceAPI.md
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/YarnServiceAPI.md
@@ -233,6 +233,8 @@ One or more components of the service. If the service is HBase say, then the com
|placement_policy|Advanced scheduling and placement policies for all containers of this component.|false|PlacementPolicy||
|configuration|Config properties for this component.|false|Configuration||
|quicklinks|A list of quicklink keys defined at the service level, and to be resolved by this component.|false|string array||
+|restart_policy|Policy of restart component. Including ALWAYS (Always restart
+ component even if instance exit code = 0); ON_FAILURE (Only restart component if instance exit code != 0); NEVER (Do not restart in any cases). Flexing is not supported for components which have restart_policy=ON_FAILURE/NEVER|false|string|ALWAYS|
### ComponentState
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org
[2/2] hadoop git commit: YARN-8080. Add restart policy for YARN
services. Contributed by Suma Shivaprasad
Posted by ey...@apache.org.
YARN-8080. Add restart policy for YARN services.
Contributed by Suma Shivaprasad
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7f083ed8
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7f083ed8
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7f083ed8
Branch: refs/heads/trunk
Commit: 7f083ed8699a720d3fb82e4ec310356902a6ac30
Parents: 7802af6
Author: Eric Yang <ey...@apache.org>
Authored: Thu May 17 17:16:50 2018 -0400
Committer: Eric Yang <ey...@apache.org>
Committed: Thu May 17 17:16:50 2018 -0400
----------------------------------------------------------------------
...RN-Simplified-V1-API-Layer-For-Services.yaml | 8 +
.../hadoop/yarn/service/ClientAMService.java | 7 +-
.../hadoop/yarn/service/ServiceContext.java | 4 +
.../hadoop/yarn/service/ServiceManager.java | 25 +-
.../hadoop/yarn/service/ServiceScheduler.java | 73 ++-
.../yarn/service/api/records/Component.java | 71 +++
.../service/component/AlwaysRestartPolicy.java | 82 ++++
.../yarn/service/component/Component.java | 209 ++++++--
.../component/ComponentRestartPolicy.java | 45 ++
.../service/component/NeverRestartPolicy.java | 82 ++++
.../component/OnFailureRestartPolicy.java | 87 ++++
.../component/instance/ComponentInstance.java | 91 +++-
.../hadoop/yarn/service/utils/ServiceUtils.java | 18 +
.../hadoop/yarn/service/ServiceTestUtils.java | 46 +-
.../hadoop/yarn/service/TestServiceManager.java | 6 +-
.../yarn/service/component/TestComponent.java | 99 +++-
.../component/TestComponentRestartPolicy.java | 130 +++++
.../instance/TestComponentInstance.java | 484 ++++++++++++++++++-
.../markdown/yarn-service/YarnServiceAPI.md | 2 +
19 files changed, 1447 insertions(+), 122 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml
index cea8296..d90ae06 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/resources/definition/YARN-Simplified-V1-API-Layer-For-Services.yaml
@@ -424,6 +424,14 @@ definitions:
items:
type: string
description: A list of quicklink keys defined at the service level, and to be resolved by this component.
+ restartPolicy:
+ type: string
+ description: Policy of restart component. Including ALWAYS (Always restart component even if instance exit code = 0); ON_FAILURE (Only restart component if instance exit code != 0); NEVER (Do not restart in any cases)
+ enum:
+ - ALWAYS
+ - ON_FAILURE
+ - NEVER
+ default: ALWAYS
ReadinessCheck:
description: A check to be performed to determine the readiness of a component instance (a container). If no readiness check is specified, the default readiness check will be used unless the yarn.service.default-readiness-check.enabled configuration property is set to false at the component, service, or system level. The artifact field is currently unsupported but may be implemented in the future, enabling a pluggable helper container to support advanced use cases.
required:
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMService.java
index d5d6fa4..e97c3d6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMService.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.proto.ClientAMProtocol.CompInstancesUpgradeRequestProto;
@@ -130,7 +131,7 @@ public class ClientAMService extends AbstractService
LOG.info("Stop the service by {}", UserGroupInformation.getCurrentUser());
context.scheduler.getDiagnostics()
.append("Stopped by user " + UserGroupInformation.getCurrentUser());
- context.scheduler.setGracefulStop();
+ context.scheduler.setGracefulStop(FinalApplicationStatus.ENDED);
// Stop the service in 2 seconds delay to make sure this rpc call is completed.
// shutdown hook will be executed which will stop AM gracefully.
@@ -157,10 +158,10 @@ public class ClientAMService extends AbstractService
public UpgradeServiceResponseProto upgrade(
UpgradeServiceRequestProto request) throws IOException {
try {
- context.getServiceManager().processUpgradeRequest(request.getVersion(),
- request.getAutoFinalize());
LOG.info("Upgrading service to version {} by {}", request.getVersion(),
UserGroupInformation.getCurrentUser());
+ context.getServiceManager().processUpgradeRequest(request.getVersion(),
+ request.getAutoFinalize());
return UpgradeServiceResponseProto.newBuilder().build();
} catch (Exception ex) {
return UpgradeServiceResponseProto.newBuilder().setError(ex.getMessage())
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceContext.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceContext.java
index 6c91b9c..8779153 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceContext.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceContext.java
@@ -56,4 +56,8 @@ public class ServiceContext {
void setServiceManager(ServiceManager serviceManager) {
this.serviceManager = Preconditions.checkNotNull(serviceManager);
}
+
+ public Service getService() {
+ return service;
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java
index e6a38dc..05ecb3f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.service.api.records.ServiceState;
import org.apache.hadoop.yarn.service.component.Component;
import org.apache.hadoop.yarn.service.component.ComponentEvent;
import org.apache.hadoop.yarn.service.component.ComponentEventType;
+import org.apache.hadoop.yarn.service.component.ComponentRestartPolicy;
import org.apache.hadoop.yarn.service.utils.ServiceApiUtil;
import org.apache.hadoop.yarn.service.utils.SliderFileSystem;
import org.apache.hadoop.yarn.state.InvalidStateTransitionException;
@@ -266,12 +267,24 @@ public class ServiceManager implements EventHandler<ServiceEvent> {
event.setAutoFinalize(true);
}
compsThatNeedUpgrade.forEach(component -> {
- ComponentEvent needUpgradeEvent = new ComponentEvent(
- component.getName(), ComponentEventType.UPGRADE)
- .setTargetSpec(component)
- .setUpgradeVersion(event.getVersion());
- context.scheduler.getDispatcher().getEventHandler().handle(
- needUpgradeEvent);
+ org.apache.hadoop.yarn.service.api.records.Component.RestartPolicyEnum
+ restartPolicy = component.getRestartPolicy();
+
+ final ComponentRestartPolicy restartPolicyHandler =
+ Component.getRestartPolicyHandler(restartPolicy);
+ // Do not allow upgrades for components which have NEVER/ON_FAILURE
+ // restart policy
+ if (restartPolicyHandler.allowUpgrades()) {
+ ComponentEvent needUpgradeEvent = new ComponentEvent(
+ component.getName(), ComponentEventType.UPGRADE).setTargetSpec(
+ component).setUpgradeVersion(event.getVersion());
+ context.scheduler.getDispatcher().getEventHandler().handle(
+ needUpgradeEvent);
+ } else {
+ LOG.info("The component {} has a restart "
+ + "policy that doesnt allow upgrades {} ", component.getName(),
+ component.getRestartPolicy().toString());
+ }
});
} else {
// nothing to upgrade if upgrade auto finalize is requested, trigger a
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
index ee0a1a7..d3e8e4f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
@@ -62,6 +62,7 @@ import org.apache.hadoop.yarn.service.api.ServiceApiConstants;
import org.apache.hadoop.yarn.service.api.records.Service;
import org.apache.hadoop.yarn.service.api.records.ServiceState;
import org.apache.hadoop.yarn.service.api.records.ConfigFile;
+import org.apache.hadoop.yarn.service.component.ComponentRestartPolicy;
import org.apache.hadoop.yarn.service.component.instance.ComponentInstance;
import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEvent;
import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType;
@@ -77,6 +78,7 @@ import org.apache.hadoop.yarn.service.timelineservice.ServiceMetricsSink;
import org.apache.hadoop.yarn.service.timelineservice.ServiceTimelinePublisher;
import org.apache.hadoop.yarn.service.utils.ServiceApiUtil;
import org.apache.hadoop.yarn.service.utils.ServiceRegistryUtils;
+import org.apache.hadoop.yarn.service.utils.ServiceUtils;
import org.apache.hadoop.yarn.util.BoundedAppender;
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
import org.slf4j.Logger;
@@ -89,8 +91,10 @@ import java.nio.ByteBuffer;
import java.text.MessageFormat;
import java.util.Collection;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
@@ -101,6 +105,10 @@ import static org.apache.hadoop.registry.client.api.RegistryConstants.*;
import static org.apache.hadoop.yarn.api.records.ContainerExitStatus.KILLED_AFTER_APP_COMPLETION;
import static org.apache.hadoop.yarn.service.api.ServiceApiConstants.*;
import static org.apache.hadoop.yarn.service.component.ComponentEventType.*;
+import static org.apache.hadoop.yarn.service.exceptions.LauncherExitCodes
+ .EXIT_FALSE;
+import static org.apache.hadoop.yarn.service.exceptions.LauncherExitCodes
+ .EXIT_SUCCESS;
/**
*
@@ -158,8 +166,15 @@ public class ServiceScheduler extends CompositeService {
private boolean gracefulStop = false;
+ private volatile FinalApplicationStatus finalApplicationStatus =
+ FinalApplicationStatus.ENDED;
+
+ // For unit test override since we don't want to terminate UT process.
+ private ServiceUtils.ProcessTerminationHandler
+ terminationHandler = new ServiceUtils.ProcessTerminationHandler();
+
public ServiceScheduler(ServiceContext context) {
- super(context.service.getName());
+ super(context.getService().getName());
this.context = context;
}
@@ -256,8 +271,9 @@ public class ServiceScheduler extends CompositeService {
.createAMRMClientAsync(1000, new AMRMClientCallback());
}
- protected void setGracefulStop() {
+ public void setGracefulStop(FinalApplicationStatus applicationStatus) {
this.gracefulStop = true;
+ this.finalApplicationStatus = applicationStatus;
nmClient.getClient().cleanupRunningContainersOnStop(true);
}
@@ -877,4 +893,57 @@ public class ServiceScheduler extends CompositeService {
public boolean hasAtLeastOnePlacementConstraint() {
return hasAtLeastOnePlacementConstraint;
}
+
+ /*
+* Check if all components of the scheduler finished.
+* If all components finished
+* (which #failed-instances + #suceeded-instances = #total-n-containers)
+* The service will be terminated.
+*/
+ public synchronized void terminateServiceIfAllComponentsFinished() {
+ boolean shouldTerminate = true;
+
+ // Succeeded comps and failed comps, for logging purposes.
+ Set<String> succeededComponents = new HashSet<>();
+ Set<String> failedComponents = new HashSet<>();
+
+ for (Component comp : getAllComponents().values()) {
+ ComponentRestartPolicy restartPolicy = comp.getRestartPolicyHandler();
+ if (!restartPolicy.shouldTerminate(comp)) {
+ shouldTerminate = false;
+ break;
+ }
+
+ long nFailed = comp.getNumFailedInstances();
+
+ if (nFailed > 0) {
+ failedComponents.add(comp.getName());
+ } else{
+ succeededComponents.add(comp.getName());
+ }
+ }
+
+ if (shouldTerminate) {
+ LOG.info("All component finished, exiting Service Master... "
+ + ", final status=" + (failedComponents.isEmpty() ?
+ "Succeeded" :
+ "Failed"));
+ LOG.info("Succeeded components: [" + org.apache.commons.lang3.StringUtils
+ .join(succeededComponents, ",") + "]");
+ LOG.info("Failed components: [" + org.apache.commons.lang3.StringUtils
+ .join(failedComponents, ",") + "]");
+
+ if (failedComponents.isEmpty()) {
+ setGracefulStop(FinalApplicationStatus.SUCCEEDED);
+ getTerminationHandler().terminate(EXIT_SUCCESS);
+ } else{
+ setGracefulStop(FinalApplicationStatus.FAILED);
+ getTerminationHandler().terminate(EXIT_FALSE);
+ }
+ }
+ }
+
+ public ServiceUtils.ProcessTerminationHandler getTerminationHandler() {
+ return terminationHandler;
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Component.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Component.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Component.java
index 7deb076..0481123 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Component.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Component.java
@@ -17,6 +17,7 @@
package org.apache.hadoop.yarn.service.api.records;
+import com.fasterxml.jackson.annotation.JsonValue;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
@@ -29,7 +30,9 @@ import java.util.Objects;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlEnum;
import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.XmlType;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
@@ -98,6 +101,74 @@ public class Component implements Serializable {
private List<Container> containers =
Collections.synchronizedList(new ArrayList<Container>());
+
+ @JsonProperty("restart_policy")
+ @XmlElement(name = "restart_policy")
+ private RestartPolicyEnum restartPolicy = RestartPolicyEnum.ALWAYS;
+
+ /**
+ * Policy of restart component. Including ALWAYS - Long lived components
+ * (Always restart component instance even if instance exit code = 0.);
+ *
+ * ON_FAILURE (Only restart component instance if instance exit code !=
+ * 0);
+ * NEVER (Do not restart in any cases)
+ *
+ * @return restartPolicy
+ **/
+ @XmlType(name = "restart_policy")
+ @XmlEnum
+ public enum RestartPolicyEnum {
+ ALWAYS("ALWAYS"),
+
+ ON_FAILURE("ON_FAILURE"),
+
+ NEVER("NEVER");
+ private String value;
+
+ RestartPolicyEnum(String value) {
+ this.value = value;
+ }
+
+ @Override
+ @JsonValue
+ public String toString() {
+ return value;
+ }
+ }
+
+ public Component restartPolicy(RestartPolicyEnum restartPolicyEnumVal) {
+ this.restartPolicy = restartPolicyEnumVal;
+ return this;
+ }
+
+ /**
+ * Policy of restart component.
+ *
+ * Including
+ * ALWAYS (Always restart component instance even if instance exit
+ * code = 0);
+ *
+ * ON_FAILURE (Only restart component instance if instance exit code !=
+ * 0);
+ *
+ * NEVER (Do not restart in any cases)
+ *
+ * @return restartPolicy
+ **/
+ @ApiModelProperty(value = "Policy of restart component. Including ALWAYS "
+ + "(Always restart component even if instance exit code = 0); "
+ + "ON_FAILURE (Only restart component if instance exit code != 0); "
+ + "NEVER (Do not restart in any cases)")
+ public RestartPolicyEnum getRestartPolicy() {
+ return restartPolicy;
+ }
+
+ public void setRestartPolicy(RestartPolicyEnum restartPolicy) {
+ this.restartPolicy = restartPolicy;
+ }
+
+
/**
* Name of the service component (mandatory).
**/
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/AlwaysRestartPolicy.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/AlwaysRestartPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/AlwaysRestartPolicy.java
new file mode 100644
index 0000000..704ab14
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/AlwaysRestartPolicy.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.service.component;
+
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.service.component.instance.ComponentInstance;
+
+/**
+ * Always restart policy allows for restarts for long live components which
+ * never terminate.
+ */
+public final class AlwaysRestartPolicy implements ComponentRestartPolicy {
+
+ private static AlwaysRestartPolicy INSTANCE = new AlwaysRestartPolicy();
+
+ private AlwaysRestartPolicy() {
+ }
+
+ public static AlwaysRestartPolicy getInstance() {
+ return INSTANCE;
+ }
+
+ @Override public boolean isLongLived() {
+ return true;
+ }
+
+ /**
+ * This is always false since these components never terminate
+ *
+ * @param component
+ * @return
+ */
+ @Override public boolean hasCompleted(Component component) {
+ return false;
+ }
+
+ /**
+ * This is always false since these components never terminate
+ *
+ * @param component
+ * @return
+ */
+ @Override public boolean hasCompletedSuccessfully(Component component) {
+ return false;
+ }
+
+ @Override public boolean shouldRelaunchInstance(
+ ComponentInstance componentInstance, ContainerStatus containerStatus) {
+ return true;
+ }
+
+ @Override public boolean isReadyForDownStream(Component dependentComponent) {
+ if (dependentComponent.getNumReadyInstances() < dependentComponent
+ .getNumDesiredInstances()) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override public boolean allowUpgrades() {
+ return true;
+ }
+
+ @Override public boolean shouldTerminate(Component component) {
+ return false;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
index 7979c19..931877e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
@@ -18,9 +18,12 @@
package org.apache.hadoop.yarn.service.component;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.ExecutionType;
+import static org.apache.hadoop.yarn.service.api.records.Component
+ .RestartPolicyEnum;
import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
@@ -111,6 +114,13 @@ public class Component implements EventHandler<ComponentEvent> {
// The number of containers failed since last reset. This excludes preempted,
// disk_failed containers etc. This will be reset to 0 periodically.
public AtomicInteger currentContainerFailure = new AtomicInteger(0);
+
+ //succeeded and Failed instances are Populated only for RestartPolicyEnum
+ //.ON_FAILURE/NEVER
+ private Map<String, ComponentInstance> succeededInstances =
+ new ConcurrentHashMap<>();
+ private Map<String, ComponentInstance> failedInstances =
+ new ConcurrentHashMap<>();
private boolean healthThresholdMonitorEnabled = false;
private AtomicBoolean upgradeInProgress = new AtomicBoolean(false);
@@ -297,7 +307,7 @@ public class Component implements EventHandler<ComponentEvent> {
@Override
public ComponentState transition(Component component,
ComponentEvent event) {
- component.setDesiredContainers((int)event.getDesired());
+ component.setDesiredContainers((int) event.getDesired());
if (!component.areDependenciesReady()) {
LOG.info("[FLEX COMPONENT {}]: Flex deferred because dependencies not"
+ " satisfied.", component.getName());
@@ -402,11 +412,37 @@ public class Component implements EventHandler<ComponentEvent> {
}
}
- private static ComponentState checkIfStable(Component component) {
+ @VisibleForTesting
+ static ComponentState checkIfStable(Component component) {
+ if (component.getRestartPolicyHandler().isLongLived()) {
+ return updateStateForLongRunningComponents(component);
+ } else{
+ //NEVER/ON_FAILURE
+ return updateStateForTerminatingComponents(component);
+ }
+ }
+
+ private static ComponentState updateStateForTerminatingComponents(
+ Component component) {
+ if (component.getNumRunningInstances() + component
+ .getNumSucceededInstances() + component.getNumFailedInstances()
+ < component.getComponentSpec().getNumberOfContainers()) {
+ component.componentSpec.setState(
+ org.apache.hadoop.yarn.service.api.records.ComponentState.FLEXING);
+ return FLEXING;
+ } else{
+ component.componentSpec.setState(
+ org.apache.hadoop.yarn.service.api.records.ComponentState.STABLE);
+ return STABLE;
+ }
+ }
+
+ private static ComponentState updateStateForLongRunningComponents(
+ Component component) {
// if desired == running
if (component.componentMetrics.containersReady.value() == component
- .getComponentSpec().getNumberOfContainers() &&
- component.numContainersThatNeedUpgrade.get() == 0) {
+ .getComponentSpec().getNumberOfContainers()
+ && component.numContainersThatNeedUpgrade.get() == 0) {
component.componentSpec.setState(
org.apache.hadoop.yarn.service.api.records.ComponentState.STABLE);
return STABLE;
@@ -425,17 +461,41 @@ public class Component implements EventHandler<ComponentEvent> {
// This method should be called whenever there is an increment or decrement
// of a READY state container of a component
- public static synchronized void checkAndUpdateComponentState(
+ //This should not matter for terminating components
+ private static synchronized void checkAndUpdateComponentState(
Component component, boolean isIncrement) {
org.apache.hadoop.yarn.service.api.records.ComponentState curState =
component.componentSpec.getState();
- if (isIncrement) {
- // check if all containers are in READY state
- if (component.numContainersThatNeedUpgrade.get() == 0 &&
- component.componentMetrics.containersReady.value() ==
- component.componentMetrics.containersDesired.value()) {
- component.componentSpec.setState(
- org.apache.hadoop.yarn.service.api.records.ComponentState.STABLE);
+
+ if (component.getRestartPolicyHandler().isLongLived()) {
+ if (isIncrement) {
+ // check if all containers are in READY state
+ if (component.numContainersThatNeedUpgrade.get() == 0
+ && component.componentMetrics.containersReady.value()
+ == component.componentMetrics.containersDesired.value()) {
+ component.componentSpec.setState(
+ org.apache.hadoop.yarn.service.api.records.ComponentState.STABLE);
+ if (curState != component.componentSpec.getState()) {
+ LOG.info("[COMPONENT {}] state changed from {} -> {}",
+ component.componentSpec.getName(), curState,
+ component.componentSpec.getState());
+ }
+ // component state change will trigger re-check of service state
+ component.context.getServiceManager().checkAndUpdateServiceState();
+ }
+ } else{
+ // container moving out of READY state could be because of FLEX down so
+ // still need to verify the count before changing the component state
+ if (component.componentMetrics.containersReady.value()
+ < component.componentMetrics.containersDesired.value()) {
+ component.componentSpec.setState(
+ org.apache.hadoop.yarn.service.api.records.ComponentState
+ .FLEXING);
+ } else if (component.componentMetrics.containersReady.value()
+ == component.componentMetrics.containersDesired.value()) {
+ component.componentSpec.setState(
+ org.apache.hadoop.yarn.service.api.records.ComponentState.STABLE);
+ }
if (curState != component.componentSpec.getState()) {
LOG.info("[COMPONENT {}] state changed from {} -> {}",
component.componentSpec.getName(), curState,
@@ -445,44 +505,38 @@ public class Component implements EventHandler<ComponentEvent> {
component.context.getServiceManager().checkAndUpdateServiceState();
}
} else {
- // container moving out of READY state could be because of FLEX down so
- // still need to verify the count before changing the component state
- if (component.componentMetrics.containersReady
- .value() < component.componentMetrics.containersDesired.value()) {
- component.componentSpec.setState(
- org.apache.hadoop.yarn.service.api.records.ComponentState.FLEXING);
- } else if (component.componentMetrics.containersReady
- .value() == component.componentMetrics.containersDesired.value()) {
- component.componentSpec.setState(
- org.apache.hadoop.yarn.service.api.records.ComponentState.STABLE);
- }
- if (curState != component.componentSpec.getState()) {
- LOG.info("[COMPONENT {}] state changed from {} -> {}",
- component.componentSpec.getName(), curState,
- component.componentSpec.getState());
- }
// component state change will trigger re-check of service state
component.context.getServiceManager().checkAndUpdateServiceState();
}
// when the service is stable then the state of component needs to
// transition to stable
- component.dispatcher.getEventHandler().handle(new ComponentEvent(
- component.getName(), ComponentEventType.CHECK_STABLE));
+ component.dispatcher.getEventHandler().handle(
+ new ComponentEvent(component.getName(),
+ ComponentEventType.CHECK_STABLE));
}
private static class ContainerCompletedTransition extends BaseTransition {
@Override
public void transition(Component component, ComponentEvent event) {
+
component.updateMetrics(event.getStatus());
component.dispatcher.getEventHandler().handle(
- new ComponentInstanceEvent(event.getStatus().getContainerId(),
- STOP).setStatus(event.getStatus()));
- component.componentSpec.setState(
- org.apache.hadoop.yarn.service.api.records.ComponentState.FLEXING);
- if (component.context.service.getState().equals(ServiceState.STABLE)) {
- component.getScheduler().getApp().setState(ServiceState.STARTED);
- LOG.info("Service def state changed from {} -> {}",
- ServiceState.STABLE, ServiceState.STARTED);
+ new ComponentInstanceEvent(event.getStatus().getContainerId(), STOP)
+ .setStatus(event.getStatus()));
+
+ ComponentRestartPolicy restartPolicy =
+ component.getRestartPolicyHandler();
+
+ if (restartPolicy.shouldRelaunchInstance(event.getInstance(),
+ event.getStatus())) {
+ component.componentSpec.setState(
+ org.apache.hadoop.yarn.service.api.records.ComponentState.FLEXING);
+
+ if (component.context.service.getState().equals(ServiceState.STABLE)) {
+ component.getScheduler().getApp().setState(ServiceState.STARTED);
+ LOG.info("Service def state changed from {} -> {}",
+ ServiceState.STABLE, ServiceState.STARTED);
+ }
}
}
}
@@ -725,8 +779,6 @@ public class Component implements EventHandler<ComponentEvent> {
componentMetrics.containersDesired.set(n);
}
-
-
private void updateMetrics(ContainerStatus status) {
switch (status.getExitStatus()) {
case SUCCESS:
@@ -753,7 +805,7 @@ public class Component implements EventHandler<ComponentEvent> {
String host = scheduler.getLiveInstances().get(status.getContainerId())
.getNodeId().getHost();
failureTracker.incNodeFailure(host);
- currentContainerFailure.getAndIncrement() ;
+ currentContainerFailure.getAndIncrement();
}
}
@@ -763,17 +815,18 @@ public class Component implements EventHandler<ComponentEvent> {
return true;
}
for (String dependency : dependencies) {
- Component dependentComponent =
- scheduler.getAllComponents().get(dependency);
+ Component dependentComponent = scheduler.getAllComponents().get(
+ dependency);
if (dependentComponent == null) {
LOG.error("Couldn't find dependency {} for {} (should never happen)",
dependency, getName());
continue;
}
- if (dependentComponent.getNumReadyInstances() < dependentComponent
- .getNumDesiredInstances()) {
+
+ if (!dependentComponent.isReadyForDownstream()) {
LOG.info("[COMPONENT {}]: Dependency {} not satisfied, only {} of {}"
- + " instances are ready.", getName(), dependency,
+ + " instances are ready or the dependent component has not "
+ + "completed ", getName(), dependency,
dependentComponent.getNumReadyInstances(),
dependentComponent.getNumDesiredInstances());
return false;
@@ -782,6 +835,7 @@ public class Component implements EventHandler<ComponentEvent> {
return true;
}
+
public Map<String, String> getDependencyHostIpTokens() {
Map<String, String> tokens = new HashMap<>();
List<String> dependencies = componentSpec.getDependencies();
@@ -955,4 +1009,67 @@ public class Component implements EventHandler<ComponentEvent> {
boolean healthThresholdMonitorEnabled) {
this.healthThresholdMonitorEnabled = healthThresholdMonitorEnabled;
}
+
+ public Collection<ComponentInstance> getSucceededInstances() {
+ return succeededInstances.values();
+ }
+
+ public long getNumSucceededInstances() {
+ return succeededInstances.size();
+ }
+
+ public long getNumFailedInstances() {
+ return failedInstances.size();
+ }
+
+ public Collection<ComponentInstance> getFailedInstances() {
+ return failedInstances.values();
+ }
+
+ public synchronized void markAsSucceeded(ComponentInstance instance) {
+ removeFailedInstanceIfExists(instance);
+ succeededInstances.put(instance.getCompInstanceName(), instance);
+ }
+
+ public synchronized void markAsFailed(ComponentInstance instance) {
+ removeSuccessfulInstanceIfExists(instance);
+ failedInstances.put(instance.getCompInstanceName(), instance);
+ }
+
+ public boolean removeFailedInstanceIfExists(ComponentInstance instance) {
+ if (failedInstances.containsKey(instance.getCompInstanceName())) {
+ failedInstances.remove(instance.getCompInstanceName());
+ return true;
+ }
+ return false;
+ }
+
+ public boolean removeSuccessfulInstanceIfExists(ComponentInstance instance) {
+ if (succeededInstances.containsKey(instance.getCompInstanceName())) {
+ succeededInstances.remove(instance.getCompInstanceName());
+ return true;
+ }
+ return false;
+ }
+
+ public boolean isReadyForDownstream() {
+ return getRestartPolicyHandler().isReadyForDownStream(this);
+ }
+
+ public static ComponentRestartPolicy getRestartPolicyHandler(
+ RestartPolicyEnum restartPolicyEnum) {
+
+ if (RestartPolicyEnum.NEVER == restartPolicyEnum) {
+ return NeverRestartPolicy.getInstance();
+ } else if (RestartPolicyEnum.ON_FAILURE == restartPolicyEnum) {
+ return OnFailureRestartPolicy.getInstance();
+ } else{
+ return AlwaysRestartPolicy.getInstance();
+ }
+ }
+
+ public ComponentRestartPolicy getRestartPolicyHandler() {
+ RestartPolicyEnum restartPolicyEnum = getComponentSpec().getRestartPolicy();
+ return getRestartPolicyHandler(restartPolicyEnum);
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentRestartPolicy.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentRestartPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentRestartPolicy.java
new file mode 100644
index 0000000..23b0fb9
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentRestartPolicy.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.service.component;
+
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.service.component.instance.ComponentInstance;
+
+/**
+ * Interface for Component Restart policies.
+ * Which is used to make decisions on termination/restart of components and
+ * their instances.
+ */
+public interface ComponentRestartPolicy {
+
+ boolean isLongLived();
+
+ boolean hasCompleted(Component component);
+
+ boolean hasCompletedSuccessfully(Component component);
+
+ boolean shouldRelaunchInstance(ComponentInstance componentInstance,
+ ContainerStatus containerStatus);
+
+ boolean isReadyForDownStream(Component component);
+
+ boolean allowUpgrades();
+
+ boolean shouldTerminate(Component component);
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/NeverRestartPolicy.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/NeverRestartPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/NeverRestartPolicy.java
new file mode 100644
index 0000000..ace1f89
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/NeverRestartPolicy.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.service.component;
+
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.service.component.instance.ComponentInstance;
+
+/**
+ * Policy for components with instances that do not require/support a restart.
+ */
+public final class NeverRestartPolicy implements ComponentRestartPolicy {
+
+ private static NeverRestartPolicy INSTANCE = new NeverRestartPolicy();
+
+ private NeverRestartPolicy() {
+ }
+
+ public static NeverRestartPolicy getInstance() {
+ return INSTANCE;
+ }
+
+ @Override public boolean isLongLived() {
+ return false;
+ }
+
+ @Override public boolean hasCompleted(Component component) {
+ if (component.getNumSucceededInstances() + component.getNumFailedInstances()
+ < component.getNumDesiredInstances()) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override public boolean hasCompletedSuccessfully(Component component) {
+ if (component.getNumSucceededInstances() == component
+ .getNumDesiredInstances()) {
+ return true;
+ }
+ return false;
+ }
+
+ @Override public boolean shouldRelaunchInstance(
+ ComponentInstance componentInstance, ContainerStatus containerStatus) {
+ return false;
+ }
+
+ @Override public boolean isReadyForDownStream(Component component) {
+ if (hasCompleted(component)) {
+ return true;
+ }
+ return false;
+ }
+
+ @Override public boolean allowUpgrades() {
+ return false;
+ }
+
+ @Override public boolean shouldTerminate(Component component) {
+ long nSucceeded = component.getNumSucceededInstances();
+ long nFailed = component.getNumFailedInstances();
+ if (nSucceeded + nFailed < component.getComponentSpec()
+ .getNumberOfContainers()) {
+ return false;
+ }
+ return true;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/OnFailureRestartPolicy.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/OnFailureRestartPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/OnFailureRestartPolicy.java
new file mode 100644
index 0000000..39fba2a
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/OnFailureRestartPolicy.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.service.component;
+
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.service.component.instance.ComponentInstance;
+
+/**
+ * Policy for components that require restarts for instances on failure.
+ */
+public final class OnFailureRestartPolicy implements ComponentRestartPolicy {
+
+ private static OnFailureRestartPolicy INSTANCE = new OnFailureRestartPolicy();
+
+ private OnFailureRestartPolicy() {
+ }
+
+ public static OnFailureRestartPolicy getInstance() {
+ return INSTANCE;
+ }
+
+ @Override public boolean isLongLived() {
+ return false;
+ }
+
+ @Override public boolean hasCompleted(Component component) {
+ if (hasCompletedSuccessfully(component)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ @Override public boolean hasCompletedSuccessfully(Component component) {
+ if (component.getNumSucceededInstances() == component
+ .getNumDesiredInstances()) {
+ return true;
+ }
+
+ return false;
+ }
+
+ @Override public boolean shouldRelaunchInstance(
+ ComponentInstance componentInstance, ContainerStatus containerStatus) {
+
+ if (ComponentInstance.hasContainerFailed(containerStatus)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ @Override public boolean isReadyForDownStream(Component component) {
+ if (hasCompletedSuccessfully(component)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ @Override public boolean allowUpgrades() {
+ return false;
+ }
+
+ @Override public boolean shouldTerminate(Component component) {
+ long nSucceeded = component.getNumSucceededInstances();
+ if (nSucceeded < component.getComponentSpec().getNumberOfContainers()) {
+ return false;
+ }
+ return true;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
index a323649..529596d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.service.component.instance;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.registry.client.api.RegistryConstants;
@@ -25,9 +26,9 @@ import org.apache.hadoop.registry.client.binding.RegistryPathUtils;
import org.apache.hadoop.registry.client.binding.RegistryUtils;
import org.apache.hadoop.registry.client.types.ServiceRecord;
import org.apache.hadoop.registry.client.types.yarn.PersistencePolicies;
-import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
@@ -44,6 +45,7 @@ import org.apache.hadoop.yarn.service.api.records.ContainerState;
import org.apache.hadoop.yarn.service.component.Component;
import org.apache.hadoop.yarn.service.component.ComponentEvent;
import org.apache.hadoop.yarn.service.component.ComponentEventType;
+import org.apache.hadoop.yarn.service.component.ComponentRestartPolicy;
import org.apache.hadoop.yarn.service.monitor.probe.ProbeStatus;
import org.apache.hadoop.yarn.service.registry.YarnRegistryViewForProviders;
import org.apache.hadoop.yarn.service.timelineservice.ServiceTimelinePublisher;
@@ -96,8 +98,10 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
// This container object is used for rest API query
private org.apache.hadoop.yarn.service.api.records.Container containerSpec;
+
private static final StateMachineFactory<ComponentInstance,
- ComponentInstanceState, ComponentInstanceEventType, ComponentInstanceEvent>
+ ComponentInstanceState, ComponentInstanceEventType,
+ ComponentInstanceEvent>
stateMachineFactory =
new StateMachineFactory<ComponentInstance, ComponentInstanceState,
ComponentInstanceEventType, ComponentInstanceEvent>(INIT)
@@ -230,6 +234,47 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
}
}
+ @VisibleForTesting
+ static void handleComponentInstanceRelaunch(
+ ComponentInstance compInstance, ComponentInstanceEvent event) {
+ Component comp = compInstance.getComponent();
+
+ // Do we need to relaunch the service?
+ boolean hasContainerFailed = hasContainerFailed(event.getStatus());
+
+ ComponentRestartPolicy restartPolicy = comp.getRestartPolicyHandler();
+
+ if (restartPolicy.shouldRelaunchInstance(compInstance, event.getStatus())) {
+ // re-ask the failed container.
+ comp.requestContainers(1);
+ comp.reInsertPendingInstance(compInstance);
+ LOG.info(compInstance.getCompInstanceId()
+ + ": {} completed. Reinsert back to pending list and requested " +
+ "a new container." + System.lineSeparator() +
+ " exitStatus={}, diagnostics={}.",
+ event.getContainerId(), event.getStatus().getExitStatus(),
+ event.getStatus().getDiagnostics());
+ } else {
+ // When no relaunch, update component's #succeeded/#failed
+ // instances.
+ if (hasContainerFailed) {
+ comp.markAsFailed(compInstance);
+ } else {
+ comp.markAsSucceeded(compInstance);
+ }
+ LOG.info(compInstance.getCompInstanceId() + (!hasContainerFailed ?
+ " succeeded" :
+ " failed") + " without retry, exitStatus=" + event.getStatus());
+ comp.getScheduler().terminateServiceIfAllComponentsFinished();
+ }
+ }
+
+ public static boolean hasContainerFailed(ContainerStatus containerStatus) {
+ //Mark conainer as failed if we cant get its exit status i.e null?
+ return containerStatus == null || containerStatus.getExitStatus() !=
+ ContainerExitStatus.SUCCESS;
+ }
+
private static class ContainerStoppedTransition extends BaseTransition {
// whether the container failed before launched by AM or not.
boolean failedBeforeLaunching = false;
@@ -244,9 +289,8 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
@Override
public void transition(ComponentInstance compInstance,
ComponentInstanceEvent event) {
- // re-ask the failed container.
+
Component comp = compInstance.component;
- comp.requestContainers(1);
String containerDiag =
compInstance.getCompInstanceId() + ": " + event.getStatus()
.getDiagnostics();
@@ -259,7 +303,10 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
compInstance.component.decContainersReady(true);
}
compInstance.component.decRunningContainers();
- boolean shouldExit = false;
+ // Should we fail (terminate) the service?
+ boolean shouldFailService = false;
+
+ final ServiceScheduler scheduler = comp.getScheduler();
// Check if it exceeds the failure threshold, but only if health threshold
// monitor is not enabled
if (!comp.isHealthThresholdMonitorEnabled()
@@ -271,10 +318,10 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
comp.getName(), comp.currentContainerFailure.get(), comp.maxContainerFailurePerComp);
compInstance.diagnostics.append(exitDiag);
// append to global diagnostics that will be reported to RM.
- comp.getScheduler().getDiagnostics().append(containerDiag);
- comp.getScheduler().getDiagnostics().append(exitDiag);
+ scheduler.getDiagnostics().append(containerDiag);
+ scheduler.getDiagnostics().append(exitDiag);
LOG.warn(exitDiag);
- shouldExit = true;
+ shouldFailService = true;
}
if (!failedBeforeLaunching) {
@@ -296,25 +343,14 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
}
// remove the failed ContainerId -> CompInstance mapping
- comp.getScheduler().removeLiveCompInstance(event.getContainerId());
+ scheduler.removeLiveCompInstance(event.getContainerId());
- comp.reInsertPendingInstance(compInstance);
+ // According to component restart policy, handle container restart
+ // or finish the service (if all components finished)
+ handleComponentInstanceRelaunch(compInstance, event);
- LOG.info(compInstance.getCompInstanceId()
- + ": {} completed. Reinsert back to pending list and requested " +
- "a new container." + System.lineSeparator() +
- " exitStatus={}, diagnostics={}.",
- event.getContainerId(), event.getStatus().getExitStatus(),
- event.getStatus().getDiagnostics());
- if (shouldExit) {
- // Sleep for 5 seconds in hope that the state can be recorded in ATS.
- // in case there's a client polling the comp state, it can be notified.
- try {
- Thread.sleep(5000);
- } catch (InterruptedException e) {
- LOG.error("Interrupted on sleep while exiting.", e);
- }
- ExitUtil.terminate(-1);
+ if (shouldFailService) {
+ scheduler.getTerminationHandler().terminate(-1);
}
}
}
@@ -630,4 +666,9 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
>>> 32));
return result;
}
+
+ @VisibleForTesting public org.apache.hadoop.yarn.service.api.records
+ .Container getContainerSpec() {
+ return containerSpec;
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceUtils.java
index 915b836..707bbf0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceUtils.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.net.DNS;
+import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.service.conf.YarnServiceConstants;
@@ -571,4 +572,21 @@ public final class ServiceUtils {
// Fallback to querying the default hostname as we did before.
return InetAddress.getLocalHost().getCanonicalHostName();
}
+
+ /**
+ * Process termination handler - exist with specified exit code after
+ * waiting a while for ATS state to be in sync.
+ */
+ public static class ProcessTerminationHandler {
+ public void terminate(int exitCode) {
+ // Sleep for 5 seconds in hope that the state can be recorded in ATS.
+ // in case there's a client polling the comp state, it can be notified.
+ try {
+ Thread.sleep(5000);
+ } catch (InterruptedException e) {
+ log.info("Interrupted on sleep while exiting.", e);
+ }
+ ExitUtil.terminate(exitCode);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/ServiceTestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/ServiceTestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/ServiceTestUtils.java
index 599b8a7..86b4cea 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/ServiceTestUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/ServiceTestUtils.java
@@ -57,6 +57,8 @@ import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Map;
import static org.apache.hadoop.registry.client.api.RegistryConstants.KEY_REGISTRY_ZK_QUORUM;
@@ -99,8 +101,32 @@ public class ServiceTestUtils {
return exampleApp;
}
+ // Example service definition
+ // 2 components, each of which has 2 containers.
+ public static Service createTerminatingJobExample(String serviceName) {
+ Service exampleApp = new Service();
+ exampleApp.setName(serviceName);
+ exampleApp.setVersion("v1");
+ exampleApp.addComponent(
+ createComponent("terminating-comp1", 2, "sleep " + "1000",
+ Component.RestartPolicyEnum.NEVER, null));
+ exampleApp.addComponent(
+ createComponent("terminating-comp2", 2, "sleep 1000",
+ Component.RestartPolicyEnum.ON_FAILURE, new ArrayList<String>() {{
+ add("terminating-comp1");
+ }}));
+ exampleApp.addComponent(
+ createComponent("terminating-comp3", 2, "sleep 1000",
+ Component.RestartPolicyEnum.ON_FAILURE, new ArrayList<String>() {{
+ add("terminating-comp2");
+ }}));
+
+ return exampleApp;
+ }
+
public static Component createComponent(String name) {
- return createComponent(name, 2L, "sleep 1000");
+ return createComponent(name, 2L, "sleep 1000",
+ Component.RestartPolicyEnum.ALWAYS, null);
}
protected static Component createComponent(String name, long numContainers,
@@ -116,6 +142,18 @@ public class ServiceTestUtils {
return comp1;
}
+ protected static Component createComponent(String name, long numContainers,
+ String command, Component.RestartPolicyEnum restartPolicyEnum,
+ List<String> dependencies) {
+ Component comp = createComponent(name, numContainers, command);
+ comp.setRestartPolicy(restartPolicyEnum);
+
+ if (dependencies != null) {
+ comp.dependencies(dependencies);
+ }
+ return comp;
+ }
+
public static SliderFileSystem initMockFs() throws IOException {
return initMockFs(null);
}
@@ -306,6 +344,12 @@ public class ServiceTestUtils {
return client;
}
+ public static ServiceManager createServiceManager(ServiceContext context) {
+ ServiceManager serviceManager = new ServiceManager(context);
+ context.setServiceManager(serviceManager);
+ return serviceManager;
+ }
+
/**
* Creates a YarnClient for test purposes.
*/
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceManager.java
index 56a0c71..fc509f1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceManager.java
@@ -227,14 +227,16 @@ public class TestServiceManager {
}
public static Service createBaseDef(String name) {
+ return createDef(name, ServiceTestUtils.createExampleApplication());
+ }
+
+ public static Service createDef(String name, Service serviceDef) {
ApplicationId applicationId = ApplicationId.newInstance(
System.currentTimeMillis(), 1);
- Service serviceDef = ServiceTestUtils.createExampleApplication();
serviceDef.setId(applicationId.toString());
serviceDef.setName(name);
serviceDef.setState(ServiceState.STARTED);
Artifact artifact = createTestArtifact("1");
-
serviceDef.getComponents().forEach(component ->
component.setArtifact(artifact));
return serviceDef;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java
index 600e438..d7c15ec 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java
@@ -38,8 +38,10 @@ import org.apache.hadoop.yarn.service.api.records.Service;
import org.apache.hadoop.yarn.service.component.instance.ComponentInstance;
import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEvent;
import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType;
+
import org.apache.hadoop.yarn.service.containerlaunch.ContainerLaunchService;
import org.apache.hadoop.yarn.service.registry.YarnRegistryViewForProviders;
+import org.apache.log4j.Logger;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
@@ -50,6 +52,7 @@ import java.util.Iterator;
import java.util.Map;
import static org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType.STOP;
+
import static org.mockito.Matchers.anyObject;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.mock;
@@ -60,6 +63,9 @@ import static org.mockito.Mockito.when;
*/
public class TestComponent {
+ private static final int WAIT_MS_PER_LOOP = 1000;
+ static final Logger LOG = Logger.getLogger(TestComponent.class);
+
@Rule
public ServiceTestUtils.ServiceFSWatcher rule =
new ServiceTestUtils.ServiceFSWatcher();
@@ -158,6 +164,57 @@ public class TestComponent {
comp.getComponentSpec().getConfiguration().getEnv("key1"));
}
+ @Test
+ public void testComponentStateUpdatesWithTerminatingComponents() throws
+ Exception {
+ final String serviceName =
+ "testComponentStateUpdatesWithTerminatingComponents";
+
+ Service testService = ServiceTestUtils.createTerminatingJobExample(
+ serviceName);
+ TestServiceManager.createDef(serviceName, testService);
+
+ ServiceContext context = createTestContext(rule, testService);
+
+ for (Component comp : context.scheduler.getAllComponents().values()) {
+
+ Iterator<ComponentInstance> instanceIter = comp.
+ getAllComponentInstances().iterator();
+
+ ComponentInstance componentInstance = instanceIter.next();
+ Container instanceContainer = componentInstance.getContainer();
+
+ Assert.assertEquals(0, comp.getNumSucceededInstances());
+ Assert.assertEquals(0, comp.getNumFailedInstances());
+ Assert.assertEquals(2, comp.getNumRunningInstances());
+ Assert.assertEquals(2, comp.getNumReadyInstances());
+ Assert.assertEquals(0, comp.getPendingInstances().size());
+
+ //stop 1 container
+ ContainerStatus containerStatus = ContainerStatus.newInstance(
+ instanceContainer.getId(),
+ org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE,
+ "successful", 0);
+ comp.handle(new ComponentEvent(comp.getName(),
+ ComponentEventType.CONTAINER_COMPLETED).setStatus(containerStatus));
+ componentInstance.handle(
+ new ComponentInstanceEvent(componentInstance.getContainer().getId(),
+ ComponentInstanceEventType.STOP).setStatus(containerStatus));
+
+ Assert.assertEquals(1, comp.getNumSucceededInstances());
+ Assert.assertEquals(0, comp.getNumFailedInstances());
+ Assert.assertEquals(1, comp.getNumRunningInstances());
+ Assert.assertEquals(1, comp.getNumReadyInstances());
+ Assert.assertEquals(0, comp.getPendingInstances().size());
+
+ org.apache.hadoop.yarn.service.component.ComponentState componentState =
+ Component.checkIfStable(comp);
+ Assert.assertEquals(
+ org.apache.hadoop.yarn.service.component.ComponentState.STABLE,
+ componentState);
+ }
+ }
+
private static org.apache.hadoop.yarn.service.api.records.Component
createSpecWithEnv(String serviceName, String compName, String key,
String val) {
@@ -171,31 +228,38 @@ public class TestComponent {
public static ServiceContext createTestContext(
ServiceTestUtils.ServiceFSWatcher fsWatcher, String serviceName)
throws Exception {
+ return createTestContext(fsWatcher,
+ TestServiceManager.createBaseDef(serviceName));
+ }
+
+ public static ServiceContext createTestContext(
+ ServiceTestUtils.ServiceFSWatcher fsWatcher, Service serviceDef)
+ throws Exception {
ServiceContext context = new ServiceContext();
- context.service = TestServiceManager.createBaseDef(serviceName);
+ context.service = serviceDef;
context.fs = fsWatcher.getFs();
ContainerLaunchService mockLaunchService = mock(
ContainerLaunchService.class);
context.scheduler = new ServiceScheduler(context) {
- @Override
- protected YarnRegistryViewForProviders createYarnRegistryOperations(
+ @Override protected YarnRegistryViewForProviders
+ createYarnRegistryOperations(
ServiceContext context, RegistryOperations registryClient) {
return mock(YarnRegistryViewForProviders.class);
}
- @Override
- public NMClientAsync createNMClient() {
+ @Override public NMClientAsync createNMClient() {
NMClientAsync nmClientAsync = super.createNMClient();
NMClient nmClient = mock(NMClient.class);
try {
when(nmClient.getContainerStatus(anyObject(), anyObject()))
- .thenAnswer((Answer<ContainerStatus>) invocation ->
- ContainerStatus.newInstance(
- (ContainerId) invocation.getArguments()[0],
- org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
- "", 0));
+ .thenAnswer(
+ (Answer<ContainerStatus>) invocation -> ContainerStatus
+ .newInstance((ContainerId) invocation.getArguments()[0],
+ org.apache.hadoop.yarn.api.records.ContainerState
+ .RUNNING,
+ "", 0));
} catch (YarnException | IOException e) {
throw new RuntimeException(e);
}
@@ -203,16 +267,18 @@ public class TestComponent {
return nmClientAsync;
}
- @Override
- public ContainerLaunchService getContainerLaunchService() {
+ @Override public ContainerLaunchService getContainerLaunchService() {
return mockLaunchService;
}
};
context.scheduler.init(fsWatcher.getConf());
+ ServiceTestUtils.createServiceManager(context);
+
doNothing().when(mockLaunchService).
reInitCompInstance(anyObject(), anyObject(), anyObject(), anyObject());
stabilizeComponents(context);
+
return context;
}
@@ -223,6 +289,8 @@ public class TestComponent {
context.attemptId = attemptId;
Map<String, Component>
componentState = context.scheduler.getAllComponents();
+
+ int counter = 0;
for (org.apache.hadoop.yarn.service.api.records.Component componentSpec :
context.service.getComponents()) {
Component component = new org.apache.hadoop.yarn.service.component.
@@ -230,9 +298,12 @@ public class TestComponent {
componentState.put(component.getName(), component);
component.handle(new ComponentEvent(component.getName(),
ComponentEventType.FLEX));
+
for (int i = 0; i < componentSpec.getNumberOfContainers(); i++) {
- assignNewContainer(attemptId, i + 1, context, component);
+ counter++;
+ assignNewContainer(attemptId, counter, context, component);
}
+
component.handle(new ComponentEvent(component.getName(),
ComponentEventType.CHECK_STABLE));
}
@@ -241,6 +312,8 @@ public class TestComponent {
private static void assignNewContainer(
ApplicationAttemptId attemptId, long containerNum,
ServiceContext context, Component component) {
+
+
Container container = org.apache.hadoop.yarn.api.records.Container
.newInstance(ContainerId.newContainerId(attemptId, containerNum),
NODE_ID, "localhost", null, null,
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7f083ed8/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponentRestartPolicy.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponentRestartPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponentRestartPolicy.java
new file mode 100644
index 0000000..60f5c91
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponentRestartPolicy.java
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.service.component;
+
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.service.component.instance.ComponentInstance;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+/**
+ * Tests for ComponentRestartPolicy implementations.
+ */
+public class TestComponentRestartPolicy {
+
+ @Test
+ public void testAlwaysRestartPolicy() throws Exception {
+
+ AlwaysRestartPolicy alwaysRestartPolicy = AlwaysRestartPolicy.getInstance();
+
+ Component component = mock(Component.class);
+ when(component.getNumReadyInstances()).thenReturn(1);
+ when(component.getNumDesiredInstances()).thenReturn(2);
+
+ ComponentInstance instance = mock(ComponentInstance.class);
+ when(instance.getComponent()).thenReturn(component);
+
+ ContainerStatus containerStatus = mock(ContainerStatus.class);
+
+ assertEquals(true, alwaysRestartPolicy.isLongLived());
+ assertEquals(true, alwaysRestartPolicy.allowUpgrades());
+ assertEquals(false, alwaysRestartPolicy.hasCompleted(component));
+ assertEquals(false,
+ alwaysRestartPolicy.hasCompletedSuccessfully(component));
+
+ assertEquals(true,
+ alwaysRestartPolicy.shouldRelaunchInstance(instance, containerStatus));
+
+ assertEquals(false, alwaysRestartPolicy.isReadyForDownStream(component));
+ }
+
+ @Test
+ public void testNeverRestartPolicy() throws Exception {
+
+ NeverRestartPolicy restartPolicy = NeverRestartPolicy.getInstance();
+
+ Component component = mock(Component.class);
+ when(component.getNumSucceededInstances()).thenReturn(new Long(1));
+ when(component.getNumFailedInstances()).thenReturn(new Long(2));
+ when(component.getNumDesiredInstances()).thenReturn(3);
+
+ ComponentInstance instance = mock(ComponentInstance.class);
+ when(instance.getComponent()).thenReturn(component);
+
+ ContainerStatus containerStatus = mock(ContainerStatus.class);
+
+ assertEquals(false, restartPolicy.isLongLived());
+ assertEquals(false, restartPolicy.allowUpgrades());
+ assertEquals(true, restartPolicy.hasCompleted(component));
+ assertEquals(false,
+ restartPolicy.hasCompletedSuccessfully(component));
+
+ assertEquals(false,
+ restartPolicy.shouldRelaunchInstance(instance, containerStatus));
+
+ assertEquals(true, restartPolicy.isReadyForDownStream(component));
+ }
+
+ @Test
+ public void testOnFailureRestartPolicy() throws Exception {
+
+ OnFailureRestartPolicy restartPolicy = OnFailureRestartPolicy.getInstance();
+
+ Component component = mock(Component.class);
+ when(component.getNumSucceededInstances()).thenReturn(new Long(3));
+ when(component.getNumFailedInstances()).thenReturn(new Long(0));
+ when(component.getNumDesiredInstances()).thenReturn(3);
+
+ ComponentInstance instance = mock(ComponentInstance.class);
+ when(instance.getComponent()).thenReturn(component);
+
+ ContainerStatus containerStatus = mock(ContainerStatus.class);
+ when(containerStatus.getExitStatus()).thenReturn(0);
+
+ assertEquals(false, restartPolicy.isLongLived());
+ assertEquals(false, restartPolicy.allowUpgrades());
+ assertEquals(true, restartPolicy.hasCompleted(component));
+ assertEquals(true,
+ restartPolicy.hasCompletedSuccessfully(component));
+
+ assertEquals(false,
+ restartPolicy.shouldRelaunchInstance(instance, containerStatus));
+
+ assertEquals(true, restartPolicy.isReadyForDownStream(component));
+
+
+ when(component.getNumSucceededInstances()).thenReturn(new Long(2));
+ when(component.getNumFailedInstances()).thenReturn(new Long(1));
+ when(component.getNumDesiredInstances()).thenReturn(3);
+
+ assertEquals(false, restartPolicy.hasCompleted(component));
+ assertEquals(false,
+ restartPolicy.hasCompletedSuccessfully(component));
+
+ when(containerStatus.getExitStatus()).thenReturn(-1000);
+
+ assertEquals(true,
+ restartPolicy.shouldRelaunchInstance(instance, containerStatus));
+
+ assertEquals(false, restartPolicy.isReadyForDownStream(component));
+
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org