You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ha...@apache.org on 2018/01/03 21:31:36 UTC
[04/50] [abbrv] hadoop git commit: YARN-7616. Map YARN application
status to Service Status more accurately. (Contributed by Gour Saha)
YARN-7616. Map YARN application status to Service Status more accurately. (Contributed by Gour Saha)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/41b58101
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/41b58101
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/41b58101
Branch: refs/heads/YARN-1011
Commit: 41b581012a83a17db785343362c718363e13e8f5
Parents: 94a2ac6
Author: Eric Yang <ey...@apache.org>
Authored: Tue Dec 19 19:14:45 2017 -0500
Committer: Eric Yang <ey...@apache.org>
Committed: Tue Dec 19 19:14:45 2017 -0500
----------------------------------------------------------------------
.../hadoop/yarn/service/ServiceMaster.java | 37 ++++++++++++++
.../hadoop/yarn/service/ServiceScheduler.java | 4 ++
.../yarn/service/client/ServiceClient.java | 26 ++++++----
.../yarn/service/component/Component.java | 53 ++++++++++++++++++--
.../component/instance/ComponentInstance.java | 6 +--
.../yarn/service/TestYarnNativeServices.java | 34 +++++++++++--
6 files changed, 137 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/41b58101/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java
index 1283604..75cc9c5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.security.client.ClientToAMTokenSecretManager;
+import org.apache.hadoop.yarn.service.api.records.ServiceState;
import org.apache.hadoop.yarn.service.exceptions.BadClusterStateException;
import org.apache.hadoop.yarn.service.monitor.ServiceMonitor;
import org.apache.hadoop.yarn.service.utils.ServiceApiUtil;
@@ -237,6 +238,7 @@ public class ServiceMaster extends CompositeService {
SliderFileSystem fs) throws IOException {
context.service = ServiceApiUtil
.loadServiceFrom(fs, new Path(serviceDefPath));
+ context.service.setState(ServiceState.ACCEPTED);
LOG.info(context.service.toString());
}
@@ -257,6 +259,41 @@ public class ServiceMaster extends CompositeService {
super.serviceStop();
}
+ // This method should be called whenever there is an increment or decrement
+ // of a READY state component of a service
+ public static synchronized void checkAndUpdateServiceState(
+ ServiceScheduler scheduler, boolean isIncrement) {
+ ServiceState curState = scheduler.getApp().getState();
+ if (!isIncrement) {
+ // set it to STARTED every time a component moves out of STABLE state
+ scheduler.getApp().setState(ServiceState.STARTED);
+ } else {
+ // otherwise check the state of all components
+ boolean isStable = true;
+ for (org.apache.hadoop.yarn.service.api.records.Component comp : scheduler
+ .getApp().getComponents()) {
+ if (comp.getState() !=
+ org.apache.hadoop.yarn.service.api.records.ComponentState.STABLE) {
+ isStable = false;
+ break;
+ }
+ }
+ if (isStable) {
+ scheduler.getApp().setState(ServiceState.STABLE);
+ } else {
+ // mark new state as started only if current state is stable, otherwise
+ // leave it as is
+ if (curState == ServiceState.STABLE) {
+ scheduler.getApp().setState(ServiceState.STARTED);
+ }
+ }
+ }
+ if (curState != scheduler.getApp().getState()) {
+ LOG.info("Service state changed from {} -> {}", curState,
+ scheduler.getApp().getState());
+ }
+ }
+
private void printSystemEnv() {
for (Map.Entry<String, String> envs : System.getenv().entrySet()) {
LOG.info("{} = {}", envs.getKey(), envs.getValue());
http://git-wip-us.apache.org/repos/asf/hadoop/blob/41b58101/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
index 2697050..45cdd28 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
@@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.service.api.ServiceApiConstants;
import org.apache.hadoop.yarn.service.api.records.Service;
+import org.apache.hadoop.yarn.service.api.records.ServiceState;
import org.apache.hadoop.yarn.service.api.records.ConfigFile;
import org.apache.hadoop.yarn.service.component.instance.ComponentInstance;
import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEvent;
@@ -284,6 +285,9 @@ public class ServiceScheduler extends CompositeService {
}
registerServiceInstance(context.attemptId, app);
+ // Since AM has been started and registered, the service is in STARTED state
+ app.setState(ServiceState.STARTED);
+
// recover components based on containers sent from RM
recoverComponents(response);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/41b58101/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java
index 81c56d2..d1ccc4f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java
@@ -268,7 +268,8 @@ public class ServiceClient extends AppAdminClient implements SliderExitCodes,
long ret = orig - Long.parseLong(newNumber.substring(1));
if (ret < 0) {
LOG.warn(MessageFormat.format(
- "[COMPONENT {}]: component count goes to negative ({}{} = {}), reset it to 0.",
+ "[COMPONENT {0}]: component count goes to negative ({1}{2} = {3}),"
+ + " ignore and reset it to 0.",
component.getName(), orig, newNumber, ret));
ret = 0;
}
@@ -878,18 +879,23 @@ public class ServiceClient extends AppAdminClient implements SliderExitCodes,
return newTimeout;
}
- public ServiceState convertState(FinalApplicationStatus status) {
- switch (status) {
- case UNDEFINED:
+ public ServiceState convertState(YarnApplicationState state) {
+ switch (state) {
+ case NEW:
+ case NEW_SAVING:
+ case SUBMITTED:
+ case ACCEPTED:
return ServiceState.ACCEPTED;
- case FAILED:
+ case RUNNING:
+ return ServiceState.STARTED;
+ case FINISHED:
case KILLED:
- return ServiceState.FAILED;
- case ENDED:
- case SUCCEEDED:
return ServiceState.STOPPED;
+ case FAILED:
+ return ServiceState.FAILED;
+ default:
+ return ServiceState.ACCEPTED;
}
- return ServiceState.ACCEPTED;
}
public String getStatusString(String appId)
@@ -917,7 +923,7 @@ public class ServiceClient extends AppAdminClient implements SliderExitCodes,
ApplicationReport appReport = yarnClient.getApplicationReport(currentAppId);
Service appSpec = new Service();
appSpec.setName(serviceName);
- appSpec.setState(convertState(appReport.getFinalApplicationStatus()));
+ appSpec.setState(convertState(appReport.getYarnApplicationState()));
ApplicationTimeout lifetime =
appReport.getApplicationTimeouts().get(ApplicationTimeoutType.LIFETIME);
if (lifetime != null) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/41b58101/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
index 9c5cbae..a84c1b1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
@@ -31,7 +31,9 @@ import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceId;
import org.apache.hadoop.yarn.service.ContainerFailureTracker;
import org.apache.hadoop.yarn.service.ServiceContext;
import org.apache.hadoop.yarn.service.ServiceScheduler;
+import org.apache.hadoop.yarn.service.api.records.ServiceState;
import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEvent;
+import org.apache.hadoop.yarn.service.ServiceMaster;
import org.apache.hadoop.yarn.service.ServiceMetrics;
import org.apache.hadoop.yarn.service.provider.ProviderUtils;
import org.apache.hadoop.yarn.state.InvalidStateTransitionException;
@@ -209,6 +211,7 @@ public class Component implements EventHandler<ComponentEvent> {
component.createNumCompInstances(delta);
component.componentSpec.setState(
org.apache.hadoop.yarn.service.api.records.ComponentState.FLEXING);
+ component.getScheduler().getApp().setState(ServiceState.STARTED);
return FLEXING;
} else if (delta < 0){
delta = 0 - delta;
@@ -229,14 +232,11 @@ public class Component implements EventHandler<ComponentEvent> {
component.instanceIdCounter.decrementAndGet();
instance.destroy();
}
- component.componentSpec.setState(
- org.apache.hadoop.yarn.service.api.records.ComponentState.STABLE);
+ checkAndUpdateComponentState(component, false);
return STABLE;
} else {
LOG.info("[FLEX COMPONENT " + component.getName() + "]: already has " +
event.getDesired() + " instances, ignoring");
- component.componentSpec.setState(
- org.apache.hadoop.yarn.service.api.records.ComponentState.STABLE);
return STABLE;
}
}
@@ -289,7 +289,7 @@ public class Component implements EventHandler<ComponentEvent> {
private static ComponentState checkIfStable(Component component) {
// if desired == running
- if (component.componentMetrics.containersRunning.value() == component
+ if (component.componentMetrics.containersReady.value() == component
.getComponentSpec().getNumberOfContainers()) {
component.componentSpec.setState(
org.apache.hadoop.yarn.service.api.records.ComponentState.STABLE);
@@ -301,6 +301,46 @@ public class Component implements EventHandler<ComponentEvent> {
}
}
+ // This method should be called whenever there is an increment or decrement
+ // of a READY state container of a component
+ public static synchronized void checkAndUpdateComponentState(
+ Component component, boolean isIncrement) {
+ org.apache.hadoop.yarn.service.api.records.ComponentState curState =
+ component.componentSpec.getState();
+ if (isIncrement) {
+ // check if all containers are in READY state
+ if (component.componentMetrics.containersReady
+ .value() == component.componentMetrics.containersDesired.value()) {
+ component.componentSpec.setState(
+ org.apache.hadoop.yarn.service.api.records.ComponentState.STABLE);
+ if (curState != component.componentSpec.getState()) {
+ LOG.info("[COMPONENT {}] state changed from {} -> {}",
+ component.componentSpec.getName(), curState,
+ component.componentSpec.getState());
+ }
+ // component state change will trigger re-check of service state
+ ServiceMaster.checkAndUpdateServiceState(component.scheduler,
+ isIncrement);
+ }
+ } else {
+ // container moving out of READY state could be because of FLEX down so
+ // still need to verify the count before changing the component state
+ if (component.componentMetrics.containersReady
+ .value() < component.componentMetrics.containersDesired.value()) {
+ component.componentSpec.setState(
+ org.apache.hadoop.yarn.service.api.records.ComponentState.FLEXING);
+ if (curState != component.componentSpec.getState()) {
+ LOG.info("[COMPONENT {}] state changed from {} -> {}",
+ component.componentSpec.getName(), curState,
+ component.componentSpec.getState());
+ }
+ // component state change will trigger re-check of service state
+ ServiceMaster.checkAndUpdateServiceState(component.scheduler,
+ isIncrement);
+ }
+ }
+ }
+
private static class ContainerCompletedTransition extends BaseTransition {
@Override
public void transition(Component component, ComponentEvent event) {
@@ -310,6 +350,7 @@ public class Component implements EventHandler<ComponentEvent> {
STOP).setStatus(event.getStatus()));
component.componentSpec.setState(
org.apache.hadoop.yarn.service.api.records.ComponentState.FLEXING);
+ component.getScheduler().getApp().setState(ServiceState.STARTED);
}
}
@@ -472,11 +513,13 @@ public class Component implements EventHandler<ComponentEvent> {
public void incContainersReady() {
componentMetrics.containersReady.incr();
scheduler.getServiceMetrics().containersReady.incr();
+ checkAndUpdateComponentState(this, true);
}
public void decContainersReady() {
componentMetrics.containersReady.decr();
scheduler.getServiceMetrics().containersReady.decr();
+ checkAndUpdateComponentState(this, false);
}
public int getNumReadyInstances() {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/41b58101/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
index 31fa5c7..0e3e11b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
@@ -147,7 +147,6 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
new ContainerStatusRetriever(compInstance.scheduler,
event.getContainerId(), compInstance), 0, 1,
TimeUnit.SECONDS);
- compInstance.component.incRunningContainers();
long containerStartTime = System.currentTimeMillis();
try {
ContainerTokenIdentifier containerTokenIdentifier = BuilderUtils
@@ -171,6 +170,7 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
compInstance.containerSpec = container;
compInstance.getCompSpec().addContainer(container);
compInstance.containerStartedTime = containerStartTime;
+ compInstance.component.incRunningContainers();
if (compInstance.timelineServiceEnabled) {
compInstance.serviceTimelinePublisher
@@ -183,8 +183,8 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
@Override
public void transition(ComponentInstance compInstance,
ComponentInstanceEvent event) {
- compInstance.component.incContainersReady();
compInstance.containerSpec.setState(ContainerState.READY);
+ compInstance.component.incContainersReady();
if (compInstance.timelineServiceEnabled) {
compInstance.serviceTimelinePublisher
.componentInstanceBecomeReady(compInstance.containerSpec);
@@ -196,8 +196,8 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
@Override
public void transition(ComponentInstance compInstance,
ComponentInstanceEvent event) {
- compInstance.component.decContainersReady();
compInstance.containerSpec.setState(ContainerState.RUNNING_BUT_UNREADY);
+ compInstance.component.decContainersReady();
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/41b58101/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java
index 1c517d9..debab8b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.*;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.service.api.records.Service;
+import org.apache.hadoop.yarn.service.api.records.ServiceState;
import org.apache.hadoop.yarn.service.api.records.Component;
import org.apache.hadoop.yarn.service.api.records.Container;
import org.apache.hadoop.yarn.service.api.records.ContainerState;
@@ -90,25 +91,25 @@ public class TestYarnNativeServices extends ServiceTestUtils {
// check app.json is persisted.
Assert.assertTrue(
getFS().exists(new Path(appDir, exampleApp.getName() + ".json")));
- waitForAllCompToBeReady(client, exampleApp);
+ waitForServiceToBeStable(client, exampleApp);
// Flex two components, each from 2 container to 3 containers.
flexComponents(client, exampleApp, 3L);
// wait for flex to be completed, increase from 2 to 3 containers.
- waitForAllCompToBeReady(client, exampleApp);
+ waitForServiceToBeStable(client, exampleApp);
// check all instances name for each component are in sequential order.
checkCompInstancesInOrder(client, exampleApp);
// flex down to 1
flexComponents(client, exampleApp, 1L);
- waitForAllCompToBeReady(client, exampleApp);
+ waitForServiceToBeStable(client, exampleApp);
checkCompInstancesInOrder(client, exampleApp);
// check component dir and registry are cleaned up.
// flex up again to 2
flexComponents(client, exampleApp, 2L);
- waitForAllCompToBeReady(client, exampleApp);
+ waitForServiceToBeStable(client, exampleApp);
checkCompInstancesInOrder(client, exampleApp);
// stop the service
@@ -145,7 +146,7 @@ public class TestYarnNativeServices extends ServiceTestUtils {
exampleApp.addComponent(compb);
client.actionCreate(exampleApp);
- waitForAllCompToBeReady(client, exampleApp);
+ waitForServiceToBeStable(client, exampleApp);
// check that containers for compa are launched before containers for compb
checkContainerLaunchDependencies(client, exampleApp, "compa", "compb");
@@ -372,6 +373,29 @@ public class TestYarnNativeServices extends ServiceTestUtils {
return allContainers;
}
+ /**
+ * Wait until service state becomes stable. A service is stable when all
+ * requested containers of all components are running and in ready state.
+ *
+ * @param client
+ * @param exampleApp
+ * @throws TimeoutException
+ * @throws InterruptedException
+ */
+ private void waitForServiceToBeStable(ServiceClient client,
+ Service exampleApp) throws TimeoutException, InterruptedException {
+ GenericTestUtils.waitFor(() -> {
+ try {
+ Service retrievedApp = client.getStatus(exampleApp.getName());
+ System.out.println(retrievedApp);
+ return retrievedApp.getState() == ServiceState.STABLE;
+ } catch (Exception e) {
+ e.printStackTrace();
+ return false;
+ }
+ }, 2000, 200000);
+ }
+
private ServiceClient createClient() throws Exception {
ServiceClient client = new ServiceClient() {
@Override protected Path addJarResource(String appName,
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org