You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wa...@apache.org on 2018/07/31 19:26:26 UTC
[1/7] hadoop git commit: YARN-8528. Final states in
ContainerAllocation might be modified externally causing unexpected
allocation results. Contributed by Xintong Song.
Repository: hadoop
Updated Branches:
refs/heads/branch-3.1.1 14acc8af6 -> 8596a0ebe
YARN-8528. Final states in ContainerAllocation might be modified externally causing unexpected allocation results. Contributed by Xintong Song.
(cherry picked from commit 004e1f248ef20b78f9d12d6f1fe04f66d8c56158)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0c3d3bc3
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0c3d3bc3
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0c3d3bc3
Branch: refs/heads/branch-3.1.1
Commit: 0c3d3bc3aef4c43d73c3bf31c741c9efe3b18ada
Parents: 14acc8a
Author: Weiwei Yang <ww...@apache.org>
Authored: Fri Jul 20 22:32:11 2018 +0800
Committer: Wangda Tan <wa...@apache.org>
Committed: Tue Jul 31 12:06:16 2018 -0700
----------------------------------------------------------------------
.../capacity/allocator/ContainerAllocation.java | 2 +-
.../allocator/RegularContainerAllocator.java | 10 ++--
.../capacity/TestCapacityScheduler.java | 48 ++++++++++++++++++++
3 files changed, 54 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/0c3d3bc3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocation.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocation.java
index f408508..b9b9bcf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocation.java
@@ -56,7 +56,7 @@ public class ContainerAllocation {
RMContainer containerToBeUnreserved;
private Resource resourceToBeAllocated = Resources.none();
- AllocationState state;
+ private AllocationState state;
NodeType containerNodeType = NodeType.NODE_LOCAL;
NodeType requestLocalityType = null;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/0c3d3bc3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java
index 99deb1a..adc27f5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java
@@ -263,7 +263,7 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
reservedContainer, schedulingMode, resourceLimits);
if (null == reservedContainer) {
- if (result.state == AllocationState.PRIORITY_SKIPPED) {
+ if (result.getAllocationState() == AllocationState.PRIORITY_SKIPPED) {
// Don't count 'skipped nodes' as a scheduling opportunity!
application.subtractSchedulingOpportunity(schedulerKey);
}
@@ -487,8 +487,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
// When a returned allocation is LOCALITY_SKIPPED, since we're in
// off-switch request now, we will skip this app w.r.t priorities
- if (allocation.state == AllocationState.LOCALITY_SKIPPED) {
- allocation.state = AllocationState.APP_SKIPPED;
+ if (allocation.getAllocationState() == AllocationState.LOCALITY_SKIPPED) {
+ allocation = ContainerAllocation.APP_SKIPPED;
}
allocation.requestLocalityType = requestLocalityType;
@@ -836,8 +836,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
result = tryAllocateOnNode(clusterResource, node, schedulingMode,
resourceLimits, schedulerKey, reservedContainer);
- if (AllocationState.ALLOCATED == result.state
- || AllocationState.RESERVED == result.state) {
+ if (AllocationState.ALLOCATED == result.getAllocationState()
+ || AllocationState.RESERVED == result.getAllocationState()) {
result = doAllocation(result, node, schedulerKey, reservedContainer);
break;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/0c3d3bc3/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
index 0b54010..79cdcfe 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
@@ -134,6 +134,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.AllocationState;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocation;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.ResourceCommitRequest;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
@@ -4930,4 +4932,50 @@ public class TestCapacityScheduler extends CapacitySchedulerTestBase {
spyCs.handle(new NodeUpdateSchedulerEvent(
spyCs.getNode(nm.getNodeId()).getRMNode()));
}
+
+ // Testcase for YARN-8528
+ // This is to test whether ContainerAllocation constants are holding correct
+ // values during scheduling.
+ @Test
+ public void testContainerAllocationLocalitySkipped() throws Exception {
+ Assert.assertEquals(AllocationState.APP_SKIPPED,
+ ContainerAllocation.APP_SKIPPED.getAllocationState());
+ Assert.assertEquals(AllocationState.LOCALITY_SKIPPED,
+ ContainerAllocation.LOCALITY_SKIPPED.getAllocationState());
+ Assert.assertEquals(AllocationState.PRIORITY_SKIPPED,
+ ContainerAllocation.PRIORITY_SKIPPED.getAllocationState());
+ Assert.assertEquals(AllocationState.QUEUE_SKIPPED,
+ ContainerAllocation.QUEUE_SKIPPED.getAllocationState());
+
+ // init RM & NMs & Nodes
+ final MockRM rm = new MockRM(new CapacitySchedulerConfiguration());
+ CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
+ rm.start();
+ final MockNM nm1 = rm.registerNode("h1:1234", 4 * GB);
+ final MockNM nm2 = rm.registerNode("h2:1234", 6 * GB); // maximum-allocation-mb = 6GB
+
+ // submit app and request resource
+ // container2 is larger than nm1 total resource, will trigger locality skip
+ final RMApp app = rm.submitApp(1 * GB, "app", "user");
+ final MockAM am = MockRM.launchAndRegisterAM(app, rm, nm1);
+ am.addRequests(new String[] {"*"}, 5 * GB, 1, 1, 2);
+ am.schedule();
+
+ // container1 (am) should be acquired, container2 should not
+ RMNode node1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
+ cs.handle(new NodeUpdateSchedulerEvent(node1));
+ ContainerId cid = ContainerId.newContainerId(am.getApplicationAttemptId(), 1l);
+ Assert.assertEquals(cs.getRMContainer(cid).getState(), RMContainerState.ACQUIRED);
+ cid = ContainerId.newContainerId(am.getApplicationAttemptId(), 2l);
+ Assert.assertNull(cs.getRMContainer(cid));
+
+ Assert.assertEquals(AllocationState.APP_SKIPPED,
+ ContainerAllocation.APP_SKIPPED.getAllocationState());
+ Assert.assertEquals(AllocationState.LOCALITY_SKIPPED,
+ ContainerAllocation.LOCALITY_SKIPPED.getAllocationState());
+ Assert.assertEquals(AllocationState.PRIORITY_SKIPPED,
+ ContainerAllocation.PRIORITY_SKIPPED.getAllocationState());
+ Assert.assertEquals(AllocationState.QUEUE_SKIPPED,
+ ContainerAllocation.QUEUE_SKIPPED.getAllocationState());
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org
[5/7] hadoop git commit: HADOOP-15593. Fixed NPE in UGI
spawnAutoRenewalThreadForUserCreds. Contributed by Gabor Bota
Posted by wa...@apache.org.
HADOOP-15593. Fixed NPE in UGI spawnAutoRenewalThreadForUserCreds.
Contributed by Gabor Bota
(cherry picked from commit 77721f39e26b630352a1f4087524a3fbd21ff06e)
(cherry picked from commit a869bd970e832c4d770b3cee6257225260f4d235)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/096c1390
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/096c1390
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/096c1390
Branch: refs/heads/branch-3.1.1
Commit: 096c1390f2510f7035db210d7a3b4db59c59d167
Parents: bae4060
Author: Eric Yang <ey...@apache.org>
Authored: Thu Jul 26 18:35:36 2018 -0400
Committer: Wangda Tan <wa...@apache.org>
Committed: Tue Jul 31 12:06:47 2018 -0700
----------------------------------------------------------------------
.../hadoop/security/UserGroupInformation.java | 179 ++++++++++++-------
.../security/TestUserGroupInformation.java | 38 ++++
2 files changed, 148 insertions(+), 69 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/096c1390/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
index 33a876f..c44ef72 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
@@ -40,6 +40,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.Date;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.Iterator;
@@ -850,81 +851,121 @@ public class UserGroupInformation {
}
//spawn thread only if we have kerb credentials
- Thread t = new Thread(new Runnable() {
+ KerberosTicket tgt = getTGT();
+ if (tgt == null) {
+ return;
+ }
+ String cmd = conf.get("hadoop.kerberos.kinit.command", "kinit");
+ long nextRefresh = getRefreshTime(tgt);
+ Thread t =
+ new Thread(new AutoRenewalForUserCredsRunnable(tgt, cmd, nextRefresh));
+ t.setDaemon(true);
+ t.setName("TGT Renewer for " + getUserName());
+ t.start();
+ }
+
+ @VisibleForTesting
+ class AutoRenewalForUserCredsRunnable implements Runnable {
+ private KerberosTicket tgt;
+ private RetryPolicy rp;
+ private String kinitCmd;
+ private long nextRefresh;
+ private boolean runRenewalLoop = true;
+
+ AutoRenewalForUserCredsRunnable(KerberosTicket tgt, String kinitCmd,
+ long nextRefresh){
+ this.tgt = tgt;
+ this.kinitCmd = kinitCmd;
+ this.nextRefresh = nextRefresh;
+ this.rp = null;
+ }
+
+ public void setRunRenewalLoop(boolean runRenewalLoop) {
+ this.runRenewalLoop = runRenewalLoop;
+ }
- @Override
- public void run() {
- String cmd = conf.get("hadoop.kerberos.kinit.command", "kinit");
- KerberosTicket tgt = getTGT();
- if (tgt == null) {
+ @Override
+ public void run() {
+ do {
+ try {
+ long now = Time.now();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Current time is " + now);
+ LOG.debug("Next refresh is " + nextRefresh);
+ }
+ if (now < nextRefresh) {
+ Thread.sleep(nextRefresh - now);
+ }
+ String output = Shell.execCommand(kinitCmd, "-R");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Renewed ticket. kinit output: {}", output);
+ }
+ reloginFromTicketCache();
+ tgt = getTGT();
+ if (tgt == null) {
+ LOG.warn("No TGT after renewal. Aborting renew thread for " +
+ getUserName());
+ return;
+ }
+ nextRefresh = Math.max(getRefreshTime(tgt),
+ now + kerberosMinSecondsBeforeRelogin);
+ metrics.renewalFailures.set(0);
+ rp = null;
+ } catch (InterruptedException ie) {
+ LOG.warn("Terminating renewal thread");
return;
- }
- long nextRefresh = getRefreshTime(tgt);
- RetryPolicy rp = null;
- while (true) {
+ } catch (IOException ie) {
+ metrics.renewalFailuresTotal.incr();
+ final long now = Time.now();
+
+ if (tgt.isDestroyed()) {
+ LOG.error("TGT is destroyed. Aborting renew thread for {}.",
+ getUserName());
+ return;
+ }
+
+ long tgtEndTime;
+ // As described in HADOOP-15593 we need to handle the case when
+ // tgt.getEndTime() throws NPE because of JDK issue JDK-8147772
+ // NPE is only possible if this issue is not fixed in the JDK
+ // currently used
try {
- long now = Time.now();
- if (LOG.isDebugEnabled()) {
- LOG.debug("Current time is " + now);
- LOG.debug("Next refresh is " + nextRefresh);
- }
- if (now < nextRefresh) {
- Thread.sleep(nextRefresh - now);
- }
- Shell.execCommand(cmd, "-R");
- if (LOG.isDebugEnabled()) {
- LOG.debug("renewed ticket");
- }
- reloginFromTicketCache();
- tgt = getTGT();
- if (tgt == null) {
- LOG.warn("No TGT after renewal. Aborting renew thread for " +
- getUserName());
- return;
- }
- nextRefresh = Math.max(getRefreshTime(tgt),
- now + kerberosMinSecondsBeforeRelogin);
- metrics.renewalFailures.set(0);
- rp = null;
- } catch (InterruptedException ie) {
- LOG.warn("Terminating renewal thread");
+ tgtEndTime = tgt.getEndTime().getTime();
+ } catch (NullPointerException npe) {
+ LOG.error("NPE thrown while getting KerberosTicket endTime. "
+ + "Aborting renew thread for {}.", getUserName());
+ return;
+ }
+
+ LOG.warn("Exception encountered while running the renewal "
+ + "command for {}. (TGT end time:{}, renewalFailures: {},"
+ + "renewalFailuresTotal: {})", getUserName(), tgtEndTime,
+ metrics.renewalFailures.value(),
+ metrics.renewalFailuresTotal.value(), ie);
+ if (rp == null) {
+ // Use a dummy maxRetries to create the policy. The policy will
+ // only be used to get next retry time with exponential back-off.
+ // The final retry time will be later limited within the
+ // tgt endTime in getNextTgtRenewalTime.
+ rp = RetryPolicies.exponentialBackoffRetry(Long.SIZE - 2,
+ kerberosMinSecondsBeforeRelogin, TimeUnit.MILLISECONDS);
+ }
+ try {
+ nextRefresh = getNextTgtRenewalTime(tgtEndTime, now, rp);
+ } catch (Exception e) {
+ LOG.error("Exception when calculating next tgt renewal time", e);
+ return;
+ }
+ metrics.renewalFailures.incr();
+ // retry until close enough to tgt endTime.
+ if (now > nextRefresh) {
+ LOG.error("TGT is expired. Aborting renew thread for {}.",
+ getUserName());
return;
- } catch (IOException ie) {
- metrics.renewalFailuresTotal.incr();
- final long tgtEndTime = tgt.getEndTime().getTime();
- LOG.warn("Exception encountered while running the renewal "
- + "command for {}. (TGT end time:{}, renewalFailures: {},"
- + "renewalFailuresTotal: {})", getUserName(), tgtEndTime,
- metrics.renewalFailures, metrics.renewalFailuresTotal, ie);
- final long now = Time.now();
- if (rp == null) {
- // Use a dummy maxRetries to create the policy. The policy will
- // only be used to get next retry time with exponential back-off.
- // The final retry time will be later limited within the
- // tgt endTime in getNextTgtRenewalTime.
- rp = RetryPolicies.exponentialBackoffRetry(Long.SIZE - 2,
- kerberosMinSecondsBeforeRelogin, TimeUnit.MILLISECONDS);
- }
- try {
- nextRefresh = getNextTgtRenewalTime(tgtEndTime, now, rp);
- } catch (Exception e) {
- LOG.error("Exception when calculating next tgt renewal time", e);
- return;
- }
- metrics.renewalFailures.incr();
- // retry until close enough to tgt endTime.
- if (now > nextRefresh) {
- LOG.error("TGT is expired. Aborting renew thread for {}.",
- getUserName());
- return;
- }
}
}
- }
- });
- t.setDaemon(true);
- t.setName("TGT Renewer for " + getUserName());
- t.start();
+ } while (runRenewalLoop);
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/hadoop/blob/096c1390/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java
index 9477990..011e930 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java
@@ -47,6 +47,7 @@ import org.slf4j.event.Level;
import javax.security.auth.Subject;
import javax.security.auth.kerberos.KerberosPrincipal;
+import javax.security.auth.kerberos.KerberosTicket;
import javax.security.auth.kerberos.KeyTab;
import javax.security.auth.login.AppConfigurationEntry;
import javax.security.auth.login.LoginContext;
@@ -61,6 +62,7 @@ import java.security.PrivilegedExceptionAction;
import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.Date;
+import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.concurrent.Callable;
@@ -88,7 +90,10 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;
public class TestUserGroupInformation {
@@ -1211,4 +1216,37 @@ public class TestUserGroupInformation {
barrier.await();
assertSame(testUgi1.getSubject(), blockingLookup.get().getSubject());
}
+
+ @Test
+ public void testKerberosTicketIsDestroyedChecked() throws Exception {
+ // Create UserGroupInformation
+ GenericTestUtils.setLogLevel(UserGroupInformation.LOG, Level.DEBUG);
+ Set<User> users = new HashSet<>();
+ users.add(new User("Foo"));
+ Subject subject =
+ new Subject(true, users, new HashSet<>(), new HashSet<>());
+ UserGroupInformation ugi = spy(new UserGroupInformation(subject));
+
+ // throw IOException in the middle of the autoRenewalForUserCreds
+ doThrow(new IOException()).when(ugi).reloginFromTicketCache();
+
+ // Create and destroy the KerberosTicket, so endTime will be null
+ Date d = new Date();
+ KerberosPrincipal kp = new KerberosPrincipal("Foo");
+ KerberosTicket tgt = spy(new KerberosTicket(new byte[]{}, kp, kp, new
+ byte[]{}, 0, null, d, d, d, d, null));
+ tgt.destroy();
+
+ // run AutoRenewalForUserCredsRunnable with this
+ UserGroupInformation.AutoRenewalForUserCredsRunnable userCredsRunnable =
+ ugi.new AutoRenewalForUserCredsRunnable(tgt,
+ Boolean.toString(Boolean.TRUE), 100);
+
+ // Set the runnable to not to run in a loop
+ userCredsRunnable.setRunRenewalLoop(false);
+ // there should be no exception when calling this
+ userCredsRunnable.run();
+ // isDestroyed should be called at least once
+ Mockito.verify(tgt, atLeastOnce()).isDestroyed();
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org
[3/7] hadoop git commit: YARN-8546. Resource leak caused by a
reserved container being released more than once under async scheduling.
Contributed by Tao Yang.
Posted by wa...@apache.org.
YARN-8546. Resource leak caused by a reserved container being released more than once under async scheduling. Contributed by Tao Yang.
(Cherry-picked from commit 5be9f4a5d05c9cb99348719fe35626b1de3055db)
(cherry picked from commit b89624a943268e180e0e1532b3a394ff580a962c)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/49795e9b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/49795e9b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/49795e9b
Branch: refs/heads/branch-3.1.1
Commit: 49795e9b9f381debc2641d688aabde48acc87385
Parents: 7cb37ed
Author: Weiwei Yang <ww...@apache.org>
Authored: Wed Jul 25 17:35:27 2018 +0800
Committer: Wangda Tan <wa...@apache.org>
Committed: Tue Jul 31 12:06:30 2018 -0700
----------------------------------------------------------------------
.../scheduler/common/fica/FiCaSchedulerApp.java | 15 ++++
.../TestCapacitySchedulerAsyncScheduling.java | 89 ++++++++++++++++++++
2 files changed, 104 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/49795e9b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
index 3b1b82c..9810e98 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
@@ -361,6 +361,21 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt {
.isEmpty()) {
for (SchedulerContainer<FiCaSchedulerApp, FiCaSchedulerNode>
releaseContainer : allocation.getToRelease()) {
+ // Make sure to-release reserved containers are not outdated
+ if (releaseContainer.getRmContainer().getState()
+ == RMContainerState.RESERVED
+ && releaseContainer.getRmContainer() != releaseContainer
+ .getSchedulerNode().getReservedContainer()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Failed to accept this proposal because "
+ + "it tries to release an outdated reserved container "
+ + releaseContainer.getRmContainer().getContainerId()
+ + " on node " + releaseContainer.getSchedulerNode().getNodeID()
+ + " whose reserved container is "
+ + releaseContainer.getSchedulerNode().getReservedContainer());
+ }
+ return false;
+ }
// Only consider non-reserved container (reserved container will
// not affect available resource of node) on the same node
if (releaseContainer.getRmContainer().getState()
http://git-wip-us.apache.org/repos/asf/hadoop/blob/49795e9b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAsyncScheduling.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAsyncScheduling.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAsyncScheduling.java
index 338b9f9..c2c1519 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAsyncScheduling.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAsyncScheduling.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.resourcemanager.MockAM;
import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
@@ -41,6 +42,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEven
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
@@ -685,6 +687,93 @@ public class TestCapacitySchedulerAsyncScheduling {
rm.stop();
}
+
+ @Test(timeout = 60000)
+ public void testReleaseOutdatedReservedContainer() throws Exception {
+ /*
+ * Submit a application, reserved container_02 on nm1,
+ * submit two allocate proposals which contain the same reserved
+ * container_02 as to-released container.
+ * First proposal should be accepted, second proposal should be rejected
+ * because it try to release an outdated reserved container
+ */
+ MockRM rm1 = new MockRM();
+ rm1.getRMContext().setNodeLabelManager(mgr);
+ rm1.start();
+ MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);
+ MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB);
+ MockNM nm3 = rm1.registerNode("h3:1234", 8 * GB);
+ rm1.drainEvents();
+
+ CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
+ RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
+ LeafQueue defaultQueue = (LeafQueue) cs.getQueue("default");
+ SchedulerNode sn1 = cs.getSchedulerNode(nm1.getNodeId());
+ SchedulerNode sn2 = cs.getSchedulerNode(nm2.getNodeId());
+ SchedulerNode sn3 = cs.getSchedulerNode(nm3.getNodeId());
+
+ // launch another app to queue, AM container should be launched in nm1
+ RMApp app1 = rm1.submitApp(4 * GB, "app", "user", null, "default");
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+ Resource allocateResource = Resources.createResource(5 * GB);
+ am1.allocate("*", (int) allocateResource.getMemorySize(), 3, 0,
+ new ArrayList<ContainerId>(), "");
+ FiCaSchedulerApp schedulerApp1 =
+ cs.getApplicationAttempt(am1.getApplicationAttemptId());
+
+ cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
+ Assert.assertEquals(1, schedulerApp1.getReservedContainers().size());
+ Assert.assertEquals(9 * GB,
+ defaultQueue.getQueueResourceUsage().getUsed().getMemorySize());
+
+ RMContainer reservedContainer =
+ schedulerApp1.getReservedContainers().get(0);
+ ResourceCommitRequest allocateFromSameReservedContainerProposal1 =
+ createAllocateFromReservedProposal(3, allocateResource, schedulerApp1,
+ sn2, sn1, cs.getRMContext(), reservedContainer);
+ boolean tryCommitResult = cs.tryCommit(cs.getClusterResource(),
+ allocateFromSameReservedContainerProposal1, true);
+ Assert.assertTrue(tryCommitResult);
+ ResourceCommitRequest allocateFromSameReservedContainerProposal2 =
+ createAllocateFromReservedProposal(4, allocateResource, schedulerApp1,
+ sn3, sn1, cs.getRMContext(), reservedContainer);
+ tryCommitResult = cs.tryCommit(cs.getClusterResource(),
+ allocateFromSameReservedContainerProposal2, true);
+ Assert.assertFalse("This proposal should be rejected because "
+ + "it try to release an outdated reserved container", tryCommitResult);
+
+ rm1.close();
+ }
+
+ private ResourceCommitRequest createAllocateFromReservedProposal(
+ int containerId, Resource allocateResource, FiCaSchedulerApp schedulerApp,
+ SchedulerNode allocateNode, SchedulerNode reservedNode,
+ RMContext rmContext, RMContainer reservedContainer) {
+ Container container = Container.newInstance(
+ ContainerId.newContainerId(schedulerApp.getApplicationAttemptId(), containerId),
+ allocateNode.getNodeID(), allocateNode.getHttpAddress(), allocateResource,
+ Priority.newInstance(0), null);
+ RMContainer rmContainer = new RMContainerImpl(container, SchedulerRequestKey
+ .create(ResourceRequest
+ .newInstance(Priority.newInstance(0), "*", allocateResource, 1)),
+ schedulerApp.getApplicationAttemptId(), allocateNode.getNodeID(), "user",
+ rmContext);
+ SchedulerContainer allocateContainer =
+ new SchedulerContainer(schedulerApp, allocateNode, rmContainer, "", true);
+ SchedulerContainer reservedSchedulerContainer =
+ new SchedulerContainer(schedulerApp, reservedNode, reservedContainer, "",
+ false);
+ List<SchedulerContainer> toRelease = new ArrayList<>();
+ toRelease.add(reservedSchedulerContainer);
+ ContainerAllocationProposal allocateFromReservedProposal =
+ new ContainerAllocationProposal(allocateContainer, toRelease, null,
+ NodeType.OFF_SWITCH, NodeType.OFF_SWITCH,
+ SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY, allocateResource);
+ List<ContainerAllocationProposal> allocateProposals = new ArrayList<>();
+ allocateProposals.add(allocateFromReservedProposal);
+ return new ResourceCommitRequest(allocateProposals, null, null);
+ }
+
private void keepNMHeartbeat(List<MockNM> mockNMs, int interval) {
if (nmHeartbeatThread != null) {
nmHeartbeatThread.setShouldStop();
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org
[7/7] hadoop git commit: YARN-8418. App local logs could leaked if
log aggregation fails to initialize for the app. (Bibin A Chundatt via
wangda)
Posted by wa...@apache.org.
YARN-8418. App local logs could leaked if log aggregation fails to initialize for the app. (Bibin A Chundatt via wangda)
Change-Id: I29a23ca4b219b48c92e7975cd44cddb8b0e04104
(cherry picked from commit 4b540bbfcf02d828052999215c6135603d98f5db)
(cherry picked from commit 7b552c9d722ed243cb6e17122edcecda62f54e68)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8596a0eb
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8596a0eb
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8596a0eb
Branch: refs/heads/branch-3.1.1
Commit: 8596a0ebe8c8a862c57d291669e56aacfc4d411b
Parents: 17625e4
Author: Wangda Tan <wa...@apache.org>
Authored: Tue Jul 31 12:07:51 2018 -0700
Committer: Wangda Tan <wa...@apache.org>
Committed: Tue Jul 31 12:18:11 2018 -0700
----------------------------------------------------------------------
.../LogAggregationFileController.java | 7 ++
.../nodemanager/NodeStatusUpdaterImpl.java | 1 +
.../containermanager/ContainerManager.java | 1 +
.../containermanager/ContainerManagerImpl.java | 13 ++-
.../logaggregation/AppLogAggregator.java | 8 ++
.../logaggregation/AppLogAggregatorImpl.java | 15 ++++
.../logaggregation/LogAggregationService.java | 83 ++++++++++++++++----
.../containermanager/loghandler/LogHandler.java | 7 ++
.../loghandler/NonAggregatingLogHandler.java | 9 +++
.../loghandler/event/LogHandlerEventType.java | 4 +-
.../event/LogHandlerTokenUpdatedEvent.java | 26 ++++++
.../nodemanager/DummyContainerManager.java | 7 ++
.../TestLogAggregationService.java | 34 +++++---
13 files changed, 187 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java
index 5ac89e9..25d012e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java
@@ -43,11 +43,14 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.SecretManager;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils;
import org.apache.hadoop.yarn.webapp.View.ViewContext;
@@ -363,6 +366,10 @@ public abstract class LogAggregationFileController {
}
});
} catch (Exception e) {
+ if (e instanceof RemoteException) {
+ throw new YarnRuntimeException(((RemoteException) e)
+ .unwrapRemoteException(SecretManager.InvalidToken.class));
+ }
throw new YarnRuntimeException(e);
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index 8154723..faf7adb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -1135,6 +1135,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
if (systemCredentials != null && !systemCredentials.isEmpty()) {
((NMContext) context).setSystemCrendentialsForApps(
parseCredentials(systemCredentials));
+ context.getContainerManager().handleCredentialUpdate();
}
List<org.apache.hadoop.yarn.api.records.Container>
containersToUpdate = response.getContainersToUpdate();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java
index 2aeb245..356c2e0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java
@@ -44,4 +44,5 @@ public interface ContainerManager extends ServiceStateChangeListener,
ContainerScheduler getContainerScheduler();
+ void handleCredentialUpdate();
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index ad63720..b107200 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager;
import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.ByteString;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.UpdateContainerTokenEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerTokenUpdatedEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEvent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -170,7 +171,6 @@ import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
-import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -214,6 +214,7 @@ public class ContainerManagerImpl extends CompositeService implements
protected final AsyncDispatcher dispatcher;
private final DeletionService deletionService;
+ private LogHandler logHandler;
private boolean serviceStopped = false;
private final ReadLock readLock;
private final WriteLock writeLock;
@@ -292,7 +293,7 @@ public class ContainerManagerImpl extends CompositeService implements
@Override
public void serviceInit(Configuration conf) throws Exception {
- LogHandler logHandler =
+ logHandler =
createLogHandler(conf, this.context, this.deletionService);
addIfService(logHandler);
dispatcher.register(LogHandlerEventType.class, logHandler);
@@ -1904,4 +1905,12 @@ public class ContainerManagerImpl extends CompositeService implements
public ContainerScheduler getContainerScheduler() {
return this.containerScheduler;
}
+
+ @Override
+ public void handleCredentialUpdate() {
+ Set<ApplicationId> invalidApps = logHandler.getInvalidTokenApps();
+ if (!invalidApps.isEmpty()) {
+ dispatcher.getEventHandler().handle(new LogHandlerTokenUpdatedEvent());
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregator.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregator.java
index 0178699..93436fa 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregator.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.server.api.ContainerLogContext;
public interface AppLogAggregator extends Runnable {
@@ -29,4 +31,10 @@ public interface AppLogAggregator extends Runnable {
void finishLogAggregation();
void disableLogAggregation();
+
+ void enableLogAggregation();
+
+ boolean isAggregationEnabled();
+
+ UserGroupInformation updateCredentials(Credentials cred);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java
index c7e06ff..0470e73 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java
@@ -534,6 +534,16 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
this.logAggregationDisabled = true;
}
+ @Override
+ public void enableLogAggregation() {
+ this.logAggregationDisabled = false;
+ }
+
+ @Override
+ public boolean isAggregationEnabled() {
+ return !logAggregationDisabled;
+ }
+
@Private
@VisibleForTesting
// This is only used for testing.
@@ -616,6 +626,11 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
return this.userUgi;
}
+ public UserGroupInformation updateCredentials(Credentials cred) {
+ this.userUgi.addCredentials(cred);
+ return userUgi;
+ }
+
@Private
@VisibleForTesting
public int getLogAggregationTimes() {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java
index 4938939..8ea97d1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java
@@ -20,10 +20,14 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregatio
import java.io.IOException;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.security.token.SecretManager;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -58,6 +62,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.eve
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerContainerFinishedEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEvent;
+
import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
@@ -83,6 +88,9 @@ public class LogAggregationService extends AbstractService implements
private final ConcurrentMap<ApplicationId, AppLogAggregator> appLogAggregators;
+ // Holds applications whose aggregation is disable due to invalid Token
+ private final Set<ApplicationId> invalidTokenApps;
+
@VisibleForTesting
ExecutorService threadPool;
@@ -95,6 +103,7 @@ public class LogAggregationService extends AbstractService implements
this.dirsHandler = dirsHandler;
this.appLogAggregators =
new ConcurrentHashMap<ApplicationId, AppLogAggregator>();
+ this.invalidTokenApps = ConcurrentHashMap.newKeySet();
}
protected void serviceInit(Configuration conf) throws Exception {
@@ -224,8 +233,8 @@ public class LogAggregationService extends AbstractService implements
userUgi.addCredentials(credentials);
}
- LogAggregationFileController logAggregationFileController
- = getLogAggregationFileController(getConfig());
+ LogAggregationFileController logAggregationFileController =
+ getLogAggregationFileController(getConfig());
logAggregationFileController.verifyAndCreateRemoteLogDir();
// New application
final AppLogAggregator appLogAggregator =
@@ -245,14 +254,16 @@ public class LogAggregationService extends AbstractService implements
logAggregationFileController.createAppDir(user, appId, userUgi);
} catch (Exception e) {
appLogAggregator.disableLogAggregation();
+
+ // add to disabled aggregators if due to InvalidToken
+ if (e.getCause() instanceof SecretManager.InvalidToken) {
+ invalidTokenApps.add(appId);
+ }
if (!(e instanceof YarnRuntimeException)) {
appDirException = new YarnRuntimeException(e);
} else {
appDirException = (YarnRuntimeException)e;
}
- appLogAggregators.remove(appId);
- closeFileSystems(userUgi);
- throw appDirException;
}
// TODO Get the user configuration for the list of containers that need log
@@ -270,6 +281,10 @@ public class LogAggregationService extends AbstractService implements
}
};
this.threadPool.execute(aggregatorWrapper);
+
+ if (appDirException != null) {
+ throw appDirException;
+ }
}
protected void closeFileSystems(final UserGroupInformation userUgi) {
@@ -307,17 +322,20 @@ public class LogAggregationService extends AbstractService implements
// App is complete. Finish up any containers' pending log aggregation and
// close the application specific logFile.
-
- AppLogAggregator aggregator = this.appLogAggregators.get(appId);
- if (aggregator == null) {
- LOG.warn("Log aggregation is not initialized for " + appId
- + ", did it fail to start?");
- this.dispatcher.getEventHandler().handle(
- new ApplicationEvent(appId,
- ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED));
- return;
+ try {
+ AppLogAggregator aggregator = this.appLogAggregators.get(appId);
+ if (aggregator == null) {
+ LOG.warn("Log aggregation is not initialized for " + appId
+ + ", did it fail to start?");
+ this.dispatcher.getEventHandler().handle(new ApplicationEvent(appId,
+ ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED));
+ return;
+ }
+ aggregator.finishLogAggregation();
+ } finally {
+ // Remove invalid Token Apps
+ invalidTokenApps.remove(appId);
}
- aggregator.finishLogAggregation();
}
@Override
@@ -344,12 +362,47 @@ public class LogAggregationService extends AbstractService implements
(LogHandlerAppFinishedEvent) event;
stopApp(appFinishedEvent.getApplicationId());
break;
+ case LOG_AGG_TOKEN_UPDATE:
+ checkAndEnableAppAggregators();
+ break;
default:
; // Ignore
}
}
+ private void checkAndEnableAppAggregators() {
+ for (ApplicationId appId : invalidTokenApps) {
+ try {
+ AppLogAggregator aggregator = appLogAggregators.get(appId);
+ if (aggregator != null) {
+ Credentials credentials =
+ context.getSystemCredentialsForApps().get(appId);
+ if (credentials != null) {
+ // Create the app dir again with
+ LogAggregationFileController logAggregationFileController =
+ getLogAggregationFileController(getConfig());
+ UserGroupInformation userUgi =
+ aggregator.updateCredentials(credentials);
+ logAggregationFileController
+ .createAppDir(userUgi.getShortUserName(), appId, userUgi);
+ aggregator.enableLogAggregation();
+ }
+ invalidTokenApps.remove(appId);
+ LOG.info("LogAggregation enabled for application {}", appId);
+ }
+ } catch (Exception e) {
+ //Ignore exception
+ LOG.warn("Enable aggregators failed {}", appId);
+ }
+ }
+ }
+
+ @Override
+ public Set<ApplicationId> getInvalidTokenApps() {
+ return invalidTokenApps;
+ }
+
@VisibleForTesting
public ConcurrentMap<ApplicationId, AppLogAggregator> getAppLogAggregators() {
return this.appLogAggregators;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/LogHandler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/LogHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/LogHandler.java
index 6eb3fb4..459fdf4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/LogHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/LogHandler.java
@@ -18,9 +18,16 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEvent;
+
+
+import java.util.Set;
+
public interface LogHandler extends EventHandler<LogHandlerEvent> {
public void handle(LogHandlerEvent event);
+
+ public Set<ApplicationId> getInvalidTokenApps();
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/NonAggregatingLogHandler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/NonAggregatingLogHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/NonAggregatingLogHandler.java
index 9c43dde..d66aa12 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/NonAggregatingLogHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/NonAggregatingLogHandler.java
@@ -19,8 +19,12 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.ThreadFactory;
@@ -204,6 +208,11 @@ public class NonAggregatingLogHandler extends AbstractService implements
}
}
+ @Override
+ public Set<ApplicationId> getInvalidTokenApps() {
+ return Collections.emptySet();
+ }
+
ScheduledThreadPoolExecutor createScheduledThreadPoolExecutor(
Configuration conf) {
ThreadFactory tf =
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/event/LogHandlerEventType.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/event/LogHandlerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/event/LogHandlerEventType.java
index 684d6b2..ec477c2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/event/LogHandlerEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/event/LogHandlerEventType.java
@@ -19,5 +19,7 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event;
public enum LogHandlerEventType {
- APPLICATION_STARTED, CONTAINER_FINISHED, APPLICATION_FINISHED
+ APPLICATION_STARTED,
+ CONTAINER_FINISHED,
+ APPLICATION_FINISHED, LOG_AGG_TOKEN_UPDATE
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/event/LogHandlerTokenUpdatedEvent.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/event/LogHandlerTokenUpdatedEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/event/LogHandlerTokenUpdatedEvent.java
new file mode 100644
index 0000000..772a463
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/event/LogHandlerTokenUpdatedEvent.java
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event;
+
+public class LogHandlerTokenUpdatedEvent extends LogHandlerEvent {
+
+ public LogHandlerTokenUpdatedEvent() {
+ super(LogHandlerEventType.LOG_AGG_TOKEN_UPDATE);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
index b5cb43b..feabeb1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
@@ -24,6 +24,8 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Collection;
+import java.util.Collections;
+import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -187,6 +189,11 @@ public class DummyContainerManager extends ContainerManagerImpl {
// Ignore
}
}
+
+ @Override
+ public Set<ApplicationId> getInvalidTokenApps() {
+ return Collections.emptySet();
+ }
};
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8596a0eb/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
index 05cdd49..dd83d99 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
@@ -73,6 +73,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.SecretManager;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
@@ -128,6 +129,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.Tes
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppStartedEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerContainerFinishedEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerTokenUpdatedEvent;
import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.util.ConverterUtils;
@@ -823,7 +825,8 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
.getFileControllerForWrite();
LogAggregationFileController spyLogAggregationFileFormat =
spy(logAggregationFileFormat);
- Exception e = new RuntimeException("KABOOM!");
+ Exception e =
+ new YarnRuntimeException(new SecretManager.InvalidToken("KABOOM!"));
doThrow(e).when(spyLogAggregationFileFormat)
.createAppDir(any(String.class), any(ApplicationId.class),
any(UserGroupInformation.class));
@@ -862,29 +865,40 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
};
checkEvents(appEventHandler, expectedEvents, false,
"getType", "getApplicationID", "getDiagnostic");
-
+ Assert.assertEquals(logAggregationService.getInvalidTokenApps().size(), 1);
// verify trying to collect logs for containers/apps we don't know about
// doesn't blow up and tear down the NM
logAggregationService.handle(new LogHandlerContainerFinishedEvent(
BuilderUtils.newContainerId(4, 1, 1, 1),
ContainerType.APPLICATION_MASTER, 0));
dispatcher.await();
+
+ AppLogAggregator appAgg =
+ logAggregationService.getAppLogAggregators().get(appId);
+ Assert.assertFalse("Aggregation should be disabled",
+ appAgg.isAggregationEnabled());
+
+ // Enabled aggregation
+ logAggregationService.handle(new LogHandlerTokenUpdatedEvent());
+ dispatcher.await();
+
+ appAgg =
+ logAggregationService.getAppLogAggregators().get(appId);
+ Assert.assertFalse("Aggregation should be enabled",
+ appAgg.isAggregationEnabled());
+
+ // Check disabled apps are cleared
+ Assert.assertEquals(0, logAggregationService.getInvalidTokenApps().size());
+
logAggregationService.handle(new LogHandlerAppFinishedEvent(
BuilderUtils.newApplicationId(1, 5)));
dispatcher.await();
logAggregationService.stop();
assertEquals(0, logAggregationService.getNumAggregators());
- // local log dir shouldn't be deleted given log aggregation cannot
- // continue due to aggregated log dir creation failure on remoteFS.
- FileDeletionTask deletionTask = new FileDeletionTask(spyDelSrvc, user,
- null, null);
- verify(spyDelSrvc, never()).delete(deletionTask);
+ verify(spyDelSrvc).delete(any(FileDeletionTask.class));
verify(logAggregationService).closeFileSystems(
any(UserGroupInformation.class));
- // make sure local log dir is not deleted in case log aggregation
- // service cannot be initiated.
- assertTrue(appLogDir.exists());
}
private void writeContainerLogs(File appLogDir, ContainerId containerId,
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org
[6/7] hadoop git commit: YARN-8508. Release GPU resource for killed
container. Contributed by Chandni Singh
Posted by wa...@apache.org.
YARN-8508. Release GPU resource for killed container.
Contributed by Chandni Singh
(cherry picked from commit ed9d60e888d0acfd748fda7f66249f5b79a3ed6d)
(cherry picked from commit c2c3eee69c8b389525fbde800e057dbcb2fc643e)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/17625e40
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/17625e40
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/17625e40
Branch: refs/heads/branch-3.1.1
Commit: 17625e40f61ac31fc87f4a74d66388dbe508d33d
Parents: 096c139
Author: Eric Yang <ey...@apache.org>
Authored: Fri Jul 27 19:33:58 2018 -0400
Committer: Wangda Tan <wa...@apache.org>
Committed: Tue Jul 31 12:06:54 2018 -0700
----------------------------------------------------------------------
.../nodemanager/LinuxContainerExecutor.java | 34 ++++++++++----------
.../nodemanager/TestLinuxContainerExecutor.java | 9 +++++-
2 files changed, 25 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/17625e40/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 03b88a4..4253f2f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -573,15 +573,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
return handleExitCode(e, container, containerId);
} finally {
resourcesHandler.postExecute(containerId);
-
- try {
- if (resourceHandlerChain != null) {
- resourceHandlerChain.postComplete(containerId);
- }
- } catch (ResourceHandlerException e) {
- LOG.warn("ResourceHandlerChain.postComplete failed for " +
- "containerId: " + containerId + ". Exception: " + e);
- }
+ postComplete(containerId);
}
return 0;
@@ -721,14 +713,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
return super.reacquireContainer(ctx);
} finally {
resourcesHandler.postExecute(containerId);
- if (resourceHandlerChain != null) {
- try {
- resourceHandlerChain.postComplete(containerId);
- } catch (ResourceHandlerException e) {
- LOG.warn("ResourceHandlerChain.postComplete failed for " +
- "containerId: " + containerId + " Exception: " + e);
- }
- }
+ postComplete(containerId);
}
}
@@ -798,6 +783,8 @@ public class LinuxContainerExecutor extends ContainerExecutor {
logOutput(e.getOutput());
throw new IOException("Error in reaping container "
+ container.getContainerId().toString() + " exit = " + retCode, e);
+ } finally {
+ postComplete(container.getContainerId());
}
return true;
}
@@ -968,4 +955,17 @@ public class LinuxContainerExecutor extends ContainerExecutor {
LOG.warn("Unable to remove docker container: " + containerId);
}
}
+
+ @VisibleForTesting
+ void postComplete(final ContainerId containerId) {
+ try {
+ if (resourceHandlerChain != null) {
+ LOG.debug("{} post complete", containerId);
+ resourceHandlerChain.postComplete(containerId);
+ }
+ } catch (ResourceHandlerException e) {
+ LOG.warn("ResourceHandlerChain.postComplete failed for " +
+ "containerId: {}. Exception: ", containerId, e);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/17625e40/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
index ddbf3b9..6d77fc4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
@@ -25,11 +25,14 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import static org.mockito.Matchers.anyObject;
import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntime;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -40,6 +43,7 @@ import java.io.IOException;
import java.io.PrintWriter;
import java.net.InetSocketAddress;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
@@ -667,12 +671,15 @@ public class TestLinuxContainerExecutor {
@Test
public void testReapContainer() throws Exception {
Container container = mock(Container.class);
- LinuxContainerExecutor lce = mock(LinuxContainerExecutor.class);
+ LinuxContainerRuntime containerRuntime = mock(LinuxContainerRuntime.class);
+ LinuxContainerExecutor lce = spy(new LinuxContainerExecutor(
+ containerRuntime));
ContainerReapContext.Builder builder = new ContainerReapContext.Builder();
builder.setContainer(container).setUser("foo");
ContainerReapContext ctx = builder.build();
lce.reapContainer(ctx);
verify(lce, times(1)).reapContainer(ctx);
+ verify(lce, times(1)).postComplete(anyObject());
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org
[4/7] hadoop git commit: YARN-8545. Return allocated resource to RM
for failed container. Contributed by Chandni Singh
Posted by wa...@apache.org.
YARN-8545. Return allocated resource to RM for failed container.
Contributed by Chandni Singh
(cherry picked from commit 40fad32824d2f8f960c779d78357e62103453da0)
(cherry picked from commit 177f6045ac4ae6e2dbae2e04da8c9cebb5da8748)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/bae40602
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/bae40602
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/bae40602
Branch: refs/heads/branch-3.1.1
Commit: bae40602fa7aa0c30e7305626acf6c01cde0a129
Parents: 49795e9
Author: Eric Yang <ey...@apache.org>
Authored: Thu Jul 26 18:22:57 2018 -0400
Committer: Wangda Tan <wa...@apache.org>
Committed: Tue Jul 31 12:06:39 2018 -0700
----------------------------------------------------------------------
.../hadoop/yarn/service/ServiceScheduler.java | 3 +-
.../yarn/service/component/Component.java | 42 +++++++++++---------
.../component/instance/ComponentInstance.java | 21 +++++++---
.../instance/ComponentInstanceEvent.java | 2 +
.../containerlaunch/ContainerLaunchService.java | 12 ++++--
.../hadoop/yarn/service/MockServiceAM.java | 34 +++++++++++++++-
.../hadoop/yarn/service/TestServiceAM.java | 35 ++++++++++++++++
.../yarn/service/component/TestComponent.java | 3 +-
.../instance/TestComponentInstance.java | 26 ++++++------
9 files changed, 135 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bae40602/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
index d3e8e4f..cfaf356 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
@@ -687,7 +687,8 @@ public class ServiceScheduler extends CompositeService {
}
ComponentEvent event =
new ComponentEvent(instance.getCompName(), CONTAINER_COMPLETED)
- .setStatus(status).setInstance(instance);
+ .setStatus(status).setInstance(instance)
+ .setContainerId(containerId);
dispatcher.getEventHandler().handle(event);
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bae40602/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
index a1ee796..aaa23da 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.service.component;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.ExecutionType;
@@ -518,10 +519,10 @@ public class Component implements EventHandler<ComponentEvent> {
private static class ContainerCompletedTransition extends BaseTransition {
@Override
public void transition(Component component, ComponentEvent event) {
-
+ Preconditions.checkNotNull(event.getContainerId());
component.updateMetrics(event.getStatus());
component.dispatcher.getEventHandler().handle(
- new ComponentInstanceEvent(event.getStatus().getContainerId(), STOP)
+ new ComponentInstanceEvent(event.getContainerId(), STOP)
.setStatus(event.getStatus()));
ComponentRestartPolicy restartPolicy =
@@ -784,28 +785,33 @@ public class Component implements EventHandler<ComponentEvent> {
}
private void updateMetrics(ContainerStatus status) {
- switch (status.getExitStatus()) {
- case SUCCESS:
- componentMetrics.containersSucceeded.incr();
- scheduler.getServiceMetrics().containersSucceeded.incr();
- return;
- case PREEMPTED:
- componentMetrics.containersPreempted.incr();
- scheduler.getServiceMetrics().containersPreempted.incr();
- break;
- case DISKS_FAILED:
- componentMetrics.containersDiskFailure.incr();
- scheduler.getServiceMetrics().containersDiskFailure.incr();
- break;
- default:
- break;
+ //when a container preparation fails while building launch context, then
+ //the container status may not exist.
+ if (status != null) {
+ switch (status.getExitStatus()) {
+ case SUCCESS:
+ componentMetrics.containersSucceeded.incr();
+ scheduler.getServiceMetrics().containersSucceeded.incr();
+ return;
+ case PREEMPTED:
+ componentMetrics.containersPreempted.incr();
+ scheduler.getServiceMetrics().containersPreempted.incr();
+ break;
+ case DISKS_FAILED:
+ componentMetrics.containersDiskFailure.incr();
+ scheduler.getServiceMetrics().containersDiskFailure.incr();
+ break;
+ default:
+ break;
+ }
}
// containersFailed include preempted, disks_failed etc.
componentMetrics.containersFailed.incr();
scheduler.getServiceMetrics().containersFailed.incr();
- if (Apps.shouldCountTowardsNodeBlacklisting(status.getExitStatus())) {
+ if (status != null && Apps.shouldCountTowardsNodeBlacklisting(
+ status.getExitStatus())) {
String host = scheduler.getLiveInstances().get(status.getContainerId())
.getNodeId().getHost();
failureTracker.incNodeFailure(host);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bae40602/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
index 529596d..44ae1e7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java
@@ -76,6 +76,8 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
Comparable<ComponentInstance> {
private static final Logger LOG =
LoggerFactory.getLogger(ComponentInstance.class);
+ private static final String FAILED_BEFORE_LAUNCH_DIAG =
+ "failed before launch";
private StateMachine<ComponentInstanceState, ComponentInstanceEventType,
ComponentInstanceEvent> stateMachine;
@@ -236,7 +238,8 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
@VisibleForTesting
static void handleComponentInstanceRelaunch(
- ComponentInstance compInstance, ComponentInstanceEvent event) {
+ ComponentInstance compInstance, ComponentInstanceEvent event,
+ boolean failureBeforeLaunch) {
Component comp = compInstance.getComponent();
// Do we need to relaunch the service?
@@ -252,8 +255,10 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
+ ": {} completed. Reinsert back to pending list and requested " +
"a new container." + System.lineSeparator() +
" exitStatus={}, diagnostics={}.",
- event.getContainerId(), event.getStatus().getExitStatus(),
- event.getStatus().getDiagnostics());
+ event.getContainerId(), failureBeforeLaunch ? null :
+ event.getStatus().getExitStatus(),
+ failureBeforeLaunch ? FAILED_BEFORE_LAUNCH_DIAG :
+ event.getStatus().getDiagnostics());
} else {
// When no relaunch, update component's #succeeded/#failed
// instances.
@@ -292,8 +297,8 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
Component comp = compInstance.component;
String containerDiag =
- compInstance.getCompInstanceId() + ": " + event.getStatus()
- .getDiagnostics();
+ compInstance.getCompInstanceId() + ": " + (failedBeforeLaunching ?
+ FAILED_BEFORE_LAUNCH_DIAG : event.getStatus().getDiagnostics());
compInstance.diagnostics.append(containerDiag + System.lineSeparator());
compInstance.cancelContainerStatusRetriever();
if (compInstance.getState().equals(ComponentInstanceState.UPGRADING)) {
@@ -307,6 +312,9 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
boolean shouldFailService = false;
final ServiceScheduler scheduler = comp.getScheduler();
+ scheduler.getAmRMClient().releaseAssignedContainer(
+ event.getContainerId());
+
// Check if it exceeds the failure threshold, but only if health threshold
// monitor is not enabled
if (!comp.isHealthThresholdMonitorEnabled()
@@ -347,7 +355,8 @@ public class ComponentInstance implements EventHandler<ComponentInstanceEvent>,
// According to component restart policy, handle container restart
// or finish the service (if all components finished)
- handleComponentInstanceRelaunch(compInstance, event);
+ handleComponentInstanceRelaunch(compInstance, event,
+ failedBeforeLaunching);
if (shouldFailService) {
scheduler.getTerminationHandler().terminate(-1);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bae40602/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstanceEvent.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstanceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstanceEvent.java
index 707b034..889da6e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstanceEvent.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstanceEvent.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.service.component.instance;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.event.AbstractEvent;
@@ -32,6 +33,7 @@ public class ComponentInstanceEvent
public ComponentInstanceEvent(ContainerId containerId,
ComponentInstanceEventType componentInstanceEventType) {
super(componentInstanceEventType);
+ Preconditions.checkNotNull(containerId);
this.id = containerId;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bae40602/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/ContainerLaunchService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/ContainerLaunchService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/ContainerLaunchService.java
index 084c721..f674e0d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/ContainerLaunchService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/ContainerLaunchService.java
@@ -22,8 +22,11 @@ import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.service.ServiceContext;
import org.apache.hadoop.yarn.service.api.records.Artifact;
+import org.apache.hadoop.yarn.service.component.ComponentEvent;
+import org.apache.hadoop.yarn.service.component.ComponentEventType;
import org.apache.hadoop.yarn.service.component.instance.ComponentInstance;
import org.apache.hadoop.yarn.service.provider.ProviderService;
import org.apache.hadoop.yarn.service.provider.ProviderFactory;
@@ -116,9 +119,12 @@ public class ContainerLaunchService extends AbstractService{
launcher.completeContainerLaunch(), true);
}
} catch (Exception e) {
- LOG.error(instance.getCompInstanceId()
- + ": Failed to launch container. ", e);
-
+ LOG.error("{}: Failed to launch container.",
+ instance.getCompInstanceId(), e);
+ ComponentEvent event = new ComponentEvent(instance.getCompName(),
+ ComponentEventType.CONTAINER_COMPLETED)
+ .setInstance(instance).setContainerId(container.getId());
+ context.scheduler.getDispatcher().getEventHandler().handle(event);
}
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bae40602/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockServiceAM.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockServiceAM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockServiceAM.java
index 4a75aef..729287c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockServiceAM.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockServiceAM.java
@@ -68,6 +68,7 @@ import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeoutException;
@@ -99,6 +100,8 @@ public class MockServiceAM extends ServiceMaster {
private Map<ContainerId, ContainerStatus> containerStatuses =
new ConcurrentHashMap<>();
+ private Set<ContainerId> releasedContainers = ConcurrentHashMap.newKeySet();
+
private Credentials amCreds;
public MockServiceAM(Service service) {
@@ -223,6 +226,13 @@ public class MockServiceAM extends ServiceMaster {
return response;
}
+ @Override
+ public synchronized void releaseAssignedContainer(
+ ContainerId containerId) {
+ releasedContainers.add(containerId);
+ super.releaseAssignedContainer(containerId);
+ }
+
@Override public void unregisterApplicationMaster(
FinalApplicationStatus appStatus, String appMessage,
String appTrackingUrl) {
@@ -288,7 +298,7 @@ public class MockServiceAM extends ServiceMaster {
}
/**
- *
+ * Creates a mock container and container ID and feeds to the component.
* @param service The service for the component
* @param id The id for the container
* @param compName The component to which the container is fed
@@ -297,6 +307,18 @@ public class MockServiceAM extends ServiceMaster {
public Container feedContainerToComp(Service service, int id,
String compName) {
ContainerId containerId = createContainerId(id);
+ return feedContainerToComp(service, containerId, compName);
+ }
+
+ /**
+ * Feeds the container to the component.
+ * @param service The service for the component
+ * @param containerId container id
+ * @param compName The component to which the container is fed
+ * @return
+ */
+ public Container feedContainerToComp(Service service, ContainerId containerId,
+ String compName) {
Container container = createContainer(containerId, compName);
synchronized (feedContainers) {
feedContainers.add(container);
@@ -423,4 +445,14 @@ public class MockServiceAM extends ServiceMaster {
}
return ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
}
+
+ /**
+ * Waits for the container to get released
+ * @param containerId ContainerId
+ */
+ public void waitForContainerToRelease(ContainerId containerId)
+ throws TimeoutException, InterruptedException {
+ GenericTestUtils.waitFor(() -> releasedContainers.contains(containerId),
+ 1000, 9990000);
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bae40602/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceAM.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceAM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceAM.java
index e9478f0..21e93fa 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceAM.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceAM.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.service;
+import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.io.FileUtils;
import org.apache.curator.test.TestingCluster;
@@ -391,4 +392,38 @@ public class TestServiceAM extends ServiceTestUtils{
.equals("newer.host"), 2000, 200000);
am.stop();
}
+
+ // Test to verify that the containers are released and the
+ // component instance is added to the pending queue when building the launch
+ // context fails.
+ @Test(timeout = 9990000)
+ public void testContainersReleasedWhenPreLaunchFails()
+ throws Exception {
+ ApplicationId applicationId = ApplicationId.newInstance(
+ System.currentTimeMillis(), 1);
+ Service exampleApp = new Service();
+ exampleApp.setId(applicationId.toString());
+ exampleApp.setVersion("v1");
+ exampleApp.setName("testContainersReleasedWhenPreLaunchFails");
+
+ Component compA = createComponent("compa", 1, "pwd");
+ Artifact artifact = new Artifact();
+ artifact.setType(Artifact.TypeEnum.TARBALL);
+ compA.artifact(artifact);
+ exampleApp.addComponent(compA);
+
+ MockServiceAM am = new MockServiceAM(exampleApp);
+ am.init(conf);
+ am.start();
+
+ ContainerId containerId = am.createContainerId(1);
+
+ // allocate a container
+ am.feedContainerToComp(exampleApp, containerId, "compa");
+ am.waitForContainerToRelease(containerId);
+
+ Assert.assertEquals(1,
+ am.getComponent("compa").getPendingInstances().size());
+ am.stop();
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bae40602/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java
index d7c15ec..a3b86a7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java
@@ -196,7 +196,8 @@ public class TestComponent {
org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE,
"successful", 0);
comp.handle(new ComponentEvent(comp.getName(),
- ComponentEventType.CONTAINER_COMPLETED).setStatus(containerStatus));
+ ComponentEventType.CONTAINER_COMPLETED).setStatus(containerStatus)
+ .setContainerId(instanceContainer.getId()));
componentInstance.handle(
new ComponentInstanceEvent(componentInstance.getContainer().getId(),
ComponentInstanceEventType.STOP).setStatus(containerStatus));
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bae40602/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java
index 26e8c93..95d774a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java
@@ -245,7 +245,7 @@ public class TestComponentInstance {
comp.getAllComponentInstances().iterator().next();
ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
verify(comp, never()).markAsFailed(any(ComponentInstance.class));
@@ -262,7 +262,7 @@ public class TestComponentInstance {
componentInstance = comp.getAllComponentInstances().iterator().next();
containerStatus.setExitStatus(1);
ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
verify(comp, never()).markAsFailed(any(ComponentInstance.class));
verify(comp, times(1)).reInsertPendingInstance(
@@ -286,7 +286,7 @@ public class TestComponentInstance {
when(comp.getNumSucceededInstances()).thenReturn(new Long(1));
ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
verify(comp, times(1)).markAsSucceeded(any(ComponentInstance.class));
verify(comp, never()).markAsFailed(any(ComponentInstance.class));
verify(comp, times(0)).reInsertPendingInstance(
@@ -304,7 +304,7 @@ public class TestComponentInstance {
when(comp.getNumFailedInstances()).thenReturn(new Long(1));
ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
verify(comp, times(1)).markAsFailed(any(ComponentInstance.class));
@@ -323,7 +323,7 @@ public class TestComponentInstance {
componentInstance = comp.getAllComponentInstances().iterator().next();
containerStatus.setExitStatus(1);
ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
verify(comp, never()).markAsFailed(any(ComponentInstance.class));
verify(comp, times(1)).reInsertPendingInstance(
@@ -340,7 +340,7 @@ public class TestComponentInstance {
componentInstance = comp.getAllComponentInstances().iterator().next();
containerStatus.setExitStatus(1);
ComponentInstance.handleComponentInstanceRelaunch(componentInstance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
verify(comp, times(1)).markAsFailed(any(ComponentInstance.class));
verify(comp, times(0)).reInsertPendingInstance(
@@ -363,7 +363,7 @@ public class TestComponentInstance {
containerStatus.setExitStatus(1);
ComponentInstance commponentInstance = iter.next();
ComponentInstance.handleComponentInstanceRelaunch(commponentInstance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
verify(comp, never()).markAsFailed(any(ComponentInstance.class));
@@ -404,7 +404,7 @@ public class TestComponentInstance {
when(component2Instance.getComponent().getNumFailedInstances())
.thenReturn(new Long(failed2Instances.size()));
ComponentInstance.handleComponentInstanceRelaunch(component2Instance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
}
Map<String, ComponentInstance> failed1Instances = new HashMap<>();
@@ -418,7 +418,7 @@ public class TestComponentInstance {
when(component1Instance.getComponent().getNumFailedInstances())
.thenReturn(new Long(failed1Instances.size()));
ComponentInstance.handleComponentInstanceRelaunch(component1Instance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
}
verify(comp, never()).markAsSucceeded(any(ComponentInstance.class));
@@ -458,7 +458,7 @@ public class TestComponentInstance {
when(component2Instance.getComponent().getNumSucceededInstances())
.thenReturn(new Long(succeeded2Instances.size()));
ComponentInstance.handleComponentInstanceRelaunch(component2Instance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
}
Map<String, ComponentInstance> succeeded1Instances = new HashMap<>();
@@ -471,7 +471,7 @@ public class TestComponentInstance {
when(component1Instance.getComponent().getNumSucceededInstances())
.thenReturn(new Long(succeeded1Instances.size()));
ComponentInstance.handleComponentInstanceRelaunch(component1Instance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
}
verify(comp, times(2)).markAsSucceeded(any(ComponentInstance.class));
@@ -500,7 +500,7 @@ public class TestComponentInstance {
for (ComponentInstance component2Instance : component2Instances) {
ComponentInstance.handleComponentInstanceRelaunch(component2Instance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
}
succeeded1Instances = new HashMap<>();
@@ -511,7 +511,7 @@ public class TestComponentInstance {
when(component1Instance.getComponent().getSucceededInstances())
.thenReturn(succeeded1Instances.values());
ComponentInstance.handleComponentInstanceRelaunch(component1Instance,
- componentInstanceEvent);
+ componentInstanceEvent, false);
}
verify(comp, times(2)).markAsSucceeded(any(ComponentInstance.class));
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org
[2/7] hadoop git commit: YARN-8301. Added YARN service upgrade
instructions. Contributed by Chandni Singh
Posted by wa...@apache.org.
YARN-8301. Added YARN service upgrade instructions.
Contributed by Chandni Singh
(cherry picked from commit 10014a4d88f239d3c072e51bc0739cba1fca9406)
(cherry picked from commit 4f2a129f2e2326ca28659d93b412cf8649ed5025)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7cb37ed6
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7cb37ed6
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7cb37ed6
Branch: refs/heads/branch-3.1.1
Commit: 7cb37ed66bc830af41b8fb87207023e74ac5386f
Parents: 0c3d3bc
Author: Eric Yang <ey...@apache.org>
Authored: Fri Jul 20 19:46:35 2018 -0400
Committer: Wangda Tan <wa...@apache.org>
Committed: Tue Jul 31 12:06:23 2018 -0700
----------------------------------------------------------------------
.../src/site/markdown/yarn-service/Overview.md | 4 +-
.../markdown/yarn-service/ServiceUpgrade.md | 197 +++++++++++++++++++
2 files changed, 198 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7cb37ed6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Overview.md
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Overview.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Overview.md
index 8e2bf9a..041b0ee 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Overview.md
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Overview.md
@@ -56,6 +56,4 @@ The benefits of combining these workloads are two-fold:
* [Registry DNS](RegistryDNS.html): Deep dives into the Registry DNS internals.
* [Examples](Examples.html): List some example service definitions (`Yarnfile`).
* [Configurations](Configurations.html): Describes how to configure the custom services on YARN.
-
-
-
+* [Service Upgrade](ServiceUpgrade.html): Describes how to upgrade a YARN service which is an experimental feature.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/7cb37ed6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/ServiceUpgrade.md
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/ServiceUpgrade.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/ServiceUpgrade.md
new file mode 100644
index 0000000..839be22
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/ServiceUpgrade.md
@@ -0,0 +1,197 @@
+<!---
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+
+# Service Upgrade (Experimental Feature - Tech Preview)
+
+Yarn service provides a way of upgrading/downgrading long running applications without
+shutting down the application to minimize the downtime during this process. This is
+an experimental feature which is currently not enabled by default.
+
+## Overview
+
+Upgrading a Yarn Service is a 3 steps (or 2 steps when auto-finalization of
+upgrade is chosen) process:
+
+1. Initiate service upgrade.\
+This step involves providing the service spec of the newer version of the service.
+Once, the service upgrade is initiated, the state of the service is changed to
+`UPGRADING`.
+
+2. Upgrade component instances.\
+This step involves triggering upgrade of individual component instance.
+By providing an API to upgrade at instance level, users can orchestrate upgrade
+of the entire service in any order which is relevant for the service.\
+In addition, there are APIs to upgrade multiple instances, all instances of a
+component, and all instances of multiple components.
+
+3. Finalize upgrade.\
+This step involves finalization of upgrade. With an explicit step to finalize the
+upgrade, users have a chance to cancel current upgrade in progress. When the
+user chose to cancel, the service will make the best effort to revert to the
+previous version.\
+\
+When the upgrade is finalized, the old service definition is
+overwritten by the new service definition and the service state changes to `STABLE`.\
+A service can be auto-finalized when the upgrade is initialized with
+`-autoFinalize` option. With auto-finalization, when all the component-instances of
+the service have been upgraded, finalization will be performed automatically by the
+service framework.\
+\
+**NOTE**: Cancel of upgrade is not implemented yet.
+
+## Upgrade Example
+This example shows upgrade of sleeper service. Below is the sleeper service
+definition
+
+```
+{
+ "name": "sleeper-service",
+ "components" :
+ [
+ {
+ "name": "sleeper",
+ "version": "1.0.0",
+ "number_of_containers": 1,
+ "launch_command": "sleep 900000",
+ "resource": {
+ "cpus": 1,
+ "memory": "256"
+ }
+ }
+ ]
+}
+```
+Assuming, user launched an instance of sleeper service named as `my-sleeper`:
+```
+{
+ "components":
+ [
+ {
+ "configuration": {...},
+ "containers":
+ [
+ {
+ "bare_host": "0.0.0.0",
+ "component_instance_name": "sleeper-0",
+ "hostname": "example.local",
+ "id": "container_1531508836237_0002_01_000002",
+ "ip": "0.0.0.0",
+ "launch_time": 1531941023675,
+ "state": "READY"
+ },
+ {
+ "bare_host": "0.0.0.0",
+ "component_instance_name": "sleeper-1",
+ "hostname": "example.local",
+ "id": "container_1531508836237_0002_01_000003",
+ "ip": "0.0.0.0",
+ "launch_time": 1531941024680,
+ "state": "READY"
+ }
+ ],
+ "dependencies": [],
+ "launch_command": "sleep 900000",
+ "name": "sleeper",
+ "number_of_containers": 2,
+ "quicklinks": [],
+ "resource": {...},
+ "restart_policy": "ALWAYS",
+ "run_privileged_container": false,
+ "state": "STABLE"
+ }
+ ],
+ "configuration": {...},
+ "id": "application_1531508836237_0002",
+ "kerberos_principal": {},
+ "lifetime": -1,
+ "name": "my-sleeper",
+ "quicklinks": {},
+ "state": "STABLE",
+ "version": "1.0.0"
+}
+```
+
+### Enable Service Upgrade
+Below is the configuration in `yarn-site.xml` required for enabling service
+upgrade.
+
+```
+ <property>
+ <name>yarn.service.upgrade.enabled</name>
+ <value>true</value>
+ </property>
+```
+
+### Initiate Upgrade
+User can initiate upgrade using the below command:
+```
+yarn app -upgrade ${service_name} -initate ${path_to_new_service_def_file} [-autoFinalize]
+```
+
+e.g. To upgrade `my-sleeper` to sleep for *1200000* instead of *900000*, the user
+can upgrade the service to version 1.0.1. Below is the service definition for
+version 1.0.1 of sleeper-service:
+
+```
+{
+ "components" :
+ [
+ {
+ "name": "sleeper",
+ "version": "1.0.1",
+ "number_of_containers": 1,
+ "launch_command": "sleep 1200000",
+ "resource": {
+ "cpus": 1,
+ "memory": "256"
+ }
+ }
+ ]
+}
+```
+The command below initiates the upgrade to version 1.0.1.
+```
+yarn app -upgrade my-sleeper -initiate sleeper_v101.json
+```
+
+### Upgrade Instance
+User can upgrade a component instance using the below command:
+```
+yarn app -upgrade ${service_name} -instances ${comma_separated_list_of_instance_names}
+```
+e.g. The command below upgrades `sleeper-0` and `sleeper-1` instances of `my-service`:
+```
+yarn app -upgrade my-sleeper -instances sleeper-0,sleeper-1
+```
+
+### Upgrade Component
+User can upgrade a component, that is, all the instances of a component with
+one command:
+```
+yarn app -upgrade ${service_name} -components ${comma_separated_list_of_component_names}
+```
+e.g. The command below upgrades all the instances of `sleeper` component of `my-service`:
+```
+yarn app -ugrade my-sleeper -components sleeper
+```
+
+### Finalize Upgrade
+User must finalize the upgrade using the below command (since autoFinalize was not specified during initiate):
+```
+yarn app -upgrade ${service_name} -finalize
+```
+e.g. The command below finalizes the upgrade of `my-sleeper`:
+```
+yarn app -upgrade my-sleeper -finalize
+```
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org