You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by tr...@apache.org on 2016/09/21 09:52:34 UTC

[01/50] [abbrv] flink git commit: [FLINK-4610] [core] Replace keySet/getValue with entrySet in UdfAnalyzerUtils [Forced Update!]

Repository: flink
Updated Branches:
  refs/heads/flip-6 11cc6b95b -> a04c11c87 (forced update)


[FLINK-4610] [core] Replace keySet/getValue with entrySet in UdfAnalyzerUtils

This closes #2491


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/5901bf33
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/5901bf33
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/5901bf33

Branch: refs/heads/flip-6
Commit: 5901bf33bc38b2d0317d3322e26dd427c3319bef
Parents: 5066125
Author: Alexander Pivovarov <ap...@gmail.com>
Authored: Fri Sep 9 23:10:12 2016 -0700
Committer: Stephan Ewen <se...@apache.org>
Committed: Mon Sep 19 22:26:20 2016 +0200

----------------------------------------------------------------------
 .../java/org/apache/flink/api/java/sca/UdfAnalyzerUtils.java | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/5901bf33/flink-java/src/main/java/org/apache/flink/api/java/sca/UdfAnalyzerUtils.java
----------------------------------------------------------------------
diff --git a/flink-java/src/main/java/org/apache/flink/api/java/sca/UdfAnalyzerUtils.java b/flink-java/src/main/java/org/apache/flink/api/java/sca/UdfAnalyzerUtils.java
index ca00d54..a477a03 100644
--- a/flink-java/src/main/java/org/apache/flink/api/java/sca/UdfAnalyzerUtils.java
+++ b/flink-java/src/main/java/org/apache/flink/api/java/sca/UdfAnalyzerUtils.java
@@ -271,15 +271,15 @@ public final class UdfAnalyzerUtils {
 		}
 
 		// recursively merge contained mappings
-		Iterator<String> it = resultMapping.keySet().iterator();
+		Iterator<Map.Entry<String, TaggedValue>> it = resultMapping.entrySet().iterator();
 		while (it.hasNext()) {
-			String key = it.next();
-			TaggedValue value = mergeReturnValues(Collections.singletonList(resultMapping.get(key)));
+			Map.Entry<String, TaggedValue> entry = it.next();
+			TaggedValue value = mergeReturnValues(Collections.singletonList(entry.getValue()));
 			if (value == null) {
 				it.remove();
 			}
 			else {
-				resultMapping.put(key, value);
+				entry.setValue(value);
 			}
 		}
 


[34/50] [abbrv] flink git commit: [FLINK-4529] [flip-6] Move TaskExecutor, JobMaster and ResourceManager out of the rpc package

Posted by tr...@apache.org.
http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/SlotManagerTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/SlotManagerTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/SlotManagerTest.java
deleted file mode 100644
index 2ee280f..0000000
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/SlotManagerTest.java
+++ /dev/null
@@ -1,540 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.clusterframework;
-
-import org.apache.flink.api.common.JobID;
-import org.apache.flink.runtime.clusterframework.types.AllocationID;
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
-import org.apache.flink.runtime.clusterframework.types.ResourceSlot;
-import org.apache.flink.runtime.clusterframework.types.SlotID;
-import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
-import org.apache.flink.runtime.rpc.resourcemanager.SlotRequest;
-import org.apache.flink.runtime.rpc.taskexecutor.SlotStatus;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-
-public class SlotManagerTest {
-
-	private static final double DEFAULT_TESTING_CPU_CORES = 1.0;
-
-	private static final long DEFAULT_TESTING_MEMORY = 512;
-
-	private static final ResourceProfile DEFAULT_TESTING_PROFILE =
-		new ResourceProfile(DEFAULT_TESTING_CPU_CORES, DEFAULT_TESTING_MEMORY);
-
-	private static final ResourceProfile DEFAULT_TESTING_BIG_PROFILE =
-		new ResourceProfile(DEFAULT_TESTING_CPU_CORES * 2, DEFAULT_TESTING_MEMORY * 2);
-
-	private ResourceManagerGateway resourceManagerGateway;
-
-	@Before
-	public void setUp() {
-		resourceManagerGateway = mock(ResourceManagerGateway.class);
-	}
-
-	/**
-	 * Tests that there are no free slots when we request, need to allocate from cluster manager master
-	 */
-	@Test
-	public void testRequestSlotWithoutFreeSlot() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-
-		assertEquals(0, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertEquals(1, slotManager.getAllocatedContainers().size());
-		assertEquals(DEFAULT_TESTING_PROFILE, slotManager.getAllocatedContainers().get(0));
-	}
-
-	/**
-	 * Tests that there are some free slots when we request, and the request is fulfilled immediately
-	 */
-	@Test
-	public void testRequestSlotWithFreeSlot() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-
-		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 1);
-		assertEquals(1, slotManager.getFreeSlotCount());
-
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertEquals(0, slotManager.getAllocatedContainers().size());
-	}
-
-	/**
-	 * Tests that there are some free slots when we request, but none of them are suitable
-	 */
-	@Test
-	public void testRequestSlotWithoutSuitableSlot() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-
-		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 2);
-		assertEquals(2, slotManager.getFreeSlotCount());
-
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
-		assertEquals(0, slotManager.getAllocatedSlotCount());
-		assertEquals(2, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertEquals(1, slotManager.getAllocatedContainers().size());
-		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
-	}
-
-	/**
-	 * Tests that we send duplicated slot request
-	 */
-	@Test
-	public void testDuplicatedSlotRequest() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 1);
-
-		SlotRequest request1 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE);
-
-		slotManager.requestSlot(request1);
-		slotManager.requestSlot(request2);
-		slotManager.requestSlot(request2);
-		slotManager.requestSlot(request1);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertEquals(1, slotManager.getAllocatedContainers().size());
-		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
-	}
-
-	/**
-	 * Tests that we send multiple slot requests
-	 */
-	@Test
-	public void testRequestMultipleSlots() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 5);
-
-		// request 3 normal slots
-		for (int i = 0; i < 3; ++i) {
-			slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-		}
-
-		// request 2 big slots
-		for (int i = 0; i < 2; ++i) {
-			slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
-		}
-
-		// request 1 normal slot again
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-
-		assertEquals(4, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-		assertEquals(2, slotManager.getPendingRequestCount());
-		assertEquals(2, slotManager.getAllocatedContainers().size());
-		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
-		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(1));
-	}
-
-	/**
-	 * Tests that a new slot appeared in SlotReport, and we used it to fulfill a pending request
-	 */
-	@Test
-	public void testNewlyAppearedFreeSlotFulfillPendingRequest() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-		assertEquals(1, slotManager.getPendingRequestCount());
-
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slotId));
-	}
-
-	/**
-	 * Tests that a new slot appeared in SlotReport, but we have no pending request
-	 */
-	@Test
-	public void testNewlyAppearedFreeSlot() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(0, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-	}
-
-	/**
-	 * Tests that a new slot appeared in SlotReport, but it't not suitable for all the pending requests
-	 */
-	@Test
-	public void testNewlyAppearedFreeSlotNotMatchPendingRequests() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
-		assertEquals(1, slotManager.getPendingRequestCount());
-
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(0, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertFalse(slotManager.isAllocated(slotId));
-	}
-
-	/**
-	 * Tests that a new slot appeared in SlotReport, and it's been reported using by some job
-	 */
-	@Test
-	public void testNewlyAppearedInUseSlot() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new AllocationID(), new JobID());
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertTrue(slotManager.isAllocated(slotId));
-	}
-
-	/**
-	 * Tests that we had a slot in-use, and it's confirmed by SlotReport
-	 */
-	@Test
-	public void testExistingInUseSlotUpdateStatus() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request);
-
-		// make this slot in use
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertTrue(slotManager.isAllocated(slotId));
-
-		// slot status is confirmed
-		SlotStatus slotStatus2 = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE,
-			request.getAllocationId(), request.getJobId());
-		slotManager.updateSlotStatus(slotStatus2);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertTrue(slotManager.isAllocated(slotId));
-	}
-
-	/**
-	 * Tests that we had a slot in-use, but it's empty according to the SlotReport
-	 */
-	@Test
-	public void testExistingInUseSlotAdjustedToEmpty() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		SlotRequest request1 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request1);
-
-		// make this slot in use
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		// another request pending
-		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request2);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slotId));
-		assertTrue(slotManager.isAllocated(request1.getAllocationId()));
-
-
-		// but slot is reported empty again, request2 will be fulfilled, request1 will be missing
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slotId));
-		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
-	}
-
-	/**
-	 * Tests that we had a slot in use, and it's also reported in use by TaskManager, but the allocation
-	 * information didn't match.
-	 */
-	@Test
-	public void testExistingInUseSlotWithDifferentAllocationInfo() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request);
-
-		// make this slot in use
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slotId));
-		assertTrue(slotManager.isAllocated(request.getAllocationId()));
-
-		SlotStatus slotStatus2 = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new AllocationID(), new JobID());
-		// update slot status with different allocation info
-		slotManager.updateSlotStatus(slotStatus2);
-
-		// original request is missing and won't be allocated
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slotId));
-		assertFalse(slotManager.isAllocated(request.getAllocationId()));
-		assertTrue(slotManager.isAllocated(slotStatus2.getAllocationID()));
-	}
-
-	/**
-	 * Tests that we had a free slot, and it's confirmed by SlotReport
-	 */
-	@Test
-	public void testExistingEmptySlotUpdateStatus() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
-		slotManager.addFreeSlot(slot);
-
-		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(0, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-	}
-
-	/**
-	 * Tests that we had a free slot, and it's reported in-use by TaskManager
-	 */
-	@Test
-	public void testExistingEmptySlotAdjustedToInUse() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
-		slotManager.addFreeSlot(slot);
-
-		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), DEFAULT_TESTING_PROFILE,
-			new AllocationID(), new JobID());
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slot.getSlotId()));
-	}
-
-	/**
-	 * Tests that we did some allocation but failed / rejected by TaskManager, request will retry
-	 */
-	@Test
-	public void testSlotAllocationFailedAtTaskManager() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
-		slotManager.addFreeSlot(slot);
-
-		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slot.getSlotId()));
-
-		slotManager.handleSlotRequestFailedAtTaskManager(request, slot.getSlotId());
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-	}
-
-
-	/**
-	 * Tests that we did some allocation but failed / rejected by TaskManager, and slot is occupied by another request
-	 */
-	@Test
-	public void testSlotAllocationFailedAtTaskManagerOccupiedByOther() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
-		slotManager.addFreeSlot(slot);
-
-		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request);
-
-		// slot is set empty by heartbeat
-		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), slot.getResourceProfile());
-		slotManager.updateSlotStatus(slotStatus);
-
-		// another request took this slot
-		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request2);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertFalse(slotManager.isAllocated(request.getAllocationId()));
-		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
-
-		// original request should be pended
-		slotManager.handleSlotRequestFailedAtTaskManager(request, slot.getSlotId());
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertFalse(slotManager.isAllocated(request.getAllocationId()));
-		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
-	}
-
-	@Test
-	public void testNotifyTaskManagerFailure() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-
-		ResourceID resource1 = ResourceID.generate();
-		ResourceID resource2 = ResourceID.generate();
-
-		ResourceSlot slot11 = new ResourceSlot(new SlotID(resource1, 1), DEFAULT_TESTING_PROFILE);
-		ResourceSlot slot12 = new ResourceSlot(new SlotID(resource1, 2), DEFAULT_TESTING_PROFILE);
-		ResourceSlot slot21 = new ResourceSlot(new SlotID(resource2, 1), DEFAULT_TESTING_PROFILE);
-		ResourceSlot slot22 = new ResourceSlot(new SlotID(resource2, 2), DEFAULT_TESTING_PROFILE);
-
-		slotManager.addFreeSlot(slot11);
-		slotManager.addFreeSlot(slot21);
-
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-
-		assertEquals(2, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-
-		slotManager.addFreeSlot(slot12);
-		slotManager.addFreeSlot(slot22);
-
-		assertEquals(2, slotManager.getAllocatedSlotCount());
-		assertEquals(2, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-
-		slotManager.notifyTaskManagerFailure(resource2);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-
-		// notify an not exist resource failure
-		slotManager.notifyTaskManagerFailure(ResourceID.generate());
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-	}
-
-	// ------------------------------------------------------------------------
-	//  testing utilities
-	// ------------------------------------------------------------------------
-
-	private void directlyProvideFreeSlots(
-		final SlotManager slotManager,
-		final ResourceProfile resourceProfile,
-		final int freeSlotNum)
-	{
-		for (int i = 0; i < freeSlotNum; ++i) {
-			slotManager.addFreeSlot(new ResourceSlot(SlotID.generate(), new ResourceProfile(resourceProfile)));
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	//  testing classes
-	// ------------------------------------------------------------------------
-
-	private static class TestingSlotManager extends SlotManager {
-
-		private final List<ResourceProfile> allocatedContainers;
-
-		TestingSlotManager(ResourceManagerGateway resourceManagerGateway) {
-			super(resourceManagerGateway);
-			this.allocatedContainers = new LinkedList<>();
-		}
-
-		/**
-		 * Choose slot randomly if it matches requirement
-		 *
-		 * @param request   The slot request
-		 * @param freeSlots All slots which can be used
-		 * @return The chosen slot or null if cannot find a match
-		 */
-		@Override
-		protected ResourceSlot chooseSlotToUse(SlotRequest request, Map<SlotID, ResourceSlot> freeSlots) {
-			for (ResourceSlot slot : freeSlots.values()) {
-				if (slot.isMatchingRequirement(request.getResourceProfile())) {
-					return slot;
-				}
-			}
-			return null;
-		}
-
-		/**
-		 * Choose request randomly if offered slot can match its requirement
-		 *
-		 * @param offeredSlot     The free slot
-		 * @param pendingRequests All the pending slot requests
-		 * @return The chosen request's AllocationID or null if cannot find a match
-		 */
-		@Override
-		protected SlotRequest chooseRequestToFulfill(ResourceSlot offeredSlot,
-			Map<AllocationID, SlotRequest> pendingRequests)
-		{
-			for (Map.Entry<AllocationID, SlotRequest> pendingRequest : pendingRequests.entrySet()) {
-				if (offeredSlot.isMatchingRequirement(pendingRequest.getValue().getResourceProfile())) {
-					return pendingRequest.getValue();
-				}
-			}
-			return null;
-		}
-
-		@Override
-		protected void allocateContainer(ResourceProfile resourceProfile) {
-			allocatedContainers.add(resourceProfile);
-		}
-
-		List<ResourceProfile> getAllocatedContainers() {
-			return allocatedContainers;
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/registration/RetryingRegistrationTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/registration/RetryingRegistrationTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/registration/RetryingRegistrationTest.java
new file mode 100644
index 0000000..80fa19c
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/registration/RetryingRegistrationTest.java
@@ -0,0 +1,336 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.registration;
+
+import akka.dispatch.Futures;
+
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.rpc.TestingRpcService;
+import org.apache.flink.util.TestLogger;
+
+import org.junit.Test;
+
+import org.slf4j.LoggerFactory;
+
+import scala.concurrent.Await;
+import scala.concurrent.ExecutionContext$;
+import scala.concurrent.Future;
+import scala.concurrent.Promise;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.UUID;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeoutException;
+
+import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
+/**
+ * Tests for the generic retrying registration class, validating the failure, retry, and back-off behavior.
+ */
+public class RetryingRegistrationTest extends TestLogger {
+
+	@Test
+	public void testSimpleSuccessfulRegistration() throws Exception {
+		final String testId = "laissez les bon temps roulez";
+		final String testEndpointAddress = "<test-address>";
+		final UUID leaderId = UUID.randomUUID();
+
+		// an endpoint that immediately returns success
+		TestRegistrationGateway testGateway = new TestRegistrationGateway(new TestRegistrationSuccess(testId));
+		TestingRpcService rpc = new TestingRpcService();
+
+		try {
+			rpc.registerGateway(testEndpointAddress, testGateway);
+
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
+			registration.startRegistration();
+
+			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
+			assertNotNull(future);
+
+			// multiple accesses return the same future
+			assertEquals(future, registration.getFuture());
+
+			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success = 
+					Await.result(future, new FiniteDuration(10, SECONDS));
+
+			// validate correct invocation and result
+			assertEquals(testId, success.f1.getCorrelationId());
+			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
+		}
+		finally {
+			testGateway.stop();
+			rpc.stopService();
+		}
+	}
+	
+	@Test
+	public void testPropagateFailures() throws Exception {
+		final String testExceptionMessage = "testExceptionMessage";
+
+		// RPC service that fails with exception upon the connection
+		RpcService rpc = mock(RpcService.class);
+		when(rpc.connect(anyString(), any(Class.class))).thenThrow(new RuntimeException(testExceptionMessage));
+
+		TestRetryingRegistration registration = new TestRetryingRegistration(rpc, "testaddress", UUID.randomUUID());
+		registration.startRegistration();
+
+		Future<?> future = registration.getFuture();
+		assertTrue(future.failed().isCompleted());
+
+		assertEquals(testExceptionMessage, future.failed().value().get().get().getMessage());
+	}
+
+	@Test
+	public void testRetryConnectOnFailure() throws Exception {
+		final String testId = "laissez les bon temps roulez";
+		final UUID leaderId = UUID.randomUUID();
+
+		ExecutorService executor = Executors.newCachedThreadPool();
+		TestRegistrationGateway testGateway = new TestRegistrationGateway(new TestRegistrationSuccess(testId));
+
+		try {
+			// RPC service that fails upon the first connection, but succeeds on the second
+			RpcService rpc = mock(RpcService.class);
+			when(rpc.connect(anyString(), any(Class.class))).thenReturn(
+					Futures.failed(new Exception("test connect failure")),  // first connection attempt fails
+					Futures.successful(testGateway)                         // second connection attempt succeeds
+			);
+			when(rpc.getExecutionContext()).thenReturn(ExecutionContext$.MODULE$.fromExecutor(executor));
+
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, "foobar address", leaderId);
+			registration.startRegistration();
+
+			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
+					Await.result(registration.getFuture(), new FiniteDuration(10, SECONDS));
+
+			// validate correct invocation and result
+			assertEquals(testId, success.f1.getCorrelationId());
+			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
+		}
+		finally {
+			testGateway.stop();
+			executor.shutdown();
+		}
+	}
+
+	@Test
+	public void testRetriesOnTimeouts() throws Exception {
+		final String testId = "rien ne va plus";
+		final String testEndpointAddress = "<test-address>";
+		final UUID leaderId = UUID.randomUUID();
+
+		// an endpoint that immediately returns futures with timeouts before returning a successful future
+		TestRegistrationGateway testGateway = new TestRegistrationGateway(
+				null, // timeout
+				null, // timeout
+				new TestRegistrationSuccess(testId) // success
+		);
+
+		TestingRpcService rpc = new TestingRpcService();
+
+		try {
+			rpc.registerGateway(testEndpointAddress, testGateway);
+	
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
+	
+			long started = System.nanoTime();
+			registration.startRegistration();
+	
+			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
+			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
+					Await.result(future, new FiniteDuration(10, SECONDS));
+	
+			long finished = System.nanoTime();
+			long elapsedMillis = (finished - started) / 1000000;
+	
+			// validate correct invocation and result
+			assertEquals(testId, success.f1.getCorrelationId());
+			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
+	
+			// validate that some retry-delay / back-off behavior happened
+			assertTrue("retries did not properly back off", elapsedMillis >= 3 * TestRetryingRegistration.INITIAL_TIMEOUT);
+		}
+		finally {
+			rpc.stopService();
+			testGateway.stop();
+		}
+	}
+
+	@Test
+	public void testDecline() throws Exception {
+		final String testId = "qui a coupe le fromage";
+		final String testEndpointAddress = "<test-address>";
+		final UUID leaderId = UUID.randomUUID();
+
+		TestingRpcService rpc = new TestingRpcService();
+
+		TestRegistrationGateway testGateway = new TestRegistrationGateway(
+				null, // timeout
+				new RegistrationResponse.Decline("no reason "),
+				null, // timeout
+				new TestRegistrationSuccess(testId) // success
+		);
+
+		try {
+			rpc.registerGateway(testEndpointAddress, testGateway);
+
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
+
+			long started = System.nanoTime();
+			registration.startRegistration();
+	
+			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
+			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
+					Await.result(future, new FiniteDuration(10, SECONDS));
+
+			long finished = System.nanoTime();
+			long elapsedMillis = (finished - started) / 1000000;
+
+			// validate correct invocation and result
+			assertEquals(testId, success.f1.getCorrelationId());
+			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
+
+			// validate that some retry-delay / back-off behavior happened
+			assertTrue("retries did not properly back off", elapsedMillis >= 
+					2 * TestRetryingRegistration.INITIAL_TIMEOUT + TestRetryingRegistration.DELAY_ON_DECLINE);
+		}
+		finally {
+			testGateway.stop();
+			rpc.stopService();
+		}
+	}
+	
+	@Test
+	@SuppressWarnings("unchecked")
+	public void testRetryOnError() throws Exception {
+		final String testId = "Petit a petit, l'oiseau fait son nid";
+		final String testEndpointAddress = "<test-address>";
+		final UUID leaderId = UUID.randomUUID();
+
+		TestingRpcService rpc = new TestingRpcService();
+
+		try {
+			// gateway that upon calls first responds with a failure, then with a success
+			TestRegistrationGateway testGateway = mock(TestRegistrationGateway.class);
+
+			when(testGateway.registrationCall(any(UUID.class), anyLong())).thenReturn(
+					Futures.<RegistrationResponse>failed(new Exception("test exception")),
+					Futures.<RegistrationResponse>successful(new TestRegistrationSuccess(testId)));
+			
+			rpc.registerGateway(testEndpointAddress, testGateway);
+
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
+
+			long started = System.nanoTime();
+			registration.startRegistration();
+
+			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
+			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
+					Await.result(future, new FiniteDuration(10, SECONDS));
+
+			long finished = System.nanoTime();
+			long elapsedMillis = (finished - started) / 1000000;
+			
+			assertEquals(testId, success.f1.getCorrelationId());
+
+			// validate that some retry-delay / back-off behavior happened
+			assertTrue("retries did not properly back off",
+					elapsedMillis >= TestRetryingRegistration.DELAY_ON_ERROR);
+		}
+		finally {
+			rpc.stopService();
+		}
+	}
+
+	@Test
+	public void testCancellation() throws Exception {
+		final String testEndpointAddress = "my-test-address";
+		final UUID leaderId = UUID.randomUUID();
+
+		TestingRpcService rpc = new TestingRpcService();
+
+		try {
+			Promise<RegistrationResponse> result = Futures.promise();
+
+			TestRegistrationGateway testGateway = mock(TestRegistrationGateway.class);
+			when(testGateway.registrationCall(any(UUID.class), anyLong())).thenReturn(result.future());
+
+			rpc.registerGateway(testEndpointAddress, testGateway);
+
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
+			registration.startRegistration();
+
+			// cancel and fail the current registration attempt
+			registration.cancel();
+			result.failure(new TimeoutException());
+
+			// there should not be a second registration attempt
+			verify(testGateway, atMost(1)).registrationCall(any(UUID.class), anyLong());
+		}
+		finally {
+			rpc.stopService();
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  test registration
+	// ------------------------------------------------------------------------
+
+	private static class TestRegistrationSuccess extends RegistrationResponse.Success {
+		private static final long serialVersionUID = 5542698790917150604L;
+
+		private final String correlationId;
+
+		private TestRegistrationSuccess(String correlationId) {
+			this.correlationId = correlationId;
+		}
+
+		public String getCorrelationId() {
+			return correlationId;
+		}
+	}
+
+	private static class TestRetryingRegistration extends RetryingRegistration<TestRegistrationGateway, TestRegistrationSuccess> {
+
+		// we use shorter timeouts here to speed up the tests
+		static final long INITIAL_TIMEOUT = 20;
+		static final long MAX_TIMEOUT = 200;
+		static final long DELAY_ON_ERROR = 200;
+		static final long DELAY_ON_DECLINE = 200;
+
+		public TestRetryingRegistration(RpcService rpc, String targetAddress, UUID leaderId) {
+			super(LoggerFactory.getLogger(RetryingRegistrationTest.class),
+					rpc, "TestEndpoint",
+					TestRegistrationGateway.class,
+					targetAddress, leaderId,
+					INITIAL_TIMEOUT, MAX_TIMEOUT, DELAY_ON_ERROR, DELAY_ON_DECLINE);
+		}
+
+		@Override
+		protected Future<RegistrationResponse> invokeRegistration(
+				TestRegistrationGateway gateway, UUID leaderId, long timeoutMillis) {
+			return gateway.registrationCall(leaderId, timeoutMillis);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/registration/TestRegistrationGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/registration/TestRegistrationGateway.java b/flink-runtime/src/test/java/org/apache/flink/runtime/registration/TestRegistrationGateway.java
new file mode 100644
index 0000000..431fbe8
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/registration/TestRegistrationGateway.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.registration;
+
+import akka.dispatch.Futures;
+
+import org.apache.flink.runtime.rpc.TestingGatewayBase;
+import org.apache.flink.util.Preconditions;
+
+import scala.concurrent.Future;
+
+import java.util.UUID;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
+public class TestRegistrationGateway extends TestingGatewayBase {
+
+	private final BlockingQueue<RegistrationCall> invocations;
+
+	private final RegistrationResponse[] responses;
+
+	private int pos;
+
+	public TestRegistrationGateway(RegistrationResponse... responses) {
+		Preconditions.checkArgument(responses != null && responses.length > 0);
+
+		this.invocations = new LinkedBlockingQueue<>();
+		this.responses = responses;
+		
+	}
+
+	// ------------------------------------------------------------------------
+
+	public Future<RegistrationResponse> registrationCall(UUID leaderId, long timeout) {
+		invocations.add(new RegistrationCall(leaderId, timeout));
+
+		RegistrationResponse response = responses[pos];
+		if (pos < responses.length - 1) {
+			pos++;
+		}
+
+		// return a completed future (for a proper value), or one that never completes and will time out (for null)
+		return response != null ? Futures.successful(response) : this.<RegistrationResponse>futureWithTimeout(timeout);
+	}
+
+	public BlockingQueue<RegistrationCall> getInvocations() {
+		return invocations;
+	}
+
+	// ------------------------------------------------------------------------
+
+	public static class RegistrationCall {
+		private final UUID leaderId;
+		private final long timeout;
+
+		public RegistrationCall(UUID leaderId, long timeout) {
+			this.leaderId = leaderId;
+			this.timeout = timeout;
+		}
+
+		public UUID leaderId() {
+			return leaderId;
+		}
+
+		public long timeout() {
+			return timeout;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ClusterShutdownITCase.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ClusterShutdownITCase.java b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ClusterShutdownITCase.java
deleted file mode 100644
index 32c6cac..0000000
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ClusterShutdownITCase.java
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.resourcemanager;
-
-import akka.actor.ActorSystem;
-import akka.testkit.JavaTestKit;
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.runtime.akka.AkkaUtils;
-import org.apache.flink.runtime.clusterframework.ApplicationStatus;
-import org.apache.flink.runtime.clusterframework.messages.StopCluster;
-import org.apache.flink.runtime.clusterframework.messages.StopClusterSuccessful;
-import org.apache.flink.runtime.instance.ActorGateway;
-import org.apache.flink.runtime.messages.Messages;
-import org.apache.flink.runtime.testingUtils.TestingMessages;
-import org.apache.flink.runtime.testingUtils.TestingUtils;
-import org.apache.flink.runtime.testutils.TestingResourceManager;
-import org.apache.flink.util.TestLogger;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import scala.Option;
-
-
-/**
- * Runs tests to ensure that a cluster is shutdown properly.
- */
-public class ClusterShutdownITCase extends TestLogger {
-
-	private static ActorSystem system;
-
-	private static Configuration config = new Configuration();
-
-	@BeforeClass
-	public static void setup() {
-		system = AkkaUtils.createActorSystem(AkkaUtils.getDefaultAkkaConfig());
-	}
-
-	@AfterClass
-	public static void teardown() {
-		JavaTestKit.shutdownActorSystem(system);
-	}
-
-	/**
-	 * Tests a faked cluster shutdown procedure without the ResourceManager.
-	 */
-	@Test
-	public void testClusterShutdownWithoutResourceManager() {
-
-		new JavaTestKit(system){{
-		new Within(duration("30 seconds")) {
-		@Override
-		protected void run() {
-
-			ActorGateway me =
-				TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
-
-			// start job manager which doesn't shutdown the actor system
-			ActorGateway jobManager =
-				TestingUtils.createJobManager(system, config, "jobmanager1");
-
-			// Tell the JobManager to inform us of shutdown actions
-			jobManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
-
-			// Register a TaskManager
-			ActorGateway taskManager =
-				TestingUtils.createTaskManager(system, jobManager, config, true, true);
-
-			// Tell the TaskManager to inform us of TaskManager shutdowns
-			taskManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
-
-
-			// No resource manager connected
-			jobManager.tell(new StopCluster(ApplicationStatus.SUCCEEDED, "Shutting down."), me);
-
-			expectMsgAllOf(
-				new TestingMessages.ComponentShutdown(taskManager.actor()),
-				new TestingMessages.ComponentShutdown(jobManager.actor()),
-				StopClusterSuccessful.getInstance()
-			);
-
-		}};
-		}};
-	}
-
-	/**
-	 * Tests a faked cluster shutdown procedure with the ResourceManager.
-	 */
-	@Test
-	public void testClusterShutdownWithResourceManager() {
-
-		new JavaTestKit(system){{
-		new Within(duration("30 seconds")) {
-		@Override
-		protected void run() {
-
-			ActorGateway me =
-				TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
-
-			// start job manager which doesn't shutdown the actor system
-			ActorGateway jobManager =
-				TestingUtils.createJobManager(system, config, "jobmanager2");
-
-			// Tell the JobManager to inform us of shutdown actions
-			jobManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
-
-			// Register a TaskManager
-			ActorGateway taskManager =
-				TestingUtils.createTaskManager(system, jobManager, config, true, true);
-
-			// Tell the TaskManager to inform us of TaskManager shutdowns
-			taskManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
-
-			// Start resource manager and let it register
-			ActorGateway resourceManager =
-				TestingUtils.createResourceManager(system, jobManager.actor(), config);
-
-			// Tell the ResourceManager to inform us of ResourceManager shutdowns
-			resourceManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
-
-			// notify about a resource manager registration at the job manager
-			resourceManager.tell(new TestingResourceManager.NotifyWhenResourceManagerConnected(), me);
-
-			// Wait for resource manager
-			expectMsgEquals(Messages.getAcknowledge());
-
-
-			// Shutdown cluster with resource manager connected
-			jobManager.tell(new StopCluster(ApplicationStatus.SUCCEEDED, "Shutting down."), me);
-
-			expectMsgAllOf(
-				new TestingMessages.ComponentShutdown(taskManager.actor()),
-				new TestingMessages.ComponentShutdown(jobManager.actor()),
-				new TestingMessages.ComponentShutdown(resourceManager.actor()),
-				StopClusterSuccessful.getInstance()
-			);
-
-		}};
-		}};
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerHATest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerHATest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerHATest.java
new file mode 100644
index 0000000..5799e62
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerHATest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices;
+import org.apache.flink.runtime.leaderelection.TestingLeaderElectionService;
+import org.apache.flink.runtime.rpc.MainThreadExecutor;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.rpc.StartStoppable;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.UUID;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doCallRealMethod;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+/**
+ * resourceManager HA test, including grant leadership and revoke leadership
+ */
+public class ResourceManagerHATest {
+
+	@Test
+	public void testGrantAndRevokeLeadership() throws Exception {
+		// mock a RpcService which will return a special RpcGateway when call its startServer method, the returned RpcGateway directly execute runAsync call
+		TestingResourceManagerGatewayProxy gateway = mock(TestingResourceManagerGatewayProxy.class);
+		doCallRealMethod().when(gateway).runAsync(any(Runnable.class));
+
+		RpcService rpcService = mock(RpcService.class);
+		when(rpcService.startServer(any(RpcEndpoint.class))).thenReturn(gateway);
+
+		TestingLeaderElectionService leaderElectionService = new TestingLeaderElectionService();
+		TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
+		highAvailabilityServices.setResourceManagerLeaderElectionService(leaderElectionService);
+
+		final ResourceManager resourceManager = new ResourceManager(rpcService, highAvailabilityServices);
+		resourceManager.start();
+		// before grant leadership, resourceManager's leaderId is null
+		Assert.assertNull(resourceManager.getLeaderSessionID());
+		final UUID leaderId = UUID.randomUUID();
+		leaderElectionService.isLeader(leaderId);
+		// after grant leadership, resourceManager's leaderId has value
+		Assert.assertEquals(leaderId, resourceManager.getLeaderSessionID());
+		// then revoke leadership, resourceManager's leaderId is null again
+		leaderElectionService.notLeader();
+		Assert.assertNull(resourceManager.getLeaderSessionID());
+	}
+
+	private static abstract class TestingResourceManagerGatewayProxy implements MainThreadExecutor, StartStoppable, RpcGateway {
+		@Override
+		public void runAsync(Runnable runnable) {
+			runnable.run();
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerITCase.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerITCase.java b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerITCase.java
deleted file mode 100644
index 0c2ca1a..0000000
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerITCase.java
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.resourcemanager;
-
-import akka.actor.ActorSystem;
-import akka.testkit.JavaTestKit;
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.runtime.akka.AkkaUtils;
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-import org.apache.flink.runtime.instance.ActorGateway;
-import org.apache.flink.runtime.instance.HardwareDescription;
-import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
-import org.apache.flink.runtime.messages.Messages;
-import org.apache.flink.runtime.messages.RegistrationMessages;
-import org.apache.flink.runtime.testingUtils.TestingUtils;
-import org.apache.flink.runtime.testutils.TestingResourceManager;
-import org.apache.flink.util.TestLogger;
-
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import scala.Option;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-/**
- * It cases which test the interaction of the resource manager with job manager and task managers.
- * Runs all tests in one Actor system.
- */
-public class ResourceManagerITCase extends TestLogger {
-
-	private static ActorSystem system;
-
-	private static Configuration config = new Configuration();
-
-	@BeforeClass
-	public static void setup() {
-		system = AkkaUtils.createActorSystem(AkkaUtils.getDefaultAkkaConfig());
-	}
-
-	@AfterClass
-	public static void teardown() {
-		JavaTestKit.shutdownActorSystem(system);
-	}
-
-	/**
-	 * Tests whether the resource manager connects and reconciles existing task managers.
-	 */
-	@Test
-	public void testResourceManagerReconciliation() {
-
-		new JavaTestKit(system){{
-		new Within(duration("10 seconds")) {
-		@Override
-		protected void run() {
-
-			ActorGateway jobManager =
-				TestingUtils.createJobManager(system, config, "ReconciliationTest");
-			ActorGateway me =
-				TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
-
-			// !! no resource manager started !!
-
-			ResourceID resourceID = ResourceID.generate();
-
-			TaskManagerLocation location = mock(TaskManagerLocation.class);
-			when(location.getResourceID()).thenReturn(resourceID);
-
-			HardwareDescription resourceProfile = HardwareDescription.extractFromSystem(1_000_000);
-
-			jobManager.tell(
-				new RegistrationMessages.RegisterTaskManager(resourceID, location, resourceProfile, 1),
-				me);
-
-			expectMsgClass(RegistrationMessages.AcknowledgeRegistration.class);
-
-			// now start the resource manager
-			ActorGateway resourceManager =
-				TestingUtils.createResourceManager(system, jobManager.actor(), config);
-
-			// register at testing job manager to receive a message once a resource manager registers
-			resourceManager.tell(new TestingResourceManager.NotifyWhenResourceManagerConnected(), me);
-
-			// Wait for resource manager
-			expectMsgEquals(Messages.getAcknowledge());
-
-			// check if we registered the task manager resource
-			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), me);
-
-			TestingResourceManager.GetRegisteredResourcesReply reply =
-				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
-
-			assertEquals(1, reply.resources.size());
-			assertTrue(reply.resources.contains(resourceID));
-
-		}};
-		}};
-	}
-
-	/**
-	 * Tests whether the resource manager gets informed upon TaskManager registration.
-	 */
-	@Test
-	public void testResourceManagerTaskManagerRegistration() {
-
-		new JavaTestKit(system){{
-		new Within(duration("30 seconds")) {
-		@Override
-		protected void run() {
-
-			ActorGateway jobManager =
-				TestingUtils.createJobManager(system, config, "RegTest");
-			ActorGateway me =
-				TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
-
-			// start the resource manager
-			ActorGateway resourceManager =
-				TestingUtils.createResourceManager(system, jobManager.actor(), config);
-
-			// notify about a resource manager registration at the job manager
-			resourceManager.tell(new TestingResourceManager.NotifyWhenResourceManagerConnected(), me);
-
-			// Wait for resource manager
-			expectMsgEquals(Messages.getAcknowledge());
-
-			// start task manager and wait for registration
-			ActorGateway taskManager =
-				TestingUtils.createTaskManager(system, jobManager.actor(), config, true, true);
-
-			// check if we registered the task manager resource
-			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), me);
-
-			TestingResourceManager.GetRegisteredResourcesReply reply =
-				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
-
-			assertEquals(1, reply.resources.size());
-
-		}};
-		}};
-	}
-
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerTest.java
deleted file mode 100644
index 043c81c..0000000
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerTest.java
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.resourcemanager;
-
-import akka.actor.ActorSystem;
-import akka.testkit.JavaTestKit;
-import org.apache.flink.configuration.ConfigConstants;
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.runtime.akka.AkkaUtils;
-import org.apache.flink.runtime.clusterframework.messages.NotifyResourceStarted;
-import org.apache.flink.runtime.clusterframework.messages.RegisterResourceManager;
-import org.apache.flink.runtime.clusterframework.messages.RegisterResourceManagerSuccessful;
-import org.apache.flink.runtime.clusterframework.messages.RemoveResource;
-import org.apache.flink.runtime.clusterframework.messages.ResourceRemoved;
-import org.apache.flink.runtime.clusterframework.messages.TriggerRegistrationAtJobManager;
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-import org.apache.flink.runtime.instance.ActorGateway;
-import org.apache.flink.runtime.messages.Acknowledge;
-import org.apache.flink.runtime.messages.JobManagerMessages;
-import org.apache.flink.runtime.testingUtils.TestingUtils;
-import org.apache.flink.runtime.testutils.TestingResourceManager;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import scala.Option;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-import static org.junit.Assert.*;
-
-/**
- * General tests for the resource manager component.
- */
-public class ResourceManagerTest {
-
-	private static ActorSystem system;
-
-	private static ActorGateway fakeJobManager;
-	private static ActorGateway resourceManager;
-
-	private static Configuration config = new Configuration();
-
-	@BeforeClass
-	public static void setup() {
-		system = AkkaUtils.createLocalActorSystem(config);
-	}
-
-	@AfterClass
-	public static void teardown() {
-		JavaTestKit.shutdownActorSystem(system);
-	}
-
-	/**
-	 * Tests the registration and reconciliation of the ResourceManager with the JobManager
-	 */
-	@Test
-	public void testJobManagerRegistrationAndReconciliation() {
-		new JavaTestKit(system){{
-		new Within(duration("10 seconds")) {
-		@Override
-		protected void run() {
-			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
-			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), config);
-
-			expectMsgClass(RegisterResourceManager.class);
-
-			List<ResourceID> resourceList = new ArrayList<>();
-			resourceList.add(ResourceID.generate());
-			resourceList.add(ResourceID.generate());
-			resourceList.add(ResourceID.generate());
-
-			resourceManager.tell(
-				new RegisterResourceManagerSuccessful(fakeJobManager.actor(), resourceList),
-				fakeJobManager);
-
-			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
-			TestingResourceManager.GetRegisteredResourcesReply reply =
-				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
-
-			for (ResourceID id : resourceList) {
-				if (!reply.resources.contains(id)) {
-					fail("Expected to find all resources that were provided during registration.");
-				}
-			}
-		}};
-		}};
-	}
-
-	/**
-	 * Tests delayed or erroneous registration of the ResourceManager with the JobManager
-	 */
-	@Test
-	public void testDelayedJobManagerRegistration() {
-		new JavaTestKit(system){{
-		new Within(duration("10 seconds")) {
-		@Override
-		protected void run() {
-
-			// set a short timeout for lookups
-			Configuration shortTimeoutConfig = config.clone();
-			shortTimeoutConfig.setString(ConfigConstants.AKKA_LOOKUP_TIMEOUT, "1 s");
-
-			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
-			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), shortTimeoutConfig);
-
-			// wait for registration message
-			RegisterResourceManager msg = expectMsgClass(RegisterResourceManager.class);
-			// give wrong response
-			getLastSender().tell(new JobManagerMessages.LeaderSessionMessage(null, new Object()),
-				fakeJobManager.actor());
-
-			// expect another retry and let it time out
-			expectMsgClass(RegisterResourceManager.class);
-
-			// wait for next try after timeout
-			expectMsgClass(RegisterResourceManager.class);
-
-		}};
-		}};
-	}
-
-	@Test
-	public void testTriggerReconnect() {
-		new JavaTestKit(system){{
-		new Within(duration("10 seconds")) {
-		@Override
-		protected void run() {
-
-			// set a long timeout for lookups such that the test fails in case of timeouts
-			Configuration shortTimeoutConfig = config.clone();
-			shortTimeoutConfig.setString(ConfigConstants.AKKA_LOOKUP_TIMEOUT, "99999 s");
-
-			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
-			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), shortTimeoutConfig);
-
-			// wait for registration message
-			RegisterResourceManager msg = expectMsgClass(RegisterResourceManager.class);
-			// all went well
-			resourceManager.tell(
-				new RegisterResourceManagerSuccessful(fakeJobManager.actor(), Collections.<ResourceID>emptyList()),
-				fakeJobManager);
-
-			// force a reconnect
-			resourceManager.tell(
-				new TriggerRegistrationAtJobManager(fakeJobManager.actor()),
-				fakeJobManager);
-
-			// new registration attempt should come in
-			expectMsgClass(RegisterResourceManager.class);
-
-		}};
-		}};
-	}
-
-	/**
-	 * Tests the registration and accounting of resources at the ResourceManager.
-	 */
-	@Test
-	public void testTaskManagerRegistration() {
-		new JavaTestKit(system){{
-		new Within(duration("10 seconds")) {
-		@Override
-		protected void run() {
-
-			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
-			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), config);
-
-			// register with JM
-			expectMsgClass(RegisterResourceManager.class);
-			resourceManager.tell(
-				new RegisterResourceManagerSuccessful(fakeJobManager.actor(), Collections.<ResourceID>emptyList()),
-				fakeJobManager);
-
-			ResourceID resourceID = ResourceID.generate();
-
-			// Send task manager registration
-			resourceManager.tell(new NotifyResourceStarted(resourceID),
-				fakeJobManager);
-
-			expectMsgClass(Acknowledge.class);
-
-			// check for number registration of registered resources
-			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
-			TestingResourceManager.GetRegisteredResourcesReply reply =
-				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
-
-			assertEquals(1, reply.resources.size());
-
-			// Send task manager registration again
-			resourceManager.tell(new NotifyResourceStarted(resourceID),
-				fakeJobManager);
-
-			expectMsgClass(Acknowledge.class);
-
-			// check for number registration of registered resources
-			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
-			reply = expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
-
-			assertEquals(1, reply.resources.size());
-
-			// Send invalid null resource id to throw an exception during resource registration
-			resourceManager.tell(new NotifyResourceStarted(null),
-				fakeJobManager);
-
-			expectMsgClass(Acknowledge.class);
-
-			// check for number registration of registered resources
-			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
-			reply = expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
-
-			assertEquals(1, reply.resources.size());
-		}};
-		}};
-	}
-
-	@Test
-	public void testResourceRemoval() {
-		new JavaTestKit(system){{
-		new Within(duration("10 seconds")) {
-		@Override
-		protected void run() {
-
-			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
-			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), config);
-
-			// register with JM
-			expectMsgClass(RegisterResourceManager.class);
-			resourceManager.tell(
-				new RegisterResourceManagerSuccessful(fakeJobManager.actor(), Collections.<ResourceID>emptyList()),
-				fakeJobManager);
-
-			ResourceID resourceID = ResourceID.generate();
-
-			// remove unknown resource
-			resourceManager.tell(new RemoveResource(resourceID), fakeJobManager);
-
-			// Send task manager registration
-			resourceManager.tell(new NotifyResourceStarted(resourceID),
-				fakeJobManager);
-
-			expectMsgClass(Acknowledge.class);
-
-			// check for number registration of registered resources
-			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
-			TestingResourceManager.GetRegisteredResourcesReply reply =
-				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
-
-			assertEquals(1, reply.resources.size());
-			assertTrue(reply.resources.contains(resourceID));
-
-			// remove resource
-			resourceManager.tell(new RemoveResource(resourceID), fakeJobManager);
-
-			// check for number registration of registered resources
-			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
-			reply =	expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
-
-			assertEquals(0, reply.resources.size());
-
-		}};
-		}};
-	}
-
-	/**
-	 * Tests notification of JobManager about a failed resource.
-	 */
-	@Test
-	public void testResourceFailureNotification() {
-		new JavaTestKit(system){{
-		new Within(duration("10 seconds")) {
-		@Override
-		protected void run() {
-
-			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
-			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), config);
-
-			// register with JM
-			expectMsgClass(RegisterResourceManager.class);
-			resourceManager.tell(
-				new RegisterResourceManagerSuccessful(fakeJobManager.actor(), Collections.<ResourceID>emptyList()),
-				fakeJobManager);
-
-			ResourceID resourceID1 = ResourceID.generate();
-			ResourceID resourceID2 = ResourceID.generate();
-
-			// Send task manager registration
-			resourceManager.tell(new NotifyResourceStarted(resourceID1),
-				fakeJobManager);
-
-			expectMsgClass(Acknowledge.class);
-
-			// Send task manager registration
-			resourceManager.tell(new NotifyResourceStarted(resourceID2),
-				fakeJobManager);
-
-			expectMsgClass(Acknowledge.class);
-
-			// check for number registration of registered resources
-			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
-			TestingResourceManager.GetRegisteredResourcesReply reply =
-				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
-
-			assertEquals(2, reply.resources.size());
-			assertTrue(reply.resources.contains(resourceID1));
-			assertTrue(reply.resources.contains(resourceID2));
-
-			// fail resources
-			resourceManager.tell(new TestingResourceManager.FailResource(resourceID1), fakeJobManager);
-			resourceManager.tell(new TestingResourceManager.FailResource(resourceID2), fakeJobManager);
-
-			ResourceRemoved answer = expectMsgClass(ResourceRemoved.class);
-			ResourceRemoved answer2 = expectMsgClass(ResourceRemoved.class);
-
-			assertEquals(resourceID1, answer.resourceId());
-			assertEquals(resourceID2, answer2.resourceId());
-
-		}};
-		}};
-	}
-}


[11/50] [abbrv] flink git commit: [FLINK-3929] Added Keytab based Kerberos support to enable secure Flink cluster deployment(addresses HDHS, Kafka and ZK services)

Posted by tr...@apache.org.
http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn/src/main/java/org/apache/flink/yarn/AbstractYarnClusterDescriptor.java
----------------------------------------------------------------------
diff --git a/flink-yarn/src/main/java/org/apache/flink/yarn/AbstractYarnClusterDescriptor.java b/flink-yarn/src/main/java/org/apache/flink/yarn/AbstractYarnClusterDescriptor.java
index f4c2032..848013c 100644
--- a/flink-yarn/src/main/java/org/apache/flink/yarn/AbstractYarnClusterDescriptor.java
+++ b/flink-yarn/src/main/java/org/apache/flink/yarn/AbstractYarnClusterDescriptor.java
@@ -24,6 +24,7 @@ import org.apache.flink.configuration.ConfigConstants;
 import org.apache.flink.configuration.GlobalConfiguration;
 import org.apache.flink.runtime.akka.AkkaUtils;
 import org.apache.flink.runtime.jobmanager.HighAvailabilityMode;
+import org.apache.flink.runtime.security.SecurityContext;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -59,7 +60,6 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
-import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -341,26 +341,8 @@ public abstract class AbstractYarnClusterDescriptor implements ClusterDescriptor
 
 	@Override
 	public YarnClusterClient deploy() {
-
 		try {
-
-			UserGroupInformation.setConfiguration(conf);
-			UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
-
-			if (UserGroupInformation.isSecurityEnabled()) {
-				if (!ugi.hasKerberosCredentials()) {
-					throw new YarnDeploymentException("In secure mode. Please provide Kerberos credentials in order to authenticate. " +
-						"You may use kinit to authenticate and request a TGT from the Kerberos server.");
-				}
-				return ugi.doAs(new PrivilegedExceptionAction<YarnClusterClient>() {
-					@Override
-					public YarnClusterClient run() throws Exception {
-						return deployInternal();
-					}
-				});
-			} else {
-				return deployInternal();
-			}
+			return deployInternal();
 		} catch (Exception e) {
 			throw new RuntimeException("Couldn't deploy Yarn cluster", e);
 		}
@@ -539,9 +521,13 @@ public abstract class AbstractYarnClusterDescriptor implements ClusterDescriptor
 			}
 		}
 
-		addLibFolderToShipFiles(effectiveShipFiles);
+		//check if there is a JAAS config file
+		File jaasConfigFile = new File(configurationDirectory + File.separator + SecurityContext.JAAS_CONF_FILENAME);
+		if (jaasConfigFile.exists() && jaasConfigFile.isFile()) {
+			effectiveShipFiles.add(jaasConfigFile);
+		}
 
-		final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j);
+		addLibFolderToShipFiles(effectiveShipFiles);
 
 		// Set-up ApplicationSubmissionContext for the application
 		ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();
@@ -626,8 +612,53 @@ public abstract class AbstractYarnClusterDescriptor implements ClusterDescriptor
 		FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
 		fs.setPermission(sessionFilesDir, permission); // set permission for path.
 
+		//To support Yarn Secure Integration Test Scenario
+		//In Integration test setup, the Yarn containers created by YarnMiniCluster does not have the Yarn site XML
+		//and KRB5 configuration files. We are adding these files as container local resources for the container
+		//applications (JM/TMs) to have proper secure cluster setup
+		Path remoteKrb5Path = null;
+		Path remoteYarnSiteXmlPath = null;
+		boolean hasKrb5 = false;
+		if(System.getenv("IN_TESTS") != null) {
+			String krb5Config = System.getProperty("java.security.krb5.conf");
+			if(krb5Config != null && krb5Config.length() != 0) {
+				File krb5 = new File(krb5Config);
+				LOG.info("Adding KRB5 configuration {} to the AM container local resource bucket", krb5.getAbsolutePath());
+				LocalResource krb5ConfResource = Records.newRecord(LocalResource.class);
+				Path krb5ConfPath = new Path(krb5.getAbsolutePath());
+				remoteKrb5Path = Utils.setupLocalResource(fs, appId.toString(), krb5ConfPath, krb5ConfResource, fs.getHomeDirectory());
+				localResources.put(Utils.KRB5_FILE_NAME, krb5ConfResource);
+
+				File f = new File(System.getenv("YARN_CONF_DIR"),Utils.YARN_SITE_FILE_NAME);
+				LOG.info("Adding Yarn configuration {} to the AM container local resource bucket", f.getAbsolutePath());
+				LocalResource yarnConfResource = Records.newRecord(LocalResource.class);
+				Path yarnSitePath = new Path(f.getAbsolutePath());
+				remoteYarnSiteXmlPath = Utils.setupLocalResource(fs, appId.toString(), yarnSitePath, yarnConfResource, fs.getHomeDirectory());
+				localResources.put(Utils.YARN_SITE_FILE_NAME, yarnConfResource);
+
+				hasKrb5 = true;
+			}
+		}
+
 		// setup security tokens
-		Utils.setTokensFor(amContainer, paths, conf);
+		LocalResource keytabResource = null;
+		Path remotePathKeytab = null;
+		String keytab = flinkConfiguration.getString(ConfigConstants.SECURITY_KEYTAB_KEY, null);
+		if(keytab != null) {
+			LOG.info("Adding keytab {} to the AM container local resource bucket", keytab);
+			keytabResource = Records.newRecord(LocalResource.class);
+			Path keytabPath = new Path(keytab);
+			remotePathKeytab = Utils.setupLocalResource(fs, appId.toString(), keytabPath, keytabResource, fs.getHomeDirectory());
+			localResources.put(Utils.KEYTAB_FILE_NAME, keytabResource);
+		}
+
+		final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j, hasKrb5);
+
+		if ( UserGroupInformation.isSecurityEnabled() && keytab == null ) {
+			//set tokens only when keytab is not provided
+			LOG.info("Adding delegation token to the AM container..");
+			Utils.setTokensFor(amContainer, paths, conf);
+		}
 
 		amContainer.setLocalResources(localResources);
 		fs.close();
@@ -646,11 +677,25 @@ public abstract class AbstractYarnClusterDescriptor implements ClusterDescriptor
 		appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
 		appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString());
 		appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString());
-		appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME, UserGroupInformation.getCurrentUser().getShortUserName());
 		appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots));
 		appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached));
 		appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace());
 
+		// https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md#identity-on-an-insecure-cluster-hadoop_user_name
+		appMasterEnv.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, UserGroupInformation.getCurrentUser().getUserName());
+
+		if(keytabResource != null) {
+			appMasterEnv.put(YarnConfigKeys.KEYTAB_PATH, remotePathKeytab.toString() );
+			String principal = flinkConfiguration.getString(ConfigConstants.SECURITY_PRINCIPAL_KEY, null);
+			appMasterEnv.put(YarnConfigKeys.KEYTAB_PRINCIPAL, principal );
+		}
+
+		//To support Yarn Secure Integration Test Scenario
+		if(remoteYarnSiteXmlPath != null && remoteKrb5Path != null) {
+			appMasterEnv.put(YarnConfigKeys.ENV_YARN_SITE_XML_PATH, remoteYarnSiteXmlPath.toString());
+			appMasterEnv.put(YarnConfigKeys.ENV_KRB5_PATH, remoteKrb5Path.toString() );
+		}
+
 		if(dynamicPropertiesEncoded != null) {
 			appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded);
 		}
@@ -700,6 +745,7 @@ public abstract class AbstractYarnClusterDescriptor implements ClusterDescriptor
 				throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage());
 			}
 			YarnApplicationState appState = report.getYarnApplicationState();
+			LOG.debug("Application State: {}", appState);
 			switch(appState) {
 				case FAILED:
 				case FINISHED:
@@ -996,7 +1042,9 @@ public abstract class AbstractYarnClusterDescriptor implements ClusterDescriptor
 		}
 	}
 
-	protected ContainerLaunchContext setupApplicationMasterContainer(boolean hasLogback, boolean hasLog4j) {
+	protected ContainerLaunchContext setupApplicationMasterContainer(boolean hasLogback,
+																	boolean hasLog4j,
+																	boolean hasKrb5) {
 		// ------------------ Prepare Application Master Container  ------------------------------
 
 		// respect custom JVM options in the YAML file
@@ -1021,6 +1069,12 @@ public abstract class AbstractYarnClusterDescriptor implements ClusterDescriptor
 			}
 		}
 
+		//applicable only for YarnMiniCluster secure test run
+		//krb5.conf file will be available as local resource in JM/TM container
+		if(hasKrb5) {
+			amCommand += " -Djava.security.krb5.conf=krb5.conf";
+		}
+
 		amCommand += " " + getApplicationMasterClass().getName() + " "
 			+ " 1>"
 			+ ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.out"

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn/src/main/java/org/apache/flink/yarn/Utils.java
----------------------------------------------------------------------
diff --git a/flink-yarn/src/main/java/org/apache/flink/yarn/Utils.java b/flink-yarn/src/main/java/org/apache/flink/yarn/Utils.java
index 1496d61..94d4582 100644
--- a/flink-yarn/src/main/java/org/apache/flink/yarn/Utils.java
+++ b/flink-yarn/src/main/java/org/apache/flink/yarn/Utils.java
@@ -60,6 +60,14 @@ public final class Utils {
 	
 	private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
 
+	/** Keytab file name populated in YARN container */
+	public static final String KEYTAB_FILE_NAME = "krb5.keytab";
+
+	/** KRB5 file name populated in YARN container for secure IT run */
+	public static final String KRB5_FILE_NAME = "krb5.conf";
+
+	/** Yarn site xml file name populated in YARN container for secure IT run */
+	public static final String YARN_SITE_FILE_NAME = "yarn-site.xml";
 
 	/**
 	 * See documentation

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationMasterRunner.java
----------------------------------------------------------------------
diff --git a/flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationMasterRunner.java b/flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationMasterRunner.java
index 6619633..efb658a 100644
--- a/flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationMasterRunner.java
+++ b/flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationMasterRunner.java
@@ -32,6 +32,7 @@ import org.apache.flink.runtime.jobmanager.JobManager;
 import org.apache.flink.runtime.jobmanager.MemoryArchivist;
 import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
 import org.apache.flink.runtime.process.ProcessReaper;
+import org.apache.flink.runtime.security.SecurityContext;
 import org.apache.flink.runtime.taskmanager.TaskManager;
 import org.apache.flink.runtime.util.EnvironmentInformation;
 import org.apache.flink.runtime.util.JvmShutdownSafeguard;
@@ -40,11 +41,11 @@ import org.apache.flink.runtime.util.SignalHandler;
 import org.apache.flink.runtime.webmonitor.WebMonitor;
 
 import org.apache.flink.yarn.cli.FlinkYarnSessionCli;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
@@ -60,11 +61,10 @@ import scala.concurrent.duration.FiniteDuration;
 import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.security.PrivilegedAction;
-import java.util.Collections;
-import java.util.HashMap;
 import java.util.Map;
+import java.util.HashMap;
 import java.util.UUID;
+import java.util.Collections;
 import java.util.concurrent.TimeUnit;
 
 import static org.apache.flink.yarn.YarnConfigKeys.ENV_FLINK_CLASSPATH;
@@ -117,7 +117,7 @@ public class YarnApplicationMasterRunner {
 
 	/**
 	 * The instance entry point for the YARN application master. Obtains user group
-	 * information and calls the main work method {@link #runApplicationMaster()} as a
+	 * information and calls the main work method {@link #runApplicationMaster(Configuration)} as a
 	 * privileged action.
 	 *
 	 * @param args The command line arguments.
@@ -127,34 +127,66 @@ public class YarnApplicationMasterRunner {
 		try {
 			LOG.debug("All environment variables: {}", ENV);
 
-			final String yarnClientUsername = ENV.get(YarnConfigKeys.ENV_CLIENT_USERNAME);
+			final String yarnClientUsername = ENV.get(YarnConfigKeys.ENV_HADOOP_USER_NAME);
 			require(yarnClientUsername != null, "YARN client user name environment variable {} not set",
-				YarnConfigKeys.ENV_CLIENT_USERNAME);
+				YarnConfigKeys.ENV_HADOOP_USER_NAME);
 
-			final UserGroupInformation currentUser;
-			try {
-				currentUser = UserGroupInformation.getCurrentUser();
-			} catch (Throwable t) {
-				throw new Exception("Cannot access UserGroupInformation information for current user", t);
+			final String currDir = ENV.get(Environment.PWD.key());
+			require(currDir != null, "Current working directory variable (%s) not set", Environment.PWD.key());
+			LOG.debug("Current working Directory: {}", currDir);
+
+			final String remoteKeytabPath = ENV.get(YarnConfigKeys.KEYTAB_PATH);
+			LOG.debug("remoteKeytabPath obtained {}", remoteKeytabPath);
+
+			final String remoteKeytabPrincipal = ENV.get(YarnConfigKeys.KEYTAB_PRINCIPAL);
+			LOG.info("remoteKeytabPrincipal obtained {}", remoteKeytabPrincipal);
+
+			String keytabPath = null;
+			if(remoteKeytabPath != null) {
+				File f = new File(currDir, Utils.KEYTAB_FILE_NAME);
+				keytabPath = f.getAbsolutePath();
+				LOG.debug("keytabPath: {}", keytabPath);
 			}
 
-			LOG.info("YARN daemon runs as user {}. Running Flink Application Master/JobManager as user {}",
-				currentUser.getShortUserName(), yarnClientUsername);
+			UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
+
+			LOG.info("YARN daemon is running as: {} Yarn client user obtainer: {}",
+					currentUser.getShortUserName(), yarnClientUsername );
+
+			SecurityContext.SecurityConfiguration sc = new SecurityContext.SecurityConfiguration();
+
+			//To support Yarn Secure Integration Test Scenario
+			File krb5Conf = new File(currDir, Utils.KRB5_FILE_NAME);
+			if(krb5Conf.exists() && krb5Conf.canRead()) {
+				String krb5Path = krb5Conf.getAbsolutePath();
+				LOG.info("KRB5 Conf: {}", krb5Path);
+				org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
+				conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
+				conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, "true");
+				sc.setHadoopConfiguration(conf);
+			}
 
-			UserGroupInformation ugi = UserGroupInformation.createRemoteUser(yarnClientUsername);
+			// Flink configuration
+			final Map<String, String> dynamicProperties =
+					FlinkYarnSessionCli.getDynamicProperties(ENV.get(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES));
+			LOG.debug("YARN dynamic properties: {}", dynamicProperties);
 
-			// transfer all security tokens, for example for authenticated HDFS and HBase access
-			for (Token<?> token : currentUser.getTokens()) {
-				ugi.addToken(token);
+			final Configuration flinkConfig = createConfiguration(currDir, dynamicProperties);
+			if(keytabPath != null && remoteKeytabPrincipal != null) {
+				flinkConfig.setString(ConfigConstants.SECURITY_KEYTAB_KEY, keytabPath);
+				flinkConfig.setString(ConfigConstants.SECURITY_PRINCIPAL_KEY, remoteKeytabPrincipal);
 			}
+			flinkConfig.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, currDir);
 
-			// run the actual work in a secured privileged action
-			return ugi.doAs(new PrivilegedAction<Integer>() {
+			SecurityContext.install(sc.setFlinkConfiguration(flinkConfig));
+
+			return SecurityContext.getInstalled().runSecured(new SecurityContext.FlinkSecuredRunner<Integer>() {
 				@Override
 				public Integer run() {
-					return runApplicationMaster();
+					return runApplicationMaster(flinkConfig);
 				}
 			});
+
 		}
 		catch (Throwable t) {
 			// make sure that everything whatever ends up in the log
@@ -172,7 +204,7 @@ public class YarnApplicationMasterRunner {
 	 *
 	 * @return The return code for the Java process.
 	 */
-	protected int runApplicationMaster() {
+	protected int runApplicationMaster(Configuration config) {
 		ActorSystem actorSystem = null;
 		WebMonitor webMonitor = null;
 
@@ -194,12 +226,21 @@ public class YarnApplicationMasterRunner {
 
 			LOG.info("YARN assigned hostname for application master: {}", appMasterHostname);
 
-			// Flink configuration
-			final Map<String, String> dynamicProperties =
-				FlinkYarnSessionCli.getDynamicProperties(ENV.get(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES));
-			LOG.debug("YARN dynamic properties: {}", dynamicProperties);
+			//Update keytab and principal path to reflect YARN container path location
+			final String remoteKeytabPath = ENV.get(YarnConfigKeys.KEYTAB_PATH);
 
-			final Configuration config = createConfiguration(currDir, dynamicProperties);
+			final String remoteKeytabPrincipal = ENV.get(YarnConfigKeys.KEYTAB_PRINCIPAL);
+
+			String keytabPath = null;
+			if(remoteKeytabPath != null) {
+				File f = new File(currDir, Utils.KEYTAB_FILE_NAME);
+				keytabPath = f.getAbsolutePath();
+				LOG.info("keytabPath: {}", keytabPath);
+			}
+			if(keytabPath != null && remoteKeytabPrincipal != null) {
+				config.setString(ConfigConstants.SECURITY_KEYTAB_KEY, keytabPath);
+				config.setString(ConfigConstants.SECURITY_PRINCIPAL_KEY, remoteKeytabPrincipal);
+			}
 
 			// Hadoop/Yarn configuration (loads config data automatically from classpath files)
 			final YarnConfiguration yarnConfig = new YarnConfiguration();
@@ -523,8 +564,20 @@ public class YarnApplicationMasterRunner {
 		String shipListString = env.get(YarnConfigKeys.ENV_CLIENT_SHIP_FILES);
 		require(shipListString != null, "Environment variable %s not set", YarnConfigKeys.ENV_CLIENT_SHIP_FILES);
 
-		String yarnClientUsername = env.get(YarnConfigKeys.ENV_CLIENT_USERNAME);
-		require(yarnClientUsername != null, "Environment variable %s not set", YarnConfigKeys.ENV_CLIENT_USERNAME);
+		String yarnClientUsername = env.get(YarnConfigKeys.ENV_HADOOP_USER_NAME);
+		require(yarnClientUsername != null, "Environment variable %s not set", YarnConfigKeys.ENV_HADOOP_USER_NAME);
+
+		final String remoteKeytabPath = env.get(YarnConfigKeys.KEYTAB_PATH);
+		LOG.info("TM:remoteKeytabPath obtained {}", remoteKeytabPath);
+
+		final String remoteKeytabPrincipal = env.get(YarnConfigKeys.KEYTAB_PRINCIPAL);
+		LOG.info("TM:remoteKeytabPrincipal obtained {}", remoteKeytabPrincipal);
+
+		final String remoteYarnConfPath = env.get(YarnConfigKeys.ENV_YARN_SITE_XML_PATH);
+		LOG.info("TM:remoteYarnConfPath obtained {}", remoteYarnConfPath);
+
+		final String remoteKrb5Path = env.get(YarnConfigKeys.ENV_KRB5_PATH);
+		LOG.info("TM:remotekrb5Path obtained {}", remoteKrb5Path);
 
 		String classPathString = env.get(YarnConfigKeys.ENV_FLINK_CLASSPATH);
 		require(classPathString != null, "Environment variable %s not set", YarnConfigKeys.ENV_FLINK_CLASSPATH);
@@ -537,6 +590,33 @@ public class YarnApplicationMasterRunner {
 			throw new Exception("Could not access YARN's default file system", e);
 		}
 
+		//register keytab
+		LocalResource keytabResource = null;
+		if(remoteKeytabPath != null) {
+			LOG.info("Adding keytab {} to the AM container local resource bucket", remoteKeytabPath);
+			keytabResource = Records.newRecord(LocalResource.class);
+			Path keytabPath = new Path(remoteKeytabPath);
+			Utils.registerLocalResource(yarnFileSystem, keytabPath, keytabResource);
+		}
+
+		//To support Yarn Secure Integration Test Scenario
+		LocalResource yarnConfResource = null;
+		LocalResource krb5ConfResource = null;
+		boolean hasKrb5 = false;
+		if(remoteYarnConfPath != null && remoteKrb5Path != null) {
+			LOG.info("TM:Adding remoteYarnConfPath {} to the container local resource bucket", remoteYarnConfPath);
+			yarnConfResource = Records.newRecord(LocalResource.class);
+			Path yarnConfPath = new Path(remoteYarnConfPath);
+			Utils.registerLocalResource(yarnFileSystem, yarnConfPath, yarnConfResource);
+
+			LOG.info("TM:Adding remoteKrb5Path {} to the container local resource bucket", remoteKrb5Path);
+			krb5ConfResource = Records.newRecord(LocalResource.class);
+			Path krb5ConfPath = new Path(remoteKrb5Path);
+			Utils.registerLocalResource(yarnFileSystem, krb5ConfPath, krb5ConfResource);
+
+			hasKrb5 = true;
+		}
+
 		// register Flink Jar with remote HDFS
 		LocalResource flinkJar = Records.newRecord(LocalResource.class);
 		{
@@ -563,6 +643,16 @@ public class YarnApplicationMasterRunner {
 		taskManagerLocalResources.put("flink.jar", flinkJar);
 		taskManagerLocalResources.put("flink-conf.yaml", flinkConf);
 
+		//To support Yarn Secure Integration Test Scenario
+		if(yarnConfResource != null && krb5ConfResource != null) {
+			taskManagerLocalResources.put(Utils.YARN_SITE_FILE_NAME, yarnConfResource);
+			taskManagerLocalResources.put(Utils.KRB5_FILE_NAME, krb5ConfResource);
+		}
+
+		if(keytabResource != null) {
+			taskManagerLocalResources.put(Utils.KEYTAB_FILE_NAME, keytabResource);
+		}
+
 		// prepare additional files to be shipped
 		for (String pathStr : shipListString.split(",")) {
 			if (!pathStr.isEmpty()) {
@@ -582,7 +672,7 @@ public class YarnApplicationMasterRunner {
 
 		String launchCommand = BootstrapTools.getTaskManagerShellCommand(
 			flinkConfig, tmParams, ".", ApplicationConstants.LOG_DIR_EXPANSION_VAR,
-			hasLogback, hasLog4j, taskManagerMainClass);
+			hasLogback, hasLog4j, hasKrb5, taskManagerMainClass);
 
 		log.info("Starting TaskManagers with command: " + launchCommand);
 
@@ -597,11 +687,17 @@ public class YarnApplicationMasterRunner {
 		containerEnv.put(ENV_FLINK_CLASSPATH, classPathString);
 		Utils.setupYarnClassPath(yarnConfig, containerEnv);
 
-		containerEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME, yarnClientUsername);
+		containerEnv.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, UserGroupInformation.getCurrentUser().getUserName());
+
+		if(remoteKeytabPath != null && remoteKeytabPrincipal != null) {
+			containerEnv.put(YarnConfigKeys.KEYTAB_PATH, remoteKeytabPath);
+			containerEnv.put(YarnConfigKeys.KEYTAB_PRINCIPAL, remoteKeytabPrincipal);
+		}
 
 		ctx.setEnvironment(containerEnv);
 
 		try (DataOutputBuffer dob = new DataOutputBuffer()) {
+			LOG.debug("Adding security tokens to Task Manager Container launch Context....");
 			UserGroupInformation user = UserGroupInformation.getCurrentUser();
 			Credentials credentials = user.getCredentials();
 			credentials.writeTokenStorageToStream(dob);

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn/src/main/java/org/apache/flink/yarn/YarnConfigKeys.java
----------------------------------------------------------------------
diff --git a/flink-yarn/src/main/java/org/apache/flink/yarn/YarnConfigKeys.java b/flink-yarn/src/main/java/org/apache/flink/yarn/YarnConfigKeys.java
index b14d7b7..ada241c 100644
--- a/flink-yarn/src/main/java/org/apache/flink/yarn/YarnConfigKeys.java
+++ b/flink-yarn/src/main/java/org/apache/flink/yarn/YarnConfigKeys.java
@@ -32,7 +32,6 @@ public class YarnConfigKeys {
 	public final static String ENV_APP_ID = "_APP_ID";
 	public static final String ENV_CLIENT_HOME_DIR = "_CLIENT_HOME_DIR";
 	public static final String ENV_CLIENT_SHIP_FILES = "_CLIENT_SHIP_FILES";
-	public static final String ENV_CLIENT_USERNAME = "_CLIENT_USERNAME";
 	public static final String ENV_SLOTS = "_SLOTS";
 	public static final String ENV_DETACHED = "_DETACHED";
 	public static final String ENV_DYNAMIC_PROPERTIES = "_DYNAMIC_PROPERTIES";
@@ -41,8 +40,13 @@ public class YarnConfigKeys {
 
 	public final static String FLINK_JAR_PATH = "_FLINK_JAR_PATH"; // the Flink jar resource location (in HDFS).
 
+	public final static String KEYTAB_PATH = "_KEYTAB_PATH";
+	public final static String KEYTAB_PRINCIPAL = "_KEYTAB_PRINCIPAL";
+	public final static String ENV_HADOOP_USER_NAME = "HADOOP_USER_NAME";
 	public static final String ENV_ZOOKEEPER_NAMESPACE = "_ZOOKEEPER_NAMESPACE";
 
+	public static final String ENV_KRB5_PATH = "_KRB5_PATH";
+	public static final String ENV_YARN_SITE_XML_PATH = "_YARN_SITE_XML_PATH";
 
 	// ------------------------------------------------------------------------
 

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn/src/main/java/org/apache/flink/yarn/YarnTaskManagerRunner.java
----------------------------------------------------------------------
diff --git a/flink-yarn/src/main/java/org/apache/flink/yarn/YarnTaskManagerRunner.java b/flink-yarn/src/main/java/org/apache/flink/yarn/YarnTaskManagerRunner.java
index 9638137..c70a30b 100644
--- a/flink-yarn/src/main/java/org/apache/flink/yarn/YarnTaskManagerRunner.java
+++ b/flink-yarn/src/main/java/org/apache/flink/yarn/YarnTaskManagerRunner.java
@@ -18,22 +18,22 @@
 
 package org.apache.flink.yarn;
 
+import java.io.File;
 import java.io.IOException;
-import java.security.PrivilegedAction;
 import java.util.Map;
 
 import org.apache.flink.configuration.ConfigConstants;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.security.SecurityContext;
 import org.apache.flink.runtime.taskmanager.TaskManager;
 import org.apache.flink.runtime.util.EnvironmentInformation;
 
 import org.apache.flink.runtime.util.JvmShutdownSafeguard;
 import org.apache.flink.runtime.util.SignalHandler;
 import org.apache.flink.util.Preconditions;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 
 import org.slf4j.Logger;
@@ -64,8 +64,18 @@ public class YarnTaskManagerRunner {
 
 		// read the environment variables for YARN
 		final Map<String, String> envs = System.getenv();
-		final String yarnClientUsername = envs.get(YarnConfigKeys.ENV_CLIENT_USERNAME);
+		final String yarnClientUsername = envs.get(YarnConfigKeys.ENV_HADOOP_USER_NAME);
 		final String localDirs = envs.get(Environment.LOCAL_DIRS.key());
+		LOG.info("Current working/local Directory: {}", localDirs);
+
+		final String currDir = envs.get(Environment.PWD.key());
+		LOG.info("Current working Directory: {}", currDir);
+
+		final String remoteKeytabPath = envs.get(YarnConfigKeys.KEYTAB_PATH);
+		LOG.info("TM: remoteKeytabPath obtained {}", remoteKeytabPath);
+
+		final String remoteKeytabPrincipal = envs.get(YarnConfigKeys.KEYTAB_PRINCIPAL);
+		LOG.info("TM: remoteKeytabPrincipal obtained {}", remoteKeytabPrincipal);
 
 		// configure local directory
 		String flinkTempDirs = configuration.getString(ConfigConstants.TASK_MANAGER_TMP_DIR_KEY, null);
@@ -78,34 +88,66 @@ public class YarnTaskManagerRunner {
 				"specified in the Flink config: " + flinkTempDirs);
 		}
 
-		LOG.info("YARN daemon runs as '" + UserGroupInformation.getCurrentUser().getShortUserName() +
-			"' setting user to execute Flink TaskManager to '" + yarnClientUsername + "'");
-
 		// tell akka to die in case of an error
 		configuration.setBoolean(ConfigConstants.AKKA_JVM_EXIT_ON_FATAL_ERROR, true);
 
-		UserGroupInformation ugi = UserGroupInformation.createRemoteUser(yarnClientUsername);
-		for (Token<? extends TokenIdentifier> toks : UserGroupInformation.getCurrentUser().getTokens()) {
-			ugi.addToken(toks);
+		String keytabPath = null;
+		if(remoteKeytabPath != null) {
+			File f = new File(currDir, Utils.KEYTAB_FILE_NAME);
+			keytabPath = f.getAbsolutePath();
+			LOG.info("keytabPath: {}", keytabPath);
 		}
 
+		UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
+
+		LOG.info("YARN daemon is running as: {} Yarn client user obtainer: {}",
+				currentUser.getShortUserName(), yarnClientUsername );
+
 		// Infer the resource identifier from the environment variable
 		String containerID = Preconditions.checkNotNull(envs.get(YarnFlinkResourceManager.ENV_FLINK_CONTAINER_ID));
 		final ResourceID resourceId = new ResourceID(containerID);
 		LOG.info("ResourceID assigned for this container: {}", resourceId);
 
-		ugi.doAs(new PrivilegedAction<Object>() {
-			@Override
-			public Object run() {
-				try {
-					TaskManager.selectNetworkInterfaceAndRunTaskManager(configuration, resourceId, taskManager);
-				}
-				catch (Throwable t) {
-					LOG.error("Error while starting the TaskManager", t);
-					System.exit(TaskManager.STARTUP_FAILURE_RETURN_CODE());
-				}
-				return null;
+		try {
+
+			SecurityContext.SecurityConfiguration sc = new SecurityContext.SecurityConfiguration();
+
+			//To support Yarn Secure Integration Test Scenario
+			File krb5Conf = new File(currDir, Utils.KRB5_FILE_NAME);
+			if(krb5Conf.exists() && krb5Conf.canRead()) {
+				String krb5Path = krb5Conf.getAbsolutePath();
+				LOG.info("KRB5 Conf: {}", krb5Path);
+				org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
+				conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
+				conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, "true");
+				sc.setHadoopConfiguration(conf);
+			}
+
+			if(keytabPath != null && remoteKeytabPrincipal != null) {
+				configuration.setString(ConfigConstants.SECURITY_KEYTAB_KEY, keytabPath);
+				configuration.setString(ConfigConstants.SECURITY_PRINCIPAL_KEY, remoteKeytabPrincipal);
 			}
-		});
+			configuration.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, currDir);
+
+			SecurityContext.install(sc.setFlinkConfiguration(configuration));
+
+			SecurityContext.getInstalled().runSecured(new SecurityContext.FlinkSecuredRunner<Integer>() {
+				@Override
+				public Integer run() {
+					try {
+						TaskManager.selectNetworkInterfaceAndRunTaskManager(configuration, resourceId, taskManager);
+					}
+					catch (Throwable t) {
+						LOG.error("Error while starting the TaskManager", t);
+						System.exit(TaskManager.STARTUP_FAILURE_RETURN_CODE());
+					}
+					return null;
+				}
+			});
+		} catch(Exception e) {
+			LOG.error("Exception occurred while launching Task Manager. Reason: {}", e);
+			throw new RuntimeException(e);
+		}
+
 	}
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn/src/main/java/org/apache/flink/yarn/cli/FlinkYarnSessionCli.java
----------------------------------------------------------------------
diff --git a/flink-yarn/src/main/java/org/apache/flink/yarn/cli/FlinkYarnSessionCli.java b/flink-yarn/src/main/java/org/apache/flink/yarn/cli/FlinkYarnSessionCli.java
index 8f02a1c..b5364f0 100644
--- a/flink-yarn/src/main/java/org/apache/flink/yarn/cli/FlinkYarnSessionCli.java
+++ b/flink-yarn/src/main/java/org/apache/flink/yarn/cli/FlinkYarnSessionCli.java
@@ -24,11 +24,14 @@ import org.apache.commons.cli.Option;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.PosixParser;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.flink.client.CliFrontend;
 import org.apache.flink.client.cli.CliFrontendParser;
 import org.apache.flink.client.cli.CustomCommandLine;
 import org.apache.flink.configuration.ConfigConstants;
 import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.GlobalConfiguration;
 import org.apache.flink.configuration.IllegalConfigurationException;
+import org.apache.flink.runtime.security.SecurityContext;
 import org.apache.flink.yarn.AbstractYarnClusterDescriptor;
 import org.apache.flink.yarn.YarnClusterDescriptor;
 import org.apache.flink.yarn.YarnClusterClient;
@@ -460,9 +463,27 @@ public class FlinkYarnSessionCli implements CustomCommandLine<YarnClusterClient>
 		}
 	}
 
-	public static void main(String[] args) {
-		FlinkYarnSessionCli cli = new FlinkYarnSessionCli("", ""); // no prefix for the YARN session
-		System.exit(cli.run(args));
+	public static void main(final String[] args) {
+		final FlinkYarnSessionCli cli = new FlinkYarnSessionCli("", "", true); // no prefix for the YARN session
+
+		String confDirPath = CliFrontend.getConfigurationDirectoryFromEnv();
+		GlobalConfiguration.loadConfiguration(confDirPath);
+		Configuration flinkConfiguration = GlobalConfiguration.loadConfiguration();
+		flinkConfiguration.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, confDirPath);
+		try {
+			SecurityContext.install(new SecurityContext.SecurityConfiguration().setFlinkConfiguration(flinkConfiguration));
+			int retCode = SecurityContext.getInstalled().runSecured(new SecurityContext.FlinkSecuredRunner<Integer>() {
+				@Override
+				public Integer run() {
+					return cli.run(args);
+				}
+			});
+			System.exit(retCode);
+		} catch(Exception e) {
+			e.printStackTrace();
+			LOG.error("Exception Occured. Reason: {}", e);
+			return;
+		}
 	}
 
 	@Override
@@ -523,6 +544,7 @@ public class FlinkYarnSessionCli implements CustomCommandLine<YarnClusterClient>
 		try {
 			return yarnClusterDescriptor.deploy();
 		} catch (Exception e) {
+			LOG.error("Error while deploying YARN cluster: "+e.getMessage(), e);
 			throw new RuntimeException("Error deploying the YARN cluster", e);
 		}
 

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 02e868e..5b3148a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -107,6 +107,13 @@ under the License.
 		<jackson.version>2.7.4</jackson.version>
 		<metrics.version>3.1.0</metrics.version>
 		<junit.version>4.11</junit.version>
+		<!--
+			Keeping the MiniKDC version fixed instead of taking hadoop version dependency
+			to support testing Kafka, ZK etc., modules that does not have Hadoop dependency
+			Starting Hadoop 3, org.apache.kerby will be used instead of MiniKDC. We may have
+			to revisit the impact at that time.
+		-->
+		<minikdc.version>2.7.2</minikdc.version>
 	</properties>
 
 	<dependencies>

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/tools/log4j-travis.properties
----------------------------------------------------------------------
diff --git a/tools/log4j-travis.properties b/tools/log4j-travis.properties
index 53379b4..476cee3 100644
--- a/tools/log4j-travis.properties
+++ b/tools/log4j-travis.properties
@@ -45,3 +45,6 @@ log4j.logger.org.apache.flink.runtime.leaderretrieval=DEBUG
 # the tests
 log4j.logger.org.apache.flink.yarn.YARNSessionFIFOITCase=INFO, console
 log4j.logger.org.apache.flink.yarn.YARNSessionCapacitySchedulerITCase=INFO, console
+log4j.logger.org.apache.flink.streaming.connectors.kafka=INFO, console
+log4j.logger.org.I0Itec.zkclient=INFO, console
+log4j.logger.org.apache.flink.streaming.connectors.kafka.internals.SimpleConsumerThread=OFF
\ No newline at end of file


[35/50] [abbrv] flink git commit: [FLINK-4529] [flip-6] Move TaskExecutor, JobMaster and ResourceManager out of the rpc package

Posted by tr...@apache.org.
http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
new file mode 100644
index 0000000..4871b96
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
@@ -0,0 +1,827 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.taskexecutor;
+
+import akka.actor.ActorSystem;
+import akka.dispatch.ExecutionContexts$;
+import akka.util.Timeout;
+import com.typesafe.config.Config;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.configuration.ConfigConstants;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.IllegalConfigurationException;
+import org.apache.flink.core.memory.HeapMemorySegment;
+import org.apache.flink.core.memory.HybridMemorySegment;
+import org.apache.flink.core.memory.MemorySegmentFactory;
+import org.apache.flink.core.memory.MemoryType;
+import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.annotation.VisibleForTesting;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
+import org.apache.flink.runtime.instance.InstanceConnectionInfo;
+import org.apache.flink.runtime.io.disk.iomanager.IOManager;
+import org.apache.flink.runtime.io.disk.iomanager.IOManager.IOMode;
+import org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync;
+import org.apache.flink.runtime.io.network.NetworkEnvironment;
+import org.apache.flink.runtime.io.network.netty.NettyConfig;
+import org.apache.flink.runtime.leaderelection.LeaderElectionService;
+import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener;
+import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
+import org.apache.flink.runtime.memory.MemoryManager;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcMethod;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.rpc.akka.AkkaRpcService;
+import org.apache.flink.runtime.taskmanager.MemoryLogger;
+import org.apache.flink.runtime.util.EnvironmentInformation;
+import org.apache.flink.runtime.util.LeaderRetrievalUtils;
+import org.apache.flink.runtime.taskmanager.NetworkEnvironmentConfiguration;
+import org.apache.flink.util.MathUtils;
+import org.apache.flink.util.NetUtils;
+
+import scala.Tuple2;
+import scala.Option;
+import scala.Some;
+import scala.concurrent.ExecutionContext;
+import scala.concurrent.duration.Duration;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.util.UUID;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * TaskExecutor implementation. The task executor is responsible for the execution of multiple
+ * {@link org.apache.flink.runtime.taskmanager.Task}.
+ */
+public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
+
+	private static final Logger LOG = LoggerFactory.getLogger(TaskExecutor.class);
+
+	/** The unique resource ID of this TaskExecutor */
+	private final ResourceID resourceID;
+
+	/** The access to the leader election and metadata storage services */
+	private final HighAvailabilityServices haServices;
+
+	/** The task manager configuration */
+	private final TaskExecutorConfiguration taskExecutorConfig;
+
+	/** The I/O manager component in the task manager */
+	private final IOManager ioManager;
+
+	/** The memory manager component in the task manager */
+	private final MemoryManager memoryManager;
+
+	/** The network component in the task manager */
+	private final NetworkEnvironment networkEnvironment;
+
+	/** The number of slots in the task manager, should be 1 for YARN */
+	private final int numberOfSlots;
+
+	// --------- resource manager --------
+
+	private TaskExecutorToResourceManagerConnection resourceManagerConnection;
+
+	// ------------------------------------------------------------------------
+
+	public TaskExecutor(
+			TaskExecutorConfiguration taskExecutorConfig,
+			ResourceID resourceID,
+			MemoryManager memoryManager,
+			IOManager ioManager,
+			NetworkEnvironment networkEnvironment,
+			int numberOfSlots,
+			RpcService rpcService,
+			HighAvailabilityServices haServices) {
+
+		super(rpcService);
+
+		this.taskExecutorConfig = checkNotNull(taskExecutorConfig);
+		this.resourceID = checkNotNull(resourceID);
+		this.memoryManager = checkNotNull(memoryManager);
+		this.ioManager = checkNotNull(ioManager);
+		this.networkEnvironment = checkNotNull(networkEnvironment);
+		this.numberOfSlots = checkNotNull(numberOfSlots);
+		this.haServices = checkNotNull(haServices);
+	}
+
+	// ------------------------------------------------------------------------
+	//  Life cycle
+	// ------------------------------------------------------------------------
+
+	@Override
+	public void start() {
+		super.start();
+
+		// start by connecting to the ResourceManager
+		try {
+			haServices.getResourceManagerLeaderRetriever().start(new ResourceManagerLeaderListener());
+		} catch (Exception e) {
+			onFatalErrorAsync(e);
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  RPC methods - ResourceManager related
+	// ------------------------------------------------------------------------
+
+	@RpcMethod
+	public void notifyOfNewResourceManagerLeader(String newLeaderAddress, UUID newLeaderId) {
+		if (resourceManagerConnection != null) {
+			if (newLeaderAddress != null) {
+				// the resource manager switched to a new leader
+				log.info("ResourceManager leader changed from {} to {}. Registering at new leader.",
+					resourceManagerConnection.getResourceManagerAddress(), newLeaderAddress);
+			}
+			else {
+				// address null means that the current leader is lost without a new leader being there, yet
+				log.info("Current ResourceManager {} lost leader status. Waiting for new ResourceManager leader.",
+					resourceManagerConnection.getResourceManagerAddress());
+			}
+
+			// drop the current connection or connection attempt
+			if (resourceManagerConnection != null) {
+				resourceManagerConnection.close();
+				resourceManagerConnection = null;
+			}
+		}
+
+		// establish a connection to the new leader
+		if (newLeaderAddress != null) {
+			log.info("Attempting to register at ResourceManager {}", newLeaderAddress);
+			resourceManagerConnection =
+				new TaskExecutorToResourceManagerConnection(log, this, newLeaderAddress, newLeaderId);
+			resourceManagerConnection.start();
+		}
+	}
+
+	/**
+	 * Starts and runs the TaskManager.
+	 * <p/>
+	 * This method first tries to select the network interface to use for the TaskManager
+	 * communication. The network interface is used both for the actor communication
+	 * (coordination) as well as for the data exchange between task managers. Unless
+	 * the hostname/interface is explicitly configured in the configuration, this
+	 * method will try out various interfaces and methods to connect to the JobManager
+	 * and select the one where the connection attempt is successful.
+	 * <p/>
+	 * After selecting the network interface, this method brings up an actor system
+	 * for the TaskManager and its actors, starts the TaskManager's services
+	 * (library cache, shuffle network stack, ...), and starts the TaskManager itself.
+	 *
+	 * @param configuration    The configuration for the TaskManager.
+	 * @param resourceID       The id of the resource which the task manager will run on.
+	 */
+	public static void selectNetworkInterfaceAndRunTaskManager(
+		Configuration configuration,
+		ResourceID resourceID) throws Exception {
+
+		final InetSocketAddress taskManagerAddress = selectNetworkInterfaceAndPort(configuration);
+
+		runTaskManager(taskManagerAddress.getHostName(), resourceID, taskManagerAddress.getPort(), configuration);
+	}
+
+	private static InetSocketAddress selectNetworkInterfaceAndPort(Configuration configuration)
+		throws Exception {
+		String taskManagerHostname = configuration.getString(ConfigConstants.TASK_MANAGER_HOSTNAME_KEY, null);
+		if (taskManagerHostname != null) {
+			LOG.info("Using configured hostname/address for TaskManager: " + taskManagerHostname);
+		} else {
+			LeaderRetrievalService leaderRetrievalService = LeaderRetrievalUtils.createLeaderRetrievalService(configuration);
+			FiniteDuration lookupTimeout = AkkaUtils.getLookupTimeout(configuration);
+
+			InetAddress taskManagerAddress = LeaderRetrievalUtils.findConnectingAddress(leaderRetrievalService, lookupTimeout);
+			taskManagerHostname = taskManagerAddress.getHostName();
+			LOG.info("TaskManager will use hostname/address '{}' ({}) for communication.",
+				taskManagerHostname, taskManagerAddress.getHostAddress());
+		}
+
+		// if no task manager port has been configured, use 0 (system will pick any free port)
+		final int actorSystemPort = configuration.getInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, 0);
+		if (actorSystemPort < 0 || actorSystemPort > 65535) {
+			throw new IllegalConfigurationException("Invalid value for '" +
+				ConfigConstants.TASK_MANAGER_IPC_PORT_KEY +
+				"' (port for the TaskManager actor system) : " + actorSystemPort +
+				" - Leave config parameter empty or use 0 to let the system choose a port automatically.");
+		}
+
+		return new InetSocketAddress(taskManagerHostname, actorSystemPort);
+	}
+
+	/**
+	 * Starts and runs the TaskManager. Brings up an actor system for the TaskManager and its
+	 * actors, starts the TaskManager's services (library cache, shuffle network stack, ...),
+	 * and starts the TaskManager itself.
+	 * <p/>
+	 * This method will also spawn a process reaper for the TaskManager (kill the process if
+	 * the actor fails) and optionally start the JVM memory logging thread.
+	 *
+	 * @param taskManagerHostname The hostname/address of the interface where the actor system
+	 *                            will communicate.
+	 * @param resourceID          The id of the resource which the task manager will run on.
+	 * @param actorSystemPort   The port at which the actor system will communicate.
+	 * @param configuration       The configuration for the TaskManager.
+	 */
+	private static void runTaskManager(
+		String taskManagerHostname,
+		ResourceID resourceID,
+		int actorSystemPort,
+		final Configuration configuration) throws Exception {
+
+		LOG.info("Starting TaskManager");
+
+		// Bring up the TaskManager actor system first, bind it to the given address.
+
+		LOG.info("Starting TaskManager actor system at " +
+			NetUtils.hostAndPortToUrlString(taskManagerHostname, actorSystemPort));
+
+		final ActorSystem taskManagerSystem;
+		try {
+			Tuple2<String, Object> address = new Tuple2<String, Object>(taskManagerHostname, actorSystemPort);
+			Config akkaConfig = AkkaUtils.getAkkaConfig(configuration, new Some<>(address));
+			LOG.debug("Using akka configuration\n " + akkaConfig);
+			taskManagerSystem = AkkaUtils.createActorSystem(akkaConfig);
+		} catch (Throwable t) {
+			if (t instanceof org.jboss.netty.channel.ChannelException) {
+				Throwable cause = t.getCause();
+				if (cause != null && t.getCause() instanceof java.net.BindException) {
+					String address = NetUtils.hostAndPortToUrlString(taskManagerHostname, actorSystemPort);
+					throw new IOException("Unable to bind TaskManager actor system to address " +
+						address + " - " + cause.getMessage(), t);
+				}
+			}
+			throw new Exception("Could not create TaskManager actor system", t);
+		}
+
+		// start akka rpc service based on actor system
+		final Timeout timeout = new Timeout(AkkaUtils.getTimeout(configuration).toMillis(), TimeUnit.MILLISECONDS);
+		final AkkaRpcService akkaRpcService = new AkkaRpcService(taskManagerSystem, timeout);
+
+		// start high availability service to implement getResourceManagerLeaderRetriever method only
+		final HighAvailabilityServices haServices = new HighAvailabilityServices() {
+			@Override
+			public LeaderRetrievalService getResourceManagerLeaderRetriever() throws Exception {
+				return LeaderRetrievalUtils.createLeaderRetrievalService(configuration);
+			}
+
+			@Override
+			public LeaderElectionService getResourceManagerLeaderElectionService() throws Exception {
+				return null;
+			}
+
+			@Override
+			public LeaderElectionService getJobMasterLeaderElectionService(JobID jobID) throws Exception {
+				return null;
+			}
+		};
+
+		// start all the TaskManager services (network stack,  library cache, ...)
+		// and the TaskManager actor
+		try {
+			LOG.info("Starting TaskManager actor");
+			TaskExecutor taskExecutor = startTaskManagerComponentsAndActor(
+				configuration,
+				resourceID,
+				akkaRpcService,
+				taskManagerHostname,
+				haServices,
+				false);
+
+			taskExecutor.start();
+
+			// if desired, start the logging daemon that periodically logs the memory usage information
+			if (LOG.isInfoEnabled() && configuration.getBoolean(
+				ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD,
+				ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD)) {
+				LOG.info("Starting periodic memory usage logger");
+
+				long interval = configuration.getLong(
+					ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS,
+					ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS);
+
+				MemoryLogger logger = new MemoryLogger(LOG, interval, taskManagerSystem);
+				logger.start();
+			}
+
+			// block until everything is done
+			taskManagerSystem.awaitTermination();
+		} catch (Throwable t) {
+			LOG.error("Error while starting up taskManager", t);
+			try {
+				taskManagerSystem.shutdown();
+			} catch (Throwable tt) {
+				LOG.warn("Could not cleanly shut down actor system", tt);
+			}
+			throw t;
+		}
+	}
+
+	// --------------------------------------------------------------------------
+	//  Starting and running the TaskManager
+	// --------------------------------------------------------------------------
+
+	/**
+	 * @param configuration                 The configuration for the TaskManager.
+	 * @param resourceID                    The id of the resource which the task manager will run on.
+	 * @param rpcService                  The rpc service which is used to start and connect to the TaskManager RpcEndpoint .
+	 * @param taskManagerHostname       The hostname/address that describes the TaskManager's data location.
+	 * @param haServices        Optionally, a high availability service can be provided. If none is given,
+	 *                                      then a HighAvailabilityServices is constructed from the configuration.
+	 * @param localTaskManagerCommunication     If true, the TaskManager will not initiate the TCP network stack.
+	 * @return An ActorRef to the TaskManager actor.
+	 * @throws org.apache.flink.configuration.IllegalConfigurationException     Thrown, if the given config contains illegal values.
+	 * @throws java.io.IOException      Thrown, if any of the I/O components (such as buffer pools,
+	 *                                       I/O manager, ...) cannot be properly started.
+	 * @throws java.lang.Exception      Thrown is some other error occurs while parsing the configuration
+	 *                                      or starting the TaskManager components.
+	 */
+	public static TaskExecutor startTaskManagerComponentsAndActor(
+		Configuration configuration,
+		ResourceID resourceID,
+		RpcService rpcService,
+		String taskManagerHostname,
+		HighAvailabilityServices haServices,
+		boolean localTaskManagerCommunication) throws Exception {
+
+		final TaskExecutorConfiguration taskExecutorConfig = parseTaskManagerConfiguration(
+			configuration, taskManagerHostname, localTaskManagerCommunication);
+
+		MemoryType memType = taskExecutorConfig.getNetworkConfig().memoryType();
+
+		// pre-start checks
+		checkTempDirs(taskExecutorConfig.getTmpDirPaths());
+
+		ExecutionContext executionContext = ExecutionContexts$.MODULE$.fromExecutor(new ForkJoinPool());
+
+		// we start the network first, to make sure it can allocate its buffers first
+		final NetworkEnvironment network = new NetworkEnvironment(
+			executionContext,
+			taskExecutorConfig.getTimeout(),
+			taskExecutorConfig.getNetworkConfig(),
+			taskExecutorConfig.getConnectionInfo());
+
+		// computing the amount of memory to use depends on how much memory is available
+		// it strictly needs to happen AFTER the network stack has been initialized
+
+		// check if a value has been configured
+		long configuredMemory = configuration.getLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1L);
+		checkConfigParameter(configuredMemory == -1 || configuredMemory > 0, configuredMemory,
+			ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY,
+			"MemoryManager needs at least one MB of memory. " +
+				"If you leave this config parameter empty, the system automatically " +
+				"pick a fraction of the available memory.");
+
+		final long memorySize;
+		boolean preAllocateMemory = configuration.getBoolean(
+			ConfigConstants.TASK_MANAGER_MEMORY_PRE_ALLOCATE_KEY,
+			ConfigConstants.DEFAULT_TASK_MANAGER_MEMORY_PRE_ALLOCATE);
+		if (configuredMemory > 0) {
+			if (preAllocateMemory) {
+				LOG.info("Using {} MB for managed memory." , configuredMemory);
+			} else {
+				LOG.info("Limiting managed memory to {} MB, memory will be allocated lazily." , configuredMemory);
+			}
+			memorySize = configuredMemory << 20; // megabytes to bytes
+		} else {
+			float fraction = configuration.getFloat(
+				ConfigConstants.TASK_MANAGER_MEMORY_FRACTION_KEY,
+				ConfigConstants.DEFAULT_MEMORY_MANAGER_MEMORY_FRACTION);
+			checkConfigParameter(fraction > 0.0f && fraction < 1.0f, fraction,
+				ConfigConstants.TASK_MANAGER_MEMORY_FRACTION_KEY,
+				"MemoryManager fraction of the free memory must be between 0.0 and 1.0");
+
+			if (memType == MemoryType.HEAP) {
+				long relativeMemSize = (long) (EnvironmentInformation.getSizeOfFreeHeapMemoryWithDefrag() * fraction);
+				if (preAllocateMemory) {
+					LOG.info("Using {} of the currently free heap space for managed heap memory ({} MB)." ,
+						fraction , relativeMemSize >> 20);
+				} else {
+					LOG.info("Limiting managed memory to {} of the currently free heap space ({} MB), " +
+						"memory will be allocated lazily." , fraction , relativeMemSize >> 20);
+				}
+				memorySize = relativeMemSize;
+			} else if (memType == MemoryType.OFF_HEAP) {
+				// The maximum heap memory has been adjusted according to the fraction
+				long maxMemory = EnvironmentInformation.getMaxJvmHeapMemory();
+				long directMemorySize = (long) (maxMemory / (1.0 - fraction) * fraction);
+				if (preAllocateMemory) {
+					LOG.info("Using {} of the maximum memory size for managed off-heap memory ({} MB)." ,
+						fraction, directMemorySize >> 20);
+				} else {
+					LOG.info("Limiting managed memory to {} of the maximum memory size ({} MB)," +
+						" memory will be allocated lazily.", fraction, directMemorySize >> 20);
+				}
+				memorySize = directMemorySize;
+			} else {
+				throw new RuntimeException("No supported memory type detected.");
+			}
+		}
+
+		// now start the memory manager
+		final MemoryManager memoryManager;
+		try {
+			memoryManager = new MemoryManager(
+				memorySize,
+				taskExecutorConfig.getNumberOfSlots(),
+				taskExecutorConfig.getNetworkConfig().networkBufferSize(),
+				memType,
+				preAllocateMemory);
+		} catch (OutOfMemoryError e) {
+			if (memType == MemoryType.HEAP) {
+				throw new Exception("OutOfMemory error (" + e.getMessage() +
+					") while allocating the TaskManager heap memory (" + memorySize + " bytes).", e);
+			} else if (memType == MemoryType.OFF_HEAP) {
+				throw new Exception("OutOfMemory error (" + e.getMessage() +
+					") while allocating the TaskManager off-heap memory (" + memorySize +
+					" bytes).Try increasing the maximum direct memory (-XX:MaxDirectMemorySize)", e);
+			} else {
+				throw e;
+			}
+		}
+
+		// start the I/O manager, it will create some temp directories.
+		final IOManager ioManager = new IOManagerAsync(taskExecutorConfig.getTmpDirPaths());
+
+		final TaskExecutor taskExecutor = new TaskExecutor(
+			taskExecutorConfig,
+			resourceID,
+			memoryManager,
+			ioManager,
+			network,
+			taskExecutorConfig.getNumberOfSlots(),
+			rpcService,
+			haServices);
+
+		return taskExecutor;
+	}
+
+	// --------------------------------------------------------------------------
+	//  Parsing and checking the TaskManager Configuration
+	// --------------------------------------------------------------------------
+
+	/**
+	 * Utility method to extract TaskManager config parameters from the configuration and to
+	 * sanity check them.
+	 *
+	 * @param configuration                 The configuration.
+	 * @param taskManagerHostname           The host name under which the TaskManager communicates.
+	 * @param localTaskManagerCommunication             True, to skip initializing the network stack.
+	 *                                      Use only in cases where only one task manager runs.
+	 * @return TaskExecutorConfiguration that wrappers InstanceConnectionInfo, NetworkEnvironmentConfiguration, etc.
+	 */
+	private static TaskExecutorConfiguration parseTaskManagerConfiguration(
+		Configuration configuration,
+		String taskManagerHostname,
+		boolean localTaskManagerCommunication) throws Exception {
+
+		// ------- read values from the config and check them ---------
+		//                      (a lot of them)
+
+		// ----> hosts / ports for communication and data exchange
+
+		int dataport = configuration.getInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY,
+			ConfigConstants.DEFAULT_TASK_MANAGER_DATA_PORT);
+		if (dataport == 0) {
+			dataport = NetUtils.getAvailablePort();
+		}
+		checkConfigParameter(dataport > 0, dataport, ConfigConstants.TASK_MANAGER_DATA_PORT_KEY,
+			"Leave config parameter empty or use 0 to let the system choose a port automatically.");
+
+		InetAddress taskManagerAddress = InetAddress.getByName(taskManagerHostname);
+		final InstanceConnectionInfo connectionInfo = new InstanceConnectionInfo(taskManagerAddress, dataport);
+
+		// ----> memory / network stack (shuffles/broadcasts), task slots, temp directories
+
+		// we need this because many configs have been written with a "-1" entry
+		int slots = configuration.getInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
+		if (slots == -1) {
+			slots = 1;
+		}
+		checkConfigParameter(slots >= 1, slots, ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS,
+			"Number of task slots must be at least one.");
+
+		final int numNetworkBuffers = configuration.getInteger(
+			ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY,
+			ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS);
+		checkConfigParameter(numNetworkBuffers > 0, numNetworkBuffers,
+			ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, "");
+
+		final int pageSize = configuration.getInteger(
+			ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY,
+			ConfigConstants.DEFAULT_TASK_MANAGER_MEMORY_SEGMENT_SIZE);
+		// check page size of for minimum size
+		checkConfigParameter(pageSize >= MemoryManager.MIN_PAGE_SIZE, pageSize,
+			ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY,
+			"Minimum memory segment size is " + MemoryManager.MIN_PAGE_SIZE);
+		// check page size for power of two
+		checkConfigParameter(MathUtils.isPowerOf2(pageSize), pageSize,
+			ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY,
+			"Memory segment size must be a power of 2.");
+
+		// check whether we use heap or off-heap memory
+		final MemoryType memType;
+		if (configuration.getBoolean(ConfigConstants.TASK_MANAGER_MEMORY_OFF_HEAP_KEY, false)) {
+			memType = MemoryType.OFF_HEAP;
+		} else {
+			memType = MemoryType.HEAP;
+		}
+
+		// initialize the memory segment factory accordingly
+		if (memType == MemoryType.HEAP) {
+			if (!MemorySegmentFactory.initializeIfNotInitialized(HeapMemorySegment.FACTORY)) {
+				throw new Exception("Memory type is set to heap memory, but memory segment " +
+					"factory has been initialized for off-heap memory segments");
+			}
+		} else {
+			if (!MemorySegmentFactory.initializeIfNotInitialized(HybridMemorySegment.FACTORY)) {
+				throw new Exception("Memory type is set to off-heap memory, but memory segment " +
+					"factory has been initialized for heap memory segments");
+			}
+		}
+
+		final String[] tmpDirs = configuration.getString(
+			ConfigConstants.TASK_MANAGER_TMP_DIR_KEY,
+			ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH).split(",|" + File.pathSeparator);
+
+		final NettyConfig nettyConfig;
+		if (!localTaskManagerCommunication) {
+			nettyConfig = new NettyConfig(connectionInfo.address(), connectionInfo.dataPort(), pageSize, slots, configuration);
+		} else {
+			nettyConfig = null;
+		}
+
+		// Default spill I/O mode for intermediate results
+		final String syncOrAsync = configuration.getString(
+			ConfigConstants.TASK_MANAGER_NETWORK_DEFAULT_IO_MODE,
+			ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_DEFAULT_IO_MODE);
+
+		final IOMode ioMode;
+		if (syncOrAsync.equals("async")) {
+			ioMode = IOManager.IOMode.ASYNC;
+		} else {
+			ioMode = IOManager.IOMode.SYNC;
+		}
+
+		final int queryServerPort =  configuration.getInteger(
+			ConfigConstants.QUERYABLE_STATE_SERVER_PORT,
+			ConfigConstants.DEFAULT_QUERYABLE_STATE_SERVER_PORT);
+
+		final int queryServerNetworkThreads =  configuration.getInteger(
+			ConfigConstants.QUERYABLE_STATE_SERVER_NETWORK_THREADS,
+			ConfigConstants.DEFAULT_QUERYABLE_STATE_SERVER_NETWORK_THREADS);
+
+		final int queryServerQueryThreads =  configuration.getInteger(
+			ConfigConstants.QUERYABLE_STATE_SERVER_QUERY_THREADS,
+			ConfigConstants.DEFAULT_QUERYABLE_STATE_SERVER_QUERY_THREADS);
+
+		final NetworkEnvironmentConfiguration networkConfig = new NetworkEnvironmentConfiguration(
+			numNetworkBuffers,
+			pageSize,
+			memType,
+			ioMode,
+			queryServerPort,
+			queryServerNetworkThreads,
+			queryServerQueryThreads,
+			localTaskManagerCommunication ? Option.<NettyConfig>empty() : new Some<>(nettyConfig),
+			new Tuple2<>(500, 3000));
+
+		// ----> timeouts, library caching, profiling
+
+		final FiniteDuration timeout;
+		try {
+			timeout = AkkaUtils.getTimeout(configuration);
+		} catch (Exception e) {
+			throw new IllegalArgumentException(
+				"Invalid format for '" + ConfigConstants.AKKA_ASK_TIMEOUT +
+					"'.Use formats like '50 s' or '1 min' to specify the timeout.");
+		}
+		LOG.info("Messages between TaskManager and JobManager have a max timeout of " + timeout);
+
+		final long cleanupInterval = configuration.getLong(
+			ConfigConstants.LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL,
+			ConfigConstants.DEFAULT_LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL) * 1000;
+
+		final FiniteDuration finiteRegistrationDuration;
+		try {
+			Duration maxRegistrationDuration = Duration.create(configuration.getString(
+				ConfigConstants.TASK_MANAGER_MAX_REGISTRATION_DURATION,
+				ConfigConstants.DEFAULT_TASK_MANAGER_MAX_REGISTRATION_DURATION));
+			if (maxRegistrationDuration.isFinite()) {
+				finiteRegistrationDuration = new FiniteDuration(maxRegistrationDuration.toSeconds(), TimeUnit.SECONDS);
+			} else {
+				finiteRegistrationDuration = null;
+			}
+		} catch (NumberFormatException e) {
+			throw new IllegalArgumentException("Invalid format for parameter " +
+				ConfigConstants.TASK_MANAGER_MAX_REGISTRATION_DURATION, e);
+		}
+
+		final FiniteDuration initialRegistrationPause;
+		try {
+			Duration pause = Duration.create(configuration.getString(
+				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE,
+				ConfigConstants.DEFAULT_TASK_MANAGER_INITIAL_REGISTRATION_PAUSE));
+			if (pause.isFinite()) {
+				initialRegistrationPause = new FiniteDuration(pause.toSeconds(), TimeUnit.SECONDS);
+			} else {
+				throw new IllegalArgumentException("The initial registration pause must be finite: " + pause);
+			}
+		} catch (NumberFormatException e) {
+			throw new IllegalArgumentException("Invalid format for parameter " +
+				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, e);
+		}
+
+		final FiniteDuration maxRegistrationPause;
+		try {
+			Duration pause = Duration.create(configuration.getString(
+				ConfigConstants.TASK_MANAGER_MAX_REGISTARTION_PAUSE,
+				ConfigConstants.DEFAULT_TASK_MANAGER_MAX_REGISTRATION_PAUSE));
+			if (pause.isFinite()) {
+				maxRegistrationPause = new FiniteDuration(pause.toSeconds(), TimeUnit.SECONDS);
+			} else {
+				throw new IllegalArgumentException("The maximum registration pause must be finite: " + pause);
+			}
+		} catch (NumberFormatException e) {
+			throw new IllegalArgumentException("Invalid format for parameter " +
+				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, e);
+		}
+
+		final FiniteDuration refusedRegistrationPause;
+		try {
+			Duration pause = Duration.create(configuration.getString(
+				ConfigConstants.TASK_MANAGER_REFUSED_REGISTRATION_PAUSE,
+				ConfigConstants.DEFAULT_TASK_MANAGER_REFUSED_REGISTRATION_PAUSE));
+			if (pause.isFinite()) {
+				refusedRegistrationPause = new FiniteDuration(pause.toSeconds(), TimeUnit.SECONDS);
+			} else {
+				throw new IllegalArgumentException("The refused registration pause must be finite: " + pause);
+			}
+		} catch (NumberFormatException e) {
+			throw new IllegalArgumentException("Invalid format for parameter " +
+				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, e);
+		}
+
+		return new TaskExecutorConfiguration(
+			tmpDirs,
+			cleanupInterval,
+			connectionInfo,
+			networkConfig,
+			timeout,
+			finiteRegistrationDuration,
+			slots,
+			configuration,
+			initialRegistrationPause,
+			maxRegistrationPause,
+			refusedRegistrationPause);
+	}
+
+	/**
+	 * Validates a condition for a config parameter and displays a standard exception, if the
+	 * the condition does not hold.
+	 *
+	 * @param condition    The condition that must hold. If the condition is false, an exception is thrown.
+	 * @param parameter    The parameter value. Will be shown in the exception message.
+	 * @param name         The name of the config parameter. Will be shown in the exception message.
+	 * @param errorMessage The optional custom error message to append to the exception message.
+	 */
+	private static void checkConfigParameter(
+		boolean condition,
+		Object parameter,
+		String name,
+		String errorMessage) {
+		if (!condition) {
+			throw new IllegalConfigurationException("Invalid configuration value for " + name + " : " + parameter + " - " + errorMessage);
+		}
+	}
+
+	/**
+	 * Validates that all the directories denoted by the strings do actually exist, are proper
+	 * directories (not files), and are writable.
+	 *
+	 * @param tmpDirs The array of directory paths to check.
+	 * @throws Exception Thrown if any of the directories does not exist or is not writable
+	 *                   or is a file, rather than a directory.
+	 */
+	private static void checkTempDirs(String[] tmpDirs) throws IOException {
+		for (String dir : tmpDirs) {
+			if (dir != null && !dir.equals("")) {
+				File file = new File(dir);
+				if (!file.exists()) {
+					throw new IOException("Temporary file directory " + file.getAbsolutePath() + " does not exist.");
+				}
+				if (!file.isDirectory()) {
+					throw new IOException("Temporary file directory " + file.getAbsolutePath() + " is not a directory.");
+				}
+				if (!file.canWrite()) {
+					throw new IOException("Temporary file directory " + file.getAbsolutePath() + " is not writable.");
+				}
+
+				if (LOG.isInfoEnabled()) {
+					long totalSpaceGb = file.getTotalSpace() >> 30;
+					long usableSpaceGb = file.getUsableSpace() >> 30;
+					double usablePercentage = (double)usableSpaceGb / totalSpaceGb * 100;
+					String path = file.getAbsolutePath();
+					LOG.info(String.format("Temporary file directory '%s': total %d GB, " + "usable %d GB (%.2f%% usable)",
+						path, totalSpaceGb, usableSpaceGb, usablePercentage));
+				}
+			} else {
+				throw new IllegalArgumentException("Temporary file directory #$id is null.");
+			}
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  Properties
+	// ------------------------------------------------------------------------
+
+	public ResourceID getResourceID() {
+		return resourceID;
+	}
+
+	// ------------------------------------------------------------------------
+	//  Error Handling
+	// ------------------------------------------------------------------------
+
+	/**
+	 * Notifies the TaskExecutor that a fatal error has occurred and it cannot proceed.
+	 * This method should be used when asynchronous threads want to notify the
+	 * TaskExecutor of a fatal error.
+	 *
+	 * @param t The exception describing the fatal error
+	 */
+	void onFatalErrorAsync(final Throwable t) {
+		runAsync(new Runnable() {
+			@Override
+			public void run() {
+				onFatalError(t);
+			}
+		});
+	}
+
+	/**
+	 * Notifies the TaskExecutor that a fatal error has occurred and it cannot proceed.
+	 * This method must only be called from within the TaskExecutor's main thread.
+	 *
+	 * @param t The exception describing the fatal error
+	 */
+	void onFatalError(Throwable t) {
+		// to be determined, probably delegate to a fatal error handler that 
+		// would either log (mini cluster) ot kill the process (yarn, mesos, ...)
+		log.error("FATAL ERROR", t);
+	}
+
+	// ------------------------------------------------------------------------
+	//  Access to fields for testing
+	// ------------------------------------------------------------------------
+
+	@VisibleForTesting
+	TaskExecutorToResourceManagerConnection getResourceManagerConnection() {
+		return resourceManagerConnection;
+	}
+
+	// ------------------------------------------------------------------------
+	//  Utility classes
+	// ------------------------------------------------------------------------
+
+	/**
+	 * The listener for leader changes of the resource manager
+	 */
+	private class ResourceManagerLeaderListener implements LeaderRetrievalListener {
+
+		@Override
+		public void notifyLeaderAddress(String leaderAddress, UUID leaderSessionID) {
+			getSelf().notifyOfNewResourceManagerLeader(leaderAddress, leaderSessionID);
+		}
+
+		@Override
+		public void handleError(Exception exception) {
+			onFatalErrorAsync(exception);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorConfiguration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorConfiguration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorConfiguration.java
new file mode 100644
index 0000000..3707a47
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorConfiguration.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.taskexecutor;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.instance.InstanceConnectionInfo;
+import org.apache.flink.runtime.taskmanager.NetworkEnvironmentConfiguration;
+
+import scala.concurrent.duration.FiniteDuration;
+
+import java.io.Serializable;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * {@link TaskExecutor} Configuration
+ */
+public class TaskExecutorConfiguration implements Serializable {
+
+	private static final long serialVersionUID = 1L;
+
+	private final String[] tmpDirPaths;
+
+	private final long cleanupInterval;
+
+	private final int numberOfSlots;
+
+	private final Configuration configuration;
+
+	private final FiniteDuration timeout;
+	private final FiniteDuration maxRegistrationDuration;
+	private final FiniteDuration initialRegistrationPause;
+	private final FiniteDuration maxRegistrationPause;
+	private final FiniteDuration refusedRegistrationPause;
+
+	private final NetworkEnvironmentConfiguration networkConfig;
+
+	private final InstanceConnectionInfo connectionInfo;
+
+	public TaskExecutorConfiguration(
+			String[] tmpDirPaths,
+			long cleanupInterval,
+			InstanceConnectionInfo connectionInfo,
+			NetworkEnvironmentConfiguration networkConfig,
+			FiniteDuration timeout,
+			FiniteDuration maxRegistrationDuration,
+			int numberOfSlots,
+			Configuration configuration) {
+
+		this (tmpDirPaths,
+			cleanupInterval,
+			connectionInfo,
+			networkConfig,
+			timeout,
+			maxRegistrationDuration,
+			numberOfSlots,
+			configuration,
+			new FiniteDuration(500, TimeUnit.MILLISECONDS),
+			new FiniteDuration(30, TimeUnit.SECONDS),
+			new FiniteDuration(10, TimeUnit.SECONDS));
+	}
+
+	public TaskExecutorConfiguration(
+			String[] tmpDirPaths,
+			long cleanupInterval,
+			InstanceConnectionInfo connectionInfo,
+			NetworkEnvironmentConfiguration networkConfig,
+			FiniteDuration timeout,
+			FiniteDuration maxRegistrationDuration,
+			int numberOfSlots,
+			Configuration configuration,
+			FiniteDuration initialRegistrationPause,
+			FiniteDuration maxRegistrationPause,
+			FiniteDuration refusedRegistrationPause) {
+
+		this.tmpDirPaths = checkNotNull(tmpDirPaths);
+		this.cleanupInterval = checkNotNull(cleanupInterval);
+		this.connectionInfo = checkNotNull(connectionInfo);
+		this.networkConfig = checkNotNull(networkConfig);
+		this.timeout = checkNotNull(timeout);
+		this.maxRegistrationDuration = maxRegistrationDuration;
+		this.numberOfSlots = checkNotNull(numberOfSlots);
+		this.configuration = checkNotNull(configuration);
+		this.initialRegistrationPause = checkNotNull(initialRegistrationPause);
+		this.maxRegistrationPause = checkNotNull(maxRegistrationPause);
+		this.refusedRegistrationPause = checkNotNull(refusedRegistrationPause);
+	}
+
+	// --------------------------------------------------------------------------------------------
+	//  Properties
+	// --------------------------------------------------------------------------------------------
+
+	public String[] getTmpDirPaths() {
+		return tmpDirPaths;
+	}
+
+	public long getCleanupInterval() {
+		return cleanupInterval;
+	}
+
+	public InstanceConnectionInfo getConnectionInfo() { return connectionInfo; }
+
+	public NetworkEnvironmentConfiguration getNetworkConfig() { return networkConfig; }
+
+	public FiniteDuration getTimeout() {
+		return timeout;
+	}
+
+	public FiniteDuration getMaxRegistrationDuration() {
+		return maxRegistrationDuration;
+	}
+
+	public int getNumberOfSlots() {
+		return numberOfSlots;
+	}
+
+	public Configuration getConfiguration() {
+		return configuration;
+	}
+
+	public FiniteDuration getInitialRegistrationPause() {
+		return initialRegistrationPause;
+	}
+
+	public FiniteDuration getMaxRegistrationPause() {
+		return maxRegistrationPause;
+	}
+
+	public FiniteDuration getRefusedRegistrationPause() {
+		return refusedRegistrationPause;
+	}
+
+}
+

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorGateway.java
new file mode 100644
index 0000000..6c99706
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorGateway.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.taskexecutor;
+
+import org.apache.flink.runtime.rpc.RpcGateway;
+
+import java.util.UUID;
+
+/**
+ * {@link TaskExecutor} RPC gateway interface
+ */
+public interface TaskExecutorGateway extends RpcGateway {
+
+	// ------------------------------------------------------------------------
+	//  ResourceManager handlers
+	// ------------------------------------------------------------------------
+
+	void notifyOfNewResourceManagerLeader(String address, UUID resourceManagerLeaderId);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorRegistrationSuccess.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorRegistrationSuccess.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorRegistrationSuccess.java
new file mode 100644
index 0000000..b357f52
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorRegistrationSuccess.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.taskexecutor;
+
+import org.apache.flink.runtime.instance.InstanceID;
+import org.apache.flink.runtime.registration.RegistrationResponse;
+
+import java.io.Serializable;
+
+/**
+ * Base class for responses from the ResourceManager to a registration attempt by a
+ * TaskExecutor.
+ */
+public final class TaskExecutorRegistrationSuccess extends RegistrationResponse.Success implements Serializable {
+
+	private static final long serialVersionUID = 1L;
+
+	private final InstanceID registrationId;
+
+	private final long heartbeatInterval;
+
+	/**
+	 * Create a new {@code TaskExecutorRegistrationSuccess} message.
+	 * 
+	 * @param registrationId     The ID that the ResourceManager assigned the registration.
+	 * @param heartbeatInterval  The interval in which the ResourceManager will heartbeat the TaskExecutor.
+	 */
+	public TaskExecutorRegistrationSuccess(InstanceID registrationId, long heartbeatInterval) {
+		this.registrationId = registrationId;
+		this.heartbeatInterval = heartbeatInterval;
+	}
+
+	/**
+	 * Gets the ID that the ResourceManager assigned the registration.
+	 */
+	public InstanceID getRegistrationId() {
+		return registrationId;
+	}
+
+	/**
+	 * Gets the interval in which the ResourceManager will heartbeat the TaskExecutor.
+	 */
+	public long getHeartbeatInterval() {
+		return heartbeatInterval;
+	}
+
+	@Override
+	public String toString() {
+		return "TaskExecutorRegistrationSuccess (" + registrationId + " / " + heartbeatInterval + ')';
+	}
+
+}
+
+
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorToResourceManagerConnection.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorToResourceManagerConnection.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorToResourceManagerConnection.java
new file mode 100644
index 0000000..25332a0
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorToResourceManagerConnection.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.taskexecutor;
+
+import akka.dispatch.OnFailure;
+import akka.dispatch.OnSuccess;
+
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.instance.InstanceID;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.registration.RegistrationResponse;
+import org.apache.flink.runtime.registration.RetryingRegistration;
+import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway;
+
+import org.slf4j.Logger;
+
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+import static org.apache.flink.util.Preconditions.checkState;
+
+/**
+ * The connection between a TaskExecutor and the ResourceManager.
+ */
+public class TaskExecutorToResourceManagerConnection {
+
+	/** the logger for all log messages of this class */
+	private final Logger log;
+
+	/** the TaskExecutor whose connection to the ResourceManager this represents */
+	private final TaskExecutor taskExecutor;
+
+	private final UUID resourceManagerLeaderId;
+
+	private final String resourceManagerAddress;
+
+	private TaskExecutorToResourceManagerConnection.ResourceManagerRegistration pendingRegistration;
+
+	private ResourceManagerGateway registeredResourceManager;
+
+	private InstanceID registrationId;
+
+	/** flag indicating that the connection is closed */
+	private volatile boolean closed;
+
+
+	public TaskExecutorToResourceManagerConnection(
+			Logger log,
+			TaskExecutor taskExecutor,
+			String resourceManagerAddress,
+			UUID resourceManagerLeaderId) {
+
+		this.log = checkNotNull(log);
+		this.taskExecutor = checkNotNull(taskExecutor);
+		this.resourceManagerAddress = checkNotNull(resourceManagerAddress);
+		this.resourceManagerLeaderId = checkNotNull(resourceManagerLeaderId);
+	}
+
+	// ------------------------------------------------------------------------
+	//  Life cycle
+	// ------------------------------------------------------------------------
+
+	@SuppressWarnings("unchecked")
+	public void start() {
+		checkState(!closed, "The connection is already closed");
+		checkState(!isRegistered() && pendingRegistration == null, "The connection is already started");
+
+		pendingRegistration = new TaskExecutorToResourceManagerConnection.ResourceManagerRegistration(
+				log, taskExecutor.getRpcService(),
+				resourceManagerAddress, resourceManagerLeaderId,
+				taskExecutor.getAddress(), taskExecutor.getResourceID());
+		pendingRegistration.startRegistration();
+
+		Future<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>> future = pendingRegistration.getFuture();
+		
+		future.onSuccess(new OnSuccess<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>>() {
+			@Override
+			public void onSuccess(Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess> result) {
+				registeredResourceManager = result.f0;
+				registrationId = result.f1.getRegistrationId();
+			}
+		}, taskExecutor.getMainThreadExecutionContext());
+		
+		// this future should only ever fail if there is a bug, not if the registration is declined
+		future.onFailure(new OnFailure() {
+			@Override
+			public void onFailure(Throwable failure) {
+				taskExecutor.onFatalError(failure);
+			}
+		}, taskExecutor.getMainThreadExecutionContext());
+	}
+
+	public void close() {
+		closed = true;
+
+		// make sure we do not keep re-trying forever
+		if (pendingRegistration != null) {
+			pendingRegistration.cancel();
+		}
+	}
+
+	public boolean isClosed() {
+		return closed;
+	}
+
+	// ------------------------------------------------------------------------
+	//  Properties
+	// ------------------------------------------------------------------------
+
+	public UUID getResourceManagerLeaderId() {
+		return resourceManagerLeaderId;
+	}
+
+	public String getResourceManagerAddress() {
+		return resourceManagerAddress;
+	}
+
+	/**
+	 * Gets the ResourceManagerGateway. This returns null until the registration is completed.
+	 */
+	public ResourceManagerGateway getResourceManager() {
+		return registeredResourceManager;
+	}
+
+	/**
+	 * Gets the ID under which the TaskExecutor is registered at the ResourceManager.
+	 * This returns null until the registration is completed.
+	 */
+	public InstanceID getRegistrationId() {
+		return registrationId;
+	}
+
+	public boolean isRegistered() {
+		return registeredResourceManager != null;
+	}
+
+	// ------------------------------------------------------------------------
+
+	@Override
+	public String toString() {
+		return String.format("Connection to ResourceManager %s (leaderId=%s)",
+				resourceManagerAddress, resourceManagerLeaderId); 
+	}
+
+	// ------------------------------------------------------------------------
+	//  Utilities
+	// ------------------------------------------------------------------------
+
+	private static class ResourceManagerRegistration
+			extends RetryingRegistration<ResourceManagerGateway, TaskExecutorRegistrationSuccess> {
+
+		private final String taskExecutorAddress;
+		
+		private final ResourceID resourceID;
+
+		ResourceManagerRegistration(
+				Logger log,
+				RpcService rpcService,
+				String targetAddress,
+				UUID leaderId,
+				String taskExecutorAddress,
+				ResourceID resourceID) {
+
+			super(log, rpcService, "ResourceManager", ResourceManagerGateway.class, targetAddress, leaderId);
+			this.taskExecutorAddress = checkNotNull(taskExecutorAddress);
+			this.resourceID = checkNotNull(resourceID);
+		}
+
+		@Override
+		protected Future<RegistrationResponse> invokeRegistration(
+				ResourceManagerGateway resourceManager, UUID leaderId, long timeoutMillis) throws Exception {
+
+			FiniteDuration timeout = new FiniteDuration(timeoutMillis, TimeUnit.MILLISECONDS);
+			return resourceManager.registerTaskExecutor(leaderId, taskExecutorAddress, resourceID, timeout);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ClusterShutdownITCase.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ClusterShutdownITCase.java b/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ClusterShutdownITCase.java
new file mode 100644
index 0000000..744308c
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ClusterShutdownITCase.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.clusterframework;
+
+import akka.actor.ActorSystem;
+import akka.testkit.JavaTestKit;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.runtime.clusterframework.ApplicationStatus;
+import org.apache.flink.runtime.clusterframework.messages.StopCluster;
+import org.apache.flink.runtime.clusterframework.messages.StopClusterSuccessful;
+import org.apache.flink.runtime.instance.ActorGateway;
+import org.apache.flink.runtime.messages.Messages;
+import org.apache.flink.runtime.testingUtils.TestingMessages;
+import org.apache.flink.runtime.testingUtils.TestingUtils;
+import org.apache.flink.runtime.testutils.TestingResourceManager;
+import org.apache.flink.util.TestLogger;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import scala.Option;
+
+
+/**
+ * Runs tests to ensure that a cluster is shutdown properly.
+ */
+public class ClusterShutdownITCase extends TestLogger {
+
+	private static ActorSystem system;
+
+	private static Configuration config = new Configuration();
+
+	@BeforeClass
+	public static void setup() {
+		system = AkkaUtils.createActorSystem(AkkaUtils.getDefaultAkkaConfig());
+	}
+
+	@AfterClass
+	public static void teardown() {
+		JavaTestKit.shutdownActorSystem(system);
+	}
+
+	/**
+	 * Tests a faked cluster shutdown procedure without the ResourceManager.
+	 */
+	@Test
+	public void testClusterShutdownWithoutResourceManager() {
+
+		new JavaTestKit(system){{
+		new Within(duration("30 seconds")) {
+		@Override
+		protected void run() {
+
+			ActorGateway me =
+				TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
+
+			// start job manager which doesn't shutdown the actor system
+			ActorGateway jobManager =
+				TestingUtils.createJobManager(system, config, "jobmanager1");
+
+			// Tell the JobManager to inform us of shutdown actions
+			jobManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
+
+			// Register a TaskManager
+			ActorGateway taskManager =
+				TestingUtils.createTaskManager(system, jobManager, config, true, true);
+
+			// Tell the TaskManager to inform us of TaskManager shutdowns
+			taskManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
+
+
+			// No resource manager connected
+			jobManager.tell(new StopCluster(ApplicationStatus.SUCCEEDED, "Shutting down."), me);
+
+			expectMsgAllOf(
+				new TestingMessages.ComponentShutdown(taskManager.actor()),
+				new TestingMessages.ComponentShutdown(jobManager.actor()),
+				StopClusterSuccessful.getInstance()
+			);
+
+		}};
+		}};
+	}
+
+	/**
+	 * Tests a faked cluster shutdown procedure with the ResourceManager.
+	 */
+	@Test
+	public void testClusterShutdownWithResourceManager() {
+
+		new JavaTestKit(system){{
+		new Within(duration("30 seconds")) {
+		@Override
+		protected void run() {
+
+			ActorGateway me =
+				TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
+
+			// start job manager which doesn't shutdown the actor system
+			ActorGateway jobManager =
+				TestingUtils.createJobManager(system, config, "jobmanager2");
+
+			// Tell the JobManager to inform us of shutdown actions
+			jobManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
+
+			// Register a TaskManager
+			ActorGateway taskManager =
+				TestingUtils.createTaskManager(system, jobManager, config, true, true);
+
+			// Tell the TaskManager to inform us of TaskManager shutdowns
+			taskManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
+
+			// Start resource manager and let it register
+			ActorGateway resourceManager =
+				TestingUtils.createResourceManager(system, jobManager.actor(), config);
+
+			// Tell the ResourceManager to inform us of ResourceManager shutdowns
+			resourceManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
+
+			// notify about a resource manager registration at the job manager
+			resourceManager.tell(new TestingResourceManager.NotifyWhenResourceManagerConnected(), me);
+
+			// Wait for resource manager
+			expectMsgEquals(Messages.getAcknowledge());
+
+
+			// Shutdown cluster with resource manager connected
+			jobManager.tell(new StopCluster(ApplicationStatus.SUCCEEDED, "Shutting down."), me);
+
+			expectMsgAllOf(
+				new TestingMessages.ComponentShutdown(taskManager.actor()),
+				new TestingMessages.ComponentShutdown(jobManager.actor()),
+				new TestingMessages.ComponentShutdown(resourceManager.actor()),
+				StopClusterSuccessful.getInstance()
+			);
+
+		}};
+		}};
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ResourceManagerITCase.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ResourceManagerITCase.java b/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ResourceManagerITCase.java
new file mode 100644
index 0000000..1565dc3
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ResourceManagerITCase.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.clusterframework;
+
+import akka.actor.ActorSystem;
+import akka.testkit.JavaTestKit;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.instance.ActorGateway;
+import org.apache.flink.runtime.instance.HardwareDescription;
+import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
+import org.apache.flink.runtime.messages.Messages;
+import org.apache.flink.runtime.messages.RegistrationMessages;
+import org.apache.flink.runtime.testingUtils.TestingUtils;
+import org.apache.flink.runtime.testutils.TestingResourceManager;
+import org.apache.flink.util.TestLogger;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import scala.Option;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+/**
+ * It cases which test the interaction of the resource manager with job manager and task managers.
+ * Runs all tests in one Actor system.
+ */
+public class ResourceManagerITCase extends TestLogger {
+
+	private static ActorSystem system;
+
+	private static Configuration config = new Configuration();
+
+	@BeforeClass
+	public static void setup() {
+		system = AkkaUtils.createActorSystem(AkkaUtils.getDefaultAkkaConfig());
+	}
+
+	@AfterClass
+	public static void teardown() {
+		JavaTestKit.shutdownActorSystem(system);
+	}
+
+	/**
+	 * Tests whether the resource manager connects and reconciles existing task managers.
+	 */
+	@Test
+	public void testResourceManagerReconciliation() {
+
+		new JavaTestKit(system){{
+		new Within(duration("10 seconds")) {
+		@Override
+		protected void run() {
+
+			ActorGateway jobManager =
+				TestingUtils.createJobManager(system, config, "ReconciliationTest");
+			ActorGateway me =
+				TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
+
+			// !! no resource manager started !!
+
+			ResourceID resourceID = ResourceID.generate();
+
+			TaskManagerLocation location = mock(TaskManagerLocation.class);
+			when(location.getResourceID()).thenReturn(resourceID);
+
+			HardwareDescription resourceProfile = HardwareDescription.extractFromSystem(1_000_000);
+
+			jobManager.tell(
+				new RegistrationMessages.RegisterTaskManager(resourceID, location, resourceProfile, 1),
+				me);
+
+			expectMsgClass(RegistrationMessages.AcknowledgeRegistration.class);
+
+			// now start the resource manager
+			ActorGateway resourceManager =
+				TestingUtils.createResourceManager(system, jobManager.actor(), config);
+
+			// register at testing job manager to receive a message once a resource manager registers
+			resourceManager.tell(new TestingResourceManager.NotifyWhenResourceManagerConnected(), me);
+
+			// Wait for resource manager
+			expectMsgEquals(Messages.getAcknowledge());
+
+			// check if we registered the task manager resource
+			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), me);
+
+			TestingResourceManager.GetRegisteredResourcesReply reply =
+				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
+
+			assertEquals(1, reply.resources.size());
+			assertTrue(reply.resources.contains(resourceID));
+
+		}};
+		}};
+	}
+
+	/**
+	 * Tests whether the resource manager gets informed upon TaskManager registration.
+	 */
+	@Test
+	public void testResourceManagerTaskManagerRegistration() {
+
+		new JavaTestKit(system){{
+		new Within(duration("30 seconds")) {
+		@Override
+		protected void run() {
+
+			ActorGateway jobManager =
+				TestingUtils.createJobManager(system, config, "RegTest");
+			ActorGateway me =
+				TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
+
+			// start the resource manager
+			ActorGateway resourceManager =
+				TestingUtils.createResourceManager(system, jobManager.actor(), config);
+
+			// notify about a resource manager registration at the job manager
+			resourceManager.tell(new TestingResourceManager.NotifyWhenResourceManagerConnected(), me);
+
+			// Wait for resource manager
+			expectMsgEquals(Messages.getAcknowledge());
+
+			// start task manager and wait for registration
+			ActorGateway taskManager =
+				TestingUtils.createTaskManager(system, jobManager.actor(), config, true, true);
+
+			// check if we registered the task manager resource
+			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), me);
+
+			TestingResourceManager.GetRegisteredResourcesReply reply =
+				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
+
+			assertEquals(1, reply.resources.size());
+
+		}};
+		}};
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ResourceManagerTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ResourceManagerTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ResourceManagerTest.java
new file mode 100644
index 0000000..ca8a07a
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/ResourceManagerTest.java
@@ -0,0 +1,338 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.clusterframework;
+
+import akka.actor.ActorSystem;
+import akka.testkit.JavaTestKit;
+import org.apache.flink.configuration.ConfigConstants;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.runtime.clusterframework.messages.NotifyResourceStarted;
+import org.apache.flink.runtime.clusterframework.messages.RegisterResourceManager;
+import org.apache.flink.runtime.clusterframework.messages.RegisterResourceManagerSuccessful;
+import org.apache.flink.runtime.clusterframework.messages.RemoveResource;
+import org.apache.flink.runtime.clusterframework.messages.ResourceRemoved;
+import org.apache.flink.runtime.clusterframework.messages.TriggerRegistrationAtJobManager;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.instance.ActorGateway;
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.messages.JobManagerMessages;
+import org.apache.flink.runtime.testingUtils.TestingUtils;
+import org.apache.flink.runtime.testutils.TestingResourceManager;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import scala.Option;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import static org.junit.Assert.*;
+
+/**
+ * General tests for the resource manager component.
+ */
+public class ResourceManagerTest {
+
+	private static ActorSystem system;
+
+	private static ActorGateway fakeJobManager;
+	private static ActorGateway resourceManager;
+
+	private static Configuration config = new Configuration();
+
+	@BeforeClass
+	public static void setup() {
+		system = AkkaUtils.createLocalActorSystem(config);
+	}
+
+	@AfterClass
+	public static void teardown() {
+		JavaTestKit.shutdownActorSystem(system);
+	}
+
+	/**
+	 * Tests the registration and reconciliation of the ResourceManager with the JobManager
+	 */
+	@Test
+	public void testJobManagerRegistrationAndReconciliation() {
+		new JavaTestKit(system){{
+		new Within(duration("10 seconds")) {
+		@Override
+		protected void run() {
+			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
+			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), config);
+
+			expectMsgClass(RegisterResourceManager.class);
+
+			List<ResourceID> resourceList = new ArrayList<>();
+			resourceList.add(ResourceID.generate());
+			resourceList.add(ResourceID.generate());
+			resourceList.add(ResourceID.generate());
+
+			resourceManager.tell(
+				new RegisterResourceManagerSuccessful(fakeJobManager.actor(), resourceList),
+				fakeJobManager);
+
+			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
+			TestingResourceManager.GetRegisteredResourcesReply reply =
+				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
+
+			for (ResourceID id : resourceList) {
+				if (!reply.resources.contains(id)) {
+					fail("Expected to find all resources that were provided during registration.");
+				}
+			}
+		}};
+		}};
+	}
+
+	/**
+	 * Tests delayed or erroneous registration of the ResourceManager with the JobManager
+	 */
+	@Test
+	public void testDelayedJobManagerRegistration() {
+		new JavaTestKit(system){{
+		new Within(duration("10 seconds")) {
+		@Override
+		protected void run() {
+
+			// set a short timeout for lookups
+			Configuration shortTimeoutConfig = config.clone();
+			shortTimeoutConfig.setString(ConfigConstants.AKKA_LOOKUP_TIMEOUT, "1 s");
+
+			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
+			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), shortTimeoutConfig);
+
+			// wait for registration message
+			RegisterResourceManager msg = expectMsgClass(RegisterResourceManager.class);
+			// give wrong response
+			getLastSender().tell(new JobManagerMessages.LeaderSessionMessage(null, new Object()),
+				fakeJobManager.actor());
+
+			// expect another retry and let it time out
+			expectMsgClass(RegisterResourceManager.class);
+
+			// wait for next try after timeout
+			expectMsgClass(RegisterResourceManager.class);
+
+		}};
+		}};
+	}
+
+	@Test
+	public void testTriggerReconnect() {
+		new JavaTestKit(system){{
+		new Within(duration("10 seconds")) {
+		@Override
+		protected void run() {
+
+			// set a long timeout for lookups such that the test fails in case of timeouts
+			Configuration shortTimeoutConfig = config.clone();
+			shortTimeoutConfig.setString(ConfigConstants.AKKA_LOOKUP_TIMEOUT, "99999 s");
+
+			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
+			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), shortTimeoutConfig);
+
+			// wait for registration message
+			RegisterResourceManager msg = expectMsgClass(RegisterResourceManager.class);
+			// all went well
+			resourceManager.tell(
+				new RegisterResourceManagerSuccessful(fakeJobManager.actor(), Collections.<ResourceID>emptyList()),
+				fakeJobManager);
+
+			// force a reconnect
+			resourceManager.tell(
+				new TriggerRegistrationAtJobManager(fakeJobManager.actor()),
+				fakeJobManager);
+
+			// new registration attempt should come in
+			expectMsgClass(RegisterResourceManager.class);
+
+		}};
+		}};
+	}
+
+	/**
+	 * Tests the registration and accounting of resources at the ResourceManager.
+	 */
+	@Test
+	public void testTaskManagerRegistration() {
+		new JavaTestKit(system){{
+		new Within(duration("10 seconds")) {
+		@Override
+		protected void run() {
+
+			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
+			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), config);
+
+			// register with JM
+			expectMsgClass(RegisterResourceManager.class);
+			resourceManager.tell(
+				new RegisterResourceManagerSuccessful(fakeJobManager.actor(), Collections.<ResourceID>emptyList()),
+				fakeJobManager);
+
+			ResourceID resourceID = ResourceID.generate();
+
+			// Send task manager registration
+			resourceManager.tell(new NotifyResourceStarted(resourceID),
+				fakeJobManager);
+
+			expectMsgClass(Acknowledge.class);
+
+			// check for number registration of registered resources
+			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
+			TestingResourceManager.GetRegisteredResourcesReply reply =
+				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
+
+			assertEquals(1, reply.resources.size());
+
+			// Send task manager registration again
+			resourceManager.tell(new NotifyResourceStarted(resourceID),
+				fakeJobManager);
+
+			expectMsgClass(Acknowledge.class);
+
+			// check for number registration of registered resources
+			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
+			reply = expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
+
+			assertEquals(1, reply.resources.size());
+
+			// Send invalid null resource id to throw an exception during resource registration
+			resourceManager.tell(new NotifyResourceStarted(null),
+				fakeJobManager);
+
+			expectMsgClass(Acknowledge.class);
+
+			// check for number registration of registered resources
+			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
+			reply = expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
+
+			assertEquals(1, reply.resources.size());
+		}};
+		}};
+	}
+
+	@Test
+	public void testResourceRemoval() {
+		new JavaTestKit(system){{
+		new Within(duration("10 seconds")) {
+		@Override
+		protected void run() {
+
+			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
+			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), config);
+
+			// register with JM
+			expectMsgClass(RegisterResourceManager.class);
+			resourceManager.tell(
+				new RegisterResourceManagerSuccessful(fakeJobManager.actor(), Collections.<ResourceID>emptyList()),
+				fakeJobManager);
+
+			ResourceID resourceID = ResourceID.generate();
+
+			// remove unknown resource
+			resourceManager.tell(new RemoveResource(resourceID), fakeJobManager);
+
+			// Send task manager registration
+			resourceManager.tell(new NotifyResourceStarted(resourceID),
+				fakeJobManager);
+
+			expectMsgClass(Acknowledge.class);
+
+			// check for number registration of registered resources
+			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
+			TestingResourceManager.GetRegisteredResourcesReply reply =
+				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
+
+			assertEquals(1, reply.resources.size());
+			assertTrue(reply.resources.contains(resourceID));
+
+			// remove resource
+			resourceManager.tell(new RemoveResource(resourceID), fakeJobManager);
+
+			// check for number registration of registered resources
+			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
+			reply =	expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
+
+			assertEquals(0, reply.resources.size());
+
+		}};
+		}};
+	}
+
+	/**
+	 * Tests notification of JobManager about a failed resource.
+	 */
+	@Test
+	public void testResourceFailureNotification() {
+		new JavaTestKit(system){{
+		new Within(duration("10 seconds")) {
+		@Override
+		protected void run() {
+
+			fakeJobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
+			resourceManager = TestingUtils.createResourceManager(system, fakeJobManager.actor(), config);
+
+			// register with JM
+			expectMsgClass(RegisterResourceManager.class);
+			resourceManager.tell(
+				new RegisterResourceManagerSuccessful(fakeJobManager.actor(), Collections.<ResourceID>emptyList()),
+				fakeJobManager);
+
+			ResourceID resourceID1 = ResourceID.generate();
+			ResourceID resourceID2 = ResourceID.generate();
+
+			// Send task manager registration
+			resourceManager.tell(new NotifyResourceStarted(resourceID1),
+				fakeJobManager);
+
+			expectMsgClass(Acknowledge.class);
+
+			// Send task manager registration
+			resourceManager.tell(new NotifyResourceStarted(resourceID2),
+				fakeJobManager);
+
+			expectMsgClass(Acknowledge.class);
+
+			// check for number registration of registered resources
+			resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), fakeJobManager);
+			TestingResourceManager.GetRegisteredResourcesReply reply =
+				expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
+
+			assertEquals(2, reply.resources.size());
+			assertTrue(reply.resources.contains(resourceID1));
+			assertTrue(reply.resources.contains(resourceID2));
+
+			// fail resources
+			resourceManager.tell(new TestingResourceManager.FailResource(resourceID1), fakeJobManager);
+			resourceManager.tell(new TestingResourceManager.FailResource(resourceID2), fakeJobManager);
+
+			ResourceRemoved answer = expectMsgClass(ResourceRemoved.class);
+			ResourceRemoved answer2 = expectMsgClass(ResourceRemoved.class);
+
+			assertEquals(resourceID1, answer.resourceId());
+			assertEquals(resourceID2, answer2.resourceId());
+
+		}};
+		}};
+	}
+}


[50/50] [abbrv] flink git commit: [hotfix] Remove unused imports from SlotRequestRegistered/Rejected and ResourceSlot

Posted by tr...@apache.org.
[hotfix] Remove unused imports from SlotRequestRegistered/Rejected and ResourceSlot


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/a04c11c8
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/a04c11c8
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/a04c11c8

Branch: refs/heads/flip-6
Commit: a04c11c87ec45fd4c9df083124e2f029d728ebf5
Parents: f766f12
Author: Till Rohrmann <tr...@apache.org>
Authored: Wed Sep 21 11:47:53 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:47:53 2016 +0200

----------------------------------------------------------------------
 .../flink/runtime/clusterframework/types/ResourceSlot.java       | 4 ----
 .../flink/runtime/resourcemanager/SlotRequestRegistered.java     | 4 ++--
 .../flink/runtime/resourcemanager/SlotRequestRejected.java       | 4 ++--
 3 files changed, 4 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/a04c11c8/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java
index 5fb8aee..4a91a79 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java
@@ -20,8 +20,6 @@ package org.apache.flink.runtime.clusterframework.types;
 
 import org.apache.flink.runtime.taskexecutor.TaskExecutorGateway;
 
-import java.io.Serializable;
-
 import static org.apache.flink.util.Preconditions.checkNotNull;
 
 /**
@@ -30,8 +28,6 @@ import static org.apache.flink.util.Preconditions.checkNotNull;
  */
 public class ResourceSlot implements ResourceIDRetrievable {
 
-	private static final long serialVersionUID = -5853720153136840674L;
-
 	/** The unique identification of this slot */
 	private final SlotID slotId;
 

http://git-wip-us.apache.org/repos/asf/flink/blob/a04c11c8/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRegistered.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRegistered.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRegistered.java
index 6b7f6dc..f719dce 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRegistered.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRegistered.java
@@ -20,13 +20,13 @@ package org.apache.flink.runtime.resourcemanager;
 
 import org.apache.flink.runtime.clusterframework.types.AllocationID;
 
-import java.io.Serializable;
-
 /**
  * Acknowledgment by the ResourceManager for a SlotRequest from the JobManager
  */
 public class SlotRequestRegistered extends SlotRequestReply {
 
+	private static final long serialVersionUID = 4760320859275256855L;
+
 	public SlotRequestRegistered(AllocationID allocationID) {
 		super(allocationID);
 	}

http://git-wip-us.apache.org/repos/asf/flink/blob/a04c11c8/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRejected.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRejected.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRejected.java
index cb3ec72..282a7d5 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRejected.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRejected.java
@@ -20,13 +20,13 @@ package org.apache.flink.runtime.resourcemanager;
 
 import org.apache.flink.runtime.clusterframework.types.AllocationID;
 
-import java.io.Serializable;
-
 /**
  * Rejection message by the ResourceManager for a SlotRequest from the JobManager
  */
 public class SlotRequestRejected extends SlotRequestReply {
 
+	private static final long serialVersionUID = 9049346740895325144L;
+
 	public SlotRequestRejected(AllocationID allocationID) {
 		super(allocationID);
 	}


[21/50] [abbrv] flink git commit: [FLINK-4384] [rpc] Add "scheduleRunAsync()" to the RpcEndpoint

Posted by tr...@apache.org.
[FLINK-4384] [rpc] Add "scheduleRunAsync()" to the RpcEndpoint

This closes #2360


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/f5614a4c
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/f5614a4c
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/f5614a4c

Branch: refs/heads/flip-6
Commit: f5614a4c352692e39218de866ad68331b28fb7fe
Parents: f5cf6b5
Author: Stephan Ewen <se...@apache.org>
Authored: Thu Aug 11 19:10:48 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:13 2016 +0200

----------------------------------------------------------------------
 .../flink/runtime/rpc/MainThreadExecutor.java   |   9 +
 .../apache/flink/runtime/rpc/RpcEndpoint.java   |  12 ++
 .../runtime/rpc/akka/AkkaInvocationHandler.java |  13 +-
 .../flink/runtime/rpc/akka/AkkaRpcActor.java    |  15 +-
 .../runtime/rpc/akka/messages/RunAsync.java     |  24 ++-
 .../runtime/rpc/akka/AkkaRpcServiceTest.java    |   3 +
 .../flink/runtime/rpc/akka/AsyncCallsTest.java  | 216 +++++++++++++++++++
 7 files changed, 286 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/f5614a4c/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
index 882c1b7..4efb382 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
@@ -52,4 +52,13 @@ public interface MainThreadExecutor {
 	 * @return Future of the callable result
 	 */
 	<V> Future<V> callAsync(Callable<V> callable, Timeout callTimeout);
+
+	/**
+	 * Execute the runnable in the main thread of the underlying RPC endpoint, with
+	 * a delay of the given number of milliseconds.
+	 *
+	 * @param runnable Runnable to be executed
+	 * @param delay    The delay, in milliseconds, after which the runnable will be executed
+	 */
+	void scheduleRunAsync(Runnable runnable, long delay);
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/f5614a4c/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
index aef0803..44933d5 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
@@ -28,6 +28,7 @@ import scala.concurrent.ExecutionContext;
 import scala.concurrent.Future;
 
 import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
 
 import static org.apache.flink.util.Preconditions.checkNotNull;
 
@@ -168,6 +169,17 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	}
 
 	/**
+	 * Execute the runnable in the main thread of the underlying RPC endpoint, with
+	 * a delay of the given number of milliseconds.
+	 *
+	 * @param runnable Runnable to be executed
+	 * @param delay    The delay after which the runnable will be executed
+	 */
+	public void scheduleRunAsync(Runnable runnable, long delay, TimeUnit unit) {
+		((MainThreadExecutor) self).scheduleRunAsync(runnable, unit.toMillis(delay));
+	}
+
+	/**
 	 * Execute the callable in the main thread of the underlying RPC service, returning a future for
 	 * the result of the callable. If the callable is not completed within the given timeout, then
 	 * the future will be failed with a {@link java.util.concurrent.TimeoutException}.

http://git-wip-us.apache.org/repos/asf/flink/blob/f5614a4c/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
index e8e383a..580b161 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
@@ -38,6 +38,9 @@ import java.lang.reflect.Method;
 import java.util.BitSet;
 import java.util.concurrent.Callable;
 
+import static org.apache.flink.util.Preconditions.checkNotNull;
+import static org.apache.flink.util.Preconditions.checkArgument;
+
 /**
  * Invocation handler to be used with a {@link AkkaRpcActor}. The invocation handler wraps the
  * rpc in a {@link RpcInvocation} message and then sends it to the {@link AkkaRpcActor} where it is
@@ -106,9 +109,17 @@ class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThrea
 
 	@Override
 	public void runAsync(Runnable runnable) {
+		scheduleRunAsync(runnable, 0);
+	}
+
+	@Override
+	public void scheduleRunAsync(Runnable runnable, long delay) {
+		checkNotNull(runnable, "runnable");
+		checkArgument(delay >= 0, "delay must be zero or greater");
+		
 		// Unfortunately I couldn't find a way to allow only local communication. Therefore, the
 		// runnable field is transient transient
-		rpcServer.tell(new RunAsync(runnable), ActorRef.noSender());
+		rpcServer.tell(new RunAsync(runnable, delay), ActorRef.noSender());
 	}
 
 	@Override

http://git-wip-us.apache.org/repos/asf/flink/blob/f5614a4c/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
index 57da38a..18ccf1b 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
@@ -18,6 +18,7 @@
 
 package org.apache.flink.runtime.rpc.akka;
 
+import akka.actor.ActorRef;
 import akka.actor.Status;
 import akka.actor.UntypedActor;
 import akka.pattern.Patterns;
@@ -30,9 +31,11 @@ import org.apache.flink.util.Preconditions;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
 
 import java.lang.reflect.Method;
 import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
 
 /**
  * Akka rpc actor which receives {@link RpcInvocation}, {@link RunAsync} and {@link CallAsync}
@@ -152,13 +155,23 @@ class AkkaRpcActor<C extends RpcGateway, T extends RpcEndpoint<C>> extends Untyp
 				"{} is only supported with local communication.",
 				runAsync.getClass().getName(),
 				runAsync.getClass().getName());
-		} else {
+		}
+		else if (runAsync.getDelay() == 0) {
+			// run immediately
 			try {
 				runAsync.getRunnable().run();
 			} catch (final Throwable e) {
 				LOG.error("Caught exception while executing runnable in main thread.", e);
 			}
 		}
+		else {
+			// schedule for later. send a new message after the delay, which will then be immediately executed 
+			FiniteDuration delay = new FiniteDuration(runAsync.getDelay(), TimeUnit.MILLISECONDS);
+			RunAsync message = new RunAsync(runAsync.getRunnable(), 0);
+
+			getContext().system().scheduler().scheduleOnce(delay, getSelf(), message,
+					getContext().dispatcher(), ActorRef.noSender());
+		}
 	}
 
 	/**

http://git-wip-us.apache.org/repos/asf/flink/blob/f5614a4c/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java
index fb95852..c18906c 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java
@@ -18,23 +18,39 @@
 
 package org.apache.flink.runtime.rpc.akka.messages;
 
-import org.apache.flink.util.Preconditions;
-
 import java.io.Serializable;
 
+import static org.apache.flink.util.Preconditions.checkNotNull;
+import static org.apache.flink.util.Preconditions.checkArgument;
+
 /**
  * Message for asynchronous runnable invocations
  */
 public final class RunAsync implements Serializable {
 	private static final long serialVersionUID = -3080595100695371036L;
 
+	/** The runnable to be executed. Transient, so it gets lost upon serialization */ 
 	private final transient Runnable runnable;
 
-	public RunAsync(Runnable runnable) {
-		this.runnable = Preconditions.checkNotNull(runnable);
+	/** The delay after which the runnable should be called */
+	private final long delay;
+
+	/**
+	 * 
+	 * @param runnable  The Runnable to run.
+	 * @param delay     The delay in milliseconds. Zero indicates immediate execution.
+	 */
+	public RunAsync(Runnable runnable, long delay) {
+		checkArgument(delay >= 0);
+		this.runnable = checkNotNull(runnable);
+		this.delay = delay;
 	}
 
 	public Runnable getRunnable() {
 		return runnable;
 	}
+
+	public long getDelay() {
+		return delay;
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/f5614a4c/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
index a4e1d7f..5e37e10 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
@@ -21,6 +21,9 @@ package org.apache.flink.runtime.rpc.akka;
 import akka.actor.ActorSystem;
 import akka.util.Timeout;
 import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcService;
 import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
 import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
 import org.apache.flink.runtime.rpc.resourcemanager.ResourceManager;

http://git-wip-us.apache.org/repos/asf/flink/blob/f5614a4c/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java
new file mode 100644
index 0000000..f2ce52d
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka;
+
+import akka.actor.ActorSystem;
+import akka.util.Timeout;
+
+import org.apache.flink.core.testutils.OneShotLatch;
+import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcMethod;
+import org.apache.flink.runtime.rpc.RpcService;
+
+import org.junit.AfterClass;
+import org.junit.Test;
+
+import scala.concurrent.Await;
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReentrantLock;
+
+import static org.junit.Assert.*;
+
+public class AsyncCallsTest {
+
+	// ------------------------------------------------------------------------
+	//  shared test members
+	// ------------------------------------------------------------------------
+
+	private static ActorSystem actorSystem = AkkaUtils.createDefaultActorSystem();
+
+	private static AkkaRpcService akkaRpcService = 
+			new AkkaRpcService(actorSystem, new Timeout(10000, TimeUnit.MILLISECONDS));
+
+	@AfterClass
+	public static void shutdown() {
+		akkaRpcService.stopService();
+		actorSystem.shutdown();
+	}
+
+
+	// ------------------------------------------------------------------------
+	//  tests
+	// ------------------------------------------------------------------------
+
+	@Test
+	public void testScheduleWithNoDelay() throws Exception {
+
+		// to collect all the thread references
+		final ReentrantLock lock = new ReentrantLock();
+		final AtomicBoolean concurrentAccess = new AtomicBoolean(false);
+
+		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService, lock);
+		TestGateway gateway = testEndpoint.getSelf();
+
+		// a bunch of gateway calls
+		gateway.someCall();
+		gateway.anotherCall();
+		gateway.someCall();
+
+		// run something asynchronously
+		for (int i = 0; i < 10000; i++) {
+			testEndpoint.runAsync(new Runnable() {
+				@Override
+				public void run() {
+					boolean holdsLock = lock.tryLock();
+					if (holdsLock) {
+						lock.unlock();
+					} else {
+						concurrentAccess.set(true);
+					}
+				}
+			});
+		}
+	
+		Future<String> result = testEndpoint.callAsync(new Callable<String>() {
+			@Override
+			public String call() throws Exception {
+				boolean holdsLock = lock.tryLock();
+				if (holdsLock) {
+					lock.unlock();
+				} else {
+					concurrentAccess.set(true);
+				}
+				return "test";
+			}
+		}, new Timeout(30, TimeUnit.SECONDS));
+		String str = Await.result(result, new FiniteDuration(30, TimeUnit.SECONDS));
+		assertEquals("test", str);
+
+		// validate that no concurrent access happened
+		assertFalse("Rpc Endpoint had concurrent access", testEndpoint.hasConcurrentAccess());
+		assertFalse("Rpc Endpoint had concurrent access", concurrentAccess.get());
+
+		akkaRpcService.stopServer(testEndpoint.getSelf());
+	}
+
+	@Test
+	public void testScheduleWithDelay() throws Exception {
+
+		// to collect all the thread references
+		final ReentrantLock lock = new ReentrantLock();
+		final AtomicBoolean concurrentAccess = new AtomicBoolean(false);
+		final OneShotLatch latch = new OneShotLatch();
+
+		final long delay = 200;
+
+		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService, lock);
+
+		// run something asynchronously
+		testEndpoint.runAsync(new Runnable() {
+			@Override
+			public void run() {
+				boolean holdsLock = lock.tryLock();
+				if (holdsLock) {
+					lock.unlock();
+				} else {
+					concurrentAccess.set(true);
+				}
+			}
+		});
+
+		final long start = System.nanoTime();
+
+		testEndpoint.scheduleRunAsync(new Runnable() {
+			@Override
+			public void run() {
+				boolean holdsLock = lock.tryLock();
+				if (holdsLock) {
+					lock.unlock();
+				} else {
+					concurrentAccess.set(true);
+				}
+				latch.trigger();
+			}
+		}, delay, TimeUnit.MILLISECONDS);
+
+		latch.await();
+		final long stop = System.nanoTime();
+
+		// validate that no concurrent access happened
+		assertFalse("Rpc Endpoint had concurrent access", testEndpoint.hasConcurrentAccess());
+		assertFalse("Rpc Endpoint had concurrent access", concurrentAccess.get());
+
+		assertTrue("call was not properly delayed", ((stop - start) / 1000000) >= delay);
+	}
+
+	// ------------------------------------------------------------------------
+	//  test RPC endpoint
+	// ------------------------------------------------------------------------
+	
+	interface TestGateway extends RpcGateway {
+
+		void someCall();
+
+		void anotherCall();
+	}
+
+	@SuppressWarnings("unused")
+	public static class TestEndpoint extends RpcEndpoint<TestGateway> {
+
+		private final ReentrantLock lock;
+
+		private volatile boolean concurrentAccess;
+
+		public TestEndpoint(RpcService rpcService, ReentrantLock lock) {
+			super(rpcService);
+			this.lock = lock;
+		}
+
+		@RpcMethod
+		public void someCall() {
+			boolean holdsLock = lock.tryLock();
+			if (holdsLock) {
+				lock.unlock();
+			} else {
+				concurrentAccess = true;
+			}
+		}
+
+		@RpcMethod
+		public void anotherCall() {
+			boolean holdsLock = lock.tryLock();
+			if (holdsLock) {
+				lock.unlock();
+			} else {
+				concurrentAccess = true;
+			}
+		}
+
+		public boolean hasConcurrentAccess() {
+			return concurrentAccess;
+		}
+	}
+}


[27/50] [abbrv] flink git commit: [FLINK-4414] [cluster] Add getAddress method to RpcGateway

Posted by tr...@apache.org.
[FLINK-4414] [cluster] Add getAddress method to RpcGateway

The RpcGateway.getAddress method allows to retrieve the fully qualified address of the
associated RpcEndpoint.

This closes #2392.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/d9baa587
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/d9baa587
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/d9baa587

Branch: refs/heads/flip-6
Commit: d9baa58754ca2fdb9c69dd1c0fc6502ed59013ee
Parents: 6899837
Author: Till Rohrmann <tr...@apache.org>
Authored: Thu Aug 18 16:34:47 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:14 2016 +0200

----------------------------------------------------------------------
 .../apache/flink/runtime/rpc/RpcEndpoint.java   |  6 +-----
 .../apache/flink/runtime/rpc/RpcGateway.java    |  7 +++++++
 .../apache/flink/runtime/rpc/RpcService.java    | 11 ----------
 .../runtime/rpc/akka/AkkaInvocationHandler.java | 14 +++++++++++--
 .../flink/runtime/rpc/akka/AkkaRpcService.java  | 21 ++++++--------------
 .../runtime/rpc/akka/AkkaRpcActorTest.java      | 16 +++++++++++++++
 6 files changed, 42 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/d9baa587/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
index a28bc14..7b3f8a1 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
@@ -69,9 +69,6 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	/** Self gateway which can be used to schedule asynchronous calls on yourself */
 	private final C self;
 
-	/** the fully qualified address of the this RPC endpoint */
-	private final String selfAddress;
-
 	/** The main thread execution context to be used to execute future callbacks in the main thread
 	 * of the executing rpc server. */
 	private final ExecutionContext mainThreadExecutionContext;
@@ -92,7 +89,6 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 		this.selfGatewayType = ReflectionUtil.getTemplateType1(getClass());
 		this.self = rpcService.startServer(this);
 		
-		this.selfAddress = rpcService.getAddress(self);
 		this.mainThreadExecutionContext = new MainThreadExecutionContext((MainThreadExecutor) self);
 	}
 
@@ -156,7 +152,7 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	 * @return Fully qualified address of the underlying RPC endpoint
 	 */
 	public String getAddress() {
-		return selfAddress;
+		return self.getAddress();
 	}
 
 	/**

http://git-wip-us.apache.org/repos/asf/flink/blob/d9baa587/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcGateway.java
index e3a16b4..81075ee 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcGateway.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcGateway.java
@@ -22,4 +22,11 @@ package org.apache.flink.runtime.rpc;
  * Rpc gateway interface which has to be implemented by Rpc gateways.
  */
 public interface RpcGateway {
+
+	/**
+	 * Returns the fully qualified address under which the associated rpc endpoint is reachable.
+	 *
+	 * @return Fully qualified address under which the associated rpc endpoint is reachable
+	 */
+	String getAddress();
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/d9baa587/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
index fabdb05..bc0f5cb 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
@@ -65,17 +65,6 @@ public interface RpcService {
 	void stopService();
 
 	/**
-	 * Get the fully qualified address of the underlying rpc server represented by the self gateway.
-	 * It must be possible to connect from a remote host to the rpc server via the returned fully
-	 * qualified address.
-	 *
-	 * @param selfGateway Self gateway associated with the underlying rpc server
-	 * @param <C> Type of the rpc gateway
-	 * @return Fully qualified address
-	 */
-	<C extends RpcGateway> String getAddress(C selfGateway);
-
-	/**
 	 * Gets the execution context, provided by this RPC service. This execution
 	 * context can be used for example for the {@code onComplete(...)} or {@code onSuccess(...)}
 	 * methods of Futures.

http://git-wip-us.apache.org/repos/asf/flink/blob/d9baa587/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
index 524bf74..bfa04f6 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
@@ -23,6 +23,7 @@ import akka.pattern.Patterns;
 import akka.util.Timeout;
 import org.apache.flink.api.java.tuple.Tuple2;
 import org.apache.flink.runtime.rpc.MainThreadExecutor;
+import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.RpcTimeout;
 import org.apache.flink.runtime.rpc.StartStoppable;
 import org.apache.flink.runtime.rpc.akka.messages.CallAsync;
@@ -55,6 +56,8 @@ import static org.apache.flink.util.Preconditions.checkArgument;
 class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThreadExecutor, StartStoppable {
 	private static final Logger LOG = Logger.getLogger(AkkaInvocationHandler.class);
 
+	private final String address;
+
 	private final ActorRef rpcEndpoint;
 
 	// whether the actor ref is local and thus no message serialization is needed
@@ -65,7 +68,8 @@ class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThrea
 
 	private final long maximumFramesize;
 
-	AkkaInvocationHandler(ActorRef rpcEndpoint, Timeout timeout, long maximumFramesize) {
+	AkkaInvocationHandler(String address, ActorRef rpcEndpoint, Timeout timeout, long maximumFramesize) {
+		this.address = Preconditions.checkNotNull(address);
 		this.rpcEndpoint = Preconditions.checkNotNull(rpcEndpoint);
 		this.isLocal = this.rpcEndpoint.path().address().hasLocalScope();
 		this.timeout = Preconditions.checkNotNull(timeout);
@@ -79,7 +83,8 @@ class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThrea
 		Object result;
 
 		if (declaringClass.equals(AkkaGateway.class) || declaringClass.equals(MainThreadExecutor.class) ||
-			declaringClass.equals(Object.class) || declaringClass.equals(StartStoppable.class)) {
+			declaringClass.equals(Object.class) || declaringClass.equals(StartStoppable.class) ||
+			declaringClass.equals(RpcGateway.class)) {
 			result = method.invoke(this, args);
 		} else {
 			String methodName = method.getName();
@@ -290,4 +295,9 @@ class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThrea
 
 		return false;
 	}
+
+	@Override
+	public String getAddress() {
+		return address;
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/d9baa587/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
index d987c2f..00a6932 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
@@ -102,7 +102,9 @@ public class AkkaRpcService implements RpcService {
 			public C apply(Object obj) {
 				ActorRef actorRef = ((ActorIdentity) obj).getRef();
 
-				InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(actorRef, timeout, maximumFramesize);
+				final String address = AkkaUtils.getAkkaURL(actorSystem, actorRef);
+
+				InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(address, actorRef, timeout, maximumFramesize);
 
 				// Rather than using the System ClassLoader directly, we derive the ClassLoader
 				// from this class . That works better in cases where Flink runs embedded and all Flink
@@ -135,7 +137,9 @@ public class AkkaRpcService implements RpcService {
 
 		LOG.info("Starting RPC endpoint for {} at {} .", rpcEndpoint.getClass().getName(), actorRef.path());
 
-		InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(actorRef, timeout, maximumFramesize);
+		final String address = AkkaUtils.getAkkaURL(actorSystem, actorRef);
+
+		InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(address, actorRef, timeout, maximumFramesize);
 
 		// Rather than using the System ClassLoader directly, we derive the ClassLoader
 		// from this class . That works better in cases where Flink runs embedded and all Flink
@@ -197,19 +201,6 @@ public class AkkaRpcService implements RpcService {
 	}
 
 	@Override
-	public String getAddress(RpcGateway selfGateway) {
-		checkState(!stopped, "RpcService is stopped");
-
-		if (selfGateway instanceof AkkaGateway) {
-			ActorRef actorRef = ((AkkaGateway) selfGateway).getRpcEndpoint();
-			return AkkaUtils.getAkkaURL(actorSystem, actorRef);
-		} else {
-			String className = AkkaGateway.class.getName();
-			throw new IllegalArgumentException("Cannot get address for non " + className + '.');
-		}
-	}
-
-	@Override
 	public ExecutionContext getExecutionContext() {
 		return actorSystem.dispatcher();
 	}

http://git-wip-us.apache.org/repos/asf/flink/blob/d9baa587/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java
index 1653fac..82d13f0 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java
@@ -34,6 +34,7 @@ import scala.concurrent.Future;
 
 import java.util.concurrent.TimeUnit;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
 public class AkkaRpcActorTest extends TestLogger {
@@ -57,6 +58,21 @@ public class AkkaRpcActorTest extends TestLogger {
 	}
 
 	/**
+	 * Tests that the rpc endpoint and the associated rpc gateway have the same addresses.
+	 * @throws Exception
+	 */
+	@Test
+	public void testAddressResolution() throws Exception {
+		DummyRpcEndpoint rpcEndpoint = new DummyRpcEndpoint(akkaRpcService);
+
+		Future<DummyRpcGateway> futureRpcGateway = akkaRpcService.connect(rpcEndpoint.getAddress(), DummyRpcGateway.class);
+
+		DummyRpcGateway rpcGateway = Await.result(futureRpcGateway, timeout.duration());
+
+		assertEquals(rpcEndpoint.getAddress(), rpcGateway.getAddress());
+	}
+
+	/**
 	 * Tests that the {@link AkkaRpcActor} stashes messages until the corresponding
 	 * {@link RpcEndpoint} has been started.
 	 */


[32/50] [abbrv] flink git commit: [hotfix] Remove RecoveryMode from JobMaster

Posted by tr...@apache.org.
[hotfix] Remove RecoveryMode from JobMaster

The recovery mode is not used any more by the latest CheckpointCoordinator.

All difference in recovery logic between high-availability and non-high-availability
is encapsulated in the HighAvailabilityServices.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/9e90412c
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/9e90412c
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/9e90412c

Branch: refs/heads/flip-6
Commit: 9e90412c6edf98f3cd8420072cf991faed607b4d
Parents: 762b5d4
Author: Stephan Ewen <se...@apache.org>
Authored: Thu Aug 25 20:37:15 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:15 2016 +0200

----------------------------------------------------------------------
 .../java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java    | 3 ---
 1 file changed, 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/9e90412c/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
index 49b200b..a046cb8 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
@@ -22,7 +22,6 @@ import org.apache.flink.api.common.JobID;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
 import org.apache.flink.runtime.jobgraph.JobGraph;
-import org.apache.flink.runtime.jobmanager.RecoveryMode;
 import org.apache.flink.runtime.leaderelection.LeaderContender;
 import org.apache.flink.runtime.leaderelection.LeaderElectionService;
 import org.apache.flink.runtime.messages.Acknowledge;
@@ -57,7 +56,6 @@ public class JobMaster extends RpcEndpoint<JobMasterGateway> {
 
 	/** Configuration of the job */
 	private final Configuration configuration;
-	private final RecoveryMode recoveryMode;
 
 	/** Service to contend for and retrieve the leadership of JM and RM */
 	private final HighAvailabilityServices highAvailabilityServices;
@@ -86,7 +84,6 @@ public class JobMaster extends RpcEndpoint<JobMasterGateway> {
 		this.jobID = Preconditions.checkNotNull(jobGraph.getJobID());
 
 		this.configuration = Preconditions.checkNotNull(configuration);
-		this.recoveryMode = RecoveryMode.fromConfig(configuration);
 
 		this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityService);
 	}


[39/50] [abbrv] flink git commit: [hotfix] [clustermgnt] Set pending registration properly in TaskExecutorToResourceManagerConnection

Posted by tr...@apache.org.
[hotfix] [clustermgnt] Set pending registration properly in TaskExecutorToResourceManagerConnection


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/ffd20e98
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/ffd20e98
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/ffd20e98

Branch: refs/heads/flip-6
Commit: ffd20e981a4c534d9110cc0227ae44fa03e33db0
Parents: 9e90412
Author: Till Rohrmann <tr...@apache.org>
Authored: Mon Aug 29 17:40:57 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:16 2016 +0200

----------------------------------------------------------------------
 .../TaskExecutorToResourceManagerConnection.java        | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/ffd20e98/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
index f398b7d..7ccc879 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
@@ -55,7 +55,7 @@ public class TaskExecutorToResourceManagerConnection {
 
 	private final String resourceManagerAddress;
 
-	private ResourceManagerRegistration pendingRegistration;
+	private TaskExecutorToResourceManagerConnection.ResourceManagerRegistration pendingRegistration;
 
 	private ResourceManagerGateway registeredResourceManager;
 
@@ -86,13 +86,13 @@ public class TaskExecutorToResourceManagerConnection {
 		checkState(!closed, "The connection is already closed");
 		checkState(!isRegistered() && pendingRegistration == null, "The connection is already started");
 
-		ResourceManagerRegistration registration = new ResourceManagerRegistration(
+		pendingRegistration = new TaskExecutorToResourceManagerConnection.ResourceManagerRegistration(
 				log, taskExecutor.getRpcService(),
 				resourceManagerAddress, resourceManagerLeaderId,
 				taskExecutor.getAddress(), taskExecutor.getResourceID());
-		registration.startRegistration();
+		pendingRegistration.startRegistration();
 
-		Future<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>> future = registration.getFuture();
+		Future<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>> future = pendingRegistration.getFuture();
 		
 		future.onSuccess(new OnSuccess<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>>() {
 			@Override
@@ -167,14 +167,14 @@ public class TaskExecutorToResourceManagerConnection {
 	//  Utilities
 	// ------------------------------------------------------------------------
 
-	static class ResourceManagerRegistration
+	private static class ResourceManagerRegistration
 			extends RetryingRegistration<ResourceManagerGateway, TaskExecutorRegistrationSuccess> {
 
 		private final String taskExecutorAddress;
 		
 		private final ResourceID resourceID;
 
-		public ResourceManagerRegistration(
+		ResourceManagerRegistration(
 				Logger log,
 				RpcService rpcService,
 				String targetAddress,


[29/50] [abbrv] flink git commit: [FLINK-4443] [rpc] Add support for rpc gateway and rpc endpoint inheritance

Posted by tr...@apache.org.
[FLINK-4443] [rpc] Add support for rpc gateway and rpc endpoint inheritance

This commit extends the RpcCompletenessTest such that it can now check for inherited
remote procedure calls. All methods defined at the RpcGateway are considered native.
This means that they need no RpcEndpoint counterpart because they are implemented by
the RpcGateway implementation.

This closes #2401.

update comments

remove native method annotation

add line break


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/762b5d41
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/762b5d41
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/762b5d41

Branch: refs/heads/flip-6
Commit: 762b5d414ddbbc983e5c0954ee2d521146248b31
Parents: 8fd8c99
Author: wenlong.lwl <we...@alibaba-inc.com>
Authored: Sun Aug 21 00:46:51 2016 +0800
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:15 2016 +0200

----------------------------------------------------------------------
 .../org/apache/flink/runtime/rpc/RpcMethod.java |  2 ++
 .../TestingHighAvailabilityServices.java        | 19 +++++++++++
 .../flink/runtime/rpc/RpcCompletenessTest.java  | 33 ++++++++++++++++++--
 3 files changed, 52 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/762b5d41/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcMethod.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcMethod.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcMethod.java
index 875e557..e4b0e94 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcMethod.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcMethod.java
@@ -19,6 +19,7 @@
 package org.apache.flink.runtime.rpc;
 
 import java.lang.annotation.ElementType;
+import java.lang.annotation.Inherited;
 import java.lang.annotation.Retention;
 import java.lang.annotation.RetentionPolicy;
 import java.lang.annotation.Target;
@@ -29,6 +30,7 @@ import java.lang.annotation.Target;
  * RpcCompletenessTest makes sure that the set of rpc methods in a rpc server and the set of
  * gateway methods in the corresponding gateway implementation are identical.
  */
+@Inherited
 @Target(ElementType.METHOD)
 @Retention(RetentionPolicy.RUNTIME)
 public @interface RpcMethod {

http://git-wip-us.apache.org/repos/asf/flink/blob/762b5d41/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java b/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java
index 3a9f943..4d654a3 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java
@@ -18,6 +18,8 @@
 
 package org.apache.flink.runtime.highavailability;
 
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.leaderelection.LeaderElectionService;
 import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
 
 /**
@@ -28,6 +30,8 @@ public class TestingHighAvailabilityServices implements HighAvailabilityServices
 
 	private volatile LeaderRetrievalService resourceManagerLeaderRetriever;
 
+	private volatile LeaderElectionService jobMasterLeaderElectionService;
+
 
 	// ------------------------------------------------------------------------
 	//  Setters for mock / testing implementations
@@ -36,6 +40,10 @@ public class TestingHighAvailabilityServices implements HighAvailabilityServices
 	public void setResourceManagerLeaderRetriever(LeaderRetrievalService resourceManagerLeaderRetriever) {
 		this.resourceManagerLeaderRetriever = resourceManagerLeaderRetriever;
 	}
+
+	public void setJobMasterLeaderElectionService(LeaderElectionService leaderElectionService) {
+		this.jobMasterLeaderElectionService = leaderElectionService;
+	}
 	
 	// ------------------------------------------------------------------------
 	//  HA Services Methods
@@ -50,4 +58,15 @@ public class TestingHighAvailabilityServices implements HighAvailabilityServices
 			throw new IllegalStateException("ResourceManagerLeaderRetriever has not been set");
 		}
 	}
+
+	@Override
+	public LeaderElectionService getJobMasterLeaderElectionService(JobID jobID) throws Exception {
+		LeaderElectionService service = jobMasterLeaderElectionService;
+
+		if (service != null) {
+			return service;
+		} else {
+			throw new IllegalStateException("JobMasterLeaderElectionService has not been set");
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/762b5d41/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
index b8aad62..b431eb9 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
@@ -68,8 +68,8 @@ public class RpcCompletenessTest extends TestLogger {
 
 	@SuppressWarnings("rawtypes")
 	private void checkCompleteness(Class<? extends RpcEndpoint> rpcEndpoint, Class<? extends RpcGateway> rpcGateway) {
-		Method[] gatewayMethods = rpcGateway.getDeclaredMethods();
-		Method[] serverMethods = rpcEndpoint.getDeclaredMethods();
+		Method[] gatewayMethods = getRpcMethodsFromGateway(rpcGateway).toArray(new Method[0]);
+		Method[] serverMethods = rpcEndpoint.getMethods();
 
 		Map<String, Set<Method>> rpcMethods = new HashMap<>();
 		Set<Method> unmatchedRpcMethods = new HashSet<>();
@@ -340,4 +340,33 @@ public class RpcCompletenessTest extends TestLogger {
 			throw new RuntimeException("Could not retrive basic type information for primitive type " + primitveType + '.');
 		}
 	}
+
+	/**
+	 * Extract all rpc methods defined by the gateway interface
+	 *
+	 * @param interfaceClass the given rpc gateway interface
+	 * @return all methods defined by the given interface
+	 */
+	private List<Method> getRpcMethodsFromGateway(Class<? extends RpcGateway> interfaceClass) {
+		if(!interfaceClass.isInterface()) {
+			fail(interfaceClass.getName() + "is not a interface");
+		}
+
+		ArrayList<Method> allMethods = new ArrayList<>();
+		// Methods defined in RpcGateway are native method
+		if(interfaceClass.equals(RpcGateway.class)) {
+			return allMethods;
+		}
+
+		// Get all methods declared in current interface
+		for(Method method : interfaceClass.getDeclaredMethods()) {
+			allMethods.add(method);
+		}
+
+		// Get all method inherited from super interface
+		for(Class superClass : interfaceClass.getInterfaces()) {
+			allMethods.addAll(getRpcMethodsFromGateway(superClass));
+		}
+		return allMethods;
+	}
 }


[07/50] [abbrv] flink git commit: [FLINK-4268] [core] Add a parsers for BigDecimal/BigInteger

Posted by tr...@apache.org.
[FLINK-4268] [core] Add a parsers for BigDecimal/BigInteger

This closes #2304.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/5c02988b
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/5c02988b
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/5c02988b

Branch: refs/heads/flip-6
Commit: 5c02988b05c56f524fc8c65b15e16b0c24278a5e
Parents: e58fd6e
Author: twalthr <tw...@apache.org>
Authored: Wed Jul 27 15:48:48 2016 +0200
Committer: twalthr <tw...@apache.org>
Committed: Tue Sep 20 16:43:59 2016 +0200

----------------------------------------------------------------------
 .../apache/flink/types/parser/BigDecParser.java | 145 +++++++++++++++++++
 .../apache/flink/types/parser/BigIntParser.java | 126 ++++++++++++++++
 .../apache/flink/types/parser/FieldParser.java  |   4 +
 .../flink/types/parser/BigDecParserTest.java    |  79 ++++++++++
 .../flink/types/parser/BigIntParserTest.java    |  68 +++++++++
 5 files changed, 422 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/5c02988b/flink-core/src/main/java/org/apache/flink/types/parser/BigDecParser.java
----------------------------------------------------------------------
diff --git a/flink-core/src/main/java/org/apache/flink/types/parser/BigDecParser.java b/flink-core/src/main/java/org/apache/flink/types/parser/BigDecParser.java
new file mode 100644
index 0000000..46a07fa
--- /dev/null
+++ b/flink-core/src/main/java/org/apache/flink/types/parser/BigDecParser.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.flink.types.parser;
+
+import java.math.BigDecimal;
+import org.apache.flink.annotation.PublicEvolving;
+
+/**
+ * Parses a text field into a {@link java.math.BigDecimal}.
+ */
+@PublicEvolving
+public class BigDecParser extends FieldParser<BigDecimal> {
+
+	private static final BigDecimal BIG_DECIMAL_INSTANCE = BigDecimal.ZERO;
+
+	private BigDecimal result;
+	private char[] reuse = null;
+
+	@Override
+	public int parseField(byte[] bytes, int startPos, int limit, byte[] delimiter, BigDecimal reusable) {
+		int i = startPos;
+
+		final int delimLimit = limit - delimiter.length + 1;
+
+		while (i < limit) {
+			if (i < delimLimit && delimiterNext(bytes, i, delimiter)) {
+				if (i == startPos) {
+					setErrorState(ParseErrorState.EMPTY_COLUMN);
+					return -1;
+				}
+				break;
+			}
+			i++;
+		}
+
+		if (i > startPos &&
+				(Character.isWhitespace(bytes[startPos]) || Character.isWhitespace(bytes[(i - 1)]))) {
+			setErrorState(ParseErrorState.NUMERIC_VALUE_ILLEGAL_CHARACTER);
+			return -1;
+		}
+
+		try {
+			final int length = i - startPos;
+			if (reuse == null || reuse.length < length) {
+				reuse = new char[length];
+			}
+			for (int j = 0; j < length; j++) {
+				final byte b = bytes[startPos + j];
+				if ((b < '0' || b > '9') && b != '-' && b != '+' && b != '.' && b != 'E' && b != 'e') {
+					throw new NumberFormatException();
+				}
+				reuse[j] = (char) bytes[startPos + j];
+			}
+
+			this.result = new BigDecimal(reuse, 0, length);
+			return (i == limit) ? limit : i + delimiter.length;
+		} catch (NumberFormatException e) {
+			setErrorState(ParseErrorState.NUMERIC_VALUE_FORMAT_ERROR);
+			return -1;
+		}
+	}
+
+	@Override
+	public BigDecimal createValue() {
+		return BIG_DECIMAL_INSTANCE;
+	}
+
+	@Override
+	public BigDecimal getLastResult() {
+		return this.result;
+	}
+
+	/**
+	 * Static utility to parse a field of type BigDecimal from a byte sequence that represents text
+	 * characters
+	 * (such as when read from a file stream).
+	 *
+	 * @param bytes    The bytes containing the text data that should be parsed.
+	 * @param startPos The offset to start the parsing.
+	 * @param length   The length of the byte sequence (counting from the offset).
+	 * @return The parsed value.
+	 * @throws NumberFormatException Thrown when the value cannot be parsed because the text 
+	 * represents not a correct number.
+	 */
+	public static final BigDecimal parseField(byte[] bytes, int startPos, int length) {
+		return parseField(bytes, startPos, length, (char) 0xffff);
+	}
+
+	/**
+	 * Static utility to parse a field of type BigDecimal from a byte sequence that represents text
+	 * characters
+	 * (such as when read from a file stream).
+	 *
+	 * @param bytes     The bytes containing the text data that should be parsed.
+	 * @param startPos  The offset to start the parsing.
+	 * @param length    The length of the byte sequence (counting from the offset).
+	 * @param delimiter The delimiter that terminates the field.
+	 * @return The parsed value.
+	 * @throws NumberFormatException Thrown when the value cannot be parsed because the text 
+	 * represents not a correct number.
+	 */
+	public static final BigDecimal parseField(byte[] bytes, int startPos, int length, char delimiter) {
+		if (length <= 0) {
+			throw new NumberFormatException("Invalid input: Empty string");
+		}
+		int i = 0;
+		final byte delByte = (byte) delimiter;
+
+		while (i < length && bytes[startPos + i] != delByte) {
+			i++;
+		}
+
+		if (i > 0 &&
+				(Character.isWhitespace(bytes[startPos]) || Character.isWhitespace(bytes[startPos + i - 1]))) {
+			throw new NumberFormatException("There is leading or trailing whitespace in the numeric field.");
+		}
+
+		final char[] chars = new char[i];
+		for (int j = 0; j < i; j++) {
+			final byte b = bytes[startPos + j];
+			if ((b < '0' || b > '9') && b != '-' && b != '+' && b != '.' && b != 'E' && b != 'e') {
+				throw new NumberFormatException();
+			}
+			chars[j] = (char) bytes[startPos + j];
+		}
+		return new BigDecimal(chars);
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/5c02988b/flink-core/src/main/java/org/apache/flink/types/parser/BigIntParser.java
----------------------------------------------------------------------
diff --git a/flink-core/src/main/java/org/apache/flink/types/parser/BigIntParser.java b/flink-core/src/main/java/org/apache/flink/types/parser/BigIntParser.java
new file mode 100644
index 0000000..13361c1
--- /dev/null
+++ b/flink-core/src/main/java/org/apache/flink/types/parser/BigIntParser.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.flink.types.parser;
+
+import java.math.BigInteger;
+import org.apache.flink.annotation.PublicEvolving;
+
+/**
+ * Parses a text field into a {@link java.math.BigInteger}.
+ */
+@PublicEvolving
+public class BigIntParser extends FieldParser<BigInteger> {
+
+	private static final BigInteger BIG_INTEGER_INSTANCE = BigInteger.ZERO;
+
+	private BigInteger result;
+
+	@Override
+	public int parseField(byte[] bytes, int startPos, int limit, byte[] delimiter, BigInteger reusable) {
+		int i = startPos;
+
+		final int delimLimit = limit - delimiter.length + 1;
+
+		while (i < limit) {
+			if (i < delimLimit && delimiterNext(bytes, i, delimiter)) {
+				if (i == startPos) {
+					setErrorState(ParseErrorState.EMPTY_COLUMN);
+					return -1;
+				}
+				break;
+			}
+			i++;
+		}
+
+		if (i > startPos &&
+				(Character.isWhitespace(bytes[startPos]) || Character.isWhitespace(bytes[(i - 1)]))) {
+			setErrorState(ParseErrorState.NUMERIC_VALUE_ILLEGAL_CHARACTER);
+			return -1;
+		}
+
+		String str = new String(bytes, startPos, i - startPos);
+		try {
+			this.result = new BigInteger(str);
+			return (i == limit) ? limit : i + delimiter.length;
+		} catch (NumberFormatException e) {
+			setErrorState(ParseErrorState.NUMERIC_VALUE_FORMAT_ERROR);
+			return -1;
+		}
+	}
+
+	@Override
+	public BigInteger createValue() {
+		return BIG_INTEGER_INSTANCE;
+	}
+
+	@Override
+	public BigInteger getLastResult() {
+		return this.result;
+	}
+
+	/**
+	 * Static utility to parse a field of type BigInteger from a byte sequence that represents text
+	 * characters
+	 * (such as when read from a file stream).
+	 *
+	 * @param bytes    The bytes containing the text data that should be parsed.
+	 * @param startPos The offset to start the parsing.
+	 * @param length   The length of the byte sequence (counting from the offset).
+	 * @return The parsed value.
+	 * @throws NumberFormatException Thrown when the value cannot be parsed because the text 
+	 * represents not a correct number.
+	 */
+	public static final BigInteger parseField(byte[] bytes, int startPos, int length) {
+		return parseField(bytes, startPos, length, (char) 0xffff);
+	}
+
+	/**
+	 * Static utility to parse a field of type BigInteger from a byte sequence that represents text
+	 * characters
+	 * (such as when read from a file stream).
+	 *
+	 * @param bytes     The bytes containing the text data that should be parsed.
+	 * @param startPos  The offset to start the parsing.
+	 * @param length    The length of the byte sequence (counting from the offset).
+	 * @param delimiter The delimiter that terminates the field.
+	 * @return The parsed value.
+	 * @throws NumberFormatException Thrown when the value cannot be parsed because the text 
+	 * represents not a correct number.
+	 */
+	public static final BigInteger parseField(byte[] bytes, int startPos, int length, char delimiter) {
+		if (length <= 0) {
+			throw new NumberFormatException("Invalid input: Empty string");
+		}
+		int i = 0;
+		final byte delByte = (byte) delimiter;
+
+		while (i < length && bytes[startPos + i] != delByte) {
+			i++;
+		}
+
+		if (i > 0 &&
+				(Character.isWhitespace(bytes[startPos]) || Character.isWhitespace(bytes[startPos + i - 1]))) {
+			throw new NumberFormatException("There is leading or trailing whitespace in the numeric field.");
+		}
+
+		String str = new String(bytes, startPos, i);
+		return new BigInteger(str);
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/5c02988b/flink-core/src/main/java/org/apache/flink/types/parser/FieldParser.java
----------------------------------------------------------------------
diff --git a/flink-core/src/main/java/org/apache/flink/types/parser/FieldParser.java b/flink-core/src/main/java/org/apache/flink/types/parser/FieldParser.java
index 5f17840..a1b9c5f 100644
--- a/flink-core/src/main/java/org/apache/flink/types/parser/FieldParser.java
+++ b/flink-core/src/main/java/org/apache/flink/types/parser/FieldParser.java
@@ -19,6 +19,8 @@
 
 package org.apache.flink.types.parser;
 
+import java.math.BigDecimal;
+import java.math.BigInteger;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -208,6 +210,8 @@ public abstract class FieldParser<T> {
 		PARSERS.put(Float.class, FloatParser.class);
 		PARSERS.put(Double.class, DoubleParser.class);
 		PARSERS.put(Boolean.class, BooleanParser.class);
+		PARSERS.put(BigDecimal.class, BigDecParser.class);
+		PARSERS.put(BigInteger.class, BigIntParser.class);
 
 		// value types
 		PARSERS.put(ByteValue.class, ByteValueParser.class);

http://git-wip-us.apache.org/repos/asf/flink/blob/5c02988b/flink-core/src/test/java/org/apache/flink/types/parser/BigDecParserTest.java
----------------------------------------------------------------------
diff --git a/flink-core/src/test/java/org/apache/flink/types/parser/BigDecParserTest.java b/flink-core/src/test/java/org/apache/flink/types/parser/BigDecParserTest.java
new file mode 100644
index 0000000..9e17565
--- /dev/null
+++ b/flink-core/src/test/java/org/apache/flink/types/parser/BigDecParserTest.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.flink.types.parser;
+
+
+import java.math.BigDecimal;
+
+public class BigDecParserTest extends ParserTestBase<BigDecimal> {
+
+	@Override
+	public String[] getValidTestValues() {
+		return new String[] {
+			"-12.5E1000", "-12.5E100", "-10000", "-1.1", "-1", "-0.44",
+			"0", "0000000", "0e0", "0.000000000000000000000000001", "0.0000001",
+			"0.1234123413478523984729447", "1", "10000", "10E100000", "10E1000000000"
+		};
+	}
+
+	@Override
+	public BigDecimal[] getValidTestResults() {
+		return new BigDecimal[] {
+			new BigDecimal("-12.5E1000"),
+			new BigDecimal("-12.5E100"),
+			new BigDecimal("-10000"),
+			new BigDecimal("-1.1"),
+			new BigDecimal("-1"),
+			new BigDecimal("-0.44"),
+			new BigDecimal("0"),
+			new BigDecimal("0"),
+			new BigDecimal("0e0"),
+			new BigDecimal("0.000000000000000000000000001"),
+			new BigDecimal("0.0000001"),
+			new BigDecimal("0.1234123413478523984729447"),
+			new BigDecimal("1"),
+			new BigDecimal("10000"),
+			new BigDecimal("10E100000"),
+			new BigDecimal("10E1000000000"),
+		};
+	}
+
+	@Override
+	public String[] getInvalidTestValues() {
+		return new String[] {
+			"a", "123abc4", "-57-6", "7-877678", " 1", "2 ", " ", "\t"
+		};
+	}
+
+	@Override
+	public boolean allowsEmptyField() {
+		return false;
+	}
+
+	@Override
+	public FieldParser<BigDecimal> getParser() {
+		return new BigDecParser();
+	}
+
+	@Override
+	public Class<BigDecimal> getTypeClass() {
+		return BigDecimal.class;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/5c02988b/flink-core/src/test/java/org/apache/flink/types/parser/BigIntParserTest.java
----------------------------------------------------------------------
diff --git a/flink-core/src/test/java/org/apache/flink/types/parser/BigIntParserTest.java b/flink-core/src/test/java/org/apache/flink/types/parser/BigIntParserTest.java
new file mode 100644
index 0000000..473cb55
--- /dev/null
+++ b/flink-core/src/test/java/org/apache/flink/types/parser/BigIntParserTest.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.flink.types.parser;
+
+
+import java.math.BigInteger;
+
+public class BigIntParserTest extends ParserTestBase<BigInteger> {
+
+	@Override
+	public String[] getValidTestValues() {
+		return new String[] {
+			"-8745979691234123413478523984729447", "-10000", "-1", "0",
+			"0000000", "8745979691234123413478523984729447"
+		};
+	}
+
+	@Override
+	public BigInteger[] getValidTestResults() {
+		return new BigInteger[] {
+			new BigInteger("-8745979691234123413478523984729447"),
+			new BigInteger("-10000"),
+			new BigInteger("-1"),
+			new BigInteger("0"),
+			new BigInteger("0"),
+			new BigInteger("8745979691234123413478523984729447")
+		};
+	}
+
+	@Override
+	public String[] getInvalidTestValues() {
+		return new String[] {
+			"1.1" ,"a", "123abc4", "-57-6", "7-877678", " 1", "2 ", " ", "\t"
+		};
+	}
+
+	@Override
+	public boolean allowsEmptyField() {
+		return false;
+	}
+
+	@Override
+	public FieldParser<BigInteger> getParser() {
+		return new BigIntParser();
+	}
+
+	@Override
+	public Class<BigInteger> getTypeClass() {
+		return BigInteger.class;
+	}
+}


[28/50] [abbrv] flink git commit: [FLINK-4400] [cluster mngmt] Implement leadership election among JobMasters

Posted by tr...@apache.org.
[FLINK-4400] [cluster mngmt] Implement leadership election among JobMasters

Adapt related components to the changes in HighAvailabilityServices

Add comments for getJobMasterElectionService in HighAvailabilityServices

This closes #2377.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/8fd8c998
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/8fd8c998
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/8fd8c998

Branch: refs/heads/flip-6
Commit: 8fd8c998b39c313c532b5816213b79770173450b
Parents: 5ea97a1
Author: xiaogang.sxg <xi...@alibaba-inc.com>
Authored: Wed Aug 17 13:46:00 2016 +0800
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:15 2016 +0200

----------------------------------------------------------------------
 .../HighAvailabilityServices.java               |   9 +
 .../runtime/highavailability/NonHaServices.java |   8 +
 .../flink/runtime/rpc/jobmaster/JobMaster.java  | 318 +++++++++----------
 .../runtime/rpc/akka/AkkaRpcServiceTest.java    |  53 +---
 4 files changed, 179 insertions(+), 209 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/8fd8c998/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java
index 094d36f..73e4f1f 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java
@@ -18,6 +18,8 @@
 
 package org.apache.flink.runtime.highavailability;
 
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.leaderelection.LeaderElectionService;
 import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
 
 /**
@@ -36,4 +38,11 @@ public interface HighAvailabilityServices {
 	 * Gets the leader retriever for the cluster's resource manager.
 	 */
 	LeaderRetrievalService getResourceManagerLeaderRetriever() throws Exception;
+
+	/**
+	 * Gets the leader election service for the given job.
+	 *
+	 * @param jobID The identifier of the job running the election.
+	 */
+	LeaderElectionService getJobMasterLeaderElectionService(JobID jobID) throws Exception;
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/8fd8c998/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java
index b8c2ed8..3d2769b 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java
@@ -18,6 +18,9 @@
 
 package org.apache.flink.runtime.highavailability;
 
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.leaderelection.LeaderElectionService;
+import org.apache.flink.runtime.leaderelection.StandaloneLeaderElectionService;
 import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
 import org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService;
 
@@ -56,4 +59,9 @@ public class NonHaServices implements HighAvailabilityServices {
 	public LeaderRetrievalService getResourceManagerLeaderRetriever() throws Exception {
 		return new StandaloneLeaderRetrievalService(resourceManagerAddress, new UUID(0, 0));
 	}
+
+	@Override
+	public LeaderElectionService getJobMasterLeaderElectionService(JobID jobID) throws Exception {
+		return new StandaloneLeaderElectionService();
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/8fd8c998/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
index e53cd68..49b200b 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
@@ -18,68 +18,77 @@
 
 package org.apache.flink.runtime.rpc.jobmaster;
 
-import akka.dispatch.Futures;
-import akka.dispatch.Mapper;
-import akka.dispatch.OnComplete;
-import org.apache.flink.runtime.instance.InstanceID;
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
+import org.apache.flink.runtime.jobgraph.JobGraph;
+import org.apache.flink.runtime.jobmanager.RecoveryMode;
+import org.apache.flink.runtime.leaderelection.LeaderContender;
+import org.apache.flink.runtime.leaderelection.LeaderElectionService;
 import org.apache.flink.runtime.messages.Acknowledge;
 import org.apache.flink.runtime.rpc.RpcMethod;
-import org.apache.flink.runtime.rpc.resourcemanager.JobMasterRegistration;
-import org.apache.flink.runtime.rpc.resourcemanager.RegistrationResponse;
 import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
 import org.apache.flink.runtime.taskmanager.TaskExecutionState;
 import org.apache.flink.util.Preconditions;
-import scala.Tuple2;
-import scala.concurrent.ExecutionContext;
-import scala.concurrent.ExecutionContext$;
-import scala.concurrent.Future;
-import scala.concurrent.duration.Deadline;
-import scala.concurrent.duration.FiniteDuration;
 
 import java.util.UUID;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.ScheduledThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
 
 /**
  * JobMaster implementation. The job master is responsible for the execution of a single
  * {@link org.apache.flink.runtime.jobgraph.JobGraph}.
- *
+ * <p>
  * It offers the following methods as part of its rpc interface to interact with the JobMaster
  * remotely:
  * <ul>
- *     <li>{@link #registerAtResourceManager(String)} triggers the registration at the resource manager</li>
  *     <li>{@link #updateTaskExecutionState(TaskExecutionState)} updates the task execution state for
  * given task</li>
  * </ul>
  */
 public class JobMaster extends RpcEndpoint<JobMasterGateway> {
-	/** Execution context for future callbacks */
-	private final ExecutionContext executionContext;
-
-	/** Execution context for scheduled runnables */
-	private final ScheduledExecutorService scheduledExecutorService;
-
-	private final FiniteDuration initialRegistrationTimeout = new FiniteDuration(500, TimeUnit.MILLISECONDS);
-	private final FiniteDuration maxRegistrationTimeout = new FiniteDuration(30, TimeUnit.SECONDS);
-	private final FiniteDuration registrationDuration = new FiniteDuration(365, TimeUnit.DAYS);
-	private final long failedRegistrationDelay = 10000;
 
 	/** Gateway to connected resource manager, null iff not connected */
 	private ResourceManagerGateway resourceManager = null;
 
-	/** UUID to filter out old registration runs */
-	private UUID currentRegistrationRun;
+	/** Logical representation of the job */
+	private final JobGraph jobGraph;
+	private final JobID jobID;
+
+	/** Configuration of the job */
+	private final Configuration configuration;
+	private final RecoveryMode recoveryMode;
+
+	/** Service to contend for and retrieve the leadership of JM and RM */
+	private final HighAvailabilityServices highAvailabilityServices;
+
+	/** Leader Management */
+	private LeaderElectionService leaderElectionService = null;
+	private UUID leaderSessionID;
+
+	/**
+	 * The JM's Constructor
+	 *
+	 * @param jobGraph The representation of the job's execution plan
+	 * @param configuration The job's configuration
+	 * @param rpcService The RPC service at which the JM serves
+	 * @param highAvailabilityService The cluster's HA service from the JM can elect and retrieve leaders.
+	 */
+	public JobMaster(
+		JobGraph jobGraph,
+		Configuration configuration,
+		RpcService rpcService,
+		HighAvailabilityServices highAvailabilityService) {
 
-	public JobMaster(RpcService rpcService, ExecutorService executorService) {
 		super(rpcService);
-		executionContext = ExecutionContext$.MODULE$.fromExecutor(
-			Preconditions.checkNotNull(executorService));
-		scheduledExecutorService = new ScheduledThreadPoolExecutor(1);
+
+		this.jobGraph = Preconditions.checkNotNull(jobGraph);
+		this.jobID = Preconditions.checkNotNull(jobGraph.getJobID());
+
+		this.configuration = Preconditions.checkNotNull(configuration);
+		this.recoveryMode = RecoveryMode.fromConfig(configuration);
+
+		this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityService);
 	}
 
 	public ResourceManagerGateway getResourceManager() {
@@ -87,6 +96,91 @@ public class JobMaster extends RpcEndpoint<JobMasterGateway> {
 	}
 
 	//----------------------------------------------------------------------------------------------
+	// Initialization methods
+	//----------------------------------------------------------------------------------------------
+	public void start() {
+		super.start();
+
+		// register at the election once the JM starts
+		registerAtElectionService();
+	}
+
+
+	//----------------------------------------------------------------------------------------------
+	// JobMaster Leadership methods
+	//----------------------------------------------------------------------------------------------
+
+	/**
+	 * Retrieves the election service and contend for the leadership.
+	 */
+	private void registerAtElectionService() {
+		try {
+			leaderElectionService = highAvailabilityServices.getJobMasterLeaderElectionService(jobID);
+			leaderElectionService.start(new JobMasterLeaderContender());
+		} catch (Exception e) {
+			throw new RuntimeException("Fail to register at the election of JobMaster", e);
+		}
+	}
+
+	/**
+	 * Start the execution when the leadership is granted.
+	 *
+	 * @param newLeaderSessionID The identifier of the new leadership session
+	 */
+	public void grantJobMasterLeadership(final UUID newLeaderSessionID) {
+		runAsync(new Runnable() {
+			@Override
+			public void run() {
+				log.info("JobManager {} grants leadership with session id {}.", getAddress(), newLeaderSessionID);
+
+				// The operation may be blocking, but since JM is idle before it grants the leadership, it's okay that
+				// JM waits here for the operation's completeness.
+				leaderSessionID = newLeaderSessionID;
+				leaderElectionService.confirmLeaderSessionID(newLeaderSessionID);
+
+				// TODO:: execute the job when the leadership is granted.
+			}
+		});
+	}
+
+	/**
+	 * Stop the execution when the leadership is revoked.
+	 */
+	public void revokeJobMasterLeadership() {
+		runAsync(new Runnable() {
+			@Override
+			public void run() {
+				log.info("JobManager {} was revoked leadership.", getAddress());
+
+				// TODO:: cancel the job's execution and notify all listeners
+				cancelAndClearEverything(new Exception("JobManager is no longer the leader."));
+
+				leaderSessionID = null;
+			}
+		});
+	}
+
+	/**
+	 * Handles error occurring in the leader election service
+	 *
+	 * @param exception Exception thrown in the leader election service
+	 */
+	public void onJobMasterElectionError(final Exception exception) {
+		runAsync(new Runnable() {
+			@Override
+			public void run() {
+				log.error("Received an error from the LeaderElectionService.", exception);
+
+				// TODO:: cancel the job's execution and shutdown the JM
+				cancelAndClearEverything(exception);
+
+				leaderSessionID = null;
+			}
+		});
+
+	}
+
+	//----------------------------------------------------------------------------------------------
 	// RPC methods
 	//----------------------------------------------------------------------------------------------
 
@@ -109,18 +203,7 @@ public class JobMaster extends RpcEndpoint<JobMasterGateway> {
 	 */
 	@RpcMethod
 	public void registerAtResourceManager(final String address) {
-		currentRegistrationRun = UUID.randomUUID();
-
-		Future<ResourceManagerGateway> resourceManagerFuture = getRpcService().connect(address, ResourceManagerGateway.class);
-
-		handleResourceManagerRegistration(
-			new JobMasterRegistration(getAddress()),
-			1,
-			resourceManagerFuture,
-			currentRegistrationRun,
-			initialRegistrationTimeout,
-			maxRegistrationTimeout,
-			registrationDuration.fromNow());
+		//TODO:: register at the RM
 	}
 
 	//----------------------------------------------------------------------------------------------
@@ -128,124 +211,37 @@ public class JobMaster extends RpcEndpoint<JobMasterGateway> {
 	//----------------------------------------------------------------------------------------------
 
 	/**
-	 * Helper method to handle the resource manager registration process. If a registration attempt
-	 * times out, then a new attempt with the doubled time out is initiated. The whole registration
-	 * process has a deadline. Once this deadline is overdue without successful registration, the
-	 * job master shuts down.
+	 * Cancel the current job and notify all listeners the job's cancellation.
 	 *
-	 * @param jobMasterRegistration Job master registration info which is sent to the resource
-	 *                              manager
-	 * @param attemptNumber Registration attempt number
-	 * @param resourceManagerFuture Future of the resource manager gateway
-	 * @param registrationRun UUID describing the current registration run
-	 * @param timeout Timeout of the last registration attempt
-	 * @param maxTimeout Maximum timeout between registration attempts
-	 * @param deadline Deadline for the registration
+	 * @param cause Cause for the cancelling.
 	 */
-	void handleResourceManagerRegistration(
-		final JobMasterRegistration jobMasterRegistration,
-		final int attemptNumber,
-		final Future<ResourceManagerGateway> resourceManagerFuture,
-		final UUID registrationRun,
-		final FiniteDuration timeout,
-		final FiniteDuration maxTimeout,
-		final Deadline deadline) {
-
-		// filter out concurrent registration runs
-		if (registrationRun.equals(currentRegistrationRun)) {
-
-			log.info("Start registration attempt #{}.", attemptNumber);
-
-			if (deadline.isOverdue()) {
-				// we've exceeded our registration deadline. This means that we have to shutdown the JobMaster
-				log.error("Exceeded registration deadline without successfully registering at the ResourceManager.");
-				shutDown();
-			} else {
-				Future<Tuple2<RegistrationResponse, ResourceManagerGateway>> registrationResponseFuture = resourceManagerFuture.flatMap(new Mapper<ResourceManagerGateway, Future<Tuple2<RegistrationResponse, ResourceManagerGateway>>>() {
-					@Override
-					public Future<Tuple2<RegistrationResponse, ResourceManagerGateway>> apply(ResourceManagerGateway resourceManagerGateway) {
-						return resourceManagerGateway.registerJobMaster(jobMasterRegistration, timeout).zip(Futures.successful(resourceManagerGateway));
-					}
-				}, executionContext);
-
-				registrationResponseFuture.onComplete(new OnComplete<Tuple2<RegistrationResponse, ResourceManagerGateway>>() {
-					@Override
-					public void onComplete(Throwable failure, Tuple2<RegistrationResponse, ResourceManagerGateway> tuple) throws Throwable {
-						if (failure != null) {
-							if (failure instanceof TimeoutException) {
-								// we haven't received an answer in the given timeout interval,
-								// so increase it and try again.
-								final FiniteDuration newTimeout = timeout.$times(2L).min(maxTimeout);
-
-								handleResourceManagerRegistration(
-									jobMasterRegistration,
-									attemptNumber + 1,
-									resourceManagerFuture,
-									registrationRun,
-									newTimeout,
-									maxTimeout,
-									deadline);
-							} else {
-								log.error("Received unknown error while registering at the ResourceManager.", failure);
-								shutDown();
-							}
-						} else {
-							final RegistrationResponse response = tuple._1();
-							final ResourceManagerGateway gateway = tuple._2();
-
-							if (response.isSuccess()) {
-								finishResourceManagerRegistration(gateway, response.getInstanceID());
-							} else {
-								log.info("The registration was refused. Try again.");
-
-								scheduledExecutorService.schedule(new Runnable() {
-									@Override
-									public void run() {
-										// we have to execute scheduled runnable in the main thread
-										// because we need consistency wrt currentRegistrationRun
-										runAsync(new Runnable() {
-											@Override
-											public void run() {
-												// our registration attempt was refused. Start over.
-												handleResourceManagerRegistration(
-													jobMasterRegistration,
-													1,
-													resourceManagerFuture,
-													registrationRun,
-													initialRegistrationTimeout,
-													maxTimeout,
-													deadline);
-											}
-										});
-									}
-								}, failedRegistrationDelay, TimeUnit.MILLISECONDS);
-							}
-						}
-					}
-				}, getMainThreadExecutionContext()); // use the main thread execution context to execute the call back in the main thread
-			}
-		} else {
-			log.info("Discard out-dated registration run.");
-		}
+	private void cancelAndClearEverything(Throwable cause) {
+		// currently, nothing to do here
 	}
 
-	/**
-	 * Finish the resource manager registration by setting the new resource manager gateway.
-	 *
-	 * @param resourceManager New resource manager gateway
-	 * @param instanceID Instance id assigned by the resource manager
-	 */
-	void finishResourceManagerRegistration(ResourceManagerGateway resourceManager, InstanceID instanceID) {
-		log.info("Successfully registered at the ResourceManager under instance id {}.", instanceID);
-		this.resourceManager = resourceManager;
-	}
+	// ------------------------------------------------------------------------
+	//  Utility classes
+	// ------------------------------------------------------------------------
+	private class JobMasterLeaderContender implements LeaderContender {
 
-	/**
-	 * Return if the job master is connected to a resource manager.
-	 *
-	 * @return true if the job master is connected to the resource manager
-	 */
-	public boolean isConnected() {
-		return resourceManager != null;
+		@Override
+		public void grantLeadership(UUID leaderSessionID) {
+			JobMaster.this.grantJobMasterLeadership(leaderSessionID);
+		}
+
+		@Override
+		public void revokeLeadership() {
+			JobMaster.this.revokeJobMasterLeadership();
+		}
+
+		@Override
+		public String getAddress() {
+			return JobMaster.this.getAddress();
+		}
+
+		@Override
+		public void handleError(Exception exception) {
+			onJobMasterElectionError(exception);
+		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/8fd8c998/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
index 7b4ab89..2790cf8 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
@@ -20,9 +20,12 @@ package org.apache.flink.runtime.rpc.akka;
 
 import akka.actor.ActorSystem;
 import akka.util.Timeout;
-
 import org.apache.flink.core.testutils.OneShotLatch;
+import org.apache.flink.configuration.Configuration;
 import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
+import org.apache.flink.runtime.highavailability.NonHaServices;
+import org.apache.flink.runtime.jobgraph.JobGraph;
 import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
 import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
 import org.apache.flink.runtime.rpc.resourcemanager.ResourceManager;
@@ -31,6 +34,7 @@ import org.apache.flink.util.TestLogger;
 import org.junit.AfterClass;
 import org.junit.Test;
 
+import org.mockito.Mockito;
 import scala.concurrent.duration.Deadline;
 import scala.concurrent.duration.FiniteDuration;
 
@@ -80,51 +84,4 @@ public class AkkaRpcServiceTest extends TestLogger {
 
 		assertTrue("call was not properly delayed", ((stop - start) / 1000000) >= delay);
 	}
-
-	// ------------------------------------------------------------------------
-	//  specific component tests - should be moved to the test classes
-	//  for those components
-	// ------------------------------------------------------------------------
-
-	/**
-	 * Tests that the {@link JobMaster} can connect to the {@link ResourceManager} using the
-	 * {@link AkkaRpcService}.
-	 */
-	@Test
-	public void testJobMasterResourceManagerRegistration() throws Exception {
-		Timeout akkaTimeout = new Timeout(10, TimeUnit.SECONDS);
-		ActorSystem actorSystem = AkkaUtils.createDefaultActorSystem();
-		ActorSystem actorSystem2 = AkkaUtils.createDefaultActorSystem();
-		AkkaRpcService akkaRpcService = new AkkaRpcService(actorSystem, akkaTimeout);
-		AkkaRpcService akkaRpcService2 = new AkkaRpcService(actorSystem2, akkaTimeout);
-		ExecutorService executorService = new ForkJoinPool();
-
-		ResourceManager resourceManager = new ResourceManager(akkaRpcService, executorService);
-		JobMaster jobMaster = new JobMaster(akkaRpcService2, executorService);
-
-		resourceManager.start();
-		jobMaster.start();
-
-		ResourceManagerGateway rm = resourceManager.getSelf();
-
-		assertTrue(rm instanceof AkkaGateway);
-
-		AkkaGateway akkaClient = (AkkaGateway) rm;
-
-		
-		jobMaster.registerAtResourceManager(AkkaUtils.getAkkaURL(actorSystem, akkaClient.getRpcEndpoint()));
-
-		// wait for successful registration
-		FiniteDuration timeout = new FiniteDuration(200, TimeUnit.SECONDS);
-		Deadline deadline = timeout.fromNow();
-
-		while (deadline.hasTimeLeft() && !jobMaster.isConnected()) {
-			Thread.sleep(100);
-		}
-
-		assertFalse(deadline.isOverdue());
-
-		jobMaster.shutDown();
-		resourceManager.shutDown();
-	}
 }


[16/50] [abbrv] flink git commit: [FLINK-4368] [distributed runtime] Eagerly initialize the RPC endpoint members

Posted by tr...@apache.org.
[FLINK-4368] [distributed runtime] Eagerly initialize the RPC endpoint members

This closes #2351


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/94e00927
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/94e00927
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/94e00927

Branch: refs/heads/flip-6
Commit: 94e009270386421865f360a3a90b1e5ab9a84c6a
Parents: 04bcb71
Author: Stephan Ewen <se...@apache.org>
Authored: Wed Aug 10 18:27:21 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:12 2016 +0200

----------------------------------------------------------------------
 .../flink/runtime/rpc/MainThreadExecutor.java   |   9 +-
 .../apache/flink/runtime/rpc/RpcEndpoint.java   | 156 +++++++++++--------
 .../runtime/rpc/akka/AkkaRpcServiceTest.java    |   4 +-
 3 files changed, 99 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/94e00927/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
index e06711e..14b2997 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
@@ -26,22 +26,23 @@ import java.util.concurrent.TimeoutException;
 
 /**
  * Interface to execute {@link Runnable} and {@link Callable} in the main thread of the underlying
- * rpc server.
+ * RPC endpoint.
  *
- * This interface is intended to be implemented by the self gateway in a {@link RpcEndpoint}
+ * <p>This interface is intended to be implemented by the self gateway in a {@link RpcEndpoint}
  * implementation which allows to dispatch local procedures to the main thread of the underlying
  * rpc server.
  */
 public interface MainThreadExecutor {
+
 	/**
-	 * Execute the runnable in the main thread of the underlying rpc server.
+	 * Execute the runnable in the main thread of the underlying RPC endpoint.
 	 *
 	 * @param runnable Runnable to be executed
 	 */
 	void runAsync(Runnable runnable);
 
 	/**
-	 * Execute the callable in the main thread of the underlying rpc server and return a future for
+	 * Execute the callable in the main thread of the underlying RPC endpoint and return a future for
 	 * the callable result. If the future is not completed within the given timeout, the returned
 	 * future will throw a {@link TimeoutException}.
 	 *

http://git-wip-us.apache.org/repos/asf/flink/blob/94e00927/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
index 3d8757f..0d928a8 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
@@ -19,85 +19,116 @@
 package org.apache.flink.runtime.rpc;
 
 import akka.util.Timeout;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+
 import scala.concurrent.ExecutionContext;
 import scala.concurrent.Future;
 
 import java.util.concurrent.Callable;
 
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
 /**
- * Base class for rpc endpoints. Distributed components which offer remote procedure calls have to
- * extend the rpc endpoint base class.
+ * Base class for RPC endpoints. Distributed components which offer remote procedure calls have to
+ * extend the RPC endpoint base class. An RPC endpoint is backed by an {@link RpcService}. 
+ * 
+ * <h1>Endpoint and Gateway</h1>
+ * 
+ * To be done...
+ * 
+ * <h1>Single Threaded Endpoint Execution </h1>
+ * 
+ * <p>All RPC calls on the same endpoint are called by the same thread
+ * (referred to as the endpoint's <i>main thread</i>).
+ * Thus, by executing all state changing operations within the main 
+ * thread, we don't have to reason about concurrent accesses, in the same way in the Actor Model
+ * of Erlang or Akka.
  *
- * The main idea is that a rpc endpoint is backed by a rpc server which has a single thread
- * processing the rpc calls. Thus, by executing all state changing operations within the main
- * thread, we don't have to reason about concurrent accesses. The rpc provides provides
- * {@link #runAsync(Runnable)}, {@link #callAsync(Callable, Timeout)} and the
- * {@link #getMainThreadExecutionContext()} to execute code in the rpc server's main thread.
+ * <p>The RPC endpoint provides provides {@link #runAsync(Runnable)}, {@link #callAsync(Callable, Timeout)}
+  * and the {@link #getMainThreadExecutionContext()} to execute code in the RPC endoint's main thread.
  *
- * @param <C> Rpc gateway counterpart for the implementing rpc endpoint
+ * @param <C> The RPC gateway counterpart for the implementing RPC endpoint
  */
 public abstract class RpcEndpoint<C extends RpcGateway> {
 
 	protected final Logger log = LoggerFactory.getLogger(getClass());
 
-	/** Rpc service to be used to start the rpc server and to obtain rpc gateways */
+	// ------------------------------------------------------------------------
+
+	/** RPC service to be used to start the RPC server and to obtain rpc gateways */
 	private final RpcService rpcService;
 
 	/** Self gateway which can be used to schedule asynchronous calls on yourself */
-	private C self;
+	private final C self;
+
+	/** the fully qualified address of the this RPC endpoint */
+	private final String selfAddress;
+
+	/** The main thread execution context to be used to execute future callbacks in the main thread
+	 * of the executing rpc server. */
+	private final MainThreadExecutionContext mainThreadExecutionContext;
+
 
 	/**
-	 * The main thread execution context to be used to execute future callbacks in the main thread
-	 * of the executing rpc server.
-	 *
-	 * IMPORTANT: The main thread context is only available after the rpc server has been started.
+	 * Initializes the RPC endpoint.
+	 * 
+	 * @param rpcService The RPC server that dispatches calls to this RPC endpoint. 
 	 */
-	private MainThreadExecutionContext mainThreadExecutionContext;
-
 	public RpcEndpoint(RpcService rpcService) {
-		this.rpcService = rpcService;
+		this.rpcService = checkNotNull(rpcService, "rpcService");
+		this.self = rpcService.startServer(this);
+		this.selfAddress = rpcService.getAddress(self);
+		this.mainThreadExecutionContext = new MainThreadExecutionContext((MainThreadExecutor) self);
 	}
 
+	// ------------------------------------------------------------------------
+	//  Shutdown
+	// ------------------------------------------------------------------------
+
 	/**
-	 * Get self-gateway which should be used to run asynchronous rpc calls on this endpoint.
-	 *
-	 * IMPORTANT: Always issue local method calls via the self-gateway if the current thread
-	 * is not the main thread of the underlying rpc server, e.g. from within a future callback.
-	 *
-	 * @return Self gateway
+	 * Shuts down the underlying RPC endpoint via the RPC service.
+	 * After this method was called, the RPC endpoint will no longer be reachable, neither remotely,
+	 * not via its {@link #getSelf() self gateway}. It will also not accepts executions in main thread
+	 * any more (via {@link #callAsync(Callable, Timeout)} and {@link #runAsync(Runnable)}).
+	 * 
+	 * <p>This method can be overridden to add RPC endpoint specific shut down code.
+	 * The overridden method should always call the parent shut down method.
 	 */
-	public C getSelf() {
-		return self;
+	public void shutDown() {
+		rpcService.stopServer(self);
 	}
 
+	// ------------------------------------------------------------------------
+	//  Basic RPC endpoint properties
+	// ------------------------------------------------------------------------
+
 	/**
-	 * Execute the runnable in the main thread of the underlying rpc server.
+	 * Get self-gateway which should be used to run asynchronous RPC calls on this endpoint.
+	 *
+	 * <p><b>IMPORTANT</b>: Always issue local method calls via the self-gateway if the current thread
+	 * is not the main thread of the underlying rpc server, e.g. from within a future callback.
 	 *
-	 * @param runnable Runnable to be executed in the main thread of the underlying rpc server
+	 * @return The self gateway
 	 */
-	public void runAsync(Runnable runnable) {
-		((MainThreadExecutor) self).runAsync(runnable);
+	public C getSelf() {
+		return self;
 	}
 
 	/**
-	 * Execute the callable in the main thread of the underlying rpc server returning a future for
-	 * the result of the callable. If the callable is not completed within the given timeout, then
-	 * the future will be failed with a {@link java.util.concurrent.TimeoutException}.
+	 * Gets the address of the underlying RPC endpoint. The address should be fully qualified so that
+	 * a remote system can connect to this RPC endpoint via this address.
 	 *
-	 * @param callable Callable to be executed in the main thread of the underlying rpc server
-	 * @param timeout Timeout for the callable to be completed
-	 * @param <V> Return type of the callable
-	 * @return Future for the result of the callable.
+	 * @return Fully qualified address of the underlying RPC endpoint
 	 */
-	public <V> Future<V> callAsync(Callable<V> callable, Timeout timeout) {
-		return ((MainThreadExecutor) self).callAsync(callable, timeout);
+	public String getAddress() {
+		return selfAddress;
 	}
 
 	/**
 	 * Gets the main thread execution context. The main thread execution context can be used to
-	 * execute tasks in the main thread of the underlying rpc server.
+	 * execute tasks in the main thread of the underlying RPC endpoint.
 	 *
 	 * @return Main thread execution context
 	 */
@@ -106,52 +137,51 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	}
 
 	/**
-	 * Gets the used rpc service.
+	 * Gets the endpoint's RPC service.
 	 *
-	 * @return Rpc service
+	 * @return The endpoint's RPC service
 	 */
 	public RpcService getRpcService() {
 		return rpcService;
 	}
 
-	/**
-	 * Starts the underlying rpc server via the rpc service and creates the main thread execution
-	 * context. This makes the rpc endpoint effectively reachable from the outside.
-	 *
-	 * Can be overriden to add rpc endpoint specific start up code. Should always call the parent
-	 * start method.
-	 */
-	public void start() {
-		self = rpcService.startServer(this);
-		mainThreadExecutionContext = new MainThreadExecutionContext((MainThreadExecutor) self);
-	}
-
+	// ------------------------------------------------------------------------
+	//  Asynchronous executions
+	// ------------------------------------------------------------------------
 
 	/**
-	 * Shuts down the underlying rpc server via the rpc service.
+	 * Execute the runnable in the main thread of the underlying RPC endpoint.
 	 *
-	 * Can be overriden to add rpc endpoint specific shut down code. Should always call the parent
-	 * shut down method.
+	 * @param runnable Runnable to be executed in the main thread of the underlying RPC endpoint
 	 */
-	public void shutDown() {
-		rpcService.stopServer(self);
+	public void runAsync(Runnable runnable) {
+		((MainThreadExecutor) self).runAsync(runnable);
 	}
 
 	/**
-	 * Gets the address of the underlying rpc server. The address should be fully qualified so that
-	 * a remote system can connect to this rpc server via this address.
+	 * Execute the callable in the main thread of the underlying RPC service, returning a future for
+	 * the result of the callable. If the callable is not completed within the given timeout, then
+	 * the future will be failed with a {@link java.util.concurrent.TimeoutException}.
 	 *
-	 * @return Fully qualified address of the underlying rpc server
+	 * @param callable Callable to be executed in the main thread of the underlying rpc server
+	 * @param timeout Timeout for the callable to be completed
+	 * @param <V> Return type of the callable
+	 * @return Future for the result of the callable.
 	 */
-	public String getAddress() {
-		return rpcService.getAddress(self);
+	public <V> Future<V> callAsync(Callable<V> callable, Timeout timeout) {
+		return ((MainThreadExecutor) self).callAsync(callable, timeout);
 	}
 
+	// ------------------------------------------------------------------------
+	//  Utilities
+	// ------------------------------------------------------------------------
+	
 	/**
 	 * Execution context which executes runnables in the main thread context. A reported failure
 	 * will cause the underlying rpc server to shut down.
 	 */
 	private class MainThreadExecutionContext implements ExecutionContext {
+
 		private final MainThreadExecutor gateway;
 
 		MainThreadExecutionContext(MainThreadExecutor gateway) {

http://git-wip-us.apache.org/repos/asf/flink/blob/94e00927/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
index c5bac94..642a380 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
@@ -54,15 +54,13 @@ public class AkkaRpcServiceTest extends TestLogger {
 		ResourceManager resourceManager = new ResourceManager(akkaRpcService, executorService);
 		JobMaster jobMaster = new JobMaster(akkaRpcService2, executorService);
 
-		resourceManager.start();
-
 		ResourceManagerGateway rm = resourceManager.getSelf();
 
 		assertTrue(rm instanceof AkkaGateway);
 
 		AkkaGateway akkaClient = (AkkaGateway) rm;
 
-		jobMaster.start();
+		
 		jobMaster.registerAtResourceManager(AkkaUtils.getAkkaURL(actorSystem, akkaClient.getActorRef()));
 
 		// wait for successful registration


[47/50] [abbrv] flink git commit: [FLINK-4538][FLINK-4348] ResourceManager slot allocation protcol

Posted by tr...@apache.org.
http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/SlotManagerTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/SlotManagerTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/SlotManagerTest.java
deleted file mode 100644
index 52d9d06..0000000
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/SlotManagerTest.java
+++ /dev/null
@@ -1,538 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.resourcemanager;
-
-import org.apache.flink.api.common.JobID;
-import org.apache.flink.runtime.clusterframework.types.AllocationID;
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
-import org.apache.flink.runtime.clusterframework.types.ResourceSlot;
-import org.apache.flink.runtime.clusterframework.types.SlotID;
-import org.apache.flink.runtime.taskexecutor.SlotStatus;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-
-public class SlotManagerTest {
-
-	private static final double DEFAULT_TESTING_CPU_CORES = 1.0;
-
-	private static final long DEFAULT_TESTING_MEMORY = 512;
-
-	private static final ResourceProfile DEFAULT_TESTING_PROFILE =
-		new ResourceProfile(DEFAULT_TESTING_CPU_CORES, DEFAULT_TESTING_MEMORY);
-
-	private static final ResourceProfile DEFAULT_TESTING_BIG_PROFILE =
-		new ResourceProfile(DEFAULT_TESTING_CPU_CORES * 2, DEFAULT_TESTING_MEMORY * 2);
-
-	private ResourceManagerGateway resourceManagerGateway;
-
-	@Before
-	public void setUp() {
-		resourceManagerGateway = mock(ResourceManagerGateway.class);
-	}
-
-	/**
-	 * Tests that there are no free slots when we request, need to allocate from cluster manager master
-	 */
-	@Test
-	public void testRequestSlotWithoutFreeSlot() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-
-		assertEquals(0, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertEquals(1, slotManager.getAllocatedContainers().size());
-		assertEquals(DEFAULT_TESTING_PROFILE, slotManager.getAllocatedContainers().get(0));
-	}
-
-	/**
-	 * Tests that there are some free slots when we request, and the request is fulfilled immediately
-	 */
-	@Test
-	public void testRequestSlotWithFreeSlot() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-
-		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 1);
-		assertEquals(1, slotManager.getFreeSlotCount());
-
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertEquals(0, slotManager.getAllocatedContainers().size());
-	}
-
-	/**
-	 * Tests that there are some free slots when we request, but none of them are suitable
-	 */
-	@Test
-	public void testRequestSlotWithoutSuitableSlot() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-
-		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 2);
-		assertEquals(2, slotManager.getFreeSlotCount());
-
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
-		assertEquals(0, slotManager.getAllocatedSlotCount());
-		assertEquals(2, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertEquals(1, slotManager.getAllocatedContainers().size());
-		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
-	}
-
-	/**
-	 * Tests that we send duplicated slot request
-	 */
-	@Test
-	public void testDuplicatedSlotRequest() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 1);
-
-		SlotRequest request1 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE);
-
-		slotManager.requestSlot(request1);
-		slotManager.requestSlot(request2);
-		slotManager.requestSlot(request2);
-		slotManager.requestSlot(request1);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertEquals(1, slotManager.getAllocatedContainers().size());
-		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
-	}
-
-	/**
-	 * Tests that we send multiple slot requests
-	 */
-	@Test
-	public void testRequestMultipleSlots() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 5);
-
-		// request 3 normal slots
-		for (int i = 0; i < 3; ++i) {
-			slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-		}
-
-		// request 2 big slots
-		for (int i = 0; i < 2; ++i) {
-			slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
-		}
-
-		// request 1 normal slot again
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-
-		assertEquals(4, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-		assertEquals(2, slotManager.getPendingRequestCount());
-		assertEquals(2, slotManager.getAllocatedContainers().size());
-		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
-		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(1));
-	}
-
-	/**
-	 * Tests that a new slot appeared in SlotReport, and we used it to fulfill a pending request
-	 */
-	@Test
-	public void testNewlyAppearedFreeSlotFulfillPendingRequest() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-		assertEquals(1, slotManager.getPendingRequestCount());
-
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slotId));
-	}
-
-	/**
-	 * Tests that a new slot appeared in SlotReport, but we have no pending request
-	 */
-	@Test
-	public void testNewlyAppearedFreeSlot() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(0, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-	}
-
-	/**
-	 * Tests that a new slot appeared in SlotReport, but it't not suitable for all the pending requests
-	 */
-	@Test
-	public void testNewlyAppearedFreeSlotNotMatchPendingRequests() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
-		assertEquals(1, slotManager.getPendingRequestCount());
-
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(0, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertFalse(slotManager.isAllocated(slotId));
-	}
-
-	/**
-	 * Tests that a new slot appeared in SlotReport, and it's been reported using by some job
-	 */
-	@Test
-	public void testNewlyAppearedInUseSlot() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new AllocationID(), new JobID());
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertTrue(slotManager.isAllocated(slotId));
-	}
-
-	/**
-	 * Tests that we had a slot in-use, and it's confirmed by SlotReport
-	 */
-	@Test
-	public void testExistingInUseSlotUpdateStatus() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request);
-
-		// make this slot in use
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertTrue(slotManager.isAllocated(slotId));
-
-		// slot status is confirmed
-		SlotStatus slotStatus2 = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE,
-			request.getAllocationId(), request.getJobId());
-		slotManager.updateSlotStatus(slotStatus2);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertTrue(slotManager.isAllocated(slotId));
-	}
-
-	/**
-	 * Tests that we had a slot in-use, but it's empty according to the SlotReport
-	 */
-	@Test
-	public void testExistingInUseSlotAdjustedToEmpty() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		SlotRequest request1 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request1);
-
-		// make this slot in use
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		// another request pending
-		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request2);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slotId));
-		assertTrue(slotManager.isAllocated(request1.getAllocationId()));
-
-
-		// but slot is reported empty again, request2 will be fulfilled, request1 will be missing
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slotId));
-		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
-	}
-
-	/**
-	 * Tests that we had a slot in use, and it's also reported in use by TaskManager, but the allocation
-	 * information didn't match.
-	 */
-	@Test
-	public void testExistingInUseSlotWithDifferentAllocationInfo() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request);
-
-		// make this slot in use
-		SlotID slotId = SlotID.generate();
-		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slotId));
-		assertTrue(slotManager.isAllocated(request.getAllocationId()));
-
-		SlotStatus slotStatus2 = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new AllocationID(), new JobID());
-		// update slot status with different allocation info
-		slotManager.updateSlotStatus(slotStatus2);
-
-		// original request is missing and won't be allocated
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slotId));
-		assertFalse(slotManager.isAllocated(request.getAllocationId()));
-		assertTrue(slotManager.isAllocated(slotStatus2.getAllocationID()));
-	}
-
-	/**
-	 * Tests that we had a free slot, and it's confirmed by SlotReport
-	 */
-	@Test
-	public void testExistingEmptySlotUpdateStatus() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
-		slotManager.addFreeSlot(slot);
-
-		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), DEFAULT_TESTING_PROFILE);
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(0, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-	}
-
-	/**
-	 * Tests that we had a free slot, and it's reported in-use by TaskManager
-	 */
-	@Test
-	public void testExistingEmptySlotAdjustedToInUse() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
-		slotManager.addFreeSlot(slot);
-
-		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), DEFAULT_TESTING_PROFILE,
-			new AllocationID(), new JobID());
-		slotManager.updateSlotStatus(slotStatus);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slot.getSlotId()));
-	}
-
-	/**
-	 * Tests that we did some allocation but failed / rejected by TaskManager, request will retry
-	 */
-	@Test
-	public void testSlotAllocationFailedAtTaskManager() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
-		slotManager.addFreeSlot(slot);
-
-		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertTrue(slotManager.isAllocated(slot.getSlotId()));
-
-		slotManager.handleSlotRequestFailedAtTaskManager(request, slot.getSlotId());
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-	}
-
-
-	/**
-	 * Tests that we did some allocation but failed / rejected by TaskManager, and slot is occupied by another request
-	 */
-	@Test
-	public void testSlotAllocationFailedAtTaskManagerOccupiedByOther() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
-		slotManager.addFreeSlot(slot);
-
-		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request);
-
-		// slot is set empty by heartbeat
-		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), slot.getResourceProfile());
-		slotManager.updateSlotStatus(slotStatus);
-
-		// another request took this slot
-		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
-		slotManager.requestSlot(request2);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-		assertFalse(slotManager.isAllocated(request.getAllocationId()));
-		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
-
-		// original request should be pended
-		slotManager.handleSlotRequestFailedAtTaskManager(request, slot.getSlotId());
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(1, slotManager.getPendingRequestCount());
-		assertFalse(slotManager.isAllocated(request.getAllocationId()));
-		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
-	}
-
-	@Test
-	public void testNotifyTaskManagerFailure() {
-		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
-
-		ResourceID resource1 = ResourceID.generate();
-		ResourceID resource2 = ResourceID.generate();
-
-		ResourceSlot slot11 = new ResourceSlot(new SlotID(resource1, 1), DEFAULT_TESTING_PROFILE);
-		ResourceSlot slot12 = new ResourceSlot(new SlotID(resource1, 2), DEFAULT_TESTING_PROFILE);
-		ResourceSlot slot21 = new ResourceSlot(new SlotID(resource2, 1), DEFAULT_TESTING_PROFILE);
-		ResourceSlot slot22 = new ResourceSlot(new SlotID(resource2, 2), DEFAULT_TESTING_PROFILE);
-
-		slotManager.addFreeSlot(slot11);
-		slotManager.addFreeSlot(slot21);
-
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
-
-		assertEquals(2, slotManager.getAllocatedSlotCount());
-		assertEquals(0, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-
-		slotManager.addFreeSlot(slot12);
-		slotManager.addFreeSlot(slot22);
-
-		assertEquals(2, slotManager.getAllocatedSlotCount());
-		assertEquals(2, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-
-		slotManager.notifyTaskManagerFailure(resource2);
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-
-		// notify an not exist resource failure
-		slotManager.notifyTaskManagerFailure(ResourceID.generate());
-
-		assertEquals(1, slotManager.getAllocatedSlotCount());
-		assertEquals(1, slotManager.getFreeSlotCount());
-		assertEquals(0, slotManager.getPendingRequestCount());
-	}
-
-	// ------------------------------------------------------------------------
-	//  testing utilities
-	// ------------------------------------------------------------------------
-
-	private void directlyProvideFreeSlots(
-		final SlotManager slotManager,
-		final ResourceProfile resourceProfile,
-		final int freeSlotNum)
-	{
-		for (int i = 0; i < freeSlotNum; ++i) {
-			slotManager.addFreeSlot(new ResourceSlot(SlotID.generate(), new ResourceProfile(resourceProfile)));
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	//  testing classes
-	// ------------------------------------------------------------------------
-
-	private static class TestingSlotManager extends SlotManager {
-
-		private final List<ResourceProfile> allocatedContainers;
-
-		TestingSlotManager(ResourceManagerGateway resourceManagerGateway) {
-			super(resourceManagerGateway);
-			this.allocatedContainers = new LinkedList<>();
-		}
-
-		/**
-		 * Choose slot randomly if it matches requirement
-		 *
-		 * @param request   The slot request
-		 * @param freeSlots All slots which can be used
-		 * @return The chosen slot or null if cannot find a match
-		 */
-		@Override
-		protected ResourceSlot chooseSlotToUse(SlotRequest request, Map<SlotID, ResourceSlot> freeSlots) {
-			for (ResourceSlot slot : freeSlots.values()) {
-				if (slot.isMatchingRequirement(request.getResourceProfile())) {
-					return slot;
-				}
-			}
-			return null;
-		}
-
-		/**
-		 * Choose request randomly if offered slot can match its requirement
-		 *
-		 * @param offeredSlot     The free slot
-		 * @param pendingRequests All the pending slot requests
-		 * @return The chosen request's AllocationID or null if cannot find a match
-		 */
-		@Override
-		protected SlotRequest chooseRequestToFulfill(ResourceSlot offeredSlot,
-			Map<AllocationID, SlotRequest> pendingRequests)
-		{
-			for (Map.Entry<AllocationID, SlotRequest> pendingRequest : pendingRequests.entrySet()) {
-				if (offeredSlot.isMatchingRequirement(pendingRequest.getValue().getResourceProfile())) {
-					return pendingRequest.getValue();
-				}
-			}
-			return null;
-		}
-
-		@Override
-		protected void allocateContainer(ResourceProfile resourceProfile) {
-			allocatedContainers.add(resourceProfile);
-		}
-
-		List<ResourceProfile> getAllocatedContainers() {
-			return allocatedContainers;
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManagerTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManagerTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManagerTest.java
new file mode 100644
index 0000000..9ee9690
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManagerTest.java
@@ -0,0 +1,554 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager.slotmanager;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+import org.apache.flink.runtime.clusterframework.types.ResourceSlot;
+import org.apache.flink.runtime.clusterframework.types.SlotID;
+import org.apache.flink.runtime.resourcemanager.SlotRequest;
+import org.apache.flink.runtime.taskexecutor.SlotStatus;
+import org.apache.flink.runtime.taskexecutor.TaskExecutorGateway;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
+public class SlotManagerTest {
+
+	private static final double DEFAULT_TESTING_CPU_CORES = 1.0;
+
+	private static final long DEFAULT_TESTING_MEMORY = 512;
+
+	private static final ResourceProfile DEFAULT_TESTING_PROFILE =
+		new ResourceProfile(DEFAULT_TESTING_CPU_CORES, DEFAULT_TESTING_MEMORY);
+
+	private static final ResourceProfile DEFAULT_TESTING_BIG_PROFILE =
+		new ResourceProfile(DEFAULT_TESTING_CPU_CORES * 2, DEFAULT_TESTING_MEMORY * 2);
+
+	private static TaskExecutorGateway taskExecutorGateway;
+
+	@BeforeClass
+	public static void setUp() {
+		taskExecutorGateway = Mockito.mock(TaskExecutorGateway.class);
+	}
+
+	/**
+	 * Tests that there are no free slots when we request, need to allocate from cluster manager master
+	 */
+	@Test
+	public void testRequestSlotWithoutFreeSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertEquals(1, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_PROFILE, slotManager.getAllocatedContainers().get(0));
+	}
+
+	/**
+	 * Tests that there are some free slots when we request, and the request is fulfilled immediately
+	 */
+	@Test
+	public void testRequestSlotWithFreeSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 1);
+		assertEquals(1, slotManager.getFreeSlotCount());
+
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertEquals(0, slotManager.getAllocatedContainers().size());
+	}
+
+	/**
+	 * Tests that there are some free slots when we request, but none of them are suitable
+	 */
+	@Test
+	public void testRequestSlotWithoutSuitableSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 2);
+		assertEquals(2, slotManager.getFreeSlotCount());
+
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(2, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertEquals(1, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
+	}
+
+	/**
+	 * Tests that we send duplicated slot request
+	 */
+	@Test
+	public void testDuplicatedSlotRequest() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 1);
+
+		SlotRequest request1 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE);
+
+		slotManager.requestSlot(request1);
+		slotManager.requestSlot(request2);
+		slotManager.requestSlot(request2);
+		slotManager.requestSlot(request1);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertEquals(1, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
+	}
+
+	/**
+	 * Tests that we send multiple slot requests
+	 */
+	@Test
+	public void testRequestMultipleSlots() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 5);
+
+		// request 3 normal slots
+		for (int i = 0; i < 3; ++i) {
+			slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		}
+
+		// request 2 big slots
+		for (int i = 0; i < 2; ++i) {
+			slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
+		}
+
+		// request 1 normal slot again
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+
+		assertEquals(4, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(2, slotManager.getPendingRequestCount());
+		assertEquals(2, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(1));
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, and we used it to fulfill a pending request
+	 */
+	@Test
+	public void testNewlyAppearedFreeSlotFulfillPendingRequest() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		assertEquals(1, slotManager.getPendingRequestCount());
+
+		SlotID slotId = SlotID.generate();
+		slotManager.registerTaskExecutor(slotId.getResourceID(), taskExecutorGateway);
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, but we have no pending request
+	 */
+	@Test
+	public void testNewlyAppearedFreeSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+
+		SlotID slotId = SlotID.generate();
+		slotManager.registerTaskExecutor(slotId.getResourceID(), taskExecutorGateway);
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, but it't not suitable for all the pending requests
+	 */
+	@Test
+	public void testNewlyAppearedFreeSlotNotMatchPendingRequests() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
+		assertEquals(1, slotManager.getPendingRequestCount());
+
+		SlotID slotId = SlotID.generate();
+		slotManager.registerTaskExecutor(slotId.getResourceID(), taskExecutorGateway);
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertFalse(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, and it's been reported using by some job
+	 */
+	@Test
+	public void testNewlyAppearedInUseSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+
+		SlotID slotId = SlotID.generate();
+		slotManager.registerTaskExecutor(slotId.getResourceID(), taskExecutorGateway);
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new JobID(), new AllocationID());
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertTrue(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that we had a slot in-use, and it's confirmed by SlotReport
+	 */
+	@Test
+	public void testExistingInUseSlotUpdateStatus() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		// make this slot in use
+		SlotID slotId = SlotID.generate();
+		slotManager.registerTaskExecutor(slotId.getResourceID(), taskExecutorGateway);
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertTrue(slotManager.isAllocated(slotId));
+
+		// slot status is confirmed
+		SlotStatus slotStatus2 = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE,
+			request.getJobId(), request.getAllocationId());
+		slotManager.updateSlotStatus(slotStatus2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertTrue(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that we had a slot in-use, but it's empty according to the SlotReport
+	 */
+	@Test
+	public void testExistingInUseSlotAdjustedToEmpty() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		SlotRequest request1 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request1);
+
+		// make this slot in use
+		SlotID slotId = SlotID.generate();
+		slotManager.registerTaskExecutor(slotId.getResourceID(), taskExecutorGateway);
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		// another request pending
+		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertTrue(slotManager.isAllocated(request1.getAllocationId()));
+
+
+		// but slot is reported empty again, request2 will be fulfilled, request1 will be missing
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
+	}
+
+	/**
+	 * Tests that we had a slot in use, and it's also reported in use by TaskManager, but the allocation
+	 * information didn't match.
+	 */
+	@Test
+	public void testExistingInUseSlotWithDifferentAllocationInfo() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		// make this slot in use
+		SlotID slotId = SlotID.generate();
+		slotManager.registerTaskExecutor(slotId.getResourceID(), taskExecutorGateway);
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertTrue(slotManager.isAllocated(request.getAllocationId()));
+
+		SlotStatus slotStatus2 = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new JobID(), new AllocationID());
+		// update slot status with different allocation info
+		slotManager.updateSlotStatus(slotStatus2);
+
+		// original request is missing and won't be allocated
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertFalse(slotManager.isAllocated(request.getAllocationId()));
+		assertTrue(slotManager.isAllocated(slotStatus2.getAllocationID()));
+	}
+
+	/**
+	 * Tests that we had a free slot, and it's confirmed by SlotReport
+	 */
+	@Test
+	public void testExistingEmptySlotUpdateStatus() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE, taskExecutorGateway);
+		slotManager.addFreeSlot(slot);
+
+		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+	}
+
+	/**
+	 * Tests that we had a free slot, and it's reported in-use by TaskManager
+	 */
+	@Test
+	public void testExistingEmptySlotAdjustedToInUse() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		final SlotID slotID = SlotID.generate();
+		slotManager.registerTaskExecutor(slotID.getResourceID(), taskExecutorGateway);
+
+		ResourceSlot slot = new ResourceSlot(slotID, DEFAULT_TESTING_PROFILE, taskExecutorGateway);
+		slotManager.addFreeSlot(slot);
+
+		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), DEFAULT_TESTING_PROFILE,
+			new JobID(), new AllocationID());
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slot.getSlotId()));
+	}
+
+	/**
+	 * Tests that we did some allocation but failed / rejected by TaskManager, request will retry
+	 */
+	@Test
+	public void testSlotAllocationFailedAtTaskManager() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE, taskExecutorGateway);
+		slotManager.addFreeSlot(slot);
+
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slot.getSlotId()));
+
+		slotManager.handleSlotRequestFailedAtTaskManager(request, slot.getSlotId());
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+	}
+
+
+	/**
+	 * Tests that we did some allocation but failed / rejected by TaskManager, and slot is occupied by another request
+	 */
+	@Test
+	public void testSlotAllocationFailedAtTaskManagerOccupiedByOther() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+		final SlotID slotID = SlotID.generate();
+		slotManager.registerTaskExecutor(slotID.getResourceID(), taskExecutorGateway);
+
+		ResourceSlot slot = new ResourceSlot(slotID, DEFAULT_TESTING_PROFILE, taskExecutorGateway);
+		slotManager.addFreeSlot(slot);
+
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		// slot is set empty by heartbeat
+		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), slot.getResourceProfile());
+		slotManager.updateSlotStatus(slotStatus);
+
+		// another request took this slot
+		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertFalse(slotManager.isAllocated(request.getAllocationId()));
+		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
+
+		// original request should be pended
+		slotManager.handleSlotRequestFailedAtTaskManager(request, slot.getSlotId());
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertFalse(slotManager.isAllocated(request.getAllocationId()));
+		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
+	}
+
+	@Test
+	public void testNotifyTaskManagerFailure() {
+		TestingSlotManager slotManager = new TestingSlotManager();
+
+		ResourceID resource1 = ResourceID.generate();
+		ResourceID resource2 = ResourceID.generate();
+
+		ResourceSlot slot11 = new ResourceSlot(new SlotID(resource1, 1), DEFAULT_TESTING_PROFILE, taskExecutorGateway);
+		ResourceSlot slot12 = new ResourceSlot(new SlotID(resource1, 2), DEFAULT_TESTING_PROFILE, taskExecutorGateway);
+		ResourceSlot slot21 = new ResourceSlot(new SlotID(resource2, 1), DEFAULT_TESTING_PROFILE, taskExecutorGateway);
+		ResourceSlot slot22 = new ResourceSlot(new SlotID(resource2, 2), DEFAULT_TESTING_PROFILE, taskExecutorGateway);
+
+		slotManager.addFreeSlot(slot11);
+		slotManager.addFreeSlot(slot21);
+
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+
+		assertEquals(2, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+
+		slotManager.addFreeSlot(slot12);
+		slotManager.addFreeSlot(slot22);
+
+		assertEquals(2, slotManager.getAllocatedSlotCount());
+		assertEquals(2, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+
+		slotManager.notifyTaskManagerFailure(resource2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+
+		// notify an not exist resource failure
+		slotManager.notifyTaskManagerFailure(ResourceID.generate());
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+	}
+
+	// ------------------------------------------------------------------------
+	//  testing utilities
+	// ------------------------------------------------------------------------
+
+	private void directlyProvideFreeSlots(
+		final SlotManager slotManager,
+		final ResourceProfile resourceProfile,
+		final int freeSlotNum)
+	{
+		for (int i = 0; i < freeSlotNum; ++i) {
+			slotManager.addFreeSlot(new ResourceSlot(SlotID.generate(), new ResourceProfile(resourceProfile), taskExecutorGateway));
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  testing classes
+	// ------------------------------------------------------------------------
+
+	private static class TestingSlotManager extends SlotManager {
+
+		private final List<ResourceProfile> allocatedContainers;
+
+		TestingSlotManager() {
+			this.allocatedContainers = new LinkedList<>();
+		}
+
+		/**
+		 * Choose slot randomly if it matches requirement
+		 *
+		 * @param request   The slot request
+		 * @param freeSlots All slots which can be used
+		 * @return The chosen slot or null if cannot find a match
+		 */
+		@Override
+		protected ResourceSlot chooseSlotToUse(SlotRequest request, Map<SlotID, ResourceSlot> freeSlots) {
+			for (ResourceSlot slot : freeSlots.values()) {
+				if (slot.isMatchingRequirement(request.getResourceProfile())) {
+					return slot;
+				}
+			}
+			return null;
+		}
+
+		/**
+		 * Choose request randomly if offered slot can match its requirement
+		 *
+		 * @param offeredSlot     The free slot
+		 * @param pendingRequests All the pending slot requests
+		 * @return The chosen request's AllocationID or null if cannot find a match
+		 */
+		@Override
+		protected SlotRequest chooseRequestToFulfill(ResourceSlot offeredSlot,
+			Map<AllocationID, SlotRequest> pendingRequests)
+		{
+			for (Map.Entry<AllocationID, SlotRequest> pendingRequest : pendingRequests.entrySet()) {
+				if (offeredSlot.isMatchingRequirement(pendingRequest.getValue().getResourceProfile())) {
+					return pendingRequest.getValue();
+				}
+			}
+			return null;
+		}
+
+		@Override
+		protected void allocateContainer(ResourceProfile resourceProfile) {
+			allocatedContainers.add(resourceProfile);
+		}
+
+		List<ResourceProfile> getAllocatedContainers() {
+			return allocatedContainers;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotProtocolTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotProtocolTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotProtocolTest.java
new file mode 100644
index 0000000..85d2880
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotProtocolTest.java
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flink.runtime.resourcemanager.slotmanager;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+import org.apache.flink.runtime.clusterframework.types.SlotID;
+import org.apache.flink.runtime.highavailability.NonHaServices;
+import org.apache.flink.runtime.jobmaster.JobMasterGateway;
+import org.apache.flink.runtime.resourcemanager.JobMasterRegistration;
+import org.apache.flink.runtime.resourcemanager.RegistrationResponse;
+import org.apache.flink.runtime.resourcemanager.ResourceManager;
+import org.apache.flink.runtime.resourcemanager.SlotRequest;
+import org.apache.flink.runtime.resourcemanager.SlotRequestReply;
+import org.apache.flink.runtime.rpc.TestingSerialRpcService;
+import org.apache.flink.runtime.taskexecutor.SlotReport;
+import org.apache.flink.runtime.taskexecutor.SlotStatus;
+import org.apache.flink.runtime.taskexecutor.TaskExecutorGateway;
+import org.apache.flink.util.TestLogger;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.mockito.Mockito;
+import scala.concurrent.Await;
+import scala.concurrent.Future;
+import scala.concurrent.duration.Duration;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.Collections;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.timeout;
+import static org.mockito.Mockito.verify;
+
+public class SlotProtocolTest extends TestLogger {
+
+	private static TestingSerialRpcService testRpcService;
+
+	@BeforeClass
+	public static void beforeClass() {
+		testRpcService = new TestingSerialRpcService();
+	}
+
+	@AfterClass
+	public static void afterClass() {
+		testRpcService.stopService();
+		testRpcService = null;
+	}
+
+	@Before
+	public void beforeTest(){
+		testRpcService.clearGateways();
+	}
+
+	/**
+	 * Tests whether
+	 * 1) SlotRequest is routed to the SlotManager
+	 * 2) SlotRequest is confirmed
+	 * 3) SlotRequest leads to a container allocation
+	 * 4) Slot becomes available and TaskExecutor gets a SlotRequest
+	 */
+	@Test
+	public void testSlotsUnavailableRequest() throws Exception {
+		final String rmAddress = "/rm1";
+		final String jmAddress = "/jm1";
+		final JobID jobID = new JobID();
+
+		testRpcService.registerGateway(jmAddress, mock(JobMasterGateway.class));
+
+
+		TestingSlotManager slotManager = Mockito.spy(new TestingSlotManager());
+		ResourceManager resourceManager =
+			new ResourceManager(testRpcService, new NonHaServices(rmAddress), slotManager);
+		resourceManager.start();
+
+		Future<RegistrationResponse> registrationFuture =
+			resourceManager.registerJobMaster(new JobMasterRegistration(jmAddress, jobID));
+		try {
+			Await.ready(registrationFuture, Duration.create(5, TimeUnit.SECONDS));
+		} catch (Exception e) {
+			Assert.fail("JobManager registration Future didn't become ready.");
+		}
+
+		final AllocationID allocationID = new AllocationID();
+		final ResourceProfile resourceProfile = new ResourceProfile(1.0, 100);
+
+		SlotRequest slotRequest = new SlotRequest(jobID, allocationID, resourceProfile);
+		SlotRequestReply slotRequestReply =
+			resourceManager.requestSlot(slotRequest);
+
+		// 1) SlotRequest is routed to the SlotManager
+		verify(slotManager).requestSlot(slotRequest);
+
+		// 2) SlotRequest is confirmed
+		Assert.assertEquals(
+			slotRequestReply.getAllocationID(),
+			allocationID);
+
+		// 3) SlotRequest leads to a container allocation
+		verify(slotManager, timeout(5000)).allocateContainer(resourceProfile);
+
+		Assert.assertFalse(slotManager.isAllocated(allocationID));
+
+		// slot becomes available
+		final String tmAddress = "/tm1";
+		TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
+		testRpcService.registerGateway(tmAddress, taskExecutorGateway);
+
+		final ResourceID resourceID = ResourceID.generate();
+		final SlotID slotID = new SlotID(resourceID, 0);
+
+		final SlotStatus slotStatus =
+			new SlotStatus(slotID, resourceProfile);
+		final SlotReport slotReport =
+			new SlotReport(Collections.singletonList(slotStatus), resourceID);
+		// register slot at SlotManager
+		slotManager.registerTaskExecutor(resourceID, taskExecutorGateway);
+		slotManager.updateSlotStatus(slotReport);
+
+		// 4) Slot becomes available and TaskExecutor gets a SlotRequest
+		verify(taskExecutorGateway, timeout(5000)).requestSlot(eq(allocationID), any(UUID.class), any(FiniteDuration.class));
+	}
+
+	/**
+	 * Tests whether
+	 * 1) a SlotRequest is routed to the SlotManager
+	 * 2) a SlotRequest is confirmed
+	 * 3) a SlotRequest leads to an allocation of a registered slot
+	 * 4) a SlotRequest is routed to the TaskExecutor
+	 */
+	@Test
+	public void testSlotAvailableRequest() throws Exception {
+		final String rmAddress = "/rm1";
+		final String jmAddress = "/jm1";
+		final String tmAddress = "/tm1";
+		final JobID jobID = new JobID();
+
+		testRpcService.registerGateway(jmAddress, mock(JobMasterGateway.class));
+
+		TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
+		testRpcService.registerGateway(tmAddress, taskExecutorGateway);
+
+		TestingSlotManager slotManager = Mockito.spy(new TestingSlotManager());
+		ResourceManager resourceManager =
+			new ResourceManager(testRpcService, new NonHaServices(rmAddress), slotManager);
+		resourceManager.start();
+
+		Future<RegistrationResponse> registrationFuture =
+			resourceManager.registerJobMaster(new JobMasterRegistration(jmAddress, jobID));
+		try {
+			Await.ready(registrationFuture, Duration.create(5, TimeUnit.SECONDS));
+		} catch (Exception e) {
+			Assert.fail("JobManager registration Future didn't become ready.");
+		}
+
+		final ResourceID resourceID = ResourceID.generate();
+		final AllocationID allocationID = new AllocationID();
+		final ResourceProfile resourceProfile = new ResourceProfile(1.0, 100);
+		final SlotID slotID = new SlotID(resourceID, 0);
+
+		final SlotStatus slotStatus =
+			new SlotStatus(slotID, resourceProfile);
+		final SlotReport slotReport =
+			new SlotReport(Collections.singletonList(slotStatus), resourceID);
+		// register slot at SlotManager
+		slotManager.registerTaskExecutor(resourceID, taskExecutorGateway);
+		slotManager.updateSlotStatus(slotReport);
+
+		SlotRequest slotRequest = new SlotRequest(jobID, allocationID, resourceProfile);
+		SlotRequestReply slotRequestReply =
+			resourceManager.requestSlot(slotRequest);
+
+		// 1) a SlotRequest is routed to the SlotManager
+		verify(slotManager).requestSlot(slotRequest);
+
+		// 2) a SlotRequest is confirmed
+		Assert.assertEquals(
+			slotRequestReply.getAllocationID(),
+			allocationID);
+
+		// 3) a SlotRequest leads to an allocation of a registered slot
+		Assert.assertTrue(slotManager.isAllocated(slotID));
+		Assert.assertTrue(slotManager.isAllocated(allocationID));
+
+
+		// 4) a SlotRequest is routed to the TaskExecutor
+		verify(taskExecutorGateway, timeout(5000)).requestSlot(eq(allocationID), any(UUID.class), any(FiniteDuration.class));
+	}
+
+
+	private static class TestingSlotManager extends SimpleSlotManager {
+
+		// change visibility of function to public for testing
+		@Override
+		public void allocateContainer(ResourceProfile resourceProfile) {
+			super.allocateContainer(resourceProfile);
+		}
+
+
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingRpcService.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingRpcService.java
index 7e92e8d..2212680 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingRpcService.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingRpcService.java
@@ -112,4 +112,8 @@ public class TestingRpcService extends AkkaRpcService {
 			return Futures.failed(new Exception("No gateway registered under that name"));
 		}
 	}
-}
\ No newline at end of file
+
+	public void clearGateways() {
+		registeredConnections.clear();
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java
index 955edcc..01776ed 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java
@@ -137,6 +137,10 @@ public class TestingSerialRpcService implements RpcService {
 		}
 	}
 
+	public void clearGateways() {
+		registeredConnections.clear();
+	}
+
 	private static final class TestingSerialInvocationHandler<C extends RpcGateway, T extends RpcEndpoint<C>> implements InvocationHandler, RpcGateway, MainThreadExecutor, StartStoppable {
 
 		private final T rpcEndpoint;


[38/50] [abbrv] flink git commit: [FLINK-4529] [flip-6] Move TaskExecutor, JobMaster and ResourceManager out of the rpc package

Posted by tr...@apache.org.
[FLINK-4529] [flip-6] Move TaskExecutor, JobMaster and ResourceManager out of the rpc package

The TaskExecutor, the JobMaster and the ResourceManager were still contained in the rpc
package. With this commit, they will be moved out of this package. Now they are contained
in dedicated packages on the o.a.f.runtime level.

This closes #2438.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/2f12ba3e
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/2f12ba3e
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/2f12ba3e

Branch: refs/heads/flip-6
Commit: 2f12ba3e91b99cb4916ecbc733a1c3b53a304016
Parents: ce92a75
Author: Till Rohrmann <tr...@apache.org>
Authored: Mon Aug 29 16:35:29 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:16 2016 +0200

----------------------------------------------------------------------
 .../runtime/clusterframework/SlotManager.java   | 525 ------------
 .../flink/runtime/jobmaster/JobMaster.java      | 244 ++++++
 .../runtime/jobmaster/JobMasterGateway.java     |  45 +
 .../registration/RegistrationResponse.java      |  84 ++
 .../registration/RetryingRegistration.java      | 296 +++++++
 .../resourcemanager/JobMasterRegistration.java  |  35 +
 .../resourcemanager/RegistrationResponse.java   |  43 +
 .../resourcemanager/ResourceManager.java        | 214 +++++
 .../resourcemanager/ResourceManagerGateway.java |  77 ++
 .../runtime/resourcemanager/SlotAssignment.java |  25 +
 .../runtime/resourcemanager/SlotManager.java    | 523 ++++++++++++
 .../runtime/resourcemanager/SlotRequest.java    |  74 ++
 .../flink/runtime/rpc/jobmaster/JobMaster.java  | 244 ------
 .../runtime/rpc/jobmaster/JobMasterGateway.java |  45 -
 .../rpc/registration/RegistrationResponse.java  |  84 --
 .../rpc/registration/RetryingRegistration.java  | 296 -------
 .../resourcemanager/JobMasterRegistration.java  |  35 -
 .../resourcemanager/RegistrationResponse.java   |  43 -
 .../rpc/resourcemanager/ResourceManager.java    | 214 -----
 .../resourcemanager/ResourceManagerGateway.java |  77 --
 .../rpc/resourcemanager/SlotAssignment.java     |  25 -
 .../rpc/resourcemanager/SlotRequest.java        |  74 --
 .../runtime/rpc/taskexecutor/SlotReport.java    |  56 --
 .../runtime/rpc/taskexecutor/SlotStatus.java    | 129 ---
 .../runtime/rpc/taskexecutor/TaskExecutor.java  | 827 -------------------
 .../taskexecutor/TaskExecutorConfiguration.java | 151 ----
 .../rpc/taskexecutor/TaskExecutorGateway.java   |  35 -
 .../TaskExecutorRegistrationSuccess.java        |  75 --
 ...TaskExecutorToResourceManagerConnection.java | 198 -----
 .../flink/runtime/taskexecutor/SlotReport.java  |  56 ++
 .../flink/runtime/taskexecutor/SlotStatus.java  | 129 +++
 .../runtime/taskexecutor/TaskExecutor.java      | 827 +++++++++++++++++++
 .../taskexecutor/TaskExecutorConfiguration.java | 151 ++++
 .../taskexecutor/TaskExecutorGateway.java       |  35 +
 .../TaskExecutorRegistrationSuccess.java        |  75 ++
 ...TaskExecutorToResourceManagerConnection.java | 198 +++++
 .../clusterframework/ClusterShutdownITCase.java | 156 ++++
 .../clusterframework/ResourceManagerITCase.java | 162 ++++
 .../clusterframework/ResourceManagerTest.java   | 338 ++++++++
 .../clusterframework/SlotManagerTest.java       | 540 ------------
 .../registration/RetryingRegistrationTest.java  | 336 ++++++++
 .../registration/TestRegistrationGateway.java   |  85 ++
 .../resourcemanager/ClusterShutdownITCase.java  | 156 ----
 .../resourcemanager/ResourceManagerHATest.java  |  76 ++
 .../resourcemanager/ResourceManagerITCase.java  | 162 ----
 .../resourcemanager/ResourceManagerTest.java    | 338 --------
 .../resourcemanager/SlotManagerTest.java        | 538 ++++++++++++
 .../runtime/rpc/akka/AkkaRpcServiceTest.java    |  14 -
 .../registration/RetryingRegistrationTest.java  | 336 --------
 .../registration/TestRegistrationGateway.java   |  85 --
 .../resourcemanager/ResourceManagerHATest.java  |  76 --
 .../rpc/taskexecutor/TaskExecutorTest.java      | 117 ---
 .../runtime/taskexecutor/TaskExecutorTest.java  | 117 +++
 53 files changed, 4939 insertions(+), 4957 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/SlotManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/SlotManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/SlotManager.java
deleted file mode 100644
index cc140a1..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/SlotManager.java
+++ /dev/null
@@ -1,525 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.clusterframework;
-
-import org.apache.flink.annotation.VisibleForTesting;
-import org.apache.flink.runtime.clusterframework.types.AllocationID;
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
-import org.apache.flink.runtime.clusterframework.types.ResourceSlot;
-import org.apache.flink.runtime.clusterframework.types.SlotID;
-import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
-import org.apache.flink.runtime.rpc.resourcemanager.SlotRequest;
-import org.apache.flink.runtime.rpc.taskexecutor.SlotReport;
-import org.apache.flink.runtime.rpc.taskexecutor.SlotStatus;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-import static org.apache.flink.util.Preconditions.checkNotNull;
-
-/**
- * SlotManager is responsible for receiving slot requests and do slot allocations. It allows to request
- * slots from registered TaskManagers and issues container allocation requests in case of there are not
- * enough available slots. Besides, it should sync its slot allocation with TaskManager's heartbeat.
- * <p>
- * The main operation principle of SlotManager is:
- * <ul>
- * <li>1. All slot allocation status should be synced with TaskManager, which is the ground truth.</li>
- * <li>2. All slots that have registered must be tracked, either by free pool or allocated pool.</li>
- * <li>3. All slot requests will be handled by best efforts, there is no guarantee that one request will be
- * fulfilled in time or correctly allocated. Conflicts or timeout or some special error will happen, it should
- * be handled outside SlotManager. SlotManager will make each decision based on the information it currently
- * holds.</li>
- * </ul>
- * <b>IMPORTANT:</b> This class is <b>Not Thread-safe</b>.
- */
-public abstract class SlotManager {
-
-	private static final Logger LOG = LoggerFactory.getLogger(SlotManager.class);
-
-	/** Gateway to communicate with ResourceManager */
-	private final ResourceManagerGateway resourceManagerGateway;
-
-	/** All registered slots, including free and allocated slots */
-	private final Map<ResourceID, Map<SlotID, ResourceSlot>> registeredSlots;
-
-	/** All pending slot requests, waiting available slots to fulfil */
-	private final Map<AllocationID, SlotRequest> pendingSlotRequests;
-
-	/** All free slots that can be used to be allocated */
-	private final Map<SlotID, ResourceSlot> freeSlots;
-
-	/** All allocations, we can lookup allocations either by SlotID or AllocationID */
-	private final AllocationMap allocationMap;
-
-	public SlotManager(ResourceManagerGateway resourceManagerGateway) {
-		this.resourceManagerGateway = checkNotNull(resourceManagerGateway);
-		this.registeredSlots = new HashMap<>(16);
-		this.pendingSlotRequests = new LinkedHashMap<>(16);
-		this.freeSlots = new HashMap<>(16);
-		this.allocationMap = new AllocationMap();
-	}
-
-	// ------------------------------------------------------------------------
-	//  slot managements
-	// ------------------------------------------------------------------------
-
-	/**
-	 * Request a slot with requirements, we may either fulfill the request or pending it. Trigger container
-	 * allocation if we don't have enough resource. If we have free slot which can match the request, record
-	 * this allocation and forward the request to TaskManager through ResourceManager (we want this done by
-	 * RPC's main thread to avoid race condition).
-	 *
-	 * @param request The detailed request of the slot
-	 */
-	public void requestSlot(final SlotRequest request) {
-		if (isRequestDuplicated(request)) {
-			LOG.warn("Duplicated slot request, AllocationID:{}", request.getAllocationId());
-			return;
-		}
-
-		// try to fulfil the request with current free slots
-		ResourceSlot slot = chooseSlotToUse(request, freeSlots);
-		if (slot != null) {
-			LOG.info("Assigning SlotID({}) to AllocationID({}), JobID:{}", slot.getSlotId(),
-				request.getAllocationId(), request.getJobId());
-
-			// record this allocation in bookkeeping
-			allocationMap.addAllocation(slot.getSlotId(), request.getAllocationId());
-
-			// remove selected slot from free pool
-			freeSlots.remove(slot.getSlotId());
-
-			// TODO: send slot request to TaskManager
-		} else {
-			LOG.info("Cannot fulfil slot request, try to allocate a new container for it, " +
-				"AllocationID:{}, JobID:{}", request.getAllocationId(), request.getJobId());
-			allocateContainer(request.getResourceProfile());
-			pendingSlotRequests.put(request.getAllocationId(), request);
-		}
-	}
-
-	/**
-	 * Sync slot status with TaskManager's SlotReport.
-	 */
-	public void updateSlotStatus(final SlotReport slotReport) {
-		for (SlotStatus slotStatus : slotReport.getSlotsStatus()) {
-			updateSlotStatus(slotStatus);
-		}
-	}
-
-	/**
-	 * The slot request to TaskManager may be either failed by rpc communication (timeout, network error, etc.)
-	 * or really rejected by TaskManager. We shall retry this request by:
-	 * <ul>
-	 * <li>1. verify and clear all the previous allocate information for this request
-	 * <li>2. try to request slot again
-	 * </ul>
-	 * <p>
-	 * This may cause some duplicate allocation, e.g. the slot request to TaskManager is successful but the response
-	 * is lost somehow, so we may request a slot in another TaskManager, this causes two slots assigned to one request,
-	 * but it can be taken care of by rejecting registration at JobManager.
-	 *
-	 * @param originalRequest The original slot request
-	 * @param slotId          The target SlotID
-	 */
-	public void handleSlotRequestFailedAtTaskManager(final SlotRequest originalRequest, final SlotID slotId) {
-		final AllocationID originalAllocationId = originalRequest.getAllocationId();
-		LOG.info("Slot request failed at TaskManager, SlotID:{}, AllocationID:{}, JobID:{}",
-			slotId, originalAllocationId, originalRequest.getJobId());
-
-		// verify the allocation info before we do anything
-		if (freeSlots.containsKey(slotId)) {
-			// this slot is currently empty, no need to de-allocate it from our allocations
-			LOG.info("Original slot is somehow empty, retrying this request");
-
-			// before retry, we should double check whether this request was allocated by some other ways
-			if (!allocationMap.isAllocated(originalAllocationId)) {
-				requestSlot(originalRequest);
-			} else {
-				LOG.info("The failed request has somehow been allocated, SlotID:{}",
-					allocationMap.getSlotID(originalAllocationId));
-			}
-		} else if (allocationMap.isAllocated(slotId)) {
-			final AllocationID currentAllocationId = allocationMap.getAllocationID(slotId);
-
-			// check whether we have an agreement on whom this slot belongs to
-			if (originalAllocationId.equals(currentAllocationId)) {
-				LOG.info("De-allocate this request and retry");
-				allocationMap.removeAllocation(currentAllocationId);
-
-				// put this slot back to free pool
-				ResourceSlot slot = checkNotNull(getRegisteredSlot(slotId));
-				freeSlots.put(slotId, slot);
-
-				// retry the request
-				requestSlot(originalRequest);
-			} else {
-				// the slot is taken by someone else, no need to de-allocate it from our allocations
-				LOG.info("Original slot is taken by someone else, current AllocationID:{}", currentAllocationId);
-
-				// before retry, we should double check whether this request was allocated by some other ways
-				if (!allocationMap.isAllocated(originalAllocationId)) {
-					requestSlot(originalRequest);
-				} else {
-					LOG.info("The failed request is somehow been allocated, SlotID:{}",
-						allocationMap.getSlotID(originalAllocationId));
-				}
-			}
-		} else {
-			LOG.error("BUG! {} is neither in free pool nor in allocated pool", slotId);
-		}
-	}
-
-	/**
-	 * Callback for TaskManager failures. In case that a TaskManager fails, we have to clean up all its slots.
-	 *
-	 * @param resourceId The ResourceID of the TaskManager
-	 */
-	public void notifyTaskManagerFailure(final ResourceID resourceId) {
-		LOG.info("Resource:{} been notified failure", resourceId);
-		final Map<SlotID, ResourceSlot> slotIdsToRemove = registeredSlots.remove(resourceId);
-		if (slotIdsToRemove != null) {
-			for (SlotID slotId : slotIdsToRemove.keySet()) {
-				LOG.info("Removing Slot:{} upon resource failure", slotId);
-				if (freeSlots.containsKey(slotId)) {
-					freeSlots.remove(slotId);
-				} else if (allocationMap.isAllocated(slotId)) {
-					allocationMap.removeAllocation(slotId);
-				} else {
-					LOG.error("BUG! {} is neither in free pool nor in allocated pool", slotId);
-				}
-			}
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	//  internal behaviors
-	// ------------------------------------------------------------------------
-
-	/**
-	 * Update slot status based on TaskManager's report. There are mainly two situations when we receive the report:
-	 * <ul>
-	 * <li>1. The slot is newly registered.</li>
-	 * <li>2. The slot has registered, it contains its current status.</li>
-	 * </ul>
-	 * <p>
-	 * Regarding 1: It's fairly simple, we just record this slot's status, and trigger schedule if slot is empty.
-	 * <p>
-	 * Regarding 2: It will cause some weird situation since we may have some time-gap on how the slot's status really
-	 * is. We may have some updates on the slot's allocation, but it doesn't reflected by TaskManager's heartbeat yet,
-	 * and we may make some wrong decision if we cannot guarantee we have the exact status about all the slots. So
-	 * the principle here is: We always trust TaskManager's heartbeat, we will correct our information based on that
-	 * and take next action based on the diff between our information and heartbeat status.
-	 *
-	 * @param reportedStatus Reported slot status
-	 */
-	void updateSlotStatus(final SlotStatus reportedStatus) {
-		final SlotID slotId = reportedStatus.getSlotID();
-		final ResourceSlot slot = new ResourceSlot(slotId, reportedStatus.getProfiler());
-
-		if (registerNewSlot(slot)) {
-			// we have a newly registered slot
-			LOG.info("New slot appeared, SlotID:{}, AllocationID:{}", slotId, reportedStatus.getAllocationID());
-
-			if (reportedStatus.getAllocationID() != null) {
-				// slot in use, record this in bookkeeping
-				allocationMap.addAllocation(slotId, reportedStatus.getAllocationID());
-			} else {
-				handleFreeSlot(new ResourceSlot(slotId, reportedStatus.getProfiler()));
-			}
-		} else {
-			// slot exists, update current information
-			if (reportedStatus.getAllocationID() != null) {
-				// slot is reported in use
-				final AllocationID reportedAllocationId = reportedStatus.getAllocationID();
-
-				// check whether we also thought this slot is in use
-				if (allocationMap.isAllocated(slotId)) {
-					// we also think that slot is in use, check whether the AllocationID matches
-					final AllocationID currentAllocationId = allocationMap.getAllocationID(slotId);
-
-					if (!reportedAllocationId.equals(currentAllocationId)) {
-						LOG.info("Slot allocation info mismatch! SlotID:{}, current:{}, reported:{}",
-							slotId, currentAllocationId, reportedAllocationId);
-
-						// seems we have a disagreement about the slot assignments, need to correct it
-						allocationMap.removeAllocation(slotId);
-						allocationMap.addAllocation(slotId, reportedAllocationId);
-					}
-				} else {
-					LOG.info("Slot allocation info mismatch! SlotID:{}, current:null, reported:{}",
-						slotId, reportedAllocationId);
-
-					// we thought the slot is free, should correct this information
-					allocationMap.addAllocation(slotId, reportedStatus.getAllocationID());
-
-					// remove this slot from free slots pool
-					freeSlots.remove(slotId);
-				}
-			} else {
-				// slot is reported empty
-
-				// check whether we also thought this slot is empty
-				if (allocationMap.isAllocated(slotId)) {
-					LOG.info("Slot allocation info mismatch! SlotID:{}, current:{}, reported:null",
-						slotId, allocationMap.getAllocationID(slotId));
-
-					// we thought the slot is in use, correct it
-					allocationMap.removeAllocation(slotId);
-
-					// we have a free slot!
-					handleFreeSlot(new ResourceSlot(slotId, reportedStatus.getProfiler()));
-				}
-			}
-		}
-	}
-
-	/**
-	 * When we have a free slot, try to fulfill the pending request first. If any request can be fulfilled,
-	 * record this allocation in bookkeeping and send slot request to TaskManager, else we just add this slot
-	 * to the free pool.
-	 *
-	 * @param freeSlot The free slot
-	 */
-	private void handleFreeSlot(final ResourceSlot freeSlot) {
-		SlotRequest chosenRequest = chooseRequestToFulfill(freeSlot, pendingSlotRequests);
-
-		if (chosenRequest != null) {
-			pendingSlotRequests.remove(chosenRequest.getAllocationId());
-
-			LOG.info("Assigning SlotID({}) to AllocationID({}), JobID:{}", freeSlot.getSlotId(),
-				chosenRequest.getAllocationId(), chosenRequest.getJobId());
-			allocationMap.addAllocation(freeSlot.getSlotId(), chosenRequest.getAllocationId());
-
-			// TODO: send slot request to TaskManager
-		} else {
-			freeSlots.put(freeSlot.getSlotId(), freeSlot);
-		}
-	}
-
-	/**
-	 * Check whether the request is duplicated. We use AllocationID to identify slot request, for each
-	 * formerly received slot request, it is either in pending list or already been allocated.
-	 *
-	 * @param request The slot request
-	 * @return <tt>true</tt> if the request is duplicated
-	 */
-	private boolean isRequestDuplicated(final SlotRequest request) {
-		final AllocationID allocationId = request.getAllocationId();
-		return pendingSlotRequests.containsKey(allocationId)
-			|| allocationMap.isAllocated(allocationId);
-	}
-
-	/**
-	 * Try to register slot, and tell if this slot is newly registered.
-	 *
-	 * @param slot The ResourceSlot which will be checked and registered
-	 * @return <tt>true</tt> if we meet a new slot
-	 */
-	private boolean registerNewSlot(final ResourceSlot slot) {
-		final SlotID slotId = slot.getSlotId();
-		final ResourceID resourceId = slotId.getResourceID();
-		if (!registeredSlots.containsKey(resourceId)) {
-			registeredSlots.put(resourceId, new HashMap<SlotID, ResourceSlot>());
-		}
-		return registeredSlots.get(resourceId).put(slotId, slot) == null;
-	}
-
-	private ResourceSlot getRegisteredSlot(final SlotID slotId) {
-		final ResourceID resourceId = slotId.getResourceID();
-		if (!registeredSlots.containsKey(resourceId)) {
-			return null;
-		}
-		return registeredSlots.get(resourceId).get(slotId);
-	}
-
-	// ------------------------------------------------------------------------
-	//  Framework specific behavior
-	// ------------------------------------------------------------------------
-
-	/**
-	 * Choose a slot to use among all free slots, the behavior is framework specified.
-	 *
-	 * @param request   The slot request
-	 * @param freeSlots All slots which can be used
-	 * @return The slot we choose to use, <tt>null</tt> if we did not find a match
-	 */
-	protected abstract ResourceSlot chooseSlotToUse(final SlotRequest request,
-		final Map<SlotID, ResourceSlot> freeSlots);
-
-	/**
-	 * Choose a pending request to fulfill when we have a free slot, the behavior is framework specified.
-	 *
-	 * @param offeredSlot     The free slot
-	 * @param pendingRequests All the pending slot requests
-	 * @return The chosen SlotRequest, <tt>null</tt> if we did not find a match
-	 */
-	protected abstract SlotRequest chooseRequestToFulfill(final ResourceSlot offeredSlot,
-		final Map<AllocationID, SlotRequest> pendingRequests);
-
-	/**
-	 * The framework specific code for allocating a container for specified resource profile.
-	 *
-	 * @param resourceProfile The resource profile
-	 */
-	protected abstract void allocateContainer(final ResourceProfile resourceProfile);
-
-
-	// ------------------------------------------------------------------------
-	//  Helper classes
-	// ------------------------------------------------------------------------
-
-	/**
-	 * We maintain all the allocations with SlotID and AllocationID. We are able to get or remove the allocation info
-	 * either by SlotID or AllocationID.
-	 */
-	private static class AllocationMap {
-
-		/** All allocated slots (by SlotID) */
-		private final Map<SlotID, AllocationID> allocatedSlots;
-
-		/** All allocated slots (by AllocationID), it'a a inverse view of allocatedSlots */
-		private final Map<AllocationID, SlotID> allocatedSlotsByAllocationId;
-
-		AllocationMap() {
-			this.allocatedSlots = new HashMap<>(16);
-			this.allocatedSlotsByAllocationId = new HashMap<>(16);
-		}
-
-		/**
-		 * Add a allocation
-		 *
-		 * @param slotId       The slot id
-		 * @param allocationId The allocation id
-		 */
-		void addAllocation(final SlotID slotId, final AllocationID allocationId) {
-			allocatedSlots.put(slotId, allocationId);
-			allocatedSlotsByAllocationId.put(allocationId, slotId);
-		}
-
-		/**
-		 * De-allocation with slot id
-		 *
-		 * @param slotId The slot id
-		 */
-		void removeAllocation(final SlotID slotId) {
-			if (allocatedSlots.containsKey(slotId)) {
-				final AllocationID allocationId = allocatedSlots.get(slotId);
-				allocatedSlots.remove(slotId);
-				allocatedSlotsByAllocationId.remove(allocationId);
-			}
-		}
-
-		/**
-		 * De-allocation with allocation id
-		 *
-		 * @param allocationId The allocation id
-		 */
-		void removeAllocation(final AllocationID allocationId) {
-			if (allocatedSlotsByAllocationId.containsKey(allocationId)) {
-				SlotID slotId = allocatedSlotsByAllocationId.get(allocationId);
-				allocatedSlotsByAllocationId.remove(allocationId);
-				allocatedSlots.remove(slotId);
-			}
-		}
-
-		/**
-		 * Check whether allocation exists by slot id
-		 *
-		 * @param slotId The slot id
-		 * @return true if the allocation exists
-		 */
-		boolean isAllocated(final SlotID slotId) {
-			return allocatedSlots.containsKey(slotId);
-		}
-
-		/**
-		 * Check whether allocation exists by allocation id
-		 *
-		 * @param allocationId The allocation id
-		 * @return true if the allocation exists
-		 */
-		boolean isAllocated(final AllocationID allocationId) {
-			return allocatedSlotsByAllocationId.containsKey(allocationId);
-		}
-
-		AllocationID getAllocationID(final SlotID slotId) {
-			return allocatedSlots.get(slotId);
-		}
-
-		SlotID getSlotID(final AllocationID allocationId) {
-			return allocatedSlotsByAllocationId.get(allocationId);
-		}
-
-		public int size() {
-			return allocatedSlots.size();
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	//  Testing utilities
-	// ------------------------------------------------------------------------
-
-	@VisibleForTesting
-	boolean isAllocated(final SlotID slotId) {
-		return allocationMap.isAllocated(slotId);
-	}
-
-	@VisibleForTesting
-	boolean isAllocated(final AllocationID allocationId) {
-		return allocationMap.isAllocated(allocationId);
-	}
-
-	/**
-	 * Add free slots directly to the free pool, this will not trigger pending requests allocation
-	 *
-	 * @param slot The resource slot
-	 */
-	@VisibleForTesting
-	void addFreeSlot(final ResourceSlot slot) {
-		final ResourceID resourceId = slot.getResourceID();
-		final SlotID slotId = slot.getSlotId();
-
-		if (!registeredSlots.containsKey(resourceId)) {
-			registeredSlots.put(resourceId, new HashMap<SlotID, ResourceSlot>());
-		}
-		registeredSlots.get(resourceId).put(slot.getSlotId(), slot);
-		freeSlots.put(slotId, slot);
-	}
-
-	@VisibleForTesting
-	int getAllocatedSlotCount() {
-		return allocationMap.size();
-	}
-
-	@VisibleForTesting
-	int getFreeSlotCount() {
-		return freeSlots.size();
-	}
-
-	@VisibleForTesting
-	int getPendingRequestCount() {
-		return pendingSlotRequests.size();
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java
new file mode 100644
index 0000000..0a6a7ef
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.jobmaster;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
+import org.apache.flink.runtime.jobgraph.JobGraph;
+import org.apache.flink.runtime.leaderelection.LeaderContender;
+import org.apache.flink.runtime.leaderelection.LeaderElectionService;
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.RpcMethod;
+import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.taskmanager.TaskExecutionState;
+import org.apache.flink.util.Preconditions;
+
+import java.util.UUID;
+
+/**
+ * JobMaster implementation. The job master is responsible for the execution of a single
+ * {@link org.apache.flink.runtime.jobgraph.JobGraph}.
+ * <p>
+ * It offers the following methods as part of its rpc interface to interact with the JobMaster
+ * remotely:
+ * <ul>
+ *     <li>{@link #updateTaskExecutionState(TaskExecutionState)} updates the task execution state for
+ * given task</li>
+ * </ul>
+ */
+public class JobMaster extends RpcEndpoint<JobMasterGateway> {
+
+	/** Gateway to connected resource manager, null iff not connected */
+	private ResourceManagerGateway resourceManager = null;
+
+	/** Logical representation of the job */
+	private final JobGraph jobGraph;
+	private final JobID jobID;
+
+	/** Configuration of the job */
+	private final Configuration configuration;
+
+	/** Service to contend for and retrieve the leadership of JM and RM */
+	private final HighAvailabilityServices highAvailabilityServices;
+
+	/** Leader Management */
+	private LeaderElectionService leaderElectionService = null;
+	private UUID leaderSessionID;
+
+	/**
+	 * The JM's Constructor
+	 *
+	 * @param jobGraph The representation of the job's execution plan
+	 * @param configuration The job's configuration
+	 * @param rpcService The RPC service at which the JM serves
+	 * @param highAvailabilityService The cluster's HA service from the JM can elect and retrieve leaders.
+	 */
+	public JobMaster(
+		JobGraph jobGraph,
+		Configuration configuration,
+		RpcService rpcService,
+		HighAvailabilityServices highAvailabilityService) {
+
+		super(rpcService);
+
+		this.jobGraph = Preconditions.checkNotNull(jobGraph);
+		this.jobID = Preconditions.checkNotNull(jobGraph.getJobID());
+
+		this.configuration = Preconditions.checkNotNull(configuration);
+
+		this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityService);
+	}
+
+	public ResourceManagerGateway getResourceManager() {
+		return resourceManager;
+	}
+
+	//----------------------------------------------------------------------------------------------
+	// Initialization methods
+	//----------------------------------------------------------------------------------------------
+	public void start() {
+		super.start();
+
+		// register at the election once the JM starts
+		registerAtElectionService();
+	}
+
+
+	//----------------------------------------------------------------------------------------------
+	// JobMaster Leadership methods
+	//----------------------------------------------------------------------------------------------
+
+	/**
+	 * Retrieves the election service and contend for the leadership.
+	 */
+	private void registerAtElectionService() {
+		try {
+			leaderElectionService = highAvailabilityServices.getJobMasterLeaderElectionService(jobID);
+			leaderElectionService.start(new JobMasterLeaderContender());
+		} catch (Exception e) {
+			throw new RuntimeException("Fail to register at the election of JobMaster", e);
+		}
+	}
+
+	/**
+	 * Start the execution when the leadership is granted.
+	 *
+	 * @param newLeaderSessionID The identifier of the new leadership session
+	 */
+	public void grantJobMasterLeadership(final UUID newLeaderSessionID) {
+		runAsync(new Runnable() {
+			@Override
+			public void run() {
+				log.info("JobManager {} grants leadership with session id {}.", getAddress(), newLeaderSessionID);
+
+				// The operation may be blocking, but since JM is idle before it grants the leadership, it's okay that
+				// JM waits here for the operation's completeness.
+				leaderSessionID = newLeaderSessionID;
+				leaderElectionService.confirmLeaderSessionID(newLeaderSessionID);
+
+				// TODO:: execute the job when the leadership is granted.
+			}
+		});
+	}
+
+	/**
+	 * Stop the execution when the leadership is revoked.
+	 */
+	public void revokeJobMasterLeadership() {
+		runAsync(new Runnable() {
+			@Override
+			public void run() {
+				log.info("JobManager {} was revoked leadership.", getAddress());
+
+				// TODO:: cancel the job's execution and notify all listeners
+				cancelAndClearEverything(new Exception("JobManager is no longer the leader."));
+
+				leaderSessionID = null;
+			}
+		});
+	}
+
+	/**
+	 * Handles error occurring in the leader election service
+	 *
+	 * @param exception Exception thrown in the leader election service
+	 */
+	public void onJobMasterElectionError(final Exception exception) {
+		runAsync(new Runnable() {
+			@Override
+			public void run() {
+				log.error("Received an error from the LeaderElectionService.", exception);
+
+				// TODO:: cancel the job's execution and shutdown the JM
+				cancelAndClearEverything(exception);
+
+				leaderSessionID = null;
+			}
+		});
+
+	}
+
+	//----------------------------------------------------------------------------------------------
+	// RPC methods
+	//----------------------------------------------------------------------------------------------
+
+	/**
+	 * Updates the task execution state for a given task.
+	 *
+	 * @param taskExecutionState New task execution state for a given task
+	 * @return Acknowledge the task execution state update
+	 */
+	@RpcMethod
+	public Acknowledge updateTaskExecutionState(TaskExecutionState taskExecutionState) {
+		System.out.println("TaskExecutionState: " + taskExecutionState);
+		return Acknowledge.get();
+	}
+
+	/**
+	 * Triggers the registration of the job master at the resource manager.
+	 *
+	 * @param address Address of the resource manager
+	 */
+	@RpcMethod
+	public void registerAtResourceManager(final String address) {
+		//TODO:: register at the RM
+	}
+
+	//----------------------------------------------------------------------------------------------
+	// Helper methods
+	//----------------------------------------------------------------------------------------------
+
+	/**
+	 * Cancel the current job and notify all listeners the job's cancellation.
+	 *
+	 * @param cause Cause for the cancelling.
+	 */
+	private void cancelAndClearEverything(Throwable cause) {
+		// currently, nothing to do here
+	}
+
+	// ------------------------------------------------------------------------
+	//  Utility classes
+	// ------------------------------------------------------------------------
+	private class JobMasterLeaderContender implements LeaderContender {
+
+		@Override
+		public void grantLeadership(UUID leaderSessionID) {
+			JobMaster.this.grantJobMasterLeadership(leaderSessionID);
+		}
+
+		@Override
+		public void revokeLeadership() {
+			JobMaster.this.revokeJobMasterLeadership();
+		}
+
+		@Override
+		public String getAddress() {
+			return JobMaster.this.getAddress();
+		}
+
+		@Override
+		public void handleError(Exception exception) {
+			onJobMasterElectionError(exception);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java
new file mode 100644
index 0000000..a53e383
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMasterGateway.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.jobmaster;
+
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.taskmanager.TaskExecutionState;
+import scala.concurrent.Future;
+
+/**
+ * {@link JobMaster} rpc gateway interface
+ */
+public interface JobMasterGateway extends RpcGateway {
+
+	/**
+	 * Updates the task execution state for a given task.
+	 *
+	 * @param taskExecutionState New task execution state for a given task
+	 * @return Future acknowledge of the task execution state update
+	 */
+	Future<Acknowledge> updateTaskExecutionState(TaskExecutionState taskExecutionState);
+
+	/**
+	 * Triggers the registration of the job master at the resource manager.
+	 *
+	 * @param address Address of the resource manager
+	 */
+	void registerAtResourceManager(final String address);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RegistrationResponse.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RegistrationResponse.java b/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RegistrationResponse.java
new file mode 100644
index 0000000..fefcc78
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RegistrationResponse.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.registration;
+
+import java.io.Serializable;
+
+/**
+ * Base class for responses given to registration attempts from {@link RetryingRegistration}.
+ */
+public abstract class RegistrationResponse implements Serializable {
+
+	private static final long serialVersionUID = 1L;
+
+	// ----------------------------------------------------------------------------
+	
+	/**
+	 * Base class for a successful registration. Concrete registration implementations
+	 * will typically extend this class to attach more information.
+	 */
+	public static class Success extends RegistrationResponse {
+		private static final long serialVersionUID = 1L;
+		
+		@Override
+		public String toString() {
+			return "Registration Successful";
+		}
+	}
+
+	// ----------------------------------------------------------------------------
+
+	/**
+	 * A rejected (declined) registration.
+	 */
+	public static final class Decline extends RegistrationResponse {
+		private static final long serialVersionUID = 1L;
+
+		/** the rejection reason */
+		private final String reason;
+
+		/**
+		 * Creates a new rejection message.
+		 * 
+		 * @param reason The reason for the rejection.
+		 */
+		public Decline(String reason) {
+			this.reason = reason != null ? reason : "(unknown)";
+		}
+
+		/**
+		 * Gets the reason for the rejection.
+		 */
+		public String getReason() {
+			return reason;
+		}
+
+		@Override
+		public String toString() {
+			return "Registration Declined (" + reason + ')';
+		}
+	}
+}
+
+
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RetryingRegistration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RetryingRegistration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RetryingRegistration.java
new file mode 100644
index 0000000..88fe9b5
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RetryingRegistration.java
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.registration;
+
+import akka.dispatch.OnFailure;
+import akka.dispatch.OnSuccess;
+
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcService;
+
+import org.slf4j.Logger;
+
+import scala.concurrent.Future;
+import scala.concurrent.Promise;
+import scala.concurrent.impl.Promise.DefaultPromise;
+
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+import static org.apache.flink.util.Preconditions.checkArgument;
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+
+/**
+ * This utility class implements the basis of registering one component at another component,
+ * for example registering the TaskExecutor at the ResourceManager.
+ * This {@code RetryingRegistration} implements both the initial address resolution
+ * and the retries-with-backoff strategy.
+ * 
+ * <p>The registration gives access to a future that is completed upon successful registration.
+ * The registration can be canceled, for example when the target where it tries to register
+ * at looses leader status.
+ * 
+ * @param <Gateway> The type of the gateway to connect to.
+ * @param <Success> The type of the successful registration responses.
+ */
+public abstract class RetryingRegistration<Gateway extends RpcGateway, Success extends RegistrationResponse.Success> {
+
+	// ------------------------------------------------------------------------
+	//  default configuration values
+	// ------------------------------------------------------------------------
+
+	/** default value for the initial registration timeout (milliseconds) */
+	private static final long INITIAL_REGISTRATION_TIMEOUT_MILLIS = 100;
+
+	/** default value for the maximum registration timeout, after exponential back-off (milliseconds) */
+	private static final long MAX_REGISTRATION_TIMEOUT_MILLIS = 30000;
+
+	/** The pause (milliseconds) made after an registration attempt caused an exception (other than timeout) */
+	private static final long ERROR_REGISTRATION_DELAY_MILLIS = 10000;
+
+	/** The pause (milliseconds) made after the registration attempt was refused */
+	private static final long REFUSED_REGISTRATION_DELAY_MILLIS = 30000;
+
+	// ------------------------------------------------------------------------
+	// Fields
+	// ------------------------------------------------------------------------
+
+	private final Logger log;
+
+	private final RpcService rpcService;
+
+	private final String targetName;
+
+	private final Class<Gateway> targetType;
+
+	private final String targetAddress;
+
+	private final UUID leaderId;
+
+	private final Promise<Tuple2<Gateway, Success>> completionPromise;
+
+	private final long initialRegistrationTimeout;
+
+	private final long maxRegistrationTimeout;
+
+	private final long delayOnError;
+
+	private final long delayOnRefusedRegistration;
+
+	private volatile boolean canceled;
+
+	// ------------------------------------------------------------------------
+
+	public RetryingRegistration(
+			Logger log,
+			RpcService rpcService,
+			String targetName,
+			Class<Gateway> targetType,
+			String targetAddress,
+			UUID leaderId) {
+		this(log, rpcService, targetName, targetType, targetAddress, leaderId,
+				INITIAL_REGISTRATION_TIMEOUT_MILLIS, MAX_REGISTRATION_TIMEOUT_MILLIS,
+				ERROR_REGISTRATION_DELAY_MILLIS, REFUSED_REGISTRATION_DELAY_MILLIS);
+	}
+
+	public RetryingRegistration(
+			Logger log,
+			RpcService rpcService,
+			String targetName, 
+			Class<Gateway> targetType,
+			String targetAddress,
+			UUID leaderId,
+			long initialRegistrationTimeout,
+			long maxRegistrationTimeout,
+			long delayOnError,
+			long delayOnRefusedRegistration) {
+
+		checkArgument(initialRegistrationTimeout > 0, "initial registration timeout must be greater than zero");
+		checkArgument(maxRegistrationTimeout > 0, "maximum registration timeout must be greater than zero");
+		checkArgument(delayOnError >= 0, "delay on error must be non-negative");
+		checkArgument(delayOnRefusedRegistration >= 0, "delay on refused registration must be non-negative");
+
+		this.log = checkNotNull(log);
+		this.rpcService = checkNotNull(rpcService);
+		this.targetName = checkNotNull(targetName);
+		this.targetType = checkNotNull(targetType);
+		this.targetAddress = checkNotNull(targetAddress);
+		this.leaderId = checkNotNull(leaderId);
+		this.initialRegistrationTimeout = initialRegistrationTimeout;
+		this.maxRegistrationTimeout = maxRegistrationTimeout;
+		this.delayOnError = delayOnError;
+		this.delayOnRefusedRegistration = delayOnRefusedRegistration;
+
+		this.completionPromise = new DefaultPromise<>();
+	}
+
+	// ------------------------------------------------------------------------
+	//  completion and cancellation
+	// ------------------------------------------------------------------------
+
+	public Future<Tuple2<Gateway, Success>> getFuture() {
+		return completionPromise.future();
+	}
+
+	/**
+	 * Cancels the registration procedure.
+	 */
+	public void cancel() {
+		canceled = true;
+	}
+
+	/**
+	 * Checks if the registration was canceled.
+	 * @return True if the registration was canceled, false otherwise.
+	 */
+	public boolean isCanceled() {
+		return canceled;
+	}
+
+	// ------------------------------------------------------------------------
+	//  registration
+	// ------------------------------------------------------------------------
+
+	protected abstract Future<RegistrationResponse> invokeRegistration(
+			Gateway gateway, UUID leaderId, long timeoutMillis) throws Exception;
+
+	/**
+	 * This method resolves the target address to a callable gateway and starts the
+	 * registration after that.
+	 */
+	@SuppressWarnings("unchecked")
+	public void startRegistration() {
+		try {
+			// trigger resolution of the resource manager address to a callable gateway
+			Future<Gateway> resourceManagerFuture = rpcService.connect(targetAddress, targetType);
+	
+			// upon success, start the registration attempts
+			resourceManagerFuture.onSuccess(new OnSuccess<Gateway>() {
+				@Override
+				public void onSuccess(Gateway result) {
+					log.info("Resolved {} address, beginning registration", targetName);
+					register(result, 1, initialRegistrationTimeout);
+				}
+			}, rpcService.getExecutionContext());
+	
+			// upon failure, retry, unless this is cancelled
+			resourceManagerFuture.onFailure(new OnFailure() {
+				@Override
+				public void onFailure(Throwable failure) {
+					if (!isCanceled()) {
+						log.warn("Could not resolve {} address {}, retrying...", targetName, targetAddress);
+						startRegistration();
+					}
+				}
+			}, rpcService.getExecutionContext());
+		}
+		catch (Throwable t) {
+			cancel();
+			completionPromise.tryFailure(t);
+		}
+	}
+
+	/**
+	 * This method performs a registration attempt and triggers either a success notification or a retry,
+	 * depending on the result.
+	 */
+	@SuppressWarnings("unchecked")
+	private void register(final Gateway gateway, final int attempt, final long timeoutMillis) {
+		// eager check for canceling to avoid some unnecessary work
+		if (canceled) {
+			return;
+		}
+
+		try {
+			log.info("Registration at {} attempt {} (timeout={}ms)", targetName, attempt, timeoutMillis);
+			Future<RegistrationResponse> registrationFuture = invokeRegistration(gateway, leaderId, timeoutMillis);
+	
+			// if the registration was successful, let the TaskExecutor know
+			registrationFuture.onSuccess(new OnSuccess<RegistrationResponse>() {
+				
+				@Override
+				public void onSuccess(RegistrationResponse result) throws Throwable {
+					if (!isCanceled()) {
+						if (result instanceof RegistrationResponse.Success) {
+							// registration successful!
+							Success success = (Success) result;
+							completionPromise.success(new Tuple2<>(gateway, success));
+						}
+						else {
+							// registration refused or unknown
+							if (result instanceof RegistrationResponse.Decline) {
+								RegistrationResponse.Decline decline = (RegistrationResponse.Decline) result;
+								log.info("Registration at {} was declined: {}", targetName, decline.getReason());
+							} else {
+								log.error("Received unknown response to registration attempt: " + result);
+							}
+
+							log.info("Pausing and re-attempting registration in {} ms", delayOnRefusedRegistration);
+							registerLater(gateway, 1, initialRegistrationTimeout, delayOnRefusedRegistration);
+						}
+					}
+				}
+			}, rpcService.getExecutionContext());
+	
+			// upon failure, retry
+			registrationFuture.onFailure(new OnFailure() {
+				@Override
+				public void onFailure(Throwable failure) {
+					if (!isCanceled()) {
+						if (failure instanceof TimeoutException) {
+							// we simply have not received a response in time. maybe the timeout was
+							// very low (initial fast registration attempts), maybe the target endpoint is
+							// currently down.
+							if (log.isDebugEnabled()) {
+								log.debug("Registration at {} ({}) attempt {} timed out after {} ms",
+										targetName, targetAddress, attempt, timeoutMillis);
+							}
+	
+							long newTimeoutMillis = Math.min(2 * timeoutMillis, maxRegistrationTimeout);
+							register(gateway, attempt + 1, newTimeoutMillis);
+						}
+						else {
+							// a serious failure occurred. we still should not give up, but keep trying
+							log.error("Registration at " + targetName + " failed due to an error", failure);
+							log.info("Pausing and re-attempting registration in {} ms", delayOnError);
+	
+							registerLater(gateway, 1, initialRegistrationTimeout, delayOnError);
+						}
+					}
+				}
+			}, rpcService.getExecutionContext());
+		}
+		catch (Throwable t) {
+			cancel();
+			completionPromise.tryFailure(t);
+		}
+	}
+
+	private void registerLater(final Gateway gateway, final int attempt, final long timeoutMillis, long delay) {
+		rpcService.scheduleRunnable(new Runnable() {
+			@Override
+			public void run() {
+				register(gateway, attempt, timeoutMillis);
+			}
+		}, delay, TimeUnit.MILLISECONDS);
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/JobMasterRegistration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/JobMasterRegistration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/JobMasterRegistration.java
new file mode 100644
index 0000000..309dcc1
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/JobMasterRegistration.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import java.io.Serializable;
+
+public class JobMasterRegistration implements Serializable {
+	private static final long serialVersionUID = 8411214999193765202L;
+
+	private final String address;
+
+	public JobMasterRegistration(String address) {
+		this.address = address;
+	}
+
+	public String getAddress() {
+		return address;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/RegistrationResponse.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/RegistrationResponse.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/RegistrationResponse.java
new file mode 100644
index 0000000..fb6c401
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/RegistrationResponse.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import org.apache.flink.runtime.instance.InstanceID;
+
+import java.io.Serializable;
+
+public class RegistrationResponse implements Serializable {
+	private static final long serialVersionUID = -2379003255993119993L;
+
+	private final boolean isSuccess;
+	private final InstanceID instanceID;
+
+	public RegistrationResponse(boolean isSuccess, InstanceID instanceID) {
+		this.isSuccess = isSuccess;
+		this.instanceID = instanceID;
+	}
+
+	public boolean isSuccess() {
+		return isSuccess;
+	}
+
+	public InstanceID getInstanceID() {
+		return instanceID;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManager.java
new file mode 100644
index 0000000..44c022b
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManager.java
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import akka.dispatch.Mapper;
+
+import org.apache.flink.annotation.VisibleForTesting;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
+import org.apache.flink.runtime.instance.InstanceID;
+import org.apache.flink.runtime.leaderelection.LeaderContender;
+import org.apache.flink.runtime.leaderelection.LeaderElectionService;
+import org.apache.flink.runtime.rpc.RpcMethod;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.jobmaster.JobMaster;
+import org.apache.flink.runtime.jobmaster.JobMasterGateway;
+import org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationSuccess;
+
+import scala.concurrent.Future;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * ResourceManager implementation. The resource manager is responsible for resource de-/allocation
+ * and bookkeeping.
+ *
+ * It offers the following methods as part of its rpc interface to interact with the him remotely:
+ * <ul>
+ *     <li>{@link #registerJobMaster(JobMasterRegistration)} registers a {@link JobMaster} at the resource manager</li>
+ *     <li>{@link #requestSlot(SlotRequest)} requests a slot from the resource manager</li>
+ * </ul>
+ */
+public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
+	private final Map<JobMasterGateway, InstanceID> jobMasterGateways;
+	private final HighAvailabilityServices highAvailabilityServices;
+	private LeaderElectionService leaderElectionService = null;
+	private UUID leaderSessionID = null;
+
+	public ResourceManager(RpcService rpcService, HighAvailabilityServices highAvailabilityServices) {
+		super(rpcService);
+		this.highAvailabilityServices = checkNotNull(highAvailabilityServices);
+		this.jobMasterGateways = new HashMap<>();
+	}
+
+	@Override
+	public void start() {
+		// start a leader
+		try {
+			super.start();
+			leaderElectionService = highAvailabilityServices.getResourceManagerLeaderElectionService();
+			leaderElectionService.start(new ResourceManagerLeaderContender());
+		} catch (Throwable e) {
+			log.error("A fatal error happened when starting the ResourceManager", e);
+			throw new RuntimeException("A fatal error happened when starting the ResourceManager", e);
+		}
+	}
+
+	@Override
+	public void shutDown() {
+		try {
+			leaderElectionService.stop();
+			super.shutDown();
+		} catch(Throwable e) {
+			log.error("A fatal error happened when shutdown the ResourceManager", e);
+			throw new RuntimeException("A fatal error happened when shutdown the ResourceManager", e);
+		}
+	}
+
+	/**
+	 * Gets the leader session id of current resourceManager.
+	 *
+	 * @return return the leaderSessionId of current resourceManager, this returns null until the current resourceManager is granted leadership.
+	 */
+	@VisibleForTesting
+	UUID getLeaderSessionID() {
+		return leaderSessionID;
+	}
+
+	/**
+	 * Register a {@link JobMaster} at the resource manager.
+	 *
+	 * @param jobMasterRegistration Job master registration information
+	 * @return Future registration response
+	 */
+	@RpcMethod
+	public Future<RegistrationResponse> registerJobMaster(JobMasterRegistration jobMasterRegistration) {
+		Future<JobMasterGateway> jobMasterFuture = getRpcService().connect(jobMasterRegistration.getAddress(), JobMasterGateway.class);
+
+		return jobMasterFuture.map(new Mapper<JobMasterGateway, RegistrationResponse>() {
+			@Override
+			public RegistrationResponse apply(final JobMasterGateway jobMasterGateway) {
+				InstanceID instanceID;
+
+				if (jobMasterGateways.containsKey(jobMasterGateway)) {
+					instanceID = jobMasterGateways.get(jobMasterGateway);
+				} else {
+					instanceID = new InstanceID();
+					jobMasterGateways.put(jobMasterGateway, instanceID);
+				}
+
+				return new RegistrationResponse(true, instanceID);
+			}
+		}, getMainThreadExecutionContext());
+	}
+
+	/**
+	 * Requests a slot from the resource manager.
+	 *
+	 * @param slotRequest Slot request
+	 * @return Slot assignment
+	 */
+	@RpcMethod
+	public SlotAssignment requestSlot(SlotRequest slotRequest) {
+		System.out.println("SlotRequest: " + slotRequest);
+		return new SlotAssignment();
+	}
+
+
+	/**
+	 *
+	 * @param resourceManagerLeaderId  The fencing token for the ResourceManager leader 
+	 * @param taskExecutorAddress      The address of the TaskExecutor that registers
+	 * @param resourceID               The resource ID of the TaskExecutor that registers
+	 *
+	 * @return The response by the ResourceManager.
+	 */
+	@RpcMethod
+	public org.apache.flink.runtime.registration.RegistrationResponse registerTaskExecutor(
+			UUID resourceManagerLeaderId,
+			String taskExecutorAddress,
+			ResourceID resourceID) {
+
+		return new TaskExecutorRegistrationSuccess(new InstanceID(), 5000);
+	}
+
+	private class ResourceManagerLeaderContender implements LeaderContender {
+
+		/**
+		 * Callback method when current resourceManager is granted leadership
+		 *
+		 * @param leaderSessionID unique leadershipID
+		 */
+		@Override
+		public void grantLeadership(final UUID leaderSessionID) {
+			runAsync(new Runnable() {
+				@Override
+				public void run() {
+					log.info("ResourceManager {} was granted leadership with leader session ID {}", getAddress(), leaderSessionID);
+					ResourceManager.this.leaderSessionID = leaderSessionID;
+					// confirming the leader session ID might be blocking,
+					leaderElectionService.confirmLeaderSessionID(leaderSessionID);
+				}
+			});
+		}
+
+		/**
+		 * Callback method when current resourceManager lose leadership.
+		 */
+		@Override
+		public void revokeLeadership() {
+			runAsync(new Runnable() {
+				@Override
+				public void run() {
+					log.info("ResourceManager {} was revoked leadership.", getAddress());
+					jobMasterGateways.clear();
+					leaderSessionID = null;
+				}
+			});
+		}
+
+		@Override
+		public String getAddress() {
+			return ResourceManager.this.getAddress();
+		}
+
+		/**
+		 * Handles error occurring in the leader election service
+		 *
+		 * @param exception Exception being thrown in the leader election service
+		 */
+		@Override
+		public void handleError(final Exception exception) {
+			runAsync(new Runnable() {
+				@Override
+				public void run() {
+					log.error("ResourceManager received an error from the LeaderElectionService.", exception);
+					// terminate ResourceManager in case of an error
+					shutDown();
+				}
+			});
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManagerGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManagerGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManagerGateway.java
new file mode 100644
index 0000000..b5782b0
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManagerGateway.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcTimeout;
+import org.apache.flink.runtime.jobmaster.JobMaster;
+
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.UUID;
+
+/**
+ * The {@link ResourceManager}'s RPC gateway interface.
+ */
+public interface ResourceManagerGateway extends RpcGateway {
+
+	/**
+	 * Register a {@link JobMaster} at the resource manager.
+	 *
+	 * @param jobMasterRegistration Job master registration information
+	 * @param timeout Timeout for the future to complete
+	 * @return Future registration response
+	 */
+	Future<RegistrationResponse> registerJobMaster(
+		JobMasterRegistration jobMasterRegistration,
+		@RpcTimeout FiniteDuration timeout);
+
+	/**
+	 * Register a {@link JobMaster} at the resource manager.
+	 *
+	 * @param jobMasterRegistration Job master registration information
+	 * @return Future registration response
+	 */
+	Future<RegistrationResponse> registerJobMaster(JobMasterRegistration jobMasterRegistration);
+
+	/**
+	 * Requests a slot from the resource manager.
+	 *
+	 * @param slotRequest Slot request
+	 * @return Future slot assignment
+	 */
+	Future<SlotAssignment> requestSlot(SlotRequest slotRequest);
+
+	/**
+	 * 
+	 * @param resourceManagerLeaderId  The fencing token for the ResourceManager leader 
+	 * @param taskExecutorAddress      The address of the TaskExecutor that registers
+	 * @param resourceID               The resource ID of the TaskExecutor that registers
+	 * @param timeout                  The timeout for the response.
+	 * 
+	 * @return The future to the response by the ResourceManager.
+	 */
+	Future<org.apache.flink.runtime.registration.RegistrationResponse> registerTaskExecutor(
+			UUID resourceManagerLeaderId,
+			String taskExecutorAddress,
+			ResourceID resourceID,
+			@RpcTimeout FiniteDuration timeout);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotAssignment.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotAssignment.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotAssignment.java
new file mode 100644
index 0000000..695204d
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotAssignment.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import java.io.Serializable;
+
+public class SlotAssignment implements Serializable{
+	private static final long serialVersionUID = -6990813455942742322L;
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotManager.java
new file mode 100644
index 0000000..5c06648
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotManager.java
@@ -0,0 +1,523 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import org.apache.flink.annotation.VisibleForTesting;
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+import org.apache.flink.runtime.clusterframework.types.ResourceSlot;
+import org.apache.flink.runtime.clusterframework.types.SlotID;
+import org.apache.flink.runtime.taskexecutor.SlotReport;
+import org.apache.flink.runtime.taskexecutor.SlotStatus;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * SlotManager is responsible for receiving slot requests and do slot allocations. It allows to request
+ * slots from registered TaskManagers and issues container allocation requests in case of there are not
+ * enough available slots. Besides, it should sync its slot allocation with TaskManager's heartbeat.
+ * <p>
+ * The main operation principle of SlotManager is:
+ * <ul>
+ * <li>1. All slot allocation status should be synced with TaskManager, which is the ground truth.</li>
+ * <li>2. All slots that have registered must be tracked, either by free pool or allocated pool.</li>
+ * <li>3. All slot requests will be handled by best efforts, there is no guarantee that one request will be
+ * fulfilled in time or correctly allocated. Conflicts or timeout or some special error will happen, it should
+ * be handled outside SlotManager. SlotManager will make each decision based on the information it currently
+ * holds.</li>
+ * </ul>
+ * <b>IMPORTANT:</b> This class is <b>Not Thread-safe</b>.
+ */
+public abstract class SlotManager {
+
+	private static final Logger LOG = LoggerFactory.getLogger(SlotManager.class);
+
+	/** Gateway to communicate with ResourceManager */
+	private final ResourceManagerGateway resourceManagerGateway;
+
+	/** All registered slots, including free and allocated slots */
+	private final Map<ResourceID, Map<SlotID, ResourceSlot>> registeredSlots;
+
+	/** All pending slot requests, waiting available slots to fulfil */
+	private final Map<AllocationID, SlotRequest> pendingSlotRequests;
+
+	/** All free slots that can be used to be allocated */
+	private final Map<SlotID, ResourceSlot> freeSlots;
+
+	/** All allocations, we can lookup allocations either by SlotID or AllocationID */
+	private final AllocationMap allocationMap;
+
+	public SlotManager(ResourceManagerGateway resourceManagerGateway) {
+		this.resourceManagerGateway = checkNotNull(resourceManagerGateway);
+		this.registeredSlots = new HashMap<>(16);
+		this.pendingSlotRequests = new LinkedHashMap<>(16);
+		this.freeSlots = new HashMap<>(16);
+		this.allocationMap = new AllocationMap();
+	}
+
+	// ------------------------------------------------------------------------
+	//  slot managements
+	// ------------------------------------------------------------------------
+
+	/**
+	 * Request a slot with requirements, we may either fulfill the request or pending it. Trigger container
+	 * allocation if we don't have enough resource. If we have free slot which can match the request, record
+	 * this allocation and forward the request to TaskManager through ResourceManager (we want this done by
+	 * RPC's main thread to avoid race condition).
+	 *
+	 * @param request The detailed request of the slot
+	 */
+	public void requestSlot(final SlotRequest request) {
+		if (isRequestDuplicated(request)) {
+			LOG.warn("Duplicated slot request, AllocationID:{}", request.getAllocationId());
+			return;
+		}
+
+		// try to fulfil the request with current free slots
+		ResourceSlot slot = chooseSlotToUse(request, freeSlots);
+		if (slot != null) {
+			LOG.info("Assigning SlotID({}) to AllocationID({}), JobID:{}", slot.getSlotId(),
+				request.getAllocationId(), request.getJobId());
+
+			// record this allocation in bookkeeping
+			allocationMap.addAllocation(slot.getSlotId(), request.getAllocationId());
+
+			// remove selected slot from free pool
+			freeSlots.remove(slot.getSlotId());
+
+			// TODO: send slot request to TaskManager
+		} else {
+			LOG.info("Cannot fulfil slot request, try to allocate a new container for it, " +
+				"AllocationID:{}, JobID:{}", request.getAllocationId(), request.getJobId());
+			allocateContainer(request.getResourceProfile());
+			pendingSlotRequests.put(request.getAllocationId(), request);
+		}
+	}
+
+	/**
+	 * Sync slot status with TaskManager's SlotReport.
+	 */
+	public void updateSlotStatus(final SlotReport slotReport) {
+		for (SlotStatus slotStatus : slotReport.getSlotsStatus()) {
+			updateSlotStatus(slotStatus);
+		}
+	}
+
+	/**
+	 * The slot request to TaskManager may be either failed by rpc communication (timeout, network error, etc.)
+	 * or really rejected by TaskManager. We shall retry this request by:
+	 * <ul>
+	 * <li>1. verify and clear all the previous allocate information for this request
+	 * <li>2. try to request slot again
+	 * </ul>
+	 * <p>
+	 * This may cause some duplicate allocation, e.g. the slot request to TaskManager is successful but the response
+	 * is lost somehow, so we may request a slot in another TaskManager, this causes two slots assigned to one request,
+	 * but it can be taken care of by rejecting registration at JobManager.
+	 *
+	 * @param originalRequest The original slot request
+	 * @param slotId          The target SlotID
+	 */
+	public void handleSlotRequestFailedAtTaskManager(final SlotRequest originalRequest, final SlotID slotId) {
+		final AllocationID originalAllocationId = originalRequest.getAllocationId();
+		LOG.info("Slot request failed at TaskManager, SlotID:{}, AllocationID:{}, JobID:{}",
+			slotId, originalAllocationId, originalRequest.getJobId());
+
+		// verify the allocation info before we do anything
+		if (freeSlots.containsKey(slotId)) {
+			// this slot is currently empty, no need to de-allocate it from our allocations
+			LOG.info("Original slot is somehow empty, retrying this request");
+
+			// before retry, we should double check whether this request was allocated by some other ways
+			if (!allocationMap.isAllocated(originalAllocationId)) {
+				requestSlot(originalRequest);
+			} else {
+				LOG.info("The failed request has somehow been allocated, SlotID:{}",
+					allocationMap.getSlotID(originalAllocationId));
+			}
+		} else if (allocationMap.isAllocated(slotId)) {
+			final AllocationID currentAllocationId = allocationMap.getAllocationID(slotId);
+
+			// check whether we have an agreement on whom this slot belongs to
+			if (originalAllocationId.equals(currentAllocationId)) {
+				LOG.info("De-allocate this request and retry");
+				allocationMap.removeAllocation(currentAllocationId);
+
+				// put this slot back to free pool
+				ResourceSlot slot = checkNotNull(getRegisteredSlot(slotId));
+				freeSlots.put(slotId, slot);
+
+				// retry the request
+				requestSlot(originalRequest);
+			} else {
+				// the slot is taken by someone else, no need to de-allocate it from our allocations
+				LOG.info("Original slot is taken by someone else, current AllocationID:{}", currentAllocationId);
+
+				// before retry, we should double check whether this request was allocated by some other ways
+				if (!allocationMap.isAllocated(originalAllocationId)) {
+					requestSlot(originalRequest);
+				} else {
+					LOG.info("The failed request is somehow been allocated, SlotID:{}",
+						allocationMap.getSlotID(originalAllocationId));
+				}
+			}
+		} else {
+			LOG.error("BUG! {} is neither in free pool nor in allocated pool", slotId);
+		}
+	}
+
+	/**
+	 * Callback for TaskManager failures. In case that a TaskManager fails, we have to clean up all its slots.
+	 *
+	 * @param resourceId The ResourceID of the TaskManager
+	 */
+	public void notifyTaskManagerFailure(final ResourceID resourceId) {
+		LOG.info("Resource:{} been notified failure", resourceId);
+		final Map<SlotID, ResourceSlot> slotIdsToRemove = registeredSlots.remove(resourceId);
+		if (slotIdsToRemove != null) {
+			for (SlotID slotId : slotIdsToRemove.keySet()) {
+				LOG.info("Removing Slot:{} upon resource failure", slotId);
+				if (freeSlots.containsKey(slotId)) {
+					freeSlots.remove(slotId);
+				} else if (allocationMap.isAllocated(slotId)) {
+					allocationMap.removeAllocation(slotId);
+				} else {
+					LOG.error("BUG! {} is neither in free pool nor in allocated pool", slotId);
+				}
+			}
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  internal behaviors
+	// ------------------------------------------------------------------------
+
+	/**
+	 * Update slot status based on TaskManager's report. There are mainly two situations when we receive the report:
+	 * <ul>
+	 * <li>1. The slot is newly registered.</li>
+	 * <li>2. The slot has registered, it contains its current status.</li>
+	 * </ul>
+	 * <p>
+	 * Regarding 1: It's fairly simple, we just record this slot's status, and trigger schedule if slot is empty.
+	 * <p>
+	 * Regarding 2: It will cause some weird situation since we may have some time-gap on how the slot's status really
+	 * is. We may have some updates on the slot's allocation, but it doesn't reflected by TaskManager's heartbeat yet,
+	 * and we may make some wrong decision if we cannot guarantee we have the exact status about all the slots. So
+	 * the principle here is: We always trust TaskManager's heartbeat, we will correct our information based on that
+	 * and take next action based on the diff between our information and heartbeat status.
+	 *
+	 * @param reportedStatus Reported slot status
+	 */
+	void updateSlotStatus(final SlotStatus reportedStatus) {
+		final SlotID slotId = reportedStatus.getSlotID();
+		final ResourceSlot slot = new ResourceSlot(slotId, reportedStatus.getProfiler());
+
+		if (registerNewSlot(slot)) {
+			// we have a newly registered slot
+			LOG.info("New slot appeared, SlotID:{}, AllocationID:{}", slotId, reportedStatus.getAllocationID());
+
+			if (reportedStatus.getAllocationID() != null) {
+				// slot in use, record this in bookkeeping
+				allocationMap.addAllocation(slotId, reportedStatus.getAllocationID());
+			} else {
+				handleFreeSlot(new ResourceSlot(slotId, reportedStatus.getProfiler()));
+			}
+		} else {
+			// slot exists, update current information
+			if (reportedStatus.getAllocationID() != null) {
+				// slot is reported in use
+				final AllocationID reportedAllocationId = reportedStatus.getAllocationID();
+
+				// check whether we also thought this slot is in use
+				if (allocationMap.isAllocated(slotId)) {
+					// we also think that slot is in use, check whether the AllocationID matches
+					final AllocationID currentAllocationId = allocationMap.getAllocationID(slotId);
+
+					if (!reportedAllocationId.equals(currentAllocationId)) {
+						LOG.info("Slot allocation info mismatch! SlotID:{}, current:{}, reported:{}",
+							slotId, currentAllocationId, reportedAllocationId);
+
+						// seems we have a disagreement about the slot assignments, need to correct it
+						allocationMap.removeAllocation(slotId);
+						allocationMap.addAllocation(slotId, reportedAllocationId);
+					}
+				} else {
+					LOG.info("Slot allocation info mismatch! SlotID:{}, current:null, reported:{}",
+						slotId, reportedAllocationId);
+
+					// we thought the slot is free, should correct this information
+					allocationMap.addAllocation(slotId, reportedStatus.getAllocationID());
+
+					// remove this slot from free slots pool
+					freeSlots.remove(slotId);
+				}
+			} else {
+				// slot is reported empty
+
+				// check whether we also thought this slot is empty
+				if (allocationMap.isAllocated(slotId)) {
+					LOG.info("Slot allocation info mismatch! SlotID:{}, current:{}, reported:null",
+						slotId, allocationMap.getAllocationID(slotId));
+
+					// we thought the slot is in use, correct it
+					allocationMap.removeAllocation(slotId);
+
+					// we have a free slot!
+					handleFreeSlot(new ResourceSlot(slotId, reportedStatus.getProfiler()));
+				}
+			}
+		}
+	}
+
+	/**
+	 * When we have a free slot, try to fulfill the pending request first. If any request can be fulfilled,
+	 * record this allocation in bookkeeping and send slot request to TaskManager, else we just add this slot
+	 * to the free pool.
+	 *
+	 * @param freeSlot The free slot
+	 */
+	private void handleFreeSlot(final ResourceSlot freeSlot) {
+		SlotRequest chosenRequest = chooseRequestToFulfill(freeSlot, pendingSlotRequests);
+
+		if (chosenRequest != null) {
+			pendingSlotRequests.remove(chosenRequest.getAllocationId());
+
+			LOG.info("Assigning SlotID({}) to AllocationID({}), JobID:{}", freeSlot.getSlotId(),
+				chosenRequest.getAllocationId(), chosenRequest.getJobId());
+			allocationMap.addAllocation(freeSlot.getSlotId(), chosenRequest.getAllocationId());
+
+			// TODO: send slot request to TaskManager
+		} else {
+			freeSlots.put(freeSlot.getSlotId(), freeSlot);
+		}
+	}
+
+	/**
+	 * Check whether the request is duplicated. We use AllocationID to identify slot request, for each
+	 * formerly received slot request, it is either in pending list or already been allocated.
+	 *
+	 * @param request The slot request
+	 * @return <tt>true</tt> if the request is duplicated
+	 */
+	private boolean isRequestDuplicated(final SlotRequest request) {
+		final AllocationID allocationId = request.getAllocationId();
+		return pendingSlotRequests.containsKey(allocationId)
+			|| allocationMap.isAllocated(allocationId);
+	}
+
+	/**
+	 * Try to register slot, and tell if this slot is newly registered.
+	 *
+	 * @param slot The ResourceSlot which will be checked and registered
+	 * @return <tt>true</tt> if we meet a new slot
+	 */
+	private boolean registerNewSlot(final ResourceSlot slot) {
+		final SlotID slotId = slot.getSlotId();
+		final ResourceID resourceId = slotId.getResourceID();
+		if (!registeredSlots.containsKey(resourceId)) {
+			registeredSlots.put(resourceId, new HashMap<SlotID, ResourceSlot>());
+		}
+		return registeredSlots.get(resourceId).put(slotId, slot) == null;
+	}
+
+	private ResourceSlot getRegisteredSlot(final SlotID slotId) {
+		final ResourceID resourceId = slotId.getResourceID();
+		if (!registeredSlots.containsKey(resourceId)) {
+			return null;
+		}
+		return registeredSlots.get(resourceId).get(slotId);
+	}
+
+	// ------------------------------------------------------------------------
+	//  Framework specific behavior
+	// ------------------------------------------------------------------------
+
+	/**
+	 * Choose a slot to use among all free slots, the behavior is framework specified.
+	 *
+	 * @param request   The slot request
+	 * @param freeSlots All slots which can be used
+	 * @return The slot we choose to use, <tt>null</tt> if we did not find a match
+	 */
+	protected abstract ResourceSlot chooseSlotToUse(final SlotRequest request,
+		final Map<SlotID, ResourceSlot> freeSlots);
+
+	/**
+	 * Choose a pending request to fulfill when we have a free slot, the behavior is framework specified.
+	 *
+	 * @param offeredSlot     The free slot
+	 * @param pendingRequests All the pending slot requests
+	 * @return The chosen SlotRequest, <tt>null</tt> if we did not find a match
+	 */
+	protected abstract SlotRequest chooseRequestToFulfill(final ResourceSlot offeredSlot,
+		final Map<AllocationID, SlotRequest> pendingRequests);
+
+	/**
+	 * The framework specific code for allocating a container for specified resource profile.
+	 *
+	 * @param resourceProfile The resource profile
+	 */
+	protected abstract void allocateContainer(final ResourceProfile resourceProfile);
+
+
+	// ------------------------------------------------------------------------
+	//  Helper classes
+	// ------------------------------------------------------------------------
+
+	/**
+	 * We maintain all the allocations with SlotID and AllocationID. We are able to get or remove the allocation info
+	 * either by SlotID or AllocationID.
+	 */
+	private static class AllocationMap {
+
+		/** All allocated slots (by SlotID) */
+		private final Map<SlotID, AllocationID> allocatedSlots;
+
+		/** All allocated slots (by AllocationID), it'a a inverse view of allocatedSlots */
+		private final Map<AllocationID, SlotID> allocatedSlotsByAllocationId;
+
+		AllocationMap() {
+			this.allocatedSlots = new HashMap<>(16);
+			this.allocatedSlotsByAllocationId = new HashMap<>(16);
+		}
+
+		/**
+		 * Add a allocation
+		 *
+		 * @param slotId       The slot id
+		 * @param allocationId The allocation id
+		 */
+		void addAllocation(final SlotID slotId, final AllocationID allocationId) {
+			allocatedSlots.put(slotId, allocationId);
+			allocatedSlotsByAllocationId.put(allocationId, slotId);
+		}
+
+		/**
+		 * De-allocation with slot id
+		 *
+		 * @param slotId The slot id
+		 */
+		void removeAllocation(final SlotID slotId) {
+			if (allocatedSlots.containsKey(slotId)) {
+				final AllocationID allocationId = allocatedSlots.get(slotId);
+				allocatedSlots.remove(slotId);
+				allocatedSlotsByAllocationId.remove(allocationId);
+			}
+		}
+
+		/**
+		 * De-allocation with allocation id
+		 *
+		 * @param allocationId The allocation id
+		 */
+		void removeAllocation(final AllocationID allocationId) {
+			if (allocatedSlotsByAllocationId.containsKey(allocationId)) {
+				SlotID slotId = allocatedSlotsByAllocationId.get(allocationId);
+				allocatedSlotsByAllocationId.remove(allocationId);
+				allocatedSlots.remove(slotId);
+			}
+		}
+
+		/**
+		 * Check whether allocation exists by slot id
+		 *
+		 * @param slotId The slot id
+		 * @return true if the allocation exists
+		 */
+		boolean isAllocated(final SlotID slotId) {
+			return allocatedSlots.containsKey(slotId);
+		}
+
+		/**
+		 * Check whether allocation exists by allocation id
+		 *
+		 * @param allocationId The allocation id
+		 * @return true if the allocation exists
+		 */
+		boolean isAllocated(final AllocationID allocationId) {
+			return allocatedSlotsByAllocationId.containsKey(allocationId);
+		}
+
+		AllocationID getAllocationID(final SlotID slotId) {
+			return allocatedSlots.get(slotId);
+		}
+
+		SlotID getSlotID(final AllocationID allocationId) {
+			return allocatedSlotsByAllocationId.get(allocationId);
+		}
+
+		public int size() {
+			return allocatedSlots.size();
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  Testing utilities
+	// ------------------------------------------------------------------------
+
+	@VisibleForTesting
+	boolean isAllocated(final SlotID slotId) {
+		return allocationMap.isAllocated(slotId);
+	}
+
+	@VisibleForTesting
+	boolean isAllocated(final AllocationID allocationId) {
+		return allocationMap.isAllocated(allocationId);
+	}
+
+	/**
+	 * Add free slots directly to the free pool, this will not trigger pending requests allocation
+	 *
+	 * @param slot The resource slot
+	 */
+	@VisibleForTesting
+	void addFreeSlot(final ResourceSlot slot) {
+		final ResourceID resourceId = slot.getResourceID();
+		final SlotID slotId = slot.getSlotId();
+
+		if (!registeredSlots.containsKey(resourceId)) {
+			registeredSlots.put(resourceId, new HashMap<SlotID, ResourceSlot>());
+		}
+		registeredSlots.get(resourceId).put(slot.getSlotId(), slot);
+		freeSlots.put(slotId, slot);
+	}
+
+	@VisibleForTesting
+	int getAllocatedSlotCount() {
+		return allocationMap.size();
+	}
+
+	@VisibleForTesting
+	int getFreeSlotCount() {
+		return freeSlots.size();
+	}
+
+	@VisibleForTesting
+	int getPendingRequestCount() {
+		return pendingSlotRequests.size();
+	}
+}


[17/50] [abbrv] flink git commit: [FLINK-4346] [rpc] Add new RPC abstraction

Posted by tr...@apache.org.
http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java
new file mode 100644
index 0000000..464a261
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.resourcemanager;
+
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcTimeout;
+import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+
+/**
+ * {@link ResourceManager} rpc gateway interface.
+ */
+public interface ResourceManagerGateway extends RpcGateway {
+
+	/**
+	 * Register a {@link JobMaster} at the resource manager.
+	 *
+	 * @param jobMasterRegistration Job master registration information
+	 * @param timeout Timeout for the future to complete
+	 * @return Future registration response
+	 */
+	Future<RegistrationResponse> registerJobMaster(
+		JobMasterRegistration jobMasterRegistration,
+		@RpcTimeout FiniteDuration timeout);
+
+	/**
+	 * Register a {@link JobMaster} at the resource manager.
+	 *
+	 * @param jobMasterRegistration Job master registration information
+	 * @return Future registration response
+	 */
+	Future<RegistrationResponse> registerJobMaster(JobMasterRegistration jobMasterRegistration);
+
+	/**
+	 * Requests a slot from the resource manager.
+	 *
+	 * @param slotRequest Slot request
+	 * @return Future slot assignment
+	 */
+	Future<SlotAssignment> requestSlot(SlotRequest slotRequest);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotAssignment.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotAssignment.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotAssignment.java
new file mode 100644
index 0000000..86cd8b7
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotAssignment.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.resourcemanager;
+
+import java.io.Serializable;
+
+public class SlotAssignment implements Serializable{
+	private static final long serialVersionUID = -6990813455942742322L;
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java
new file mode 100644
index 0000000..d8fe268
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.resourcemanager;
+
+import java.io.Serializable;
+
+public class SlotRequest implements Serializable{
+	private static final long serialVersionUID = -6586877187990445986L;
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
new file mode 100644
index 0000000..cdfc3bd
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.taskexecutor;
+
+import akka.dispatch.ExecutionContexts$;
+import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
+import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.RpcMethod;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcService;
+import scala.concurrent.ExecutionContext;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+
+/**
+ * TaskExecutor implementation. The task executor is responsible for the execution of multiple
+ * {@link org.apache.flink.runtime.taskmanager.Task}.
+ *
+ * It offers the following methods as part of its rpc interface to interact with him remotely:
+ * <ul>
+ *     <li>{@link #executeTask(TaskDeploymentDescriptor)} executes a given task on the TaskExecutor</li>
+ *     <li>{@link #cancelTask(ExecutionAttemptID)} cancels a given task identified by the {@link ExecutionAttemptID}</li>
+ * </ul>
+ */
+public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
+	private final ExecutionContext executionContext;
+	private final Set<ExecutionAttemptID> tasks = new HashSet<>();
+
+	public TaskExecutor(RpcService rpcService, ExecutorService executorService) {
+		super(rpcService);
+		this.executionContext = ExecutionContexts$.MODULE$.fromExecutor(executorService);
+	}
+
+	/**
+	 * Execute the given task on the task executor. The task is described by the provided
+	 * {@link TaskDeploymentDescriptor}.
+	 *
+	 * @param taskDeploymentDescriptor Descriptor for the task to be executed
+	 * @return Acknowledge the start of the task execution
+	 */
+	@RpcMethod
+	public Acknowledge executeTask(TaskDeploymentDescriptor taskDeploymentDescriptor) {
+		tasks.add(taskDeploymentDescriptor.getExecutionId());
+		return Acknowledge.get();
+	}
+
+	/**
+	 * Cancel a task identified by it {@link ExecutionAttemptID}. If the task cannot be found, then
+	 * the method throws an {@link Exception}.
+	 *
+	 * @param executionAttemptId Execution attempt ID identifying the task to be canceled.
+	 * @return Acknowledge the task canceling
+	 * @throws Exception if the task with the given execution attempt id could not be found
+	 */
+	@RpcMethod
+	public Acknowledge cancelTask(ExecutionAttemptID executionAttemptId) throws Exception {
+		if (tasks.contains(executionAttemptId)) {
+			return Acknowledge.get();
+		} else {
+			throw new Exception("Could not find task.");
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java
new file mode 100644
index 0000000..450423e
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.taskexecutor;
+
+import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
+import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import scala.concurrent.Future;
+
+/**
+ * {@link TaskExecutor} rpc gateway interface
+ */
+public interface TaskExecutorGateway extends RpcGateway {
+	/**
+	 * Execute the given task on the task executor. The task is described by the provided
+	 * {@link TaskDeploymentDescriptor}.
+	 *
+	 * @param taskDeploymentDescriptor Descriptor for the task to be executed
+	 * @return Future acknowledge of the start of the task execution
+	 */
+	Future<Acknowledge> executeTask(TaskDeploymentDescriptor taskDeploymentDescriptor);
+
+	/**
+	 * Cancel a task identified by it {@link ExecutionAttemptID}. If the task cannot be found, then
+	 * the method throws an {@link Exception}.
+	 *
+	 * @param executionAttemptId Execution attempt ID identifying the task to be canceled.
+	 * @return Future acknowledge of the task canceling
+	 */
+	Future<Acknowledge> cancelTask(ExecutionAttemptID executionAttemptId);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
new file mode 100644
index 0000000..0ded25e
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
@@ -0,0 +1,327 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+import org.apache.flink.util.TestLogger;
+import org.junit.Test;
+import org.reflections.Reflections;
+import scala.concurrent.Future;
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Method;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+public class RpcCompletenessTest extends TestLogger {
+	private static final Class<?> futureClass = Future.class;
+
+	@Test
+	public void testRpcCompleteness() {
+		Reflections reflections = new Reflections("org.apache.flink");
+
+		Set<Class<? extends RpcEndpoint>> classes = reflections.getSubTypesOf(RpcEndpoint.class);
+
+		Class<? extends RpcEndpoint> c;
+
+		for (Class<? extends RpcEndpoint> rpcEndpoint :classes){
+			c = rpcEndpoint;
+			Type superClass = c.getGenericSuperclass();
+
+			Class<?> rpcGatewayType = extractTypeParameter(superClass, 0);
+
+			if (rpcGatewayType != null) {
+				checkCompleteness(rpcEndpoint, (Class<? extends RpcGateway>) rpcGatewayType);
+			} else {
+				fail("Could not retrieve the rpc gateway class for the given rpc endpoint class " + rpcEndpoint.getName());
+			}
+		}
+	}
+
+	private void checkCompleteness(Class<? extends RpcEndpoint> rpcEndpoint, Class<? extends RpcGateway> rpcGateway) {
+		Method[] gatewayMethods = rpcGateway.getDeclaredMethods();
+		Method[] serverMethods = rpcEndpoint.getDeclaredMethods();
+
+		Map<String, Set<Method>> rpcMethods = new HashMap<>();
+		Set<Method> unmatchedRpcMethods = new HashSet<>();
+
+		for (Method serverMethod : serverMethods) {
+			if (serverMethod.isAnnotationPresent(RpcMethod.class)) {
+				if (rpcMethods.containsKey(serverMethod.getName())) {
+					Set<Method> methods = rpcMethods.get(serverMethod.getName());
+					methods.add(serverMethod);
+
+					rpcMethods.put(serverMethod.getName(), methods);
+				} else {
+					Set<Method> methods = new HashSet<>();
+					methods.add(serverMethod);
+
+					rpcMethods.put(serverMethod.getName(), methods);
+				}
+
+				unmatchedRpcMethods.add(serverMethod);
+			}
+		}
+
+		for (Method gatewayMethod : gatewayMethods) {
+			assertTrue(
+				"The rpc endpoint " + rpcEndpoint.getName() + " does not contain a RpcMethod " +
+					"annotated method with the same name and signature " +
+					generateEndpointMethodSignature(gatewayMethod) + ".",
+				rpcMethods.containsKey(gatewayMethod.getName()));
+
+			checkGatewayMethod(gatewayMethod);
+
+			if (!matchGatewayMethodWithEndpoint(gatewayMethod, rpcMethods.get(gatewayMethod.getName()), unmatchedRpcMethods)) {
+				fail("Could not find a RpcMethod annotated method in rpc endpoint " +
+					rpcEndpoint.getName() + " matching the rpc gateway method " +
+					generateEndpointMethodSignature(gatewayMethod) + " defined in the rpc gateway " +
+					rpcGateway.getName() + ".");
+			}
+		}
+
+		if (!unmatchedRpcMethods.isEmpty()) {
+			StringBuilder builder = new StringBuilder();
+
+			for (Method unmatchedRpcMethod : unmatchedRpcMethods) {
+				builder.append(unmatchedRpcMethod).append("\n");
+			}
+
+			fail("The rpc endpoint " + rpcEndpoint.getName() + " contains rpc methods which " +
+				"are not matched to gateway methods of " + rpcGateway.getName() + ":\n" +
+				builder.toString());
+		}
+	}
+
+	/**
+	 * Checks whether the gateway method fulfills the gateway method requirements.
+	 * <ul>
+	 *     <li>It checks whether the return type is void or a {@link Future} wrapping the actual result. </li>
+	 *     <li>It checks that the method's parameter list contains at most one parameter annotated with {@link RpcTimeout}.</li>
+	 * </ul>
+	 *
+	 * @param gatewayMethod Gateway method to check
+	 */
+	private void checkGatewayMethod(Method gatewayMethod) {
+		if (!gatewayMethod.getReturnType().equals(Void.TYPE)) {
+			assertTrue(
+				"The return type of method " + gatewayMethod.getName() + " in the rpc gateway " +
+					gatewayMethod.getDeclaringClass().getName() + " is non void and not a " +
+					"future. Non-void return types have to be returned as a future.",
+				gatewayMethod.getReturnType().equals(futureClass));
+		}
+
+		Annotation[][] parameterAnnotations = gatewayMethod.getParameterAnnotations();
+		int rpcTimeoutParameters = 0;
+
+		for (Annotation[] parameterAnnotation : parameterAnnotations) {
+			for (Annotation annotation : parameterAnnotation) {
+				if (annotation.equals(RpcTimeout.class)) {
+					rpcTimeoutParameters++;
+				}
+			}
+		}
+
+		assertTrue("The gateway method " + gatewayMethod + " must have at most one RpcTimeout " +
+			"annotated parameter.", rpcTimeoutParameters <= 1);
+	}
+
+	/**
+	 * Checks whether we find a matching overloaded version for the gateway method among the methods
+	 * with the same name in the rpc endpoint.
+	 *
+	 * @param gatewayMethod Gateway method
+	 * @param endpointMethods Set of rpc methods on the rpc endpoint with the same name as the gateway
+	 *                   method
+	 * @param unmatchedRpcMethods Set of unmatched rpc methods on the endpoint side (so far)
+	 */
+	private boolean matchGatewayMethodWithEndpoint(Method gatewayMethod, Set<Method> endpointMethods, Set<Method> unmatchedRpcMethods) {
+		for (Method endpointMethod : endpointMethods) {
+			if (checkMethod(gatewayMethod, endpointMethod)) {
+				unmatchedRpcMethods.remove(endpointMethod);
+				return true;
+			}
+		}
+
+		return false;
+	}
+
+	private boolean checkMethod(Method gatewayMethod, Method endpointMethod) {
+		Class<?>[] gatewayParameterTypes = gatewayMethod.getParameterTypes();
+		Annotation[][] gatewayParameterAnnotations = gatewayMethod.getParameterAnnotations();
+
+		Class<?>[] endpointParameterTypes = endpointMethod.getParameterTypes();
+
+		List<Class<?>> filteredGatewayParameterTypes = new ArrayList<>();
+
+		assertEquals(gatewayParameterTypes.length, gatewayParameterAnnotations.length);
+
+		// filter out the RpcTimeout parameters
+		for (int i = 0; i < gatewayParameterTypes.length; i++) {
+			if (!isRpcTimeout(gatewayParameterAnnotations[i])) {
+				filteredGatewayParameterTypes.add(gatewayParameterTypes[i]);
+			}
+		}
+
+		if (filteredGatewayParameterTypes.size() != endpointParameterTypes.length) {
+			return false;
+		} else {
+			// check the parameter types
+			for (int i = 0; i < filteredGatewayParameterTypes.size(); i++) {
+				if (!checkType(filteredGatewayParameterTypes.get(i), endpointParameterTypes[i])) {
+					return false;
+				}
+			}
+
+			// check the return types
+			if (endpointMethod.getReturnType() == void.class) {
+				if (gatewayMethod.getReturnType() != void.class) {
+					return false;
+				}
+			} else {
+				// has return value. The gateway method should be wrapped in a future
+				Class<?> futureClass = gatewayMethod.getReturnType();
+
+				// sanity check that the return type of a gateway method must be void or a future
+				if (!futureClass.equals(RpcCompletenessTest.futureClass)) {
+					return false;
+				} else {
+					Class<?> valueClass = extractTypeParameter(futureClass, 0);
+
+					if (endpointMethod.getReturnType().equals(futureClass)) {
+						Class<?> rpcEndpointValueClass = extractTypeParameter(endpointMethod.getReturnType(), 0);
+
+						// check if we have the same future value types
+						if (valueClass != null && rpcEndpointValueClass != null && !checkType(valueClass, rpcEndpointValueClass)) {
+							return false;
+						}
+					} else {
+						if (valueClass != null && !checkType(valueClass, endpointMethod.getReturnType())) {
+							return false;
+						}
+					}
+				}
+			}
+
+			return gatewayMethod.getName().equals(endpointMethod.getName());
+		}
+	}
+
+	private boolean checkType(Class<?> firstType, Class<?> secondType) {
+		return firstType.equals(secondType);
+	}
+
+	/**
+	 * Generates from a gateway rpc method signature the corresponding rpc endpoint signature.
+	 *
+	 * For example the {@link RpcTimeout} annotation adds an additional parameter to the gateway
+	 * signature which is not relevant on the server side.
+	 *
+	 * @param method Method to generate the signature string for
+	 * @return String of the respective server side rpc method signature
+	 */
+	private String generateEndpointMethodSignature(Method method) {
+		StringBuilder builder = new StringBuilder();
+
+		if (method.getReturnType().equals(Void.TYPE)) {
+			builder.append("void").append(" ");
+		} else if (method.getReturnType().equals(futureClass)) {
+			Class<?> valueClass = extractTypeParameter(method.getGenericReturnType(), 0);
+
+			builder
+				.append(futureClass.getSimpleName())
+				.append("<")
+				.append(valueClass != null ? valueClass.getSimpleName() : "")
+				.append(">");
+
+			if (valueClass != null) {
+				builder.append("/").append(valueClass.getSimpleName());
+			}
+
+			builder.append(" ");
+		} else {
+			return "Invalid rpc method signature.";
+		}
+
+		builder.append(method.getName()).append("(");
+
+		Class<?>[] parameterTypes = method.getParameterTypes();
+		Annotation[][] parameterAnnotations = method.getParameterAnnotations();
+
+		assertEquals(parameterTypes.length, parameterAnnotations.length);
+
+		for (int i = 0; i < parameterTypes.length; i++) {
+			// filter out the RpcTimeout parameters
+			if (!isRpcTimeout(parameterAnnotations[i])) {
+				builder.append(parameterTypes[i].getName());
+
+				if (i < parameterTypes.length -1) {
+					builder.append(", ");
+				}
+			}
+		}
+
+		builder.append(")");
+
+		return builder.toString();
+	}
+
+	private Class<?> extractTypeParameter(Type genericType, int position) {
+		if (genericType instanceof ParameterizedType) {
+			ParameterizedType parameterizedType = (ParameterizedType) genericType;
+
+			Type[] typeArguments = parameterizedType.getActualTypeArguments();
+
+			if (position < 0 || position >= typeArguments.length) {
+				throw new IndexOutOfBoundsException("The generic type " +
+					parameterizedType.getRawType() + " only has " + typeArguments.length +
+					" type arguments.");
+			} else {
+				Type typeArgument = typeArguments[position];
+
+				if (typeArgument instanceof Class<?>) {
+					return (Class<?>) typeArgument;
+				} else {
+					return null;
+				}
+			}
+		} else {
+			return null;
+		}
+	}
+
+	private boolean isRpcTimeout(Annotation[] annotations) {
+		for (Annotation annotation : annotations) {
+			if (annotation.annotationType().equals(RpcTimeout.class)) {
+				return true;
+			}
+		}
+
+		return false;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
new file mode 100644
index 0000000..c5bac94
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka;
+
+import akka.actor.ActorSystem;
+import akka.util.Timeout;
+import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
+import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
+import org.apache.flink.runtime.rpc.resourcemanager.ResourceManager;
+import org.apache.flink.util.TestLogger;
+import org.junit.Test;
+import scala.concurrent.duration.Deadline;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.TimeUnit;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class AkkaRpcServiceTest extends TestLogger {
+
+	/**
+	 * Tests that the {@link JobMaster} can connect to the {@link ResourceManager} using the
+	 * {@link AkkaRpcService}.
+	 */
+	@Test
+	public void testJobMasterResourceManagerRegistration() throws Exception {
+		Timeout akkaTimeout = new Timeout(10, TimeUnit.SECONDS);
+		ActorSystem actorSystem = AkkaUtils.createDefaultActorSystem();
+		ActorSystem actorSystem2 = AkkaUtils.createDefaultActorSystem();
+		AkkaRpcService akkaRpcService = new AkkaRpcService(actorSystem, akkaTimeout);
+		AkkaRpcService akkaRpcService2 = new AkkaRpcService(actorSystem2, akkaTimeout);
+		ExecutorService executorService = new ForkJoinPool();
+
+		ResourceManager resourceManager = new ResourceManager(akkaRpcService, executorService);
+		JobMaster jobMaster = new JobMaster(akkaRpcService2, executorService);
+
+		resourceManager.start();
+
+		ResourceManagerGateway rm = resourceManager.getSelf();
+
+		assertTrue(rm instanceof AkkaGateway);
+
+		AkkaGateway akkaClient = (AkkaGateway) rm;
+
+		jobMaster.start();
+		jobMaster.registerAtResourceManager(AkkaUtils.getAkkaURL(actorSystem, akkaClient.getActorRef()));
+
+		// wait for successful registration
+		FiniteDuration timeout = new FiniteDuration(20, TimeUnit.SECONDS);
+		Deadline deadline = timeout.fromNow();
+
+		while (deadline.hasTimeLeft() && !jobMaster.isConnected()) {
+			Thread.sleep(100);
+		}
+
+		assertFalse(deadline.isOverdue());
+
+		jobMaster.shutDown();
+		resourceManager.shutDown();
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
new file mode 100644
index 0000000..c143527
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.taskexecutor;
+
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.blob.BlobKey;
+import org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor;
+import org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor;
+import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
+import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
+import org.apache.flink.runtime.jobgraph.JobVertexID;
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.util.DirectExecutorService;
+import org.apache.flink.util.SerializedValue;
+import org.apache.flink.util.TestLogger;
+import org.junit.Test;
+
+import java.net.URL;
+import java.util.Collections;
+
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.mock;
+
+public class TaskExecutorTest extends TestLogger {
+
+	/**
+	 * Tests that we can deploy and cancel a task on the TaskExecutor without exceptions
+	 */
+	@Test
+	public void testTaskExecution() throws Exception {
+		RpcService testingRpcService = mock(RpcService.class);
+		DirectExecutorService directExecutorService = null;
+		TaskExecutor taskExecutor = new TaskExecutor(testingRpcService, directExecutorService);
+
+		TaskDeploymentDescriptor tdd = new TaskDeploymentDescriptor(
+			new JobID(),
+			"Test job",
+			new JobVertexID(),
+			new ExecutionAttemptID(),
+			new SerializedValue<ExecutionConfig>(null),
+			"Test task",
+			0,
+			1,
+			0,
+			new Configuration(),
+			new Configuration(),
+			"Invokable",
+			Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
+			Collections.<InputGateDeploymentDescriptor>emptyList(),
+			Collections.<BlobKey>emptyList(),
+			Collections.<URL>emptyList(),
+			0
+		);
+
+		Acknowledge ack = taskExecutor.executeTask(tdd);
+
+		ack = taskExecutor.cancelTask(tdd.getExecutionId());
+	}
+
+	/**
+	 * Tests that cancelling a non-existing task will return an exception
+	 */
+	@Test(expected=Exception.class)
+	public void testWrongTaskCancellation() throws Exception {
+		RpcService testingRpcService = mock(RpcService.class);
+		DirectExecutorService directExecutorService = null;
+		TaskExecutor taskExecutor = new TaskExecutor(testingRpcService, directExecutorService);
+
+		taskExecutor.cancelTask(new ExecutionAttemptID());
+
+		fail("The cancellation should have thrown an exception.");
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/test/java/org/apache/flink/runtime/util/DirectExecutorService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/util/DirectExecutorService.java b/flink-runtime/src/test/java/org/apache/flink/runtime/util/DirectExecutorService.java
new file mode 100644
index 0000000..1d7c971
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/util/DirectExecutorService.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.util;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CancellationException;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+public class DirectExecutorService implements ExecutorService {
+	private boolean _shutdown = false;
+
+	@Override
+	public void shutdown() {
+		_shutdown = true;
+	}
+
+	@Override
+	public List<Runnable> shutdownNow() {
+		_shutdown = true;
+		return Collections.emptyList();
+	}
+
+	@Override
+	public boolean isShutdown() {
+		return _shutdown;
+	}
+
+	@Override
+	public boolean isTerminated() {
+		return _shutdown;
+	}
+
+	@Override
+	public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedException {
+		return _shutdown;
+	}
+
+	@Override
+	public <T> Future<T> submit(Callable<T> task) {
+		try {
+			T result = task.call();
+
+			return new CompletedFuture<>(result, null);
+		} catch (Exception e) {
+			return new CompletedFuture<>(null, e);
+		}
+	}
+
+	@Override
+	public <T> Future<T> submit(Runnable task, T result) {
+		task.run();
+
+		return new CompletedFuture<>(result, null);
+	}
+
+	@Override
+	public Future<?> submit(Runnable task) {
+		task.run();
+		return new CompletedFuture<>(null, null);
+	}
+
+	@Override
+	public <T> List<Future<T>> invokeAll(Collection<? extends Callable<T>> tasks) throws InterruptedException {
+		ArrayList<Future<T>> result = new ArrayList<>();
+
+		for (Callable<T> task : tasks) {
+			try {
+				result.add(new CompletedFuture<T>(task.call(), null));
+			} catch (Exception e) {
+				result.add(new CompletedFuture<T>(null, e));
+			}
+		}
+		return result;
+	}
+
+	@Override
+	public <T> List<Future<T>> invokeAll(Collection<? extends Callable<T>> tasks, long timeout, TimeUnit unit) throws InterruptedException {
+		long end = System.currentTimeMillis() + unit.toMillis(timeout);
+		Iterator<? extends Callable<T>> iterator = tasks.iterator();
+		ArrayList<Future<T>> result = new ArrayList<>();
+
+		while (end > System.currentTimeMillis() && iterator.hasNext()) {
+			Callable<T> callable = iterator.next();
+
+			try {
+				result.add(new CompletedFuture<T>(callable.call(), null));
+			} catch (Exception e) {
+				result.add(new CompletedFuture<T>(null, e));
+			}
+		}
+
+		while(iterator.hasNext()) {
+			iterator.next();
+			result.add(new Future<T>() {
+				@Override
+				public boolean cancel(boolean mayInterruptIfRunning) {
+					return false;
+				}
+
+				@Override
+				public boolean isCancelled() {
+					return true;
+				}
+
+				@Override
+				public boolean isDone() {
+					return false;
+				}
+
+				@Override
+				public T get() throws InterruptedException, ExecutionException {
+					throw new CancellationException("Task has been cancelled.");
+				}
+
+				@Override
+				public T get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
+					throw new CancellationException("Task has been cancelled.");
+				}
+			});
+		}
+
+		return result;
+	}
+
+	@Override
+	public <T> T invokeAny(Collection<? extends Callable<T>> tasks) throws InterruptedException, ExecutionException {
+		Exception exception = null;
+
+		for (Callable<T> task : tasks) {
+			try {
+				return task.call();
+			} catch (Exception e) {
+				// try next task
+				exception = e;
+			}
+		}
+
+		throw new ExecutionException("No tasks finished successfully.", exception);
+	}
+
+	@Override
+	public <T> T invokeAny(Collection<? extends Callable<T>> tasks, long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
+		long end = System.currentTimeMillis() + unit.toMillis(timeout);
+		Exception exception = null;
+
+		Iterator<? extends Callable<T>> iterator = tasks.iterator();
+
+		while (end > System.currentTimeMillis() && iterator.hasNext()) {
+			Callable<T> callable = iterator.next();
+
+			try {
+				return callable.call();
+			} catch (Exception e) {
+				// ignore exception and try next
+				exception = e;
+			}
+		}
+
+		if (iterator.hasNext()) {
+			throw new TimeoutException("Could not finish execution of tasks within time.");
+		} else {
+			throw new ExecutionException("No tasks finished successfully.", exception);
+		}
+	}
+
+	@Override
+	public void execute(Runnable command) {
+		command.run();
+	}
+
+	public static class CompletedFuture<V> implements Future<V> {
+		private final V value;
+		private final Exception exception;
+
+		public CompletedFuture(V value, Exception exception) {
+			this.value = value;
+			this.exception = exception;
+		}
+
+		@Override
+		public boolean cancel(boolean mayInterruptIfRunning) {
+			return false;
+		}
+
+		@Override
+		public boolean isCancelled() {
+			return false;
+		}
+
+		@Override
+		public boolean isDone() {
+			return true;
+		}
+
+		@Override
+		public V get() throws InterruptedException, ExecutionException {
+			if (exception != null) {
+				throw new ExecutionException(exception);
+			} else {
+				return value;
+			}
+		}
+
+		@Override
+		public V get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
+			return get();
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-tests/pom.xml
----------------------------------------------------------------------
diff --git a/flink-tests/pom.xml b/flink-tests/pom.xml
index b09db1f..3202a9f 100644
--- a/flink-tests/pom.xml
+++ b/flink-tests/pom.xml
@@ -202,7 +202,6 @@ under the License.
 		<dependency>
 			<groupId>org.reflections</groupId>
 			<artifactId>reflections</artifactId>
-			<version>0.9.10</version>
 		</dependency>
 
 	</dependencies>

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 5b3148a..5d7cf91 100644
--- a/pom.xml
+++ b/pom.xml
@@ -413,6 +413,13 @@ under the License.
 				<artifactId>jackson-annotations</artifactId>
 				<version>${jackson.version}</version>
 			</dependency>
+
+			<dependency>
+				<groupId>org.reflections</groupId>
+				<artifactId>reflections</artifactId>
+				<version>0.9.10</version>
+				<scope>test</scope>
+			</dependency>
 		</dependencies>
 	</dependencyManagement>
 


[12/50] [abbrv] flink git commit: [FLINK-3929] Added Keytab based Kerberos support to enable secure Flink cluster deployment(addresses HDHS, Kafka and ZK services)

Posted by tr...@apache.org.
http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java
index 0dbe865..213ba4a 100644
--- a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java
+++ b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java
@@ -65,6 +65,9 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 	private String brokerConnectionString = "";
 	private Properties standardProps;
 	private Properties additionalServerProperties;
+	private boolean secureMode = false;
+	// 6 seconds is default. Seems to be too small for travis. 30 seconds
+	private String zkTimeout = "30000";
 
 	public String getBrokerConnectionString() {
 		return brokerConnectionString;
@@ -131,8 +134,22 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 	}
 
 	@Override
-	public void prepare(int numKafkaServers, Properties additionalServerProperties) {
+	public boolean isSecureRunSupported() {
+		return true;
+	}
+
+	@Override
+	public void prepare(int numKafkaServers, Properties additionalServerProperties, boolean secureMode) {
+
+		//increase the timeout since in Travis ZK connection takes long time for secure connection.
+		if(secureMode) {
+			//run only one kafka server to avoid multiple ZK connections from many instances - Travis timeout
+			numKafkaServers = 1;
+			zkTimeout = String.valueOf(Integer.parseInt(zkTimeout) * 15);
+		}
+
 		this.additionalServerProperties = additionalServerProperties;
+		this.secureMode = secureMode;
 		File tempDir = new File(System.getProperty("java.io.tmpdir"));
 
 		tmpZkDir = new File(tempDir, "kafkaITcase-zk-dir-" + (UUID.randomUUID().toString()));
@@ -155,6 +172,7 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 			LOG.info("Starting Zookeeper");
 			zookeeper = new TestingServer(-1, tmpZkDir);
 			zookeeperConnectionString = zookeeper.getConnectString();
+			LOG.info("zookeeperConnectionString: {}", zookeeperConnectionString);
 
 			LOG.info("Starting KafkaServer");
 			brokers = new ArrayList<>(numKafkaServers);
@@ -163,7 +181,11 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 				brokers.add(getKafkaServer(i, tmpKafkaDirs.get(i)));
 
 				SocketServer socketServer = brokers.get(i).socketServer();
-				brokerConnectionString += hostAndPortToUrlString(KafkaTestEnvironment.KAFKA_HOST, brokers.get(i).socketServer().boundPort(SecurityProtocol.PLAINTEXT)) + ",";
+				if(secureMode) {
+					brokerConnectionString += hostAndPortToUrlString(KafkaTestEnvironment.KAFKA_HOST, brokers.get(i).socketServer().boundPort(SecurityProtocol.SASL_PLAINTEXT)) + ",";
+				} else {
+					brokerConnectionString += hostAndPortToUrlString(KafkaTestEnvironment.KAFKA_HOST, brokers.get(i).socketServer().boundPort(SecurityProtocol.PLAINTEXT)) + ",";
+				}
 			}
 
 			LOG.info("ZK and KafkaServer started.");
@@ -173,15 +195,18 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 			fail("Test setup failed: " + t.getMessage());
 		}
 
+		LOG.info("brokerConnectionString --> {}", brokerConnectionString);
+
 		standardProps = new Properties();
 		standardProps.setProperty("zookeeper.connect", zookeeperConnectionString);
 		standardProps.setProperty("bootstrap.servers", brokerConnectionString);
 		standardProps.setProperty("group.id", "flink-tests");
 		standardProps.setProperty("auto.commit.enable", "false");
-		standardProps.setProperty("zookeeper.session.timeout.ms", "30000"); // 6 seconds is default. Seems to be too small for travis.
-		standardProps.setProperty("zookeeper.connection.timeout.ms", "30000");
+		standardProps.setProperty("zookeeper.session.timeout.ms", zkTimeout);
+		standardProps.setProperty("zookeeper.connection.timeout.ms", zkTimeout);
 		standardProps.setProperty("auto.offset.reset", "earliest"); // read from the beginning. (earliest is kafka 0.9 value)
 		standardProps.setProperty("fetch.message.max.bytes", "256"); // make a lot of fetches (MESSAGES MUST BE SMALLER!)
+
 	}
 
 	@Override
@@ -196,6 +221,7 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 		if (zookeeper != null) {
 			try {
 				zookeeper.stop();
+				zookeeper.close();
 			}
 			catch (Exception e) {
 				LOG.warn("ZK.stop() failed", e);
@@ -224,6 +250,7 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 	}
 
 	public ZkUtils getZkUtils() {
+		LOG.info("In getZKUtils:: zookeeperConnectionString = {}", zookeeperConnectionString);
 		ZkClient creator = new ZkClient(zookeeperConnectionString, Integer.valueOf(standardProps.getProperty("zookeeper.session.timeout.ms")),
 				Integer.valueOf(standardProps.getProperty("zookeeper.connection.timeout.ms")), new ZooKeeperStringSerializer());
 		return ZkUtils.apply(creator, false);
@@ -241,23 +268,37 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 			zkUtils.close();
 		}
 
+		LOG.info("Topic {} create request is successfully posted", topic);
+
 		// validate that the topic has been created
-		final long deadline = System.currentTimeMillis() + 30000;
+		final long deadline = System.currentTimeMillis() + Integer.parseInt(zkTimeout);
 		do {
 			try {
-				Thread.sleep(100);
+				if(secureMode) {
+					//increase wait time since in Travis ZK timeout occurs frequently
+					int wait = Integer.parseInt(zkTimeout) / 100;
+					LOG.info("waiting for {} msecs before the topic {} can be checked", wait, topic);
+					Thread.sleep(wait);
+				} else {
+					Thread.sleep(100);
+				}
+
 			} catch (InterruptedException e) {
 				// restore interrupted state
 			}
 			// we could use AdminUtils.topicExists(zkUtils, topic) here, but it's results are
 			// not always correct.
 
+			LOG.info("Validating if the topic {} has been created or not", topic);
+
 			// create a new ZK utils connection
 			ZkUtils checkZKConn = getZkUtils();
 			if(AdminUtils.topicExists(checkZKConn, topic)) {
+				LOG.info("topic {} has been created successfully", topic);
 				checkZKConn.close();
 				return;
 			}
+			LOG.info("topic {} has not been created yet. Will check again...", topic);
 			checkZKConn.close();
 		}
 		while (System.currentTimeMillis() < deadline);
@@ -296,8 +337,8 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 		kafkaProperties.put("replica.fetch.max.bytes", String.valueOf(50 * 1024 * 1024));
 
 		// for CI stability, increase zookeeper session timeout
-		kafkaProperties.put("zookeeper.session.timeout.ms", "30000");
-		kafkaProperties.put("zookeeper.connection.timeout.ms", "30000");
+		kafkaProperties.put("zookeeper.session.timeout.ms", zkTimeout);
+		kafkaProperties.put("zookeeper.connection.timeout.ms", zkTimeout);
 		if(additionalServerProperties != null) {
 			kafkaProperties.putAll(additionalServerProperties);
 		}
@@ -307,6 +348,15 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 		for (int i = 1; i <= numTries; i++) {
 			int kafkaPort = NetUtils.getAvailablePort();
 			kafkaProperties.put("port", Integer.toString(kafkaPort));
+
+			//to support secure kafka cluster
+			if(secureMode) {
+				LOG.info("Adding Kafka secure configurations");
+				kafkaProperties.put("listeners", "SASL_PLAINTEXT://" + KAFKA_HOST + ":" + kafkaPort);
+				kafkaProperties.put("advertised.listeners", "SASL_PLAINTEXT://" + KAFKA_HOST + ":" + kafkaPort);
+				kafkaProperties.putAll(getSecureProperties());
+			}
+
 			KafkaConfig kafkaConfig = new KafkaConfig(kafkaProperties);
 
 			try {
@@ -329,4 +379,19 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 		throw new Exception("Could not start Kafka after " + numTries + " retries due to port conflicts.");
 	}
 
+	public Properties getSecureProperties() {
+		Properties prop = new Properties();
+		if(secureMode) {
+			prop.put("security.inter.broker.protocol", "SASL_PLAINTEXT");
+			prop.put("security.protocol", "SASL_PLAINTEXT");
+			prop.put("sasl.kerberos.service.name", "kafka");
+
+			//add special timeout for Travis
+			prop.setProperty("zookeeper.session.timeout.ms", zkTimeout);
+			prop.setProperty("zookeeper.connection.timeout.ms", zkTimeout);
+			prop.setProperty("metadata.fetch.timeout.ms","120000");
+		}
+		return prop;
+	}
+
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/resources/log4j-test.properties
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/resources/log4j-test.properties b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/resources/log4j-test.properties
index fbeb110..4ac1773 100644
--- a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/resources/log4j-test.properties
+++ b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/resources/log4j-test.properties
@@ -28,3 +28,5 @@ log4j.logger.org.jboss.netty.channel.DefaultChannelPipeline=ERROR, testlogger
 log4j.logger.org.apache.zookeeper=OFF, testlogger
 log4j.logger.state.change.logger=OFF, testlogger
 log4j.logger.kafka=OFF, testlogger
+
+log4j.logger.org.apache.directory=OFF, testlogger
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-base/pom.xml
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-base/pom.xml b/flink-streaming-connectors/flink-connector-kafka-base/pom.xml
index 49d630f..ef71bde 100644
--- a/flink-streaming-connectors/flink-connector-kafka-base/pom.xml
+++ b/flink-streaming-connectors/flink-connector-kafka-base/pom.xml
@@ -161,6 +161,14 @@ under the License.
 			<type>test-jar</type>
 			<scope>test</scope>
 		</dependency>
+
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-minikdc</artifactId>
+			<version>${minikdc.version}</version>
+			<scope>test</scope>
+		</dependency>
+
 	</dependencies>
 
 	<dependencyManagement>
@@ -187,6 +195,17 @@ under the License.
 					</execution>
 				</executions>
 			</plugin>
+			<!--
+            https://issues.apache.org/jira/browse/DIRSHARED-134
+            Required to pull the Mini-KDC transitive dependency
+            -->
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-bundle-plugin</artifactId>
+				<version>3.0.1</version>
+				<inherited>true</inherited>
+				<extensions>true</extensions>
+			</plugin>
 		</plugins>
 	</build>
 	

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaConsumerTestBase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaConsumerTestBase.java b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaConsumerTestBase.java
index 920f15b..a87ff8a 100644
--- a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaConsumerTestBase.java
+++ b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaConsumerTestBase.java
@@ -181,6 +181,7 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 			properties.setProperty("session.timeout.ms", "2000");
 			properties.setProperty("fetch.max.wait.ms", "2000");
 			properties.setProperty("heartbeat.interval.ms", "1000");
+			properties.putAll(secureProps);
 			FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer("doesntexist", new SimpleStringSchema(), properties);
 			DataStream<String> stream = see.addSource(source);
 			stream.print();
@@ -275,6 +276,7 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		});
 		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
 		producerProperties.setProperty("retries", "3");
+		producerProperties.putAll(secureProps);
 		FlinkKafkaProducerBase<Tuple2<Long, String>> prod = kafkaServer.getProducer(topic, new KeyedSerializationSchemaWrapper<>(sinkSchema), producerProperties, null);
 		stream.addSink(prod);
 
@@ -283,7 +285,11 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		List<String> topics = new ArrayList<>();
 		topics.add(topic);
 		topics.add(additionalEmptyTopic);
-		FlinkKafkaConsumerBase<Tuple2<Long, String>> source = kafkaServer.getConsumer(topics, sourceSchema, standardProps);
+
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+		FlinkKafkaConsumerBase<Tuple2<Long, String>> source = kafkaServer.getConsumer(topics, sourceSchema, props);
 
 		DataStreamSource<Tuple2<Long, String>> consuming = env.addSource(source).setParallelism(parallelism);
 
@@ -371,7 +377,11 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
 		env.getConfig().disableSysoutLogging();
 
-		FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, standardProps);
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+
+		FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
 
 		env
 				.addSource(kafkaSource)
@@ -416,7 +426,10 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
 		env.getConfig().disableSysoutLogging();
 
-		FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, standardProps);
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+		FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
 
 		env
 				.addSource(kafkaSource)
@@ -463,7 +476,10 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		env.getConfig().disableSysoutLogging();
 		env.setBufferTimeout(0);
 
-		FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, standardProps);
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+		FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
 
 		env
 			.addSource(kafkaSource)
@@ -506,7 +522,10 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 					env.enableCheckpointing(100);
 					env.getConfig().disableSysoutLogging();
 
-					FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), standardProps);
+					Properties props = new Properties();
+					props.putAll(standardProps);
+					props.putAll(secureProps);
+					FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), props);
 
 					env.addSource(source).addSink(new DiscardingSink<String>());
 
@@ -577,7 +596,10 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 					env.enableCheckpointing(100);
 					env.getConfig().disableSysoutLogging();
 
-					FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), standardProps);
+					Properties props = new Properties();
+					props.putAll(standardProps);
+					props.putAll(secureProps);
+					FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), props);
 
 					env.addSource(source).addSink(new DiscardingSink<String>());
 
@@ -629,7 +651,10 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		env.setParallelism(12); // needs to be more that the mini cluster has slots
 		env.getConfig().disableSysoutLogging();
 
-		FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, standardProps);
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+		FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
 
 		env
 				.addSource(kafkaSource)
@@ -700,15 +725,19 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 
 		Tuple2WithTopicSchema schema = new Tuple2WithTopicSchema(env.getConfig());
 
-		stream.addSink(kafkaServer.getProducer("dummy", schema, standardProps, null));
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+
+		stream.addSink(kafkaServer.getProducer("dummy", schema, props, null));
 
 		env.execute("Write to topics");
 
 		// run second job consuming from multiple topics
 		env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
 		env.getConfig().disableSysoutLogging();
-		
-		stream = env.addSource(kafkaServer.getConsumer(topics, schema, standardProps));
+
+		stream = env.addSource(kafkaServer.getConsumer(topics, schema, props));
 
 		stream.flatMap(new FlatMapFunction<Tuple3<Integer, Integer, String>, Integer>() {
 			Map<String, Integer> countPerTopic = new HashMap<>(NUM_TOPICS);
@@ -787,6 +816,10 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		// Produce serialized JSON data
 		createTestTopic(topic, 1, 1);
 
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+
 		StreamExecutionEnvironment env = StreamExecutionEnvironment
 				.createRemoteEnvironment("localhost", flinkPort);
 		env.getConfig().disableSysoutLogging();
@@ -805,7 +838,7 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		}).addSink(kafkaServer.getProducer(
 				topic,
 				new ByteArraySerializationSchema(),
-				standardProps,
+				props,
 				null));
 
 		// Execute blocks
@@ -940,6 +973,7 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		consumerProps.setProperty("fetch.message.max.bytes", Integer.toString(1024 * 1024 * 14));
 		consumerProps.setProperty("max.partition.fetch.bytes", Integer.toString(1024 * 1024 * 14)); // for the new fetcher
 		consumerProps.setProperty("queued.max.message.chunks", "1");
+		consumerProps.putAll(secureProps);
 
 		FlinkKafkaConsumerBase<Tuple2<Long, byte[]>> source = kafkaServer.getConsumer(topic, serSchema, consumerProps);
 		DataStreamSource<Tuple2<Long, byte[]>> consuming = env.addSource(source);
@@ -969,6 +1003,7 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		Properties producerProps = new Properties();
 		producerProps.setProperty("max.request.size", Integer.toString(1024 * 1024 * 15));
 		producerProps.setProperty("retries", "3");
+		producerProps.putAll(secureProps);
 		producerProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerConnectionStrings);
 
 		DataStream<Tuple2<Long, byte[]>> stream = env.addSource(new RichSourceFunction<Tuple2<Long, byte[]>>() {
@@ -1047,8 +1082,10 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		env.setRestartStrategy(RestartStrategies.noRestart());
 		env.getConfig().disableSysoutLogging();
 
-
-		FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, standardProps);
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+		FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props);
 
 		env
 				.addSource(kafkaSource)
@@ -1097,6 +1134,7 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		KeyedSerializationSchema<Tuple2<Long, PojoValue>> schema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());
 		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
 		producerProperties.setProperty("retries", "3");
+		producerProperties.putAll(secureProps);
 		kvStream.addSink(kafkaServer.getProducer(topic, schema, producerProperties, null));
 		env.execute("Write KV to Kafka");
 
@@ -1110,7 +1148,10 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 
 		KeyedDeserializationSchema<Tuple2<Long, PojoValue>> readSchema = new TypeInformationKeyValueSerializationSchema<>(Long.class, PojoValue.class, env.getConfig());
 
-		DataStream<Tuple2<Long, PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, readSchema, standardProps));
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+		DataStream<Tuple2<Long, PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, readSchema, props));
 		fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long,PojoValue>, Object>() {
 			long counter = 0;
 			@Override
@@ -1178,6 +1219,8 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 
 		Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
 		producerProperties.setProperty("retries", "3");
+		producerProperties.putAll(secureProps);
+
 		kvStream.addSink(kafkaServer.getProducer(topic, schema, producerProperties, null));
 
 		env.execute("Write deletes to Kafka");
@@ -1189,7 +1232,10 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
 		env.getConfig().disableSysoutLogging();
 
-		DataStream<Tuple2<byte[], PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, schema, standardProps));
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+		DataStream<Tuple2<byte[], PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, schema, props));
 
 		fromKafka.flatMap(new RichFlatMapFunction<Tuple2<byte[], PojoValue>, Object>() {
 			long counter = 0;
@@ -1226,7 +1272,11 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
 		env1.getConfig().disableSysoutLogging();
 
-		DataStream<Tuple2<Integer, Integer>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, new FixedNumberDeserializationSchema(ELEMENT_COUNT), standardProps));
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+
+		DataStream<Tuple2<Integer, Integer>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, new FixedNumberDeserializationSchema(ELEMENT_COUNT), props));
 		fromKafka.flatMap(new FlatMapFunction<Tuple2<Integer,Integer>, Void>() {
 			@Override
 			public void flatMap(Tuple2<Integer, Integer> value, Collector<Void> out) throws Exception {
@@ -1262,8 +1312,12 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 					env1.getConfig().disableSysoutLogging();
 					env1.disableOperatorChaining(); // let the source read everything into the network buffers
 
+					Properties props = new Properties();
+					props.putAll(standardProps);
+					props.putAll(secureProps);
+
 					TypeInformationSerializationSchema<Tuple2<Integer, Integer>> schema = new TypeInformationSerializationSchema<>(TypeInfoParser.<Tuple2<Integer, Integer>>parse("Tuple2<Integer, Integer>"), env1.getConfig());
-					DataStream<Tuple2<Integer, Integer>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, schema, standardProps));
+					DataStream<Tuple2<Integer, Integer>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, schema, props));
 					fromKafka.flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Void>() {
 						@Override
 						public void flatMap(Tuple2<Integer, Integer> value, Collector<Void> out) throws Exception {// no op
@@ -1288,7 +1342,7 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 						}
 					});
 
-					fromGen.addSink(kafkaServer.getProducer(topic, new KeyedSerializationSchemaWrapper<>(schema), standardProps, null));
+					fromGen.addSink(kafkaServer.getProducer(topic, new KeyedSerializationSchemaWrapper<>(schema), props, null));
 
 					env1.execute("Metrics test job");
 				} catch(Throwable t) {
@@ -1403,6 +1457,7 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		final TypeInformationSerializationSchema<Tuple2<Integer, Integer>> deser =
 				new TypeInformationSerializationSchema<>(intIntTupleType, env.getConfig());
 
+		cc.putAll(secureProps);
 		// create the consumer
 		FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> consumer = kafkaServer.getConsumer(topicName, deser, cc);
 
@@ -1505,6 +1560,7 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 			// the producer must not produce duplicates
 			Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
 			producerProperties.setProperty("retries", "0");
+			producerProperties.putAll(secureProps);
 			
 			stream.addSink(kafkaServer.getProducer(
 							topicName, serSchema, producerProperties,
@@ -1537,7 +1593,7 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 			
 			Properties readProps = (Properties) standardProps.clone();
 			readProps.setProperty("group.id", "flink-tests-validator");
-			
+			readProps.putAll(secureProps);
 			FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> consumer = kafkaServer.getConsumer(topicName, deserSchema, readProps);
 
 			readEnv
@@ -1672,6 +1728,7 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		newProps.setProperty("group.id", "topic-printer"+ UUID.randomUUID().toString());
 		newProps.setProperty("auto.offset.reset", "smallest");
 		newProps.setProperty("zookeeper.connect", standardProps.getProperty("zookeeper.connect"));
+		newProps.putAll(secureProps);
 
 		ConsumerConfig printerConfig = new ConsumerConfig(newProps);
 		printTopic(topicName, printerConfig, deserializer, elements);
@@ -1893,8 +1950,11 @@ public abstract class KafkaConsumerTestBase extends KafkaTestBase {
 		TypeInformationSerializationSchema<Tuple2<Long, Integer>> sourceSchema =
 			new TypeInformationSerializationSchema<>(inputTypeInfo, env.getConfig());
 
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
 		FlinkKafkaConsumerBase<Tuple2<Long, Integer>> source = kafkaServer
-			.getConsumer(topics, sourceSchema, standardProps)
+			.getConsumer(topics, sourceSchema, props)
 			.assignTimestampsAndWatermarks(new TestPunctuatedTSExtractor());
 
 		DataStreamSource<Tuple2<Long, Integer>> consuming = env.setParallelism(1).addSource(source);

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerTestBase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerTestBase.java b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerTestBase.java
index 14e74f1..5bcf406 100644
--- a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerTestBase.java
+++ b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaProducerTestBase.java
@@ -34,6 +34,7 @@ import org.apache.flink.test.util.SuccessException;
 
 
 import java.io.Serializable;
+import java.util.Properties;
 
 import static org.apache.flink.test.util.TestUtils.tryExecute;
 import static org.junit.Assert.assertEquals;
@@ -102,17 +103,24 @@ public abstract class KafkaProducerTestBase extends KafkaTestBase {
 				}
 			})
 			.setParallelism(1);
+
+			Properties props = new Properties();
+			props.putAll(FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings));
+			props.putAll(secureProps);
 			
 			// sink partitions into 
 			stream.addSink(kafkaServer.getProducer(topic,
 					new KeyedSerializationSchemaWrapper<>(serSchema),
-					FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings),
+					props,
 					new CustomPartitioner(parallelism)))
 			.setParallelism(parallelism);
 
 			// ------ consuming topology ---------
-			
-			FlinkKafkaConsumerBase<Tuple2<Long, String>> source = kafkaServer.getConsumer(topic, deserSchema, standardProps);
+
+			Properties consumerProps = new Properties();
+			consumerProps.putAll(standardProps);
+			consumerProps.putAll(secureProps);
+			FlinkKafkaConsumerBase<Tuple2<Long, String>> source = kafkaServer.getConsumer(topic, deserSchema, consumerProps);
 			
 			env.addSource(source).setParallelism(parallelism)
 

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaShortRetentionTestBase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaShortRetentionTestBase.java b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaShortRetentionTestBase.java
index c4949ff..9236e78 100644
--- a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaShortRetentionTestBase.java
+++ b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaShortRetentionTestBase.java
@@ -36,6 +36,8 @@ import org.apache.flink.util.InstantiationUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
+import org.junit.ClassRule;
+import org.junit.rules.TemporaryFolder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -60,29 +62,39 @@ public class KafkaShortRetentionTestBase implements Serializable {
 	private static Properties standardProps;
 	private static LocalFlinkMiniCluster flink;
 
+	@ClassRule
+	public static TemporaryFolder tempFolder = new TemporaryFolder();
+
+	protected static Properties secureProps = new Properties();
+
 	@BeforeClass
 	public static void prepare() throws IOException, ClassNotFoundException {
 		LOG.info("-------------------------------------------------------------------------");
 		LOG.info("    Starting KafkaShortRetentionTestBase ");
 		LOG.info("-------------------------------------------------------------------------");
 
+		Configuration flinkConfig = new Configuration();
+
 		// dynamically load the implementation for the test
 		Class<?> clazz = Class.forName("org.apache.flink.streaming.connectors.kafka.KafkaTestEnvironmentImpl");
 		kafkaServer = (KafkaTestEnvironment) InstantiationUtil.instantiate(clazz);
 
 		LOG.info("Starting KafkaTestBase.prepare() for Kafka " + kafkaServer.getVersion());
 
+		if(kafkaServer.isSecureRunSupported()) {
+			secureProps = kafkaServer.getSecureProperties();
+		}
+
 		Properties specificProperties = new Properties();
 		specificProperties.setProperty("log.retention.hours", "0");
 		specificProperties.setProperty("log.retention.minutes", "0");
 		specificProperties.setProperty("log.retention.ms", "250");
 		specificProperties.setProperty("log.retention.check.interval.ms", "100");
-		kafkaServer.prepare(1, specificProperties);
+		kafkaServer.prepare(1, specificProperties, false);
 
 		standardProps = kafkaServer.getStandardProperties();
 
 		// start also a re-usable Flink mini cluster
-		Configuration flinkConfig = new Configuration();
 		flinkConfig.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
 		flinkConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 8);
 		flinkConfig.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 16);
@@ -98,6 +110,8 @@ public class KafkaShortRetentionTestBase implements Serializable {
 			flink.shutdown();
 		}
 		kafkaServer.shutdown();
+
+		secureProps.clear();
 	}
 
 	/**
@@ -151,12 +165,17 @@ public class KafkaShortRetentionTestBase implements Serializable {
 				running = false;
 			}
 		});
-		stream.addSink(kafkaServer.getProducer(topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), standardProps, null));
+
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+
+		stream.addSink(kafkaServer.getProducer(topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), props, null));
 
 		// ----------- add consumer dataflow ----------
 
 		NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
-		FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, standardProps);
+		FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);
 
 		DataStreamSource<String> consuming = env.addSource(source);
 		consuming.addSink(new DiscardingSink<String>());
@@ -224,6 +243,7 @@ public class KafkaShortRetentionTestBase implements Serializable {
 
 		Properties customProps = new Properties();
 		customProps.putAll(standardProps);
+		customProps.putAll(secureProps);
 		customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception
 		FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);
 
@@ -255,6 +275,7 @@ public class KafkaShortRetentionTestBase implements Serializable {
 
 		Properties customProps = new Properties();
 		customProps.putAll(standardProps);
+		customProps.putAll(secureProps);
 		customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception
 		
 		try {

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestBase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestBase.java b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestBase.java
index 771db17..afdd158 100644
--- a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestBase.java
+++ b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestBase.java
@@ -31,6 +31,8 @@ import org.apache.flink.util.TestLogger;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
+import org.junit.ClassRule;
+import org.junit.rules.TemporaryFolder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -73,61 +75,90 @@ public abstract class KafkaTestBase extends TestLogger {
 
 	protected static KafkaTestEnvironment kafkaServer;
 
+	@ClassRule
+	public static TemporaryFolder tempFolder = new TemporaryFolder();
+
+	protected static Properties secureProps = new Properties();
+
 	// ------------------------------------------------------------------------
 	//  Setup and teardown of the mini clusters
 	// ------------------------------------------------------------------------
 	
 	@BeforeClass
 	public static void prepare() throws IOException, ClassNotFoundException {
+
 		LOG.info("-------------------------------------------------------------------------");
 		LOG.info("    Starting KafkaTestBase ");
 		LOG.info("-------------------------------------------------------------------------");
-		
 
+		startClusters(false);
 
-		// dynamically load the implementation for the test
-		Class<?> clazz = Class.forName("org.apache.flink.streaming.connectors.kafka.KafkaTestEnvironmentImpl");
-		kafkaServer = (KafkaTestEnvironment) InstantiationUtil.instantiate(clazz);
+	}
 
-		LOG.info("Starting KafkaTestBase.prepare() for Kafka " + kafkaServer.getVersion());
+	@AfterClass
+	public static void shutDownServices() {
 
-		kafkaServer.prepare(NUMBER_OF_KAFKA_SERVERS);
+		LOG.info("-------------------------------------------------------------------------");
+		LOG.info("    Shut down KafkaTestBase ");
+		LOG.info("-------------------------------------------------------------------------");
 
-		standardProps = kafkaServer.getStandardProperties();
-		brokerConnectionStrings = kafkaServer.getBrokerConnectionString();
+		shutdownClusters();
 
-		// start also a re-usable Flink mini cluster
-		Configuration flinkConfig = new Configuration();
+		LOG.info("-------------------------------------------------------------------------");
+		LOG.info("    KafkaTestBase finished");
+		LOG.info("-------------------------------------------------------------------------");
+	}
+
+	protected static Configuration getFlinkConfiguration() {
+		Configuration flinkConfig = new Configuration();;
 		flinkConfig.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
 		flinkConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 8);
 		flinkConfig.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 16);
 		flinkConfig.setString(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_DELAY, "0 s");
 		flinkConfig.setString(ConfigConstants.METRICS_REPORTERS_LIST, "my_reporter");
 		flinkConfig.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "my_reporter." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, JMXReporter.class.getName());
+		return flinkConfig;
+	}
+
+	protected static void startClusters(boolean secureMode) throws ClassNotFoundException {
+
+		// dynamically load the implementation for the test
+		Class<?> clazz = Class.forName("org.apache.flink.streaming.connectors.kafka.KafkaTestEnvironmentImpl");
+		kafkaServer = (KafkaTestEnvironment) InstantiationUtil.instantiate(clazz);
+
+		LOG.info("Starting KafkaTestBase.prepare() for Kafka " + kafkaServer.getVersion());
+
+		kafkaServer.prepare(NUMBER_OF_KAFKA_SERVERS, secureMode);
+
+		standardProps = kafkaServer.getStandardProperties();
 
-		flink = new LocalFlinkMiniCluster(flinkConfig, false);
+		brokerConnectionStrings = kafkaServer.getBrokerConnectionString();
+
+		if(kafkaServer.isSecureRunSupported() && secureMode) {
+			secureProps = kafkaServer.getSecureProperties();
+		}
+
+		// start also a re-usable Flink mini cluster
+		flink = new LocalFlinkMiniCluster(getFlinkConfiguration(), false);
 		flink.start();
 
 		flinkPort = flink.getLeaderRPCPort();
-	}
 
-	@AfterClass
-	public static void shutDownServices() {
+	}
 
-		LOG.info("-------------------------------------------------------------------------");
-		LOG.info("    Shut down KafkaTestBase ");
-		LOG.info("-------------------------------------------------------------------------");
+	protected static void shutdownClusters() {
 
 		flinkPort = -1;
 		if (flink != null) {
 			flink.shutdown();
 		}
 
+		if(secureProps != null) {
+			secureProps.clear();
+		}
+
 		kafkaServer.shutdown();
 
-		LOG.info("-------------------------------------------------------------------------");
-		LOG.info("    KafkaTestBase finished");
-		LOG.info("-------------------------------------------------------------------------");
 	}
 
 
@@ -164,4 +195,5 @@ public abstract class KafkaTestBase extends TestLogger {
 	protected static void deleteTestTopic(String topic) {
 		kafkaServer.deleteTestTopic(topic);
 	}
+
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironment.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironment.java b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironment.java
index 0b1d51d..6ecde71 100644
--- a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironment.java
+++ b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironment.java
@@ -35,10 +35,10 @@ public abstract class KafkaTestEnvironment {
 
 	protected static final String KAFKA_HOST = "localhost";
 
-	public abstract void prepare(int numKafkaServers, Properties kafkaServerProperties);
+	public abstract void prepare(int numKafkaServers, Properties kafkaServerProperties, boolean secureMode);
 
-	public void prepare(int numberOfKafkaServers) {
-		this.prepare(numberOfKafkaServers, null);
+	public void prepare(int numberOfKafkaServers, boolean secureMode) {
+		this.prepare(numberOfKafkaServers, null, secureMode);
 	}
 
 	public abstract void shutdown();
@@ -51,9 +51,10 @@ public abstract class KafkaTestEnvironment {
 		this.createTestTopic(topic, numberOfPartitions, replicationFactor, new Properties());
 	}
 
-
 	public abstract Properties getStandardProperties();
 
+	public abstract Properties getSecureProperties();
+
 	public abstract String getBrokerConnectionString();
 
 	public abstract String getVersion();
@@ -86,4 +87,6 @@ public abstract class KafkaTestEnvironment {
 
 	public abstract int getBrokerId(KafkaServer server);
 
+	public abstract boolean isSecureRunSupported();
+
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/DataGenerators.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/DataGenerators.java b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/DataGenerators.java
index 5a38e56..58a5cc3 100644
--- a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/DataGenerators.java
+++ b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/testutils/DataGenerators.java
@@ -135,11 +135,18 @@ public class DataGenerators {
 					}
 				});
 
+		Properties props = new Properties();
+		props.putAll(FlinkKafkaProducerBase.getPropertiesFromBrokerList(testServer.getBrokerConnectionString()));
+		Properties secureProps = testServer.getSecureProperties();
+		if(secureProps != null) {
+			props.putAll(testServer.getSecureProperties());
+		}
+
 		stream
 				.rebalance()
 				.addSink(testServer.getProducer(topic,
 						new KeyedSerializationSchemaWrapper<>(new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, env.getConfig())),
-						FlinkKafkaProducerBase.getPropertiesFromBrokerList(testServer.getBrokerConnectionString()),
+						props,
 						new KafkaPartitioner<Integer>() {
 							@Override
 							public int partition(Integer next, byte[] serializedKey, byte[] serializedValue, int numPartitions) {

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-test-utils-parent/flink-test-utils/pom.xml
----------------------------------------------------------------------
diff --git a/flink-test-utils-parent/flink-test-utils/pom.xml b/flink-test-utils-parent/flink-test-utils/pom.xml
index 18ecfde..5c99ef6 100644
--- a/flink-test-utils-parent/flink-test-utils/pom.xml
+++ b/flink-test-utils-parent/flink-test-utils/pom.xml
@@ -78,5 +78,30 @@ under the License.
 			<scope>compile</scope>
 		</dependency>
 
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-minikdc</artifactId>
+			<version>${minikdc.version}</version>
+		</dependency>
+
 	</dependencies>
+
+	<build>
+		<plugins>
+
+			<!--
+            https://issues.apache.org/jira/browse/DIRSHARED-134
+            Required to pull the Mini-KDC transitive dependency
+            -->
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-bundle-plugin</artifactId>
+				<version>3.0.1</version>
+				<inherited>true</inherited>
+				<extensions>true</extensions>
+			</plugin>
+
+		</plugins>
+	</build>
+
 </project>

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/streaming/util/StreamingMultipleProgramsTestBase.java
----------------------------------------------------------------------
diff --git a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/streaming/util/StreamingMultipleProgramsTestBase.java b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/streaming/util/StreamingMultipleProgramsTestBase.java
index a478908..6ec6c2c 100644
--- a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/streaming/util/StreamingMultipleProgramsTestBase.java
+++ b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/streaming/util/StreamingMultipleProgramsTestBase.java
@@ -25,6 +25,8 @@ import org.apache.flink.test.util.TestBaseUtils;
 
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Base class for streaming unit tests that run multiple tests and want to reuse the same
@@ -67,18 +69,22 @@ public class StreamingMultipleProgramsTestBase extends AbstractTestBase {
 		super(new Configuration());
 	}
 
+	protected static final Logger LOG = LoggerFactory.getLogger(StreamingMultipleProgramsTestBase.class);
+
 	// ------------------------------------------------------------------------
 	//  Cluster setup & teardown
 	// ------------------------------------------------------------------------
 
 	@BeforeClass
 	public static void setup() throws Exception {
+		LOG.info("In StreamingMultipleProgramsTestBase: Starting FlinkMiniCluster ");
 		cluster = TestBaseUtils.startCluster(1, DEFAULT_PARALLELISM, false, false, true);
 		TestStreamEnvironment.setAsContext(cluster, DEFAULT_PARALLELISM);
 	}
 
 	@AfterClass
 	public static void teardown() throws Exception {
+		LOG.info("In StreamingMultipleProgramsTestBase: Closing FlinkMiniCluster ");
 		TestStreamEnvironment.unsetAsContext();
 		stopCluster(cluster, TestBaseUtils.DEFAULT_TIMEOUT);
 	}

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/SecureTestEnvironment.java
----------------------------------------------------------------------
diff --git a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/SecureTestEnvironment.java b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/SecureTestEnvironment.java
new file mode 100644
index 0000000..00b19f1
--- /dev/null
+++ b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/SecureTestEnvironment.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.util;
+
+import org.apache.flink.configuration.ConfigConstants;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.security.SecurityContext;
+import org.apache.hadoop.minikdc.MiniKdc;
+import org.junit.rules.TemporaryFolder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.Nullable;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.BufferedWriter;
+import java.io.PrintWriter;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * Helper {@link SecureTestEnvironment} to handle MiniKDC lifecycle.
+ * This class can be used to start/stop MiniKDC and create secure configurations for MiniDFSCluster
+ * and MiniYarn
+ */
+
+public class SecureTestEnvironment {
+
+	protected static final Logger LOG = LoggerFactory.getLogger(SecureTestEnvironment.class);
+
+	private static MiniKdc kdc;
+
+	private static String testKeytab = null;
+
+	private static String testPrincipal = null;
+
+	private static String testZkServerPrincipal = null;
+
+	private static String testZkClientPrincipal = null;
+
+	private static String testKafkaServerPrincipal = null;
+
+	private static String hadoopServicePrincipal = null;
+
+	private static File baseDirForSecureRun = null;
+
+	public static void prepare(TemporaryFolder tempFolder) {
+
+		try {
+			baseDirForSecureRun = tempFolder.newFolder();
+			LOG.info("Base Directory for Secure Environment: {}", baseDirForSecureRun);
+
+			String hostName = "localhost";
+			Properties kdcConf = MiniKdc.createConf();
+			if(LOG.isDebugEnabled()) {
+				kdcConf.setProperty(MiniKdc.DEBUG, "true");
+			}
+			kdcConf.setProperty(MiniKdc.KDC_BIND_ADDRESS, hostName);
+			kdc = new MiniKdc(kdcConf, baseDirForSecureRun);
+			kdc.start();
+			LOG.info("Started Mini KDC");
+
+			File keytabFile = new File(baseDirForSecureRun, "test-users.keytab");
+			testKeytab = keytabFile.getAbsolutePath();
+			testZkServerPrincipal = "zookeeper/127.0.0.1";
+			testZkClientPrincipal = "zk-client/127.0.0.1";
+			testKafkaServerPrincipal = "kafka/" + hostName;
+			hadoopServicePrincipal = "hadoop/" + hostName;
+			testPrincipal = "client/" + hostName;
+
+			kdc.createPrincipal(keytabFile, testPrincipal, testZkServerPrincipal,
+					hadoopServicePrincipal,
+					testZkClientPrincipal,
+					testKafkaServerPrincipal);
+
+			testPrincipal = testPrincipal + "@" + kdc.getRealm();
+			testZkServerPrincipal = testZkServerPrincipal + "@" + kdc.getRealm();
+			testZkClientPrincipal = testZkClientPrincipal + "@" + kdc.getRealm();
+			testKafkaServerPrincipal = testKafkaServerPrincipal + "@" + kdc.getRealm();
+			hadoopServicePrincipal = hadoopServicePrincipal + "@" + kdc.getRealm();
+
+			LOG.info("-------------------------------------------------------------------");
+			LOG.info("Test Principal: {}", testPrincipal);
+			LOG.info("Test ZK Server Principal: {}", testZkServerPrincipal);
+			LOG.info("Test ZK Client Principal: {}", testZkClientPrincipal);
+			LOG.info("Test Kafka Server Principal: {}", testKafkaServerPrincipal);
+			LOG.info("Test Hadoop Service Principal: {}", hadoopServicePrincipal);
+			LOG.info("Test Keytab: {}", testKeytab);
+			LOG.info("-------------------------------------------------------------------");
+
+			//Security Context is established to allow non hadoop applications that requires JAAS
+			//based SASL/Kerberos authentication to work. However, for Hadoop specific applications
+			//the context can be reinitialized with Hadoop configuration by calling
+			//ctx.setHadoopConfiguration() for the UGI implementation to work properly.
+			//See Yarn test case module for reference
+			createJaasConfig(baseDirForSecureRun);
+			SecurityContext.SecurityConfiguration ctx = new SecurityContext.SecurityConfiguration();
+			Configuration flinkConfig = new Configuration();
+			flinkConfig.setString(ConfigConstants.SECURITY_KEYTAB_KEY, testKeytab);
+			flinkConfig.setString(ConfigConstants.SECURITY_PRINCIPAL_KEY, testPrincipal);
+			flinkConfig.setBoolean(ConfigConstants.ZOOKEEPER_SASL_DISABLE, false);
+			flinkConfig.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, baseDirForSecureRun.getAbsolutePath());
+			ctx.setFlinkConfiguration(flinkConfig);
+			TestingSecurityContext.install(ctx, getClientSecurityConfigurationMap());
+
+			populateSystemEnvVariables();
+
+		} catch(Exception e) {
+			LOG.error("Exception occured while preparing secure environment. Reason: {}", e);
+			throw new RuntimeException(e);
+		}
+
+	}
+
+	public static void cleanup() {
+
+		LOG.info("Cleaning up Secure Environment");
+
+		if( kdc != null) {
+			kdc.stop();
+			LOG.info("Stopped KDC server");
+		}
+
+		resetSystemEnvVariables();
+
+		testKeytab = null;
+		testPrincipal = null;
+		testZkServerPrincipal = null;
+		hadoopServicePrincipal = null;
+		baseDirForSecureRun = null;
+
+	}
+
+	private static void populateSystemEnvVariables() {
+
+		if(LOG.isDebugEnabled()) {
+			System.setProperty("FLINK_JAAS_DEBUG", "true");
+			System.setProperty("sun.security.krb5.debug", "true");
+		}
+
+		System.setProperty("java.security.krb5.conf", kdc.getKrb5conf().getAbsolutePath());
+
+		System.setProperty("zookeeper.authProvider.1", "org.apache.zookeeper.server.auth.SASLAuthenticationProvider");
+		System.setProperty("zookeeper.kerberos.removeHostFromPrincipal", "true");
+		System.setProperty("zookeeper.kerberos.removeRealmFromPrincipal", "true");
+	}
+
+	private static void resetSystemEnvVariables() {
+		System.clearProperty("java.security.krb5.conf");
+		System.clearProperty("FLINK_JAAS_DEBUG");
+		System.clearProperty("sun.security.krb5.debug");
+
+		System.clearProperty("zookeeper.authProvider.1");
+		System.clearProperty("zookeeper.kerberos.removeHostFromPrincipal");
+		System.clearProperty("zookeeper.kerberos.removeRealmFromPrincipal");
+	}
+
+	public static org.apache.flink.configuration.Configuration populateFlinkSecureConfigurations(
+			@Nullable org.apache.flink.configuration.Configuration flinkConf) {
+
+		org.apache.flink.configuration.Configuration conf;
+
+		if(flinkConf== null) {
+			conf = new org.apache.flink.configuration.Configuration();
+		} else {
+			conf = flinkConf;
+		}
+
+		conf.setString(ConfigConstants.SECURITY_KEYTAB_KEY , testKeytab);
+		conf.setString(ConfigConstants.SECURITY_PRINCIPAL_KEY , testPrincipal);
+
+		return conf;
+	}
+
+	public static Map<String, TestingSecurityContext.ClientSecurityConfiguration> getClientSecurityConfigurationMap() {
+
+		Map<String, TestingSecurityContext.ClientSecurityConfiguration> clientSecurityConfigurationMap = new HashMap<>();
+
+		if(testZkServerPrincipal != null ) {
+			TestingSecurityContext.ClientSecurityConfiguration zkServer =
+					new TestingSecurityContext.ClientSecurityConfiguration(testZkServerPrincipal, testKeytab,
+							"Server", "zk-server");
+			clientSecurityConfigurationMap.put("Server",zkServer);
+		}
+
+		if(testZkClientPrincipal != null ) {
+			TestingSecurityContext.ClientSecurityConfiguration zkClient =
+					new TestingSecurityContext.ClientSecurityConfiguration(testZkClientPrincipal, testKeytab,
+							"Client", "zk-client");
+			clientSecurityConfigurationMap.put("Client",zkClient);
+		}
+
+		if(testKafkaServerPrincipal != null ) {
+			TestingSecurityContext.ClientSecurityConfiguration kafkaServer =
+					new TestingSecurityContext.ClientSecurityConfiguration(testKafkaServerPrincipal, testKeytab,
+							"KafkaServer", "kafka-server");
+			clientSecurityConfigurationMap.put("KafkaServer",kafkaServer);
+		}
+
+		return clientSecurityConfigurationMap;
+	}
+
+	public static String getTestKeytab() {
+		return testKeytab;
+	}
+
+	public static String getHadoopServicePrincipal() {
+		return hadoopServicePrincipal;
+	}
+
+	/*
+	 * Helper method to create a temporary JAAS configuration file to ger around the Kafka and ZK SASL
+	 * implementation lookup java.security.auth.login.config
+	 */
+	private static void  createJaasConfig(File baseDirForSecureRun) {
+
+		try(FileWriter fw = new FileWriter(new File(baseDirForSecureRun,SecurityContext.JAAS_CONF_FILENAME), true);
+			BufferedWriter bw = new BufferedWriter(fw);
+			PrintWriter out = new PrintWriter(bw))
+		{
+			out.println("sample {");
+			out.println("useKeyTab=false");
+			out.println("useTicketCache=true;");
+			out.println("};");
+		} catch (IOException e) {
+			LOG.error("Exception occured while trying to create JAAS config. Reason: {}", e.getMessage());
+			throw new RuntimeException(e);
+		}
+
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/TestingJaasConfiguration.java
----------------------------------------------------------------------
diff --git a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/TestingJaasConfiguration.java b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/TestingJaasConfiguration.java
new file mode 100644
index 0000000..25b2362
--- /dev/null
+++ b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/TestingJaasConfiguration.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.util;
+
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.runtime.security.JaasConfiguration;
+import org.apache.hadoop.security.authentication.util.KerberosUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.security.auth.login.AppConfigurationEntry;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * {@link TestingJaasConfiguration} for handling the integration test case since it requires to manage
+ * client principal as well as server principals of Hadoop/ZK which expects the host name to be populated
+ * in specific way (localhost vs 127.0.0.1). This provides an abstraction to handle more than one Login Module
+ * since the default {@link JaasConfiguration} behavior only supports global/unique principal identifier
+ */
+
+@Internal
+public class TestingJaasConfiguration extends JaasConfiguration {
+
+	private static final Logger LOG = LoggerFactory.getLogger(TestingJaasConfiguration.class);
+
+	public Map<String, TestingSecurityContext.ClientSecurityConfiguration> clientSecurityConfigurationMap;
+
+	TestingJaasConfiguration(String keytab, String principal, Map<String,
+			TestingSecurityContext.ClientSecurityConfiguration> clientSecurityConfigurationMap) {
+		super(keytab, principal);
+		this.clientSecurityConfigurationMap = clientSecurityConfigurationMap;
+	}
+
+	@Override
+	public AppConfigurationEntry[] getAppConfigurationEntry(String applicationName) {
+
+		LOG.debug("In TestingJaasConfiguration - Application Requested: {}", applicationName);
+
+		AppConfigurationEntry[] appConfigurationEntry = super.getAppConfigurationEntry(applicationName);
+
+		if(clientSecurityConfigurationMap != null && clientSecurityConfigurationMap.size() > 0) {
+
+			if(clientSecurityConfigurationMap.containsKey(applicationName)) {
+
+				LOG.debug("In TestingJaasConfiguration - Application: {} found in the supplied context", applicationName);
+
+				TestingSecurityContext.ClientSecurityConfiguration conf = clientSecurityConfigurationMap.get(applicationName);
+
+				if(appConfigurationEntry != null && appConfigurationEntry.length > 0) {
+
+					for(int count=0; count < appConfigurationEntry.length; count++) {
+
+						AppConfigurationEntry ace = appConfigurationEntry[count];
+
+						if (ace.getOptions().containsKey("keyTab")) {
+
+							String keyTab = conf.getKeytab();
+							String principal = conf.getPrincipal();
+
+							LOG.debug("In TestingJaasConfiguration - Application: {} from the supplied context will " +
+									"use Client Specific Keytab: {} and Principal: {}", applicationName, keyTab, principal);
+
+							Map<String, String> newKeytabKerberosOptions = new HashMap<>();
+							newKeytabKerberosOptions.putAll(getKeytabKerberosOptions());
+
+							newKeytabKerberosOptions.put("keyTab", keyTab);
+							newKeytabKerberosOptions.put("principal", principal);
+
+							AppConfigurationEntry keytabKerberosAce = new AppConfigurationEntry(
+									KerberosUtil.getKrb5LoginModuleName(),
+									AppConfigurationEntry.LoginModuleControlFlag.REQUIRED,
+									newKeytabKerberosOptions);
+							appConfigurationEntry = new AppConfigurationEntry[] {keytabKerberosAce};
+
+							LOG.debug("---->Login Module is using Keytab based configuration<------");
+							LOG.debug("Login Module Name: " + keytabKerberosAce.getLoginModuleName());
+							LOG.debug("Control Flag: " + keytabKerberosAce.getControlFlag());
+							LOG.debug("Options: " + keytabKerberosAce.getOptions());
+						}
+					}
+				}
+			}
+
+		}
+
+		return appConfigurationEntry;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/TestingSecurityContext.java
----------------------------------------------------------------------
diff --git a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/TestingSecurityContext.java b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/TestingSecurityContext.java
new file mode 100644
index 0000000..5e84c7e
--- /dev/null
+++ b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/TestingSecurityContext.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.util;
+
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.runtime.security.SecurityContext;
+
+import java.util.Map;
+
+/*
+ * Test security context to support handling both client and server principals in MiniKDC
+ * This class is used only in integration test code for connectors like Kafka, HDFS etc.,
+ */
+@Internal
+public class TestingSecurityContext {
+
+	public static void install(SecurityContext.SecurityConfiguration config,
+						Map<String, ClientSecurityConfiguration> clientSecurityConfigurationMap)
+			throws Exception {
+
+		SecurityContext.install(config);
+
+		// establish the JAAS config for Test environment
+		TestingJaasConfiguration jaasConfig = new TestingJaasConfiguration(config.getKeytab(),
+				config.getPrincipal(), clientSecurityConfigurationMap);
+		javax.security.auth.login.Configuration.setConfiguration(jaasConfig);
+	}
+
+	public static class ClientSecurityConfiguration {
+
+		private String principal;
+
+		private String keytab;
+
+		private String moduleName;
+
+		private String jaasServiceName;
+
+		public String getPrincipal() {
+			return principal;
+		}
+
+		public String getKeytab() {
+			return keytab;
+		}
+
+		public String getModuleName() {
+			return moduleName;
+		}
+
+		public String getJaasServiceName() {
+			return jaasServiceName;
+		}
+
+		public ClientSecurityConfiguration(String principal, String keytab, String moduleName, String jaasServiceName) {
+			this.principal = principal;
+			this.keytab = keytab;
+			this.moduleName = moduleName;
+			this.jaasServiceName = jaasServiceName;
+		}
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn-tests/pom.xml
----------------------------------------------------------------------
diff --git a/flink-yarn-tests/pom.xml b/flink-yarn-tests/pom.xml
index ffdca36..68e4752 100644
--- a/flink-yarn-tests/pom.xml
+++ b/flink-yarn-tests/pom.xml
@@ -103,6 +103,13 @@ under the License.
 			<artifactId>akka-testkit_${scala.binary.version}</artifactId>
 			<scope>test</scope>
 		</dependency>
+
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-minikdc</artifactId>
+			<version>${minikdc.version}</version>
+		</dependency>
+
 	</dependencies>
 
 	<build>
@@ -298,6 +305,19 @@ under the License.
 					<skip>true</skip>
 				</configuration>
 			</plugin>
+
+			<!--
+            https://issues.apache.org/jira/browse/DIRSHARED-134
+            Required to pull the Mini-KDC transitive dependency
+            -->
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-bundle-plugin</artifactId>
+				<version>3.0.1</version>
+				<inherited>true</inherited>
+				<extensions>true</extensions>
+			</plugin>
+			
 		</plugins>
 	</build>
 </project>

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn-tests/src/test/java/org/apache/flink/yarn/FlinkYarnSessionCliTest.java
----------------------------------------------------------------------
diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/FlinkYarnSessionCliTest.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/FlinkYarnSessionCliTest.java
index d03d9eb..a503115 100644
--- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/FlinkYarnSessionCliTest.java
+++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/FlinkYarnSessionCliTest.java
@@ -180,7 +180,7 @@ public class FlinkYarnSessionCliTest {
 				Mockito.mock(YarnClient.class),
 				Mockito.mock(ApplicationReport.class),
 				config,
-				new Path("/tmp"), false);
+				new Path("/temp"), false);
 		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNHighAvailabilityITCase.java
----------------------------------------------------------------------
diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNHighAvailabilityITCase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNHighAvailabilityITCase.java
index a293348..9d6ff85 100644
--- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNHighAvailabilityITCase.java
+++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNHighAvailabilityITCase.java
@@ -50,14 +50,14 @@ import java.util.concurrent.TimeUnit;
 
 public class YARNHighAvailabilityITCase extends YarnTestBase {
 
-	private static TestingServer zkServer;
+	protected static TestingServer zkServer;
 
-	private static ActorSystem actorSystem;
+	protected static ActorSystem actorSystem;
 
-	private static final int numberApplicationAttempts = 10;
+	protected static final int numberApplicationAttempts = 10;
 
 	@Rule
-	public TemporaryFolder tmp = new TemporaryFolder();
+	public TemporaryFolder temp = new TemporaryFolder();
 
 	@BeforeClass
 	public static void setup() {
@@ -108,7 +108,11 @@ public class YARNHighAvailabilityITCase extends YarnTestBase {
 		String confDirPath = System.getenv(ConfigConstants.ENV_FLINK_CONF_DIR);
 		flinkYarnClient.setConfigurationDirectory(confDirPath);
 
-		String fsStateHandlePath = tmp.getRoot().getPath();
+		String fsStateHandlePath = temp.getRoot().getPath();
+
+		// load the configuration
+		File configDirectory = new File(confDirPath);
+		GlobalConfiguration.loadConfiguration(configDirectory.getAbsolutePath());
 
 		flinkYarnClient.setFlinkConfiguration(GlobalConfiguration.loadConfiguration());
 		flinkYarnClient.setDynamicPropertiesEncoded("recovery.mode=zookeeper@@recovery.zookeeper.quorum=" +

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionCapacitySchedulerITCase.java
----------------------------------------------------------------------
diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionCapacitySchedulerITCase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionCapacitySchedulerITCase.java
index ddea4dd..650397d 100644
--- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionCapacitySchedulerITCase.java
+++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionCapacitySchedulerITCase.java
@@ -23,10 +23,12 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
 import com.google.common.base.Joiner;
 import org.apache.commons.io.FileUtils;
 import org.apache.flink.configuration.ConfigConstants;
+import org.apache.flink.configuration.GlobalConfiguration;
 import org.apache.flink.runtime.client.JobClient;
 import org.apache.flink.runtime.webmonitor.WebMonitorUtils;
 import org.apache.flink.test.testdata.WordCountData;
 import org.apache.flink.test.util.TestBaseUtils;
+import org.apache.flink.yarn.cli.FlinkYarnSessionCli;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -516,6 +518,27 @@ public class YARNSessionCapacitySchedulerITCase extends YarnTestBase {
 		} catch(Throwable t) {
 			LOG.warn("Error while detached yarn session was running", t);
 			Assert.fail(t.getMessage());
+		} finally {
+
+			//cleanup the yarn-properties file
+			String confDirPath = System.getenv("FLINK_CONF_DIR");
+			File configDirectory = new File(confDirPath);
+			LOG.info("testDetachedPerJobYarnClusterInternal: Using configuration directory " + configDirectory.getAbsolutePath());
+
+			// load the configuration
+			LOG.info("testDetachedPerJobYarnClusterInternal: Trying to load configuration file");
+			GlobalConfiguration.loadConfiguration(configDirectory.getAbsolutePath());
+
+			try {
+				File yarnPropertiesFile = FlinkYarnSessionCli.getYarnPropertiesLocation(GlobalConfiguration.loadConfiguration());
+				if(yarnPropertiesFile.exists()) {
+					LOG.info("testDetachedPerJobYarnClusterInternal: Cleaning up temporary Yarn address reference: {}", yarnPropertiesFile.getAbsolutePath());
+					yarnPropertiesFile.delete();
+				}
+			} catch (Exception e) {
+				LOG.warn("testDetachedPerJobYarnClusterInternal: Exception while deleting the JobManager address file", e);
+			}
+
 		}
 	}
 

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOITCase.java
----------------------------------------------------------------------
diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOITCase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOITCase.java
index 3caa0ee..ca696f9 100644
--- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOITCase.java
+++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOITCase.java
@@ -100,6 +100,9 @@ public class YARNSessionFIFOITCase extends YarnTestBase {
 		while(getRunningContainers() < 2) {
 			sleep(500);
 		}
+
+		//additional sleep for the JM/TM to start and establish connection
+		sleep(2000);
 		LOG.info("Two containers are running. Killing the application");
 
 		// kill application "externally".
@@ -121,6 +124,27 @@ public class YARNSessionFIFOITCase extends YarnTestBase {
 		} catch(Throwable t) {
 			LOG.warn("Killing failed", t);
 			Assert.fail();
+		} finally {
+
+			//cleanup the yarn-properties file
+			String confDirPath = System.getenv("FLINK_CONF_DIR");
+			File configDirectory = new File(confDirPath);
+			LOG.info("testDetachedPerJobYarnClusterInternal: Using configuration directory " + configDirectory.getAbsolutePath());
+
+			// load the configuration
+			LOG.info("testDetachedPerJobYarnClusterInternal: Trying to load configuration file");
+			GlobalConfiguration.loadConfiguration(configDirectory.getAbsolutePath());
+
+			try {
+				File yarnPropertiesFile = FlinkYarnSessionCli.getYarnPropertiesLocation(GlobalConfiguration.loadConfiguration());
+				if(yarnPropertiesFile.exists()) {
+					LOG.info("testDetachedPerJobYarnClusterInternal: Cleaning up temporary Yarn address reference: {}", yarnPropertiesFile.getAbsolutePath());
+					yarnPropertiesFile.delete();
+				}
+			} catch (Exception e) {
+				LOG.warn("testDetachedPerJobYarnClusterInternal: Exception while deleting the JobManager address file", e);
+			}
+
 		}
 
 		LOG.info("Finished testDetachedMode()");

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOSecuredITCase.java
----------------------------------------------------------------------
diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOSecuredITCase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOSecuredITCase.java
new file mode 100644
index 0000000..0b7c230
--- /dev/null
+++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YARNSessionFIFOSecuredITCase.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.yarn;
+
+import org.apache.flink.configuration.ConfigConstants;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.security.SecurityContext;
+import org.apache.flink.test.util.SecureTestEnvironment;
+import org.apache.flink.test.util.TestingSecurityContext;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class YARNSessionFIFOSecuredITCase extends YARNSessionFIFOITCase {
+
+	protected static final Logger LOG = LoggerFactory.getLogger(YARNSessionFIFOSecuredITCase.class);
+
+	@BeforeClass
+	public static void setup() {
+
+		LOG.info("starting secure cluster environment for testing");
+
+		yarnConfiguration.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, ResourceScheduler.class);
+		yarnConfiguration.setInt(YarnConfiguration.NM_PMEM_MB, 768);
+		yarnConfiguration.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512);
+		yarnConfiguration.set(YarnTestBase.TEST_CLUSTER_NAME_KEY, "flink-yarn-tests-fifo-secured");
+
+		SecureTestEnvironment.prepare(tmp);
+
+		populateYarnSecureConfigurations(yarnConfiguration,SecureTestEnvironment.getHadoopServicePrincipal(),
+				SecureTestEnvironment.getTestKeytab());
+
+		Configuration flinkConfig = new Configuration();
+		flinkConfig.setString(ConfigConstants.SECURITY_KEYTAB_KEY,
+				SecureTestEnvironment.getTestKeytab());
+		flinkConfig.setString(ConfigConstants.SECURITY_PRINCIPAL_KEY,
+				SecureTestEnvironment.getHadoopServicePrincipal());
+
+		SecurityContext.SecurityConfiguration ctx = new SecurityContext.SecurityConfiguration();
+		ctx.setFlinkConfiguration(flinkConfig);
+		ctx.setHadoopConfiguration(yarnConfiguration);
+		try {
+			TestingSecurityContext.install(ctx, SecureTestEnvironment.getClientSecurityConfigurationMap());
+
+			SecurityContext.getInstalled().runSecured(new SecurityContext.FlinkSecuredRunner<Integer>() {
+				@Override
+				public Integer run() {
+					startYARNSecureMode(yarnConfiguration, SecureTestEnvironment.getHadoopServicePrincipal(),
+							SecureTestEnvironment.getTestKeytab());
+					return null;
+				}
+			});
+
+		} catch(Exception e) {
+			throw new RuntimeException("Exception occurred while setting up secure test context. Reason: {}", e);
+		}
+
+	}
+
+	@AfterClass
+	public static void teardownSecureCluster() throws Exception {
+		LOG.info("tearing down secure cluster environment");
+		SecureTestEnvironment.cleanup();
+	}
+
+	/* For secure cluster testing, it is enough to run only one test and override below test methods
+	 * to keep the overall build time minimal
+	 */
+	@Override
+	public void testQueryCluster() {}
+
+	@Override
+	public void testNonexistingQueue() {}
+
+	@Override
+	public void testResourceComputation() {}
+
+	@Override
+	public void testfullAlloc() {}
+
+	@Override
+	public void testJavaAPI() {}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java
----------------------------------------------------------------------
diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java
index 6270010..605aa44 100644
--- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java
+++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java
@@ -20,6 +20,7 @@ package org.apache.flink.yarn;
 
 import akka.actor.Identify;
 import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.flink.client.CliFrontend;
 import org.apache.flink.client.cli.CommandLineOptions;
 import org.apache.flink.client.program.ClusterClient;
@@ -29,6 +30,8 @@ import org.apache.flink.yarn.cli.FlinkYarnSessionCli;
 import org.apache.flink.test.util.TestBaseUtils;
 import org.apache.flink.util.TestLogger;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.service.Service;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -62,6 +65,8 @@ import java.io.FileWriter;
 import java.io.FilenameFilter;
 import java.io.IOException;
 import java.io.PrintStream;
+import java.io.BufferedWriter;
+import java.io.PrintWriter;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
@@ -123,6 +128,11 @@ public abstract class YarnTestBase extends TestLogger {
 	 */
 	protected static File flinkLibFolder;
 
+	/**
+	 * Temporary folder where Flink configurations will be kept for secure run
+	 */
+	protected static File tempConfPathForSecureRun = null;
+
 	static {
 		yarnConfiguration = new YarnConfiguration();
 		yarnConfiguration.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512);
@@ -140,6 +150,23 @@ public abstract class YarnTestBase extends TestLogger {
 	}
 
 
+	public static void populateYarnSecureConfigurations(Configuration conf, String principal, String keytab) {
+
+		conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
+		conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, "true");
+
+		conf.set(YarnConfiguration.RM_KEYTAB, keytab);
+		conf.set(YarnConfiguration.RM_PRINCIPAL, principal);
+		conf.set(YarnConfiguration.NM_KEYTAB, keytab);
+		conf.set(YarnConfiguration.NM_PRINCIPAL, principal);
+
+		conf.set(YarnConfiguration.RM_WEBAPP_SPNEGO_USER_NAME_KEY, principal);
+		conf.set(YarnConfiguration.RM_WEBAPP_SPNEGO_KEYTAB_FILE_KEY,keytab);
+		conf.set(YarnConfiguration.NM_WEBAPP_SPNEGO_USER_NAME_KEY, principal);
+		conf.set(YarnConfiguration.NM_WEBAPP_SPNEGO_KEYTAB_FILE_KEY,keytab);
+
+		conf.set("hadoop.security.auth_to_local","RULE:[1:$1] RULE:[2:$1]");
+	}
 
 	/**
 	 * Sleep a bit between the tests (we are re-using the YARN cluster for the tests)
@@ -336,8 +363,16 @@ public abstract class YarnTestBase extends TestLogger {
 		return count;
 	}
 
+	public static void startYARNSecureMode(Configuration conf, String principal, String keytab) {
+		start(conf, principal, keytab);
+	}
+
 	public static void startYARNWithConfig(Configuration conf) {
-		// set the home directory to a tmp directory. Flink on YARN is using the home dir to distribute the file
+		start(conf,null,null);
+	}
+
+	private static void start(Configuration conf, String principal, String keytab) {
+		// set the home directory to a temp directory. Flink on YARN is using the home dir to distribute the file
 		File homeDir = null;
 		try {
 			homeDir = tmp.newFolder();
@@ -374,7 +409,39 @@ public abstract class YarnTestBase extends TestLogger {
 			File flinkConfDirPath = findFile(flinkDistRootDir, new ContainsName(new String[]{"flink-conf.yaml"}));
 			Assert.assertNotNull(flinkConfDirPath);
 
-			map.put(ConfigConstants.ENV_FLINK_CONF_DIR, flinkConfDirPath.getParent());
+			if(!StringUtils.isBlank(principal) && !StringUtils.isBlank(keytab)) {
+				//copy conf dir to test temporary workspace location
+				tempConfPathForSecureRun = tmp.newFolder("conf");
+
+				String confDirPath = flinkConfDirPath.getParentFile().getAbsolutePath();
+				FileUtils.copyDirectory(new File(confDirPath), tempConfPathForSecureRun);
+
+				try(FileWriter fw = new FileWriter(new File(tempConfPathForSecureRun,"flink-conf.yaml"), true);
+					BufferedWriter bw = new BufferedWriter(fw);
+					PrintWriter out = new PrintWriter(bw))
+				{
+					LOG.info("writing keytab: " + keytab + " and principal: " + principal + " to config file");
+					out.println("");
+					out.println("#Security Configurations Auto Populated ");
+					out.println(ConfigConstants.SECURITY_KEYTAB_KEY + ": " + keytab);
+					out.println(ConfigConstants.SECURITY_PRINCIPAL_KEY + ": " + principal);
+					out.println("");
+				} catch (IOException e) {
+					LOG.error("Exception occured while trying to append the security configurations. Reason: {}", e.getMessage());
+					throw new RuntimeException(e);
+				}
+
+				String configDir = tempConfPathForSecureRun.getAbsolutePath();
+
+				LOG.info("Temporary Flink configuration directory to be used for secure test: {}", configDir);
+
+				Assert.assertNotNull(configDir);
+
+				map.put(ConfigConstants.ENV_FLINK_CONF_DIR, configDir);
+
+			} else {
+				map.put(ConfigConstants.ENV_FLINK_CONF_DIR, flinkConfDirPath.getParent());
+			}
 
 			File yarnConfFile = writeYarnSiteConfigXML(conf);
 			map.put("YARN_CONF_DIR", yarnConfFile.getParentFile().getAbsolutePath());
@@ -392,6 +459,7 @@ public abstract class YarnTestBase extends TestLogger {
 			LOG.error("setup failure", ex);
 			Assert.fail();
 		}
+
 	}
 
 	/**
@@ -421,7 +489,6 @@ public abstract class YarnTestBase extends TestLogger {
 		System.setOut(new PrintStream(outContent));
 		System.setErr(new PrintStream(errContent));
 
-
 		final int START_TIMEOUT_SECONDS = 60;
 
 		Runner runner = new Runner(args, type);
@@ -624,12 +691,23 @@ public abstract class YarnTestBase extends TestLogger {
 
 	@AfterClass
 	public static void teardown() throws Exception {
+
+		LOG.info("Stopping MiniYarn Cluster");
+		yarnCluster.stop();
+
 		// Unset FLINK_CONF_DIR, as it might change the behavior of other tests
 		Map<String, String> map = new HashMap<>(System.getenv());
 		map.remove(ConfigConstants.ENV_FLINK_CONF_DIR);
+		map.remove("YARN_CONF_DIR");
+		map.remove("IN_TESTS");
 		TestBaseUtils.setEnv(map);
 
-		// When we are on travis, we copy the tmp files of JUnit (containing the MiniYARNCluster log files)
+		if(tempConfPathForSecureRun != null) {
+			FileUtil.fullyDelete(tempConfPathForSecureRun);
+			tempConfPathForSecureRun = null;
+		}
+
+		// When we are on travis, we copy the temp files of JUnit (containing the MiniYARNCluster log files)
 		// to <flinkRoot>/target/flink-yarn-tests-*.
 		// The files from there are picked up by the ./tools/travis_watchdog.sh script
 		// to upload them to Amazon S3.
@@ -646,6 +724,7 @@ public abstract class YarnTestBase extends TestLogger {
 				LOG.warn("Error copying the final files from {} to {}: msg: {}", src.getAbsolutePath(), target.getAbsolutePath(), e.getMessage(), e);
 			}
 		}
+
 	}
 
 	public static boolean isOnTravis() {

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-yarn-tests/src/test/resources/log4j-test.properties
----------------------------------------------------------------------
diff --git a/flink-yarn-tests/src/test/resources/log4j-test.properties b/flink-yarn-tests/src/test/resources/log4j-test.properties
index e94ca26..8f56c1f 100644
--- a/flink-yarn-tests/src/test/resources/log4j-test.properties
+++ b/flink-yarn-tests/src/test/resources/log4j-test.properties
@@ -34,3 +34,8 @@ log4j.logger.org.apache.hadoop=OFF
 log4j.logger.org.apache.flink.runtime.leaderelection=INFO
 log4j.logger.org.apache.flink.runtime.leaderretrieval=INFO
 
+log4j.logger.org.apache.directory=OFF
+log4j.logger.org.mortbay.log=OFF, testlogger
+log4j.logger.net.sf.ehcache=OFF
+log4j.logger.org.apache.hadoop.metrics2=OFF
+log4j.logger.org.apache.hadoop.conf.Configuration=OFF


[36/50] [abbrv] flink git commit: [FLINK-4529] [flip-6] Move TaskExecutor, JobMaster and ResourceManager out of the rpc package

Posted by tr...@apache.org.
http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
deleted file mode 100644
index 36d6310..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
+++ /dev/null
@@ -1,827 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.taskexecutor;
-
-import akka.actor.ActorSystem;
-import akka.dispatch.ExecutionContexts$;
-import akka.util.Timeout;
-import com.typesafe.config.Config;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flink.api.common.JobID;
-import org.apache.flink.configuration.ConfigConstants;
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.configuration.IllegalConfigurationException;
-import org.apache.flink.core.memory.HeapMemorySegment;
-import org.apache.flink.core.memory.HybridMemorySegment;
-import org.apache.flink.core.memory.MemorySegmentFactory;
-import org.apache.flink.core.memory.MemoryType;
-import org.apache.flink.runtime.akka.AkkaUtils;
-import org.apache.flink.annotation.VisibleForTesting;
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
-import org.apache.flink.runtime.instance.InstanceConnectionInfo;
-import org.apache.flink.runtime.io.disk.iomanager.IOManager;
-import org.apache.flink.runtime.io.disk.iomanager.IOManager.IOMode;
-import org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync;
-import org.apache.flink.runtime.io.network.NetworkEnvironment;
-import org.apache.flink.runtime.io.network.netty.NettyConfig;
-import org.apache.flink.runtime.leaderelection.LeaderElectionService;
-import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener;
-import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
-import org.apache.flink.runtime.memory.MemoryManager;
-import org.apache.flink.runtime.rpc.RpcEndpoint;
-import org.apache.flink.runtime.rpc.RpcMethod;
-import org.apache.flink.runtime.rpc.RpcService;
-import org.apache.flink.runtime.rpc.akka.AkkaRpcService;
-import org.apache.flink.runtime.taskmanager.MemoryLogger;
-import org.apache.flink.runtime.util.EnvironmentInformation;
-import org.apache.flink.runtime.util.LeaderRetrievalUtils;
-import org.apache.flink.runtime.taskmanager.NetworkEnvironmentConfiguration;
-import org.apache.flink.util.MathUtils;
-import org.apache.flink.util.NetUtils;
-
-import scala.Tuple2;
-import scala.Option;
-import scala.Some;
-import scala.concurrent.ExecutionContext;
-import scala.concurrent.duration.Duration;
-import scala.concurrent.duration.FiniteDuration;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.InetSocketAddress;
-import java.util.UUID;
-import java.util.concurrent.ForkJoinPool;
-import java.util.concurrent.TimeUnit;
-
-import static org.apache.flink.util.Preconditions.checkNotNull;
-
-/**
- * TaskExecutor implementation. The task executor is responsible for the execution of multiple
- * {@link org.apache.flink.runtime.taskmanager.Task}.
- */
-public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
-
-	private static final Logger LOG = LoggerFactory.getLogger(TaskExecutor.class);
-
-	/** The unique resource ID of this TaskExecutor */
-	private final ResourceID resourceID;
-
-	/** The access to the leader election and metadata storage services */
-	private final HighAvailabilityServices haServices;
-
-	/** The task manager configuration */
-	private final TaskExecutorConfiguration taskExecutorConfig;
-
-	/** The I/O manager component in the task manager */
-	private final IOManager ioManager;
-
-	/** The memory manager component in the task manager */
-	private final MemoryManager memoryManager;
-
-	/** The network component in the task manager */
-	private final NetworkEnvironment networkEnvironment;
-
-	/** The number of slots in the task manager, should be 1 for YARN */
-	private final int numberOfSlots;
-
-	// --------- resource manager --------
-
-	private TaskExecutorToResourceManagerConnection resourceManagerConnection;
-
-	// ------------------------------------------------------------------------
-
-	public TaskExecutor(
-			TaskExecutorConfiguration taskExecutorConfig,
-			ResourceID resourceID,
-			MemoryManager memoryManager,
-			IOManager ioManager,
-			NetworkEnvironment networkEnvironment,
-			int numberOfSlots,
-			RpcService rpcService,
-			HighAvailabilityServices haServices) {
-
-		super(rpcService);
-
-		this.taskExecutorConfig = checkNotNull(taskExecutorConfig);
-		this.resourceID = checkNotNull(resourceID);
-		this.memoryManager = checkNotNull(memoryManager);
-		this.ioManager = checkNotNull(ioManager);
-		this.networkEnvironment = checkNotNull(networkEnvironment);
-		this.numberOfSlots = checkNotNull(numberOfSlots);
-		this.haServices = checkNotNull(haServices);
-	}
-
-	// ------------------------------------------------------------------------
-	//  Life cycle
-	// ------------------------------------------------------------------------
-
-	@Override
-	public void start() {
-		super.start();
-
-		// start by connecting to the ResourceManager
-		try {
-			haServices.getResourceManagerLeaderRetriever().start(new ResourceManagerLeaderListener());
-		} catch (Exception e) {
-			onFatalErrorAsync(e);
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	//  RPC methods - ResourceManager related
-	// ------------------------------------------------------------------------
-
-	@RpcMethod
-	public void notifyOfNewResourceManagerLeader(String newLeaderAddress, UUID newLeaderId) {
-		if (resourceManagerConnection != null) {
-			if (newLeaderAddress != null) {
-				// the resource manager switched to a new leader
-				log.info("ResourceManager leader changed from {} to {}. Registering at new leader.",
-					resourceManagerConnection.getResourceManagerAddress(), newLeaderAddress);
-			}
-			else {
-				// address null means that the current leader is lost without a new leader being there, yet
-				log.info("Current ResourceManager {} lost leader status. Waiting for new ResourceManager leader.",
-					resourceManagerConnection.getResourceManagerAddress());
-			}
-
-			// drop the current connection or connection attempt
-			if (resourceManagerConnection != null) {
-				resourceManagerConnection.close();
-				resourceManagerConnection = null;
-			}
-		}
-
-		// establish a connection to the new leader
-		if (newLeaderAddress != null) {
-			log.info("Attempting to register at ResourceManager {}", newLeaderAddress);
-			resourceManagerConnection =
-				new TaskExecutorToResourceManagerConnection(log, this, newLeaderAddress, newLeaderId);
-			resourceManagerConnection.start();
-		}
-	}
-
-	/**
-	 * Starts and runs the TaskManager.
-	 * <p/>
-	 * This method first tries to select the network interface to use for the TaskManager
-	 * communication. The network interface is used both for the actor communication
-	 * (coordination) as well as for the data exchange between task managers. Unless
-	 * the hostname/interface is explicitly configured in the configuration, this
-	 * method will try out various interfaces and methods to connect to the JobManager
-	 * and select the one where the connection attempt is successful.
-	 * <p/>
-	 * After selecting the network interface, this method brings up an actor system
-	 * for the TaskManager and its actors, starts the TaskManager's services
-	 * (library cache, shuffle network stack, ...), and starts the TaskManager itself.
-	 *
-	 * @param configuration    The configuration for the TaskManager.
-	 * @param resourceID       The id of the resource which the task manager will run on.
-	 */
-	public static void selectNetworkInterfaceAndRunTaskManager(
-		Configuration configuration,
-		ResourceID resourceID) throws Exception {
-
-		final InetSocketAddress taskManagerAddress = selectNetworkInterfaceAndPort(configuration);
-
-		runTaskManager(taskManagerAddress.getHostName(), resourceID, taskManagerAddress.getPort(), configuration);
-	}
-
-	private static InetSocketAddress selectNetworkInterfaceAndPort(Configuration configuration)
-		throws Exception {
-		String taskManagerHostname = configuration.getString(ConfigConstants.TASK_MANAGER_HOSTNAME_KEY, null);
-		if (taskManagerHostname != null) {
-			LOG.info("Using configured hostname/address for TaskManager: " + taskManagerHostname);
-		} else {
-			LeaderRetrievalService leaderRetrievalService = LeaderRetrievalUtils.createLeaderRetrievalService(configuration);
-			FiniteDuration lookupTimeout = AkkaUtils.getLookupTimeout(configuration);
-
-			InetAddress taskManagerAddress = LeaderRetrievalUtils.findConnectingAddress(leaderRetrievalService, lookupTimeout);
-			taskManagerHostname = taskManagerAddress.getHostName();
-			LOG.info("TaskManager will use hostname/address '{}' ({}) for communication.",
-				taskManagerHostname, taskManagerAddress.getHostAddress());
-		}
-
-		// if no task manager port has been configured, use 0 (system will pick any free port)
-		final int actorSystemPort = configuration.getInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, 0);
-		if (actorSystemPort < 0 || actorSystemPort > 65535) {
-			throw new IllegalConfigurationException("Invalid value for '" +
-				ConfigConstants.TASK_MANAGER_IPC_PORT_KEY +
-				"' (port for the TaskManager actor system) : " + actorSystemPort +
-				" - Leave config parameter empty or use 0 to let the system choose a port automatically.");
-		}
-
-		return new InetSocketAddress(taskManagerHostname, actorSystemPort);
-	}
-
-	/**
-	 * Starts and runs the TaskManager. Brings up an actor system for the TaskManager and its
-	 * actors, starts the TaskManager's services (library cache, shuffle network stack, ...),
-	 * and starts the TaskManager itself.
-	 * <p/>
-	 * This method will also spawn a process reaper for the TaskManager (kill the process if
-	 * the actor fails) and optionally start the JVM memory logging thread.
-	 *
-	 * @param taskManagerHostname The hostname/address of the interface where the actor system
-	 *                            will communicate.
-	 * @param resourceID          The id of the resource which the task manager will run on.
-	 * @param actorSystemPort   The port at which the actor system will communicate.
-	 * @param configuration       The configuration for the TaskManager.
-	 */
-	private static void runTaskManager(
-		String taskManagerHostname,
-		ResourceID resourceID,
-		int actorSystemPort,
-		final Configuration configuration) throws Exception {
-
-		LOG.info("Starting TaskManager");
-
-		// Bring up the TaskManager actor system first, bind it to the given address.
-
-		LOG.info("Starting TaskManager actor system at " +
-			NetUtils.hostAndPortToUrlString(taskManagerHostname, actorSystemPort));
-
-		final ActorSystem taskManagerSystem;
-		try {
-			Tuple2<String, Object> address = new Tuple2<String, Object>(taskManagerHostname, actorSystemPort);
-			Config akkaConfig = AkkaUtils.getAkkaConfig(configuration, new Some<>(address));
-			LOG.debug("Using akka configuration\n " + akkaConfig);
-			taskManagerSystem = AkkaUtils.createActorSystem(akkaConfig);
-		} catch (Throwable t) {
-			if (t instanceof org.jboss.netty.channel.ChannelException) {
-				Throwable cause = t.getCause();
-				if (cause != null && t.getCause() instanceof java.net.BindException) {
-					String address = NetUtils.hostAndPortToUrlString(taskManagerHostname, actorSystemPort);
-					throw new IOException("Unable to bind TaskManager actor system to address " +
-						address + " - " + cause.getMessage(), t);
-				}
-			}
-			throw new Exception("Could not create TaskManager actor system", t);
-		}
-
-		// start akka rpc service based on actor system
-		final Timeout timeout = new Timeout(AkkaUtils.getTimeout(configuration).toMillis(), TimeUnit.MILLISECONDS);
-		final AkkaRpcService akkaRpcService = new AkkaRpcService(taskManagerSystem, timeout);
-
-		// start high availability service to implement getResourceManagerLeaderRetriever method only
-		final HighAvailabilityServices haServices = new HighAvailabilityServices() {
-			@Override
-			public LeaderRetrievalService getResourceManagerLeaderRetriever() throws Exception {
-				return LeaderRetrievalUtils.createLeaderRetrievalService(configuration);
-			}
-
-			@Override
-			public LeaderElectionService getResourceManagerLeaderElectionService() throws Exception {
-				return null;
-			}
-
-			@Override
-			public LeaderElectionService getJobMasterLeaderElectionService(JobID jobID) throws Exception {
-				return null;
-			}
-		};
-
-		// start all the TaskManager services (network stack,  library cache, ...)
-		// and the TaskManager actor
-		try {
-			LOG.info("Starting TaskManager actor");
-			TaskExecutor taskExecutor = startTaskManagerComponentsAndActor(
-				configuration,
-				resourceID,
-				akkaRpcService,
-				taskManagerHostname,
-				haServices,
-				false);
-
-			taskExecutor.start();
-
-			// if desired, start the logging daemon that periodically logs the memory usage information
-			if (LOG.isInfoEnabled() && configuration.getBoolean(
-				ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD,
-				ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD)) {
-				LOG.info("Starting periodic memory usage logger");
-
-				long interval = configuration.getLong(
-					ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS,
-					ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS);
-
-				MemoryLogger logger = new MemoryLogger(LOG, interval, taskManagerSystem);
-				logger.start();
-			}
-
-			// block until everything is done
-			taskManagerSystem.awaitTermination();
-		} catch (Throwable t) {
-			LOG.error("Error while starting up taskManager", t);
-			try {
-				taskManagerSystem.shutdown();
-			} catch (Throwable tt) {
-				LOG.warn("Could not cleanly shut down actor system", tt);
-			}
-			throw t;
-		}
-	}
-
-	// --------------------------------------------------------------------------
-	//  Starting and running the TaskManager
-	// --------------------------------------------------------------------------
-
-	/**
-	 * @param configuration                 The configuration for the TaskManager.
-	 * @param resourceID                    The id of the resource which the task manager will run on.
-	 * @param rpcService                  The rpc service which is used to start and connect to the TaskManager RpcEndpoint .
-	 * @param taskManagerHostname       The hostname/address that describes the TaskManager's data location.
-	 * @param haServices        Optionally, a high availability service can be provided. If none is given,
-	 *                                      then a HighAvailabilityServices is constructed from the configuration.
-	 * @param localTaskManagerCommunication     If true, the TaskManager will not initiate the TCP network stack.
-	 * @return An ActorRef to the TaskManager actor.
-	 * @throws org.apache.flink.configuration.IllegalConfigurationException     Thrown, if the given config contains illegal values.
-	 * @throws java.io.IOException      Thrown, if any of the I/O components (such as buffer pools,
-	 *                                       I/O manager, ...) cannot be properly started.
-	 * @throws java.lang.Exception      Thrown is some other error occurs while parsing the configuration
-	 *                                      or starting the TaskManager components.
-	 */
-	public static TaskExecutor startTaskManagerComponentsAndActor(
-		Configuration configuration,
-		ResourceID resourceID,
-		RpcService rpcService,
-		String taskManagerHostname,
-		HighAvailabilityServices haServices,
-		boolean localTaskManagerCommunication) throws Exception {
-
-		final TaskExecutorConfiguration taskExecutorConfig = parseTaskManagerConfiguration(
-			configuration, taskManagerHostname, localTaskManagerCommunication);
-
-		MemoryType memType = taskExecutorConfig.getNetworkConfig().memoryType();
-
-		// pre-start checks
-		checkTempDirs(taskExecutorConfig.getTmpDirPaths());
-
-		ExecutionContext executionContext = ExecutionContexts$.MODULE$.fromExecutor(new ForkJoinPool());
-
-		// we start the network first, to make sure it can allocate its buffers first
-		final NetworkEnvironment network = new NetworkEnvironment(
-			executionContext,
-			taskExecutorConfig.getTimeout(),
-			taskExecutorConfig.getNetworkConfig(),
-			taskExecutorConfig.getConnectionInfo());
-
-		// computing the amount of memory to use depends on how much memory is available
-		// it strictly needs to happen AFTER the network stack has been initialized
-
-		// check if a value has been configured
-		long configuredMemory = configuration.getLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1L);
-		checkConfigParameter(configuredMemory == -1 || configuredMemory > 0, configuredMemory,
-			ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY,
-			"MemoryManager needs at least one MB of memory. " +
-				"If you leave this config parameter empty, the system automatically " +
-				"pick a fraction of the available memory.");
-
-		final long memorySize;
-		boolean preAllocateMemory = configuration.getBoolean(
-			ConfigConstants.TASK_MANAGER_MEMORY_PRE_ALLOCATE_KEY,
-			ConfigConstants.DEFAULT_TASK_MANAGER_MEMORY_PRE_ALLOCATE);
-		if (configuredMemory > 0) {
-			if (preAllocateMemory) {
-				LOG.info("Using {} MB for managed memory." , configuredMemory);
-			} else {
-				LOG.info("Limiting managed memory to {} MB, memory will be allocated lazily." , configuredMemory);
-			}
-			memorySize = configuredMemory << 20; // megabytes to bytes
-		} else {
-			float fraction = configuration.getFloat(
-				ConfigConstants.TASK_MANAGER_MEMORY_FRACTION_KEY,
-				ConfigConstants.DEFAULT_MEMORY_MANAGER_MEMORY_FRACTION);
-			checkConfigParameter(fraction > 0.0f && fraction < 1.0f, fraction,
-				ConfigConstants.TASK_MANAGER_MEMORY_FRACTION_KEY,
-				"MemoryManager fraction of the free memory must be between 0.0 and 1.0");
-
-			if (memType == MemoryType.HEAP) {
-				long relativeMemSize = (long) (EnvironmentInformation.getSizeOfFreeHeapMemoryWithDefrag() * fraction);
-				if (preAllocateMemory) {
-					LOG.info("Using {} of the currently free heap space for managed heap memory ({} MB)." ,
-						fraction , relativeMemSize >> 20);
-				} else {
-					LOG.info("Limiting managed memory to {} of the currently free heap space ({} MB), " +
-						"memory will be allocated lazily." , fraction , relativeMemSize >> 20);
-				}
-				memorySize = relativeMemSize;
-			} else if (memType == MemoryType.OFF_HEAP) {
-				// The maximum heap memory has been adjusted according to the fraction
-				long maxMemory = EnvironmentInformation.getMaxJvmHeapMemory();
-				long directMemorySize = (long) (maxMemory / (1.0 - fraction) * fraction);
-				if (preAllocateMemory) {
-					LOG.info("Using {} of the maximum memory size for managed off-heap memory ({} MB)." ,
-						fraction, directMemorySize >> 20);
-				} else {
-					LOG.info("Limiting managed memory to {} of the maximum memory size ({} MB)," +
-						" memory will be allocated lazily.", fraction, directMemorySize >> 20);
-				}
-				memorySize = directMemorySize;
-			} else {
-				throw new RuntimeException("No supported memory type detected.");
-			}
-		}
-
-		// now start the memory manager
-		final MemoryManager memoryManager;
-		try {
-			memoryManager = new MemoryManager(
-				memorySize,
-				taskExecutorConfig.getNumberOfSlots(),
-				taskExecutorConfig.getNetworkConfig().networkBufferSize(),
-				memType,
-				preAllocateMemory);
-		} catch (OutOfMemoryError e) {
-			if (memType == MemoryType.HEAP) {
-				throw new Exception("OutOfMemory error (" + e.getMessage() +
-					") while allocating the TaskManager heap memory (" + memorySize + " bytes).", e);
-			} else if (memType == MemoryType.OFF_HEAP) {
-				throw new Exception("OutOfMemory error (" + e.getMessage() +
-					") while allocating the TaskManager off-heap memory (" + memorySize +
-					" bytes).Try increasing the maximum direct memory (-XX:MaxDirectMemorySize)", e);
-			} else {
-				throw e;
-			}
-		}
-
-		// start the I/O manager, it will create some temp directories.
-		final IOManager ioManager = new IOManagerAsync(taskExecutorConfig.getTmpDirPaths());
-
-		final TaskExecutor taskExecutor = new TaskExecutor(
-			taskExecutorConfig,
-			resourceID,
-			memoryManager,
-			ioManager,
-			network,
-			taskExecutorConfig.getNumberOfSlots(),
-			rpcService,
-			haServices);
-
-		return taskExecutor;
-	}
-
-	// --------------------------------------------------------------------------
-	//  Parsing and checking the TaskManager Configuration
-	// --------------------------------------------------------------------------
-
-	/**
-	 * Utility method to extract TaskManager config parameters from the configuration and to
-	 * sanity check them.
-	 *
-	 * @param configuration                 The configuration.
-	 * @param taskManagerHostname           The host name under which the TaskManager communicates.
-	 * @param localTaskManagerCommunication             True, to skip initializing the network stack.
-	 *                                      Use only in cases where only one task manager runs.
-	 * @return TaskExecutorConfiguration that wrappers InstanceConnectionInfo, NetworkEnvironmentConfiguration, etc.
-	 */
-	private static TaskExecutorConfiguration parseTaskManagerConfiguration(
-		Configuration configuration,
-		String taskManagerHostname,
-		boolean localTaskManagerCommunication) throws Exception {
-
-		// ------- read values from the config and check them ---------
-		//                      (a lot of them)
-
-		// ----> hosts / ports for communication and data exchange
-
-		int dataport = configuration.getInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY,
-			ConfigConstants.DEFAULT_TASK_MANAGER_DATA_PORT);
-		if (dataport == 0) {
-			dataport = NetUtils.getAvailablePort();
-		}
-		checkConfigParameter(dataport > 0, dataport, ConfigConstants.TASK_MANAGER_DATA_PORT_KEY,
-			"Leave config parameter empty or use 0 to let the system choose a port automatically.");
-
-		InetAddress taskManagerAddress = InetAddress.getByName(taskManagerHostname);
-		final InstanceConnectionInfo connectionInfo = new InstanceConnectionInfo(taskManagerAddress, dataport);
-
-		// ----> memory / network stack (shuffles/broadcasts), task slots, temp directories
-
-		// we need this because many configs have been written with a "-1" entry
-		int slots = configuration.getInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
-		if (slots == -1) {
-			slots = 1;
-		}
-		checkConfigParameter(slots >= 1, slots, ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS,
-			"Number of task slots must be at least one.");
-
-		final int numNetworkBuffers = configuration.getInteger(
-			ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY,
-			ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS);
-		checkConfigParameter(numNetworkBuffers > 0, numNetworkBuffers,
-			ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, "");
-
-		final int pageSize = configuration.getInteger(
-			ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY,
-			ConfigConstants.DEFAULT_TASK_MANAGER_MEMORY_SEGMENT_SIZE);
-		// check page size of for minimum size
-		checkConfigParameter(pageSize >= MemoryManager.MIN_PAGE_SIZE, pageSize,
-			ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY,
-			"Minimum memory segment size is " + MemoryManager.MIN_PAGE_SIZE);
-		// check page size for power of two
-		checkConfigParameter(MathUtils.isPowerOf2(pageSize), pageSize,
-			ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY,
-			"Memory segment size must be a power of 2.");
-
-		// check whether we use heap or off-heap memory
-		final MemoryType memType;
-		if (configuration.getBoolean(ConfigConstants.TASK_MANAGER_MEMORY_OFF_HEAP_KEY, false)) {
-			memType = MemoryType.OFF_HEAP;
-		} else {
-			memType = MemoryType.HEAP;
-		}
-
-		// initialize the memory segment factory accordingly
-		if (memType == MemoryType.HEAP) {
-			if (!MemorySegmentFactory.initializeIfNotInitialized(HeapMemorySegment.FACTORY)) {
-				throw new Exception("Memory type is set to heap memory, but memory segment " +
-					"factory has been initialized for off-heap memory segments");
-			}
-		} else {
-			if (!MemorySegmentFactory.initializeIfNotInitialized(HybridMemorySegment.FACTORY)) {
-				throw new Exception("Memory type is set to off-heap memory, but memory segment " +
-					"factory has been initialized for heap memory segments");
-			}
-		}
-
-		final String[] tmpDirs = configuration.getString(
-			ConfigConstants.TASK_MANAGER_TMP_DIR_KEY,
-			ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH).split(",|" + File.pathSeparator);
-
-		final NettyConfig nettyConfig;
-		if (!localTaskManagerCommunication) {
-			nettyConfig = new NettyConfig(connectionInfo.address(), connectionInfo.dataPort(), pageSize, slots, configuration);
-		} else {
-			nettyConfig = null;
-		}
-
-		// Default spill I/O mode for intermediate results
-		final String syncOrAsync = configuration.getString(
-			ConfigConstants.TASK_MANAGER_NETWORK_DEFAULT_IO_MODE,
-			ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_DEFAULT_IO_MODE);
-
-		final IOMode ioMode;
-		if (syncOrAsync.equals("async")) {
-			ioMode = IOManager.IOMode.ASYNC;
-		} else {
-			ioMode = IOManager.IOMode.SYNC;
-		}
-
-		final int queryServerPort =  configuration.getInteger(
-			ConfigConstants.QUERYABLE_STATE_SERVER_PORT,
-			ConfigConstants.DEFAULT_QUERYABLE_STATE_SERVER_PORT);
-
-		final int queryServerNetworkThreads =  configuration.getInteger(
-			ConfigConstants.QUERYABLE_STATE_SERVER_NETWORK_THREADS,
-			ConfigConstants.DEFAULT_QUERYABLE_STATE_SERVER_NETWORK_THREADS);
-
-		final int queryServerQueryThreads =  configuration.getInteger(
-			ConfigConstants.QUERYABLE_STATE_SERVER_QUERY_THREADS,
-			ConfigConstants.DEFAULT_QUERYABLE_STATE_SERVER_QUERY_THREADS);
-
-		final NetworkEnvironmentConfiguration networkConfig = new NetworkEnvironmentConfiguration(
-			numNetworkBuffers,
-			pageSize,
-			memType,
-			ioMode,
-			queryServerPort,
-			queryServerNetworkThreads,
-			queryServerQueryThreads,
-			localTaskManagerCommunication ? Option.<NettyConfig>empty() : new Some<>(nettyConfig),
-			new Tuple2<>(500, 3000));
-
-		// ----> timeouts, library caching, profiling
-
-		final FiniteDuration timeout;
-		try {
-			timeout = AkkaUtils.getTimeout(configuration);
-		} catch (Exception e) {
-			throw new IllegalArgumentException(
-				"Invalid format for '" + ConfigConstants.AKKA_ASK_TIMEOUT +
-					"'.Use formats like '50 s' or '1 min' to specify the timeout.");
-		}
-		LOG.info("Messages between TaskManager and JobManager have a max timeout of " + timeout);
-
-		final long cleanupInterval = configuration.getLong(
-			ConfigConstants.LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL,
-			ConfigConstants.DEFAULT_LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL) * 1000;
-
-		final FiniteDuration finiteRegistrationDuration;
-		try {
-			Duration maxRegistrationDuration = Duration.create(configuration.getString(
-				ConfigConstants.TASK_MANAGER_MAX_REGISTRATION_DURATION,
-				ConfigConstants.DEFAULT_TASK_MANAGER_MAX_REGISTRATION_DURATION));
-			if (maxRegistrationDuration.isFinite()) {
-				finiteRegistrationDuration = new FiniteDuration(maxRegistrationDuration.toSeconds(), TimeUnit.SECONDS);
-			} else {
-				finiteRegistrationDuration = null;
-			}
-		} catch (NumberFormatException e) {
-			throw new IllegalArgumentException("Invalid format for parameter " +
-				ConfigConstants.TASK_MANAGER_MAX_REGISTRATION_DURATION, e);
-		}
-
-		final FiniteDuration initialRegistrationPause;
-		try {
-			Duration pause = Duration.create(configuration.getString(
-				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE,
-				ConfigConstants.DEFAULT_TASK_MANAGER_INITIAL_REGISTRATION_PAUSE));
-			if (pause.isFinite()) {
-				initialRegistrationPause = new FiniteDuration(pause.toSeconds(), TimeUnit.SECONDS);
-			} else {
-				throw new IllegalArgumentException("The initial registration pause must be finite: " + pause);
-			}
-		} catch (NumberFormatException e) {
-			throw new IllegalArgumentException("Invalid format for parameter " +
-				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, e);
-		}
-
-		final FiniteDuration maxRegistrationPause;
-		try {
-			Duration pause = Duration.create(configuration.getString(
-				ConfigConstants.TASK_MANAGER_MAX_REGISTARTION_PAUSE,
-				ConfigConstants.DEFAULT_TASK_MANAGER_MAX_REGISTRATION_PAUSE));
-			if (pause.isFinite()) {
-				maxRegistrationPause = new FiniteDuration(pause.toSeconds(), TimeUnit.SECONDS);
-			} else {
-				throw new IllegalArgumentException("The maximum registration pause must be finite: " + pause);
-			}
-		} catch (NumberFormatException e) {
-			throw new IllegalArgumentException("Invalid format for parameter " +
-				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, e);
-		}
-
-		final FiniteDuration refusedRegistrationPause;
-		try {
-			Duration pause = Duration.create(configuration.getString(
-				ConfigConstants.TASK_MANAGER_REFUSED_REGISTRATION_PAUSE,
-				ConfigConstants.DEFAULT_TASK_MANAGER_REFUSED_REGISTRATION_PAUSE));
-			if (pause.isFinite()) {
-				refusedRegistrationPause = new FiniteDuration(pause.toSeconds(), TimeUnit.SECONDS);
-			} else {
-				throw new IllegalArgumentException("The refused registration pause must be finite: " + pause);
-			}
-		} catch (NumberFormatException e) {
-			throw new IllegalArgumentException("Invalid format for parameter " +
-				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, e);
-		}
-
-		return new TaskExecutorConfiguration(
-			tmpDirs,
-			cleanupInterval,
-			connectionInfo,
-			networkConfig,
-			timeout,
-			finiteRegistrationDuration,
-			slots,
-			configuration,
-			initialRegistrationPause,
-			maxRegistrationPause,
-			refusedRegistrationPause);
-	}
-
-	/**
-	 * Validates a condition for a config parameter and displays a standard exception, if the
-	 * the condition does not hold.
-	 *
-	 * @param condition    The condition that must hold. If the condition is false, an exception is thrown.
-	 * @param parameter    The parameter value. Will be shown in the exception message.
-	 * @param name         The name of the config parameter. Will be shown in the exception message.
-	 * @param errorMessage The optional custom error message to append to the exception message.
-	 */
-	private static void checkConfigParameter(
-		boolean condition,
-		Object parameter,
-		String name,
-		String errorMessage) {
-		if (!condition) {
-			throw new IllegalConfigurationException("Invalid configuration value for " + name + " : " + parameter + " - " + errorMessage);
-		}
-	}
-
-	/**
-	 * Validates that all the directories denoted by the strings do actually exist, are proper
-	 * directories (not files), and are writable.
-	 *
-	 * @param tmpDirs The array of directory paths to check.
-	 * @throws Exception Thrown if any of the directories does not exist or is not writable
-	 *                   or is a file, rather than a directory.
-	 */
-	private static void checkTempDirs(String[] tmpDirs) throws IOException {
-		for (String dir : tmpDirs) {
-			if (dir != null && !dir.equals("")) {
-				File file = new File(dir);
-				if (!file.exists()) {
-					throw new IOException("Temporary file directory " + file.getAbsolutePath() + " does not exist.");
-				}
-				if (!file.isDirectory()) {
-					throw new IOException("Temporary file directory " + file.getAbsolutePath() + " is not a directory.");
-				}
-				if (!file.canWrite()) {
-					throw new IOException("Temporary file directory " + file.getAbsolutePath() + " is not writable.");
-				}
-
-				if (LOG.isInfoEnabled()) {
-					long totalSpaceGb = file.getTotalSpace() >> 30;
-					long usableSpaceGb = file.getUsableSpace() >> 30;
-					double usablePercentage = (double)usableSpaceGb / totalSpaceGb * 100;
-					String path = file.getAbsolutePath();
-					LOG.info(String.format("Temporary file directory '%s': total %d GB, " + "usable %d GB (%.2f%% usable)",
-						path, totalSpaceGb, usableSpaceGb, usablePercentage));
-				}
-			} else {
-				throw new IllegalArgumentException("Temporary file directory #$id is null.");
-			}
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	//  Properties
-	// ------------------------------------------------------------------------
-
-	public ResourceID getResourceID() {
-		return resourceID;
-	}
-
-	// ------------------------------------------------------------------------
-	//  Error Handling
-	// ------------------------------------------------------------------------
-
-	/**
-	 * Notifies the TaskExecutor that a fatal error has occurred and it cannot proceed.
-	 * This method should be used when asynchronous threads want to notify the
-	 * TaskExecutor of a fatal error.
-	 *
-	 * @param t The exception describing the fatal error
-	 */
-	void onFatalErrorAsync(final Throwable t) {
-		runAsync(new Runnable() {
-			@Override
-			public void run() {
-				onFatalError(t);
-			}
-		});
-	}
-
-	/**
-	 * Notifies the TaskExecutor that a fatal error has occurred and it cannot proceed.
-	 * This method must only be called from within the TaskExecutor's main thread.
-	 *
-	 * @param t The exception describing the fatal error
-	 */
-	void onFatalError(Throwable t) {
-		// to be determined, probably delegate to a fatal error handler that 
-		// would either log (mini cluster) ot kill the process (yarn, mesos, ...)
-		log.error("FATAL ERROR", t);
-	}
-
-	// ------------------------------------------------------------------------
-	//  Access to fields for testing
-	// ------------------------------------------------------------------------
-
-	@VisibleForTesting
-	TaskExecutorToResourceManagerConnection getResourceManagerConnection() {
-		return resourceManagerConnection;
-	}
-
-	// ------------------------------------------------------------------------
-	//  Utility classes
-	// ------------------------------------------------------------------------
-
-	/**
-	 * The listener for leader changes of the resource manager
-	 */
-	private class ResourceManagerLeaderListener implements LeaderRetrievalListener {
-
-		@Override
-		public void notifyLeaderAddress(String leaderAddress, UUID leaderSessionID) {
-			getSelf().notifyOfNewResourceManagerLeader(leaderAddress, leaderSessionID);
-		}
-
-		@Override
-		public void handleError(Exception exception) {
-			onFatalErrorAsync(exception);
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorConfiguration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorConfiguration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorConfiguration.java
deleted file mode 100644
index 32484e1..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorConfiguration.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.taskexecutor;
-
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.runtime.instance.InstanceConnectionInfo;
-import org.apache.flink.runtime.taskmanager.NetworkEnvironmentConfiguration;
-
-import scala.concurrent.duration.FiniteDuration;
-
-import java.io.Serializable;
-import java.util.concurrent.TimeUnit;
-
-import static org.apache.flink.util.Preconditions.checkNotNull;
-
-/**
- * {@link TaskExecutor} Configuration
- */
-public class TaskExecutorConfiguration implements Serializable {
-
-	private static final long serialVersionUID = 1L;
-
-	private final String[] tmpDirPaths;
-
-	private final long cleanupInterval;
-
-	private final int numberOfSlots;
-
-	private final Configuration configuration;
-
-	private final FiniteDuration timeout;
-	private final FiniteDuration maxRegistrationDuration;
-	private final FiniteDuration initialRegistrationPause;
-	private final FiniteDuration maxRegistrationPause;
-	private final FiniteDuration refusedRegistrationPause;
-
-	private final NetworkEnvironmentConfiguration networkConfig;
-
-	private final InstanceConnectionInfo connectionInfo;
-
-	public TaskExecutorConfiguration(
-			String[] tmpDirPaths,
-			long cleanupInterval,
-			InstanceConnectionInfo connectionInfo,
-			NetworkEnvironmentConfiguration networkConfig,
-			FiniteDuration timeout,
-			FiniteDuration maxRegistrationDuration,
-			int numberOfSlots,
-			Configuration configuration) {
-
-		this (tmpDirPaths,
-			cleanupInterval,
-			connectionInfo,
-			networkConfig,
-			timeout,
-			maxRegistrationDuration,
-			numberOfSlots,
-			configuration,
-			new FiniteDuration(500, TimeUnit.MILLISECONDS),
-			new FiniteDuration(30, TimeUnit.SECONDS),
-			new FiniteDuration(10, TimeUnit.SECONDS));
-	}
-
-	public TaskExecutorConfiguration(
-			String[] tmpDirPaths,
-			long cleanupInterval,
-			InstanceConnectionInfo connectionInfo,
-			NetworkEnvironmentConfiguration networkConfig,
-			FiniteDuration timeout,
-			FiniteDuration maxRegistrationDuration,
-			int numberOfSlots,
-			Configuration configuration,
-			FiniteDuration initialRegistrationPause,
-			FiniteDuration maxRegistrationPause,
-			FiniteDuration refusedRegistrationPause) {
-
-		this.tmpDirPaths = checkNotNull(tmpDirPaths);
-		this.cleanupInterval = checkNotNull(cleanupInterval);
-		this.connectionInfo = checkNotNull(connectionInfo);
-		this.networkConfig = checkNotNull(networkConfig);
-		this.timeout = checkNotNull(timeout);
-		this.maxRegistrationDuration = maxRegistrationDuration;
-		this.numberOfSlots = checkNotNull(numberOfSlots);
-		this.configuration = checkNotNull(configuration);
-		this.initialRegistrationPause = checkNotNull(initialRegistrationPause);
-		this.maxRegistrationPause = checkNotNull(maxRegistrationPause);
-		this.refusedRegistrationPause = checkNotNull(refusedRegistrationPause);
-	}
-
-	// --------------------------------------------------------------------------------------------
-	//  Properties
-	// --------------------------------------------------------------------------------------------
-
-	public String[] getTmpDirPaths() {
-		return tmpDirPaths;
-	}
-
-	public long getCleanupInterval() {
-		return cleanupInterval;
-	}
-
-	public InstanceConnectionInfo getConnectionInfo() { return connectionInfo; }
-
-	public NetworkEnvironmentConfiguration getNetworkConfig() { return networkConfig; }
-
-	public FiniteDuration getTimeout() {
-		return timeout;
-	}
-
-	public FiniteDuration getMaxRegistrationDuration() {
-		return maxRegistrationDuration;
-	}
-
-	public int getNumberOfSlots() {
-		return numberOfSlots;
-	}
-
-	public Configuration getConfiguration() {
-		return configuration;
-	}
-
-	public FiniteDuration getInitialRegistrationPause() {
-		return initialRegistrationPause;
-	}
-
-	public FiniteDuration getMaxRegistrationPause() {
-		return maxRegistrationPause;
-	}
-
-	public FiniteDuration getRefusedRegistrationPause() {
-		return refusedRegistrationPause;
-	}
-
-}
-

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java
deleted file mode 100644
index b0b21b0..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.taskexecutor;
-
-import org.apache.flink.runtime.rpc.RpcGateway;
-
-import java.util.UUID;
-
-/**
- * {@link TaskExecutor} RPC gateway interface
- */
-public interface TaskExecutorGateway extends RpcGateway {
-
-	// ------------------------------------------------------------------------
-	//  ResourceManager handlers
-	// ------------------------------------------------------------------------
-
-	void notifyOfNewResourceManagerLeader(String address, UUID resourceManagerLeaderId);
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorRegistrationSuccess.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorRegistrationSuccess.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorRegistrationSuccess.java
deleted file mode 100644
index 641102d..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorRegistrationSuccess.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.taskexecutor;
-
-import org.apache.flink.runtime.instance.InstanceID;
-import org.apache.flink.runtime.rpc.registration.RegistrationResponse;
-
-import java.io.Serializable;
-
-/**
- * Base class for responses from the ResourceManager to a registration attempt by a
- * TaskExecutor.
- */
-public final class TaskExecutorRegistrationSuccess extends RegistrationResponse.Success implements Serializable {
-
-	private static final long serialVersionUID = 1L;
-
-	private final InstanceID registrationId;
-
-	private final long heartbeatInterval;
-
-	/**
-	 * Create a new {@code TaskExecutorRegistrationSuccess} message.
-	 * 
-	 * @param registrationId     The ID that the ResourceManager assigned the registration.
-	 * @param heartbeatInterval  The interval in which the ResourceManager will heartbeat the TaskExecutor.
-	 */
-	public TaskExecutorRegistrationSuccess(InstanceID registrationId, long heartbeatInterval) {
-		this.registrationId = registrationId;
-		this.heartbeatInterval = heartbeatInterval;
-	}
-
-	/**
-	 * Gets the ID that the ResourceManager assigned the registration.
-	 */
-	public InstanceID getRegistrationId() {
-		return registrationId;
-	}
-
-	/**
-	 * Gets the interval in which the ResourceManager will heartbeat the TaskExecutor.
-	 */
-	public long getHeartbeatInterval() {
-		return heartbeatInterval;
-	}
-
-	@Override
-	public String toString() {
-		return "TaskExecutorRegistrationSuccess (" + registrationId + " / " + heartbeatInterval + ')';
-	}
-
-}
-
-
-
-
-
-
-

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
deleted file mode 100644
index 7ccc879..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.taskexecutor;
-
-import akka.dispatch.OnFailure;
-import akka.dispatch.OnSuccess;
-
-import org.apache.flink.api.java.tuple.Tuple2;
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-import org.apache.flink.runtime.instance.InstanceID;
-import org.apache.flink.runtime.rpc.RpcService;
-import org.apache.flink.runtime.rpc.registration.RegistrationResponse;
-import org.apache.flink.runtime.rpc.registration.RetryingRegistration;
-import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
-
-import org.slf4j.Logger;
-
-import scala.concurrent.Future;
-import scala.concurrent.duration.FiniteDuration;
-
-import java.util.UUID;
-import java.util.concurrent.TimeUnit;
-
-import static org.apache.flink.util.Preconditions.checkNotNull;
-import static org.apache.flink.util.Preconditions.checkState;
-
-/**
- * The connection between a TaskExecutor and the ResourceManager.
- */
-public class TaskExecutorToResourceManagerConnection {
-
-	/** the logger for all log messages of this class */
-	private final Logger log;
-
-	/** the TaskExecutor whose connection to the ResourceManager this represents */
-	private final TaskExecutor taskExecutor;
-
-	private final UUID resourceManagerLeaderId;
-
-	private final String resourceManagerAddress;
-
-	private TaskExecutorToResourceManagerConnection.ResourceManagerRegistration pendingRegistration;
-
-	private ResourceManagerGateway registeredResourceManager;
-
-	private InstanceID registrationId;
-
-	/** flag indicating that the connection is closed */
-	private volatile boolean closed;
-
-
-	public TaskExecutorToResourceManagerConnection(
-			Logger log,
-			TaskExecutor taskExecutor,
-			String resourceManagerAddress,
-			UUID resourceManagerLeaderId) {
-
-		this.log = checkNotNull(log);
-		this.taskExecutor = checkNotNull(taskExecutor);
-		this.resourceManagerAddress = checkNotNull(resourceManagerAddress);
-		this.resourceManagerLeaderId = checkNotNull(resourceManagerLeaderId);
-	}
-
-	// ------------------------------------------------------------------------
-	//  Life cycle
-	// ------------------------------------------------------------------------
-
-	@SuppressWarnings("unchecked")
-	public void start() {
-		checkState(!closed, "The connection is already closed");
-		checkState(!isRegistered() && pendingRegistration == null, "The connection is already started");
-
-		pendingRegistration = new TaskExecutorToResourceManagerConnection.ResourceManagerRegistration(
-				log, taskExecutor.getRpcService(),
-				resourceManagerAddress, resourceManagerLeaderId,
-				taskExecutor.getAddress(), taskExecutor.getResourceID());
-		pendingRegistration.startRegistration();
-
-		Future<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>> future = pendingRegistration.getFuture();
-		
-		future.onSuccess(new OnSuccess<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>>() {
-			@Override
-			public void onSuccess(Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess> result) {
-				registeredResourceManager = result.f0;
-				registrationId = result.f1.getRegistrationId();
-			}
-		}, taskExecutor.getMainThreadExecutionContext());
-		
-		// this future should only ever fail if there is a bug, not if the registration is declined
-		future.onFailure(new OnFailure() {
-			@Override
-			public void onFailure(Throwable failure) {
-				taskExecutor.onFatalError(failure);
-			}
-		}, taskExecutor.getMainThreadExecutionContext());
-	}
-
-	public void close() {
-		closed = true;
-
-		// make sure we do not keep re-trying forever
-		if (pendingRegistration != null) {
-			pendingRegistration.cancel();
-		}
-	}
-
-	public boolean isClosed() {
-		return closed;
-	}
-
-	// ------------------------------------------------------------------------
-	//  Properties
-	// ------------------------------------------------------------------------
-
-	public UUID getResourceManagerLeaderId() {
-		return resourceManagerLeaderId;
-	}
-
-	public String getResourceManagerAddress() {
-		return resourceManagerAddress;
-	}
-
-	/**
-	 * Gets the ResourceManagerGateway. This returns null until the registration is completed.
-	 */
-	public ResourceManagerGateway getResourceManager() {
-		return registeredResourceManager;
-	}
-
-	/**
-	 * Gets the ID under which the TaskExecutor is registered at the ResourceManager.
-	 * This returns null until the registration is completed.
-	 */
-	public InstanceID getRegistrationId() {
-		return registrationId;
-	}
-
-	public boolean isRegistered() {
-		return registeredResourceManager != null;
-	}
-
-	// ------------------------------------------------------------------------
-
-	@Override
-	public String toString() {
-		return String.format("Connection to ResourceManager %s (leaderId=%s)",
-				resourceManagerAddress, resourceManagerLeaderId); 
-	}
-
-	// ------------------------------------------------------------------------
-	//  Utilities
-	// ------------------------------------------------------------------------
-
-	private static class ResourceManagerRegistration
-			extends RetryingRegistration<ResourceManagerGateway, TaskExecutorRegistrationSuccess> {
-
-		private final String taskExecutorAddress;
-		
-		private final ResourceID resourceID;
-
-		ResourceManagerRegistration(
-				Logger log,
-				RpcService rpcService,
-				String targetAddress,
-				UUID leaderId,
-				String taskExecutorAddress,
-				ResourceID resourceID) {
-
-			super(log, rpcService, "ResourceManager", ResourceManagerGateway.class, targetAddress, leaderId);
-			this.taskExecutorAddress = checkNotNull(taskExecutorAddress);
-			this.resourceID = checkNotNull(resourceID);
-		}
-
-		@Override
-		protected Future<RegistrationResponse> invokeRegistration(
-				ResourceManagerGateway resourceManager, UUID leaderId, long timeoutMillis) throws Exception {
-
-			FiniteDuration timeout = new FiniteDuration(timeoutMillis, TimeUnit.MILLISECONDS);
-			return resourceManager.registerTaskExecutor(leaderId, taskExecutorAddress, resourceID, timeout);
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotReport.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotReport.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotReport.java
new file mode 100644
index 0000000..a5de2d5
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotReport.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.taskexecutor;
+
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+
+import java.io.Serializable;
+import java.util.List;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * A report about the current status of all slots of the TaskExecutor, describing
+ * which slots are available and allocated, and what jobs (JobManagers) the allocated slots
+ * have been allocated to.
+ */
+public class SlotReport implements Serializable {
+
+	private static final long serialVersionUID = -3150175198722481689L;
+
+	/** The slots status of the TaskManager */
+	private final List<SlotStatus> slotsStatus;
+
+	/** The resource id which identifies the TaskManager */
+	private final ResourceID resourceID;
+
+	public SlotReport(final List<SlotStatus> slotsStatus, final ResourceID resourceID) {
+		this.slotsStatus = checkNotNull(slotsStatus);
+		this.resourceID = checkNotNull(resourceID);
+	}
+
+	public List<SlotStatus> getSlotsStatus() {
+		return slotsStatus;
+	}
+
+	public ResourceID getResourceID() {
+		return resourceID;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotStatus.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotStatus.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotStatus.java
new file mode 100644
index 0000000..744b674
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotStatus.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.taskexecutor;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+import org.apache.flink.runtime.clusterframework.types.SlotID;
+
+import java.io.Serializable;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * This describes the slot current status which located in TaskManager.
+ */
+public class SlotStatus implements Serializable {
+
+	private static final long serialVersionUID = 5099191707339664493L;
+
+	/** slotID to identify a slot */
+	private final SlotID slotID;
+
+	/** the resource profile of the slot */
+	private final ResourceProfile profiler;
+
+	/** if the slot is allocated, allocationId identify its allocation; else, allocationId is null */
+	private final AllocationID allocationID;
+
+	/** if the slot is allocated, jobId identify which job this slot is allocated to; else, jobId is null */
+	private final JobID jobID;
+
+	public SlotStatus(SlotID slotID, ResourceProfile profiler) {
+		this(slotID, profiler, null, null);
+	}
+
+	public SlotStatus(SlotID slotID, ResourceProfile profiler, AllocationID allocationID, JobID jobID) {
+		this.slotID = checkNotNull(slotID, "slotID cannot be null");
+		this.profiler = checkNotNull(profiler, "profile cannot be null");
+		this.allocationID = allocationID;
+		this.jobID = jobID;
+	}
+
+	/**
+	 * Get the unique identification of this slot
+	 *
+	 * @return The slot id
+	 */
+	public SlotID getSlotID() {
+		return slotID;
+	}
+
+	/**
+	 * Get the resource profile of this slot
+	 *
+	 * @return The resource profile
+	 */
+	public ResourceProfile getProfiler() {
+		return profiler;
+	}
+
+	/**
+	 * Get the allocation id of this slot
+	 *
+	 * @return The allocation id if this slot is allocated, otherwise null
+	 */
+	public AllocationID getAllocationID() {
+		return allocationID;
+	}
+
+	/**
+	 * Get the job id of the slot allocated for
+	 *
+	 * @return The job id if this slot is allocated, otherwise null
+	 */
+	public JobID getJobID() {
+		return jobID;
+	}
+
+	@Override
+	public boolean equals(Object o) {
+		if (this == o) {
+			return true;
+		}
+		if (o == null || getClass() != o.getClass()) {
+			return false;
+		}
+
+		SlotStatus that = (SlotStatus) o;
+
+		if (!slotID.equals(that.slotID)) {
+			return false;
+		}
+		if (!profiler.equals(that.profiler)) {
+			return false;
+		}
+		if (allocationID != null ? !allocationID.equals(that.allocationID) : that.allocationID != null) {
+			return false;
+		}
+		return jobID != null ? jobID.equals(that.jobID) : that.jobID == null;
+
+	}
+
+	@Override
+	public int hashCode() {
+		int result = slotID.hashCode();
+		result = 31 * result + profiler.hashCode();
+		result = 31 * result + (allocationID != null ? allocationID.hashCode() : 0);
+		result = 31 * result + (jobID != null ? jobID.hashCode() : 0);
+		return result;
+	}
+
+}


[37/50] [abbrv] flink git commit: [FLINK-4529] [flip-6] Move TaskExecutor, JobMaster and ResourceManager out of the rpc package

Posted by tr...@apache.org.
http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequest.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequest.java
new file mode 100644
index 0000000..896421b
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+
+import java.io.Serializable;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * This describes the requirement of the slot, mainly used by JobManager requesting slot from ResourceManager.
+ */
+public class SlotRequest implements Serializable {
+
+	private static final long serialVersionUID = -6586877187990445986L;
+
+	/** The JobID of the slot requested for */
+	private final JobID jobId;
+
+	/** The unique identification of this request */
+	private final AllocationID allocationId;
+
+	/** The resource profile of the required slot */
+	private final ResourceProfile resourceProfile;
+
+	public SlotRequest(JobID jobId, AllocationID allocationId, ResourceProfile resourceProfile) {
+		this.jobId = checkNotNull(jobId);
+		this.allocationId = checkNotNull(allocationId);
+		this.resourceProfile = checkNotNull(resourceProfile);
+	}
+
+	/**
+	 * Get the JobID of the slot requested for.
+	 * @return The job id
+	 */
+	public JobID getJobId() {
+		return jobId;
+	}
+
+	/**
+	 * Get the unique identification of this request
+	 * @return the allocation id
+	 */
+	public AllocationID getAllocationId() {
+		return allocationId;
+	}
+
+	/**
+	 * Get the resource profile of the desired slot
+	 * @return The resource profile
+	 */
+	public ResourceProfile getResourceProfile() {
+		return resourceProfile;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
deleted file mode 100644
index a046cb8..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.jobmaster;
-
-import org.apache.flink.api.common.JobID;
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
-import org.apache.flink.runtime.jobgraph.JobGraph;
-import org.apache.flink.runtime.leaderelection.LeaderContender;
-import org.apache.flink.runtime.leaderelection.LeaderElectionService;
-import org.apache.flink.runtime.messages.Acknowledge;
-import org.apache.flink.runtime.rpc.RpcMethod;
-import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
-import org.apache.flink.runtime.rpc.RpcEndpoint;
-import org.apache.flink.runtime.rpc.RpcService;
-import org.apache.flink.runtime.taskmanager.TaskExecutionState;
-import org.apache.flink.util.Preconditions;
-
-import java.util.UUID;
-
-/**
- * JobMaster implementation. The job master is responsible for the execution of a single
- * {@link org.apache.flink.runtime.jobgraph.JobGraph}.
- * <p>
- * It offers the following methods as part of its rpc interface to interact with the JobMaster
- * remotely:
- * <ul>
- *     <li>{@link #updateTaskExecutionState(TaskExecutionState)} updates the task execution state for
- * given task</li>
- * </ul>
- */
-public class JobMaster extends RpcEndpoint<JobMasterGateway> {
-
-	/** Gateway to connected resource manager, null iff not connected */
-	private ResourceManagerGateway resourceManager = null;
-
-	/** Logical representation of the job */
-	private final JobGraph jobGraph;
-	private final JobID jobID;
-
-	/** Configuration of the job */
-	private final Configuration configuration;
-
-	/** Service to contend for and retrieve the leadership of JM and RM */
-	private final HighAvailabilityServices highAvailabilityServices;
-
-	/** Leader Management */
-	private LeaderElectionService leaderElectionService = null;
-	private UUID leaderSessionID;
-
-	/**
-	 * The JM's Constructor
-	 *
-	 * @param jobGraph The representation of the job's execution plan
-	 * @param configuration The job's configuration
-	 * @param rpcService The RPC service at which the JM serves
-	 * @param highAvailabilityService The cluster's HA service from the JM can elect and retrieve leaders.
-	 */
-	public JobMaster(
-		JobGraph jobGraph,
-		Configuration configuration,
-		RpcService rpcService,
-		HighAvailabilityServices highAvailabilityService) {
-
-		super(rpcService);
-
-		this.jobGraph = Preconditions.checkNotNull(jobGraph);
-		this.jobID = Preconditions.checkNotNull(jobGraph.getJobID());
-
-		this.configuration = Preconditions.checkNotNull(configuration);
-
-		this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityService);
-	}
-
-	public ResourceManagerGateway getResourceManager() {
-		return resourceManager;
-	}
-
-	//----------------------------------------------------------------------------------------------
-	// Initialization methods
-	//----------------------------------------------------------------------------------------------
-	public void start() {
-		super.start();
-
-		// register at the election once the JM starts
-		registerAtElectionService();
-	}
-
-
-	//----------------------------------------------------------------------------------------------
-	// JobMaster Leadership methods
-	//----------------------------------------------------------------------------------------------
-
-	/**
-	 * Retrieves the election service and contend for the leadership.
-	 */
-	private void registerAtElectionService() {
-		try {
-			leaderElectionService = highAvailabilityServices.getJobMasterLeaderElectionService(jobID);
-			leaderElectionService.start(new JobMasterLeaderContender());
-		} catch (Exception e) {
-			throw new RuntimeException("Fail to register at the election of JobMaster", e);
-		}
-	}
-
-	/**
-	 * Start the execution when the leadership is granted.
-	 *
-	 * @param newLeaderSessionID The identifier of the new leadership session
-	 */
-	public void grantJobMasterLeadership(final UUID newLeaderSessionID) {
-		runAsync(new Runnable() {
-			@Override
-			public void run() {
-				log.info("JobManager {} grants leadership with session id {}.", getAddress(), newLeaderSessionID);
-
-				// The operation may be blocking, but since JM is idle before it grants the leadership, it's okay that
-				// JM waits here for the operation's completeness.
-				leaderSessionID = newLeaderSessionID;
-				leaderElectionService.confirmLeaderSessionID(newLeaderSessionID);
-
-				// TODO:: execute the job when the leadership is granted.
-			}
-		});
-	}
-
-	/**
-	 * Stop the execution when the leadership is revoked.
-	 */
-	public void revokeJobMasterLeadership() {
-		runAsync(new Runnable() {
-			@Override
-			public void run() {
-				log.info("JobManager {} was revoked leadership.", getAddress());
-
-				// TODO:: cancel the job's execution and notify all listeners
-				cancelAndClearEverything(new Exception("JobManager is no longer the leader."));
-
-				leaderSessionID = null;
-			}
-		});
-	}
-
-	/**
-	 * Handles error occurring in the leader election service
-	 *
-	 * @param exception Exception thrown in the leader election service
-	 */
-	public void onJobMasterElectionError(final Exception exception) {
-		runAsync(new Runnable() {
-			@Override
-			public void run() {
-				log.error("Received an error from the LeaderElectionService.", exception);
-
-				// TODO:: cancel the job's execution and shutdown the JM
-				cancelAndClearEverything(exception);
-
-				leaderSessionID = null;
-			}
-		});
-
-	}
-
-	//----------------------------------------------------------------------------------------------
-	// RPC methods
-	//----------------------------------------------------------------------------------------------
-
-	/**
-	 * Updates the task execution state for a given task.
-	 *
-	 * @param taskExecutionState New task execution state for a given task
-	 * @return Acknowledge the task execution state update
-	 */
-	@RpcMethod
-	public Acknowledge updateTaskExecutionState(TaskExecutionState taskExecutionState) {
-		System.out.println("TaskExecutionState: " + taskExecutionState);
-		return Acknowledge.get();
-	}
-
-	/**
-	 * Triggers the registration of the job master at the resource manager.
-	 *
-	 * @param address Address of the resource manager
-	 */
-	@RpcMethod
-	public void registerAtResourceManager(final String address) {
-		//TODO:: register at the RM
-	}
-
-	//----------------------------------------------------------------------------------------------
-	// Helper methods
-	//----------------------------------------------------------------------------------------------
-
-	/**
-	 * Cancel the current job and notify all listeners the job's cancellation.
-	 *
-	 * @param cause Cause for the cancelling.
-	 */
-	private void cancelAndClearEverything(Throwable cause) {
-		// currently, nothing to do here
-	}
-
-	// ------------------------------------------------------------------------
-	//  Utility classes
-	// ------------------------------------------------------------------------
-	private class JobMasterLeaderContender implements LeaderContender {
-
-		@Override
-		public void grantLeadership(UUID leaderSessionID) {
-			JobMaster.this.grantJobMasterLeadership(leaderSessionID);
-		}
-
-		@Override
-		public void revokeLeadership() {
-			JobMaster.this.revokeJobMasterLeadership();
-		}
-
-		@Override
-		public String getAddress() {
-			return JobMaster.this.getAddress();
-		}
-
-		@Override
-		public void handleError(Exception exception) {
-			onJobMasterElectionError(exception);
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMasterGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMasterGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMasterGateway.java
deleted file mode 100644
index 17a4c3a..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMasterGateway.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.jobmaster;
-
-import org.apache.flink.runtime.messages.Acknowledge;
-import org.apache.flink.runtime.rpc.RpcGateway;
-import org.apache.flink.runtime.taskmanager.TaskExecutionState;
-import scala.concurrent.Future;
-
-/**
- * {@link JobMaster} rpc gateway interface
- */
-public interface JobMasterGateway extends RpcGateway {
-
-	/**
-	 * Updates the task execution state for a given task.
-	 *
-	 * @param taskExecutionState New task execution state for a given task
-	 * @return Future acknowledge of the task execution state update
-	 */
-	Future<Acknowledge> updateTaskExecutionState(TaskExecutionState taskExecutionState);
-
-	/**
-	 * Triggers the registration of the job master at the resource manager.
-	 *
-	 * @param address Address of the resource manager
-	 */
-	void registerAtResourceManager(final String address);
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RegistrationResponse.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RegistrationResponse.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RegistrationResponse.java
deleted file mode 100644
index 2de560a..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RegistrationResponse.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.registration;
-
-import java.io.Serializable;
-
-/**
- * Base class for responses given to registration attempts from {@link RetryingRegistration}.
- */
-public abstract class RegistrationResponse implements Serializable {
-
-	private static final long serialVersionUID = 1L;
-
-	// ----------------------------------------------------------------------------
-	
-	/**
-	 * Base class for a successful registration. Concrete registration implementations
-	 * will typically extend this class to attach more information.
-	 */
-	public static class Success extends RegistrationResponse {
-		private static final long serialVersionUID = 1L;
-		
-		@Override
-		public String toString() {
-			return "Registration Successful";
-		}
-	}
-
-	// ----------------------------------------------------------------------------
-
-	/**
-	 * A rejected (declined) registration.
-	 */
-	public static final class Decline extends RegistrationResponse {
-		private static final long serialVersionUID = 1L;
-
-		/** the rejection reason */
-		private final String reason;
-
-		/**
-		 * Creates a new rejection message.
-		 * 
-		 * @param reason The reason for the rejection.
-		 */
-		public Decline(String reason) {
-			this.reason = reason != null ? reason : "(unknown)";
-		}
-
-		/**
-		 * Gets the reason for the rejection.
-		 */
-		public String getReason() {
-			return reason;
-		}
-
-		@Override
-		public String toString() {
-			return "Registration Declined (" + reason + ')';
-		}
-	}
-}
-
-
-
-
-
-
-

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java
deleted file mode 100644
index dcb5011..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.registration;
-
-import akka.dispatch.OnFailure;
-import akka.dispatch.OnSuccess;
-
-import org.apache.flink.api.java.tuple.Tuple2;
-import org.apache.flink.runtime.rpc.RpcGateway;
-import org.apache.flink.runtime.rpc.RpcService;
-
-import org.slf4j.Logger;
-
-import scala.concurrent.Future;
-import scala.concurrent.Promise;
-import scala.concurrent.impl.Promise.DefaultPromise;
-
-import java.util.UUID;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
-
-import static org.apache.flink.util.Preconditions.checkArgument;
-import static org.apache.flink.util.Preconditions.checkNotNull;
-
-
-/**
- * This utility class implements the basis of registering one component at another component,
- * for example registering the TaskExecutor at the ResourceManager.
- * This {@code RetryingRegistration} implements both the initial address resolution
- * and the retries-with-backoff strategy.
- * 
- * <p>The registration gives access to a future that is completed upon successful registration.
- * The registration can be canceled, for example when the target where it tries to register
- * at looses leader status.
- * 
- * @param <Gateway> The type of the gateway to connect to.
- * @param <Success> The type of the successful registration responses.
- */
-public abstract class RetryingRegistration<Gateway extends RpcGateway, Success extends RegistrationResponse.Success> {
-
-	// ------------------------------------------------------------------------
-	//  default configuration values
-	// ------------------------------------------------------------------------
-
-	/** default value for the initial registration timeout (milliseconds) */
-	private static final long INITIAL_REGISTRATION_TIMEOUT_MILLIS = 100;
-
-	/** default value for the maximum registration timeout, after exponential back-off (milliseconds) */
-	private static final long MAX_REGISTRATION_TIMEOUT_MILLIS = 30000;
-
-	/** The pause (milliseconds) made after an registration attempt caused an exception (other than timeout) */
-	private static final long ERROR_REGISTRATION_DELAY_MILLIS = 10000;
-
-	/** The pause (milliseconds) made after the registration attempt was refused */
-	private static final long REFUSED_REGISTRATION_DELAY_MILLIS = 30000;
-
-	// ------------------------------------------------------------------------
-	// Fields
-	// ------------------------------------------------------------------------
-
-	private final Logger log;
-
-	private final RpcService rpcService;
-
-	private final String targetName;
-
-	private final Class<Gateway> targetType;
-
-	private final String targetAddress;
-
-	private final UUID leaderId;
-
-	private final Promise<Tuple2<Gateway, Success>> completionPromise;
-
-	private final long initialRegistrationTimeout;
-
-	private final long maxRegistrationTimeout;
-
-	private final long delayOnError;
-
-	private final long delayOnRefusedRegistration;
-
-	private volatile boolean canceled;
-
-	// ------------------------------------------------------------------------
-
-	public RetryingRegistration(
-			Logger log,
-			RpcService rpcService,
-			String targetName,
-			Class<Gateway> targetType,
-			String targetAddress,
-			UUID leaderId) {
-		this(log, rpcService, targetName, targetType, targetAddress, leaderId,
-				INITIAL_REGISTRATION_TIMEOUT_MILLIS, MAX_REGISTRATION_TIMEOUT_MILLIS,
-				ERROR_REGISTRATION_DELAY_MILLIS, REFUSED_REGISTRATION_DELAY_MILLIS);
-	}
-
-	public RetryingRegistration(
-			Logger log,
-			RpcService rpcService,
-			String targetName, 
-			Class<Gateway> targetType,
-			String targetAddress,
-			UUID leaderId,
-			long initialRegistrationTimeout,
-			long maxRegistrationTimeout,
-			long delayOnError,
-			long delayOnRefusedRegistration) {
-
-		checkArgument(initialRegistrationTimeout > 0, "initial registration timeout must be greater than zero");
-		checkArgument(maxRegistrationTimeout > 0, "maximum registration timeout must be greater than zero");
-		checkArgument(delayOnError >= 0, "delay on error must be non-negative");
-		checkArgument(delayOnRefusedRegistration >= 0, "delay on refused registration must be non-negative");
-
-		this.log = checkNotNull(log);
-		this.rpcService = checkNotNull(rpcService);
-		this.targetName = checkNotNull(targetName);
-		this.targetType = checkNotNull(targetType);
-		this.targetAddress = checkNotNull(targetAddress);
-		this.leaderId = checkNotNull(leaderId);
-		this.initialRegistrationTimeout = initialRegistrationTimeout;
-		this.maxRegistrationTimeout = maxRegistrationTimeout;
-		this.delayOnError = delayOnError;
-		this.delayOnRefusedRegistration = delayOnRefusedRegistration;
-
-		this.completionPromise = new DefaultPromise<>();
-	}
-
-	// ------------------------------------------------------------------------
-	//  completion and cancellation
-	// ------------------------------------------------------------------------
-
-	public Future<Tuple2<Gateway, Success>> getFuture() {
-		return completionPromise.future();
-	}
-
-	/**
-	 * Cancels the registration procedure.
-	 */
-	public void cancel() {
-		canceled = true;
-	}
-
-	/**
-	 * Checks if the registration was canceled.
-	 * @return True if the registration was canceled, false otherwise.
-	 */
-	public boolean isCanceled() {
-		return canceled;
-	}
-
-	// ------------------------------------------------------------------------
-	//  registration
-	// ------------------------------------------------------------------------
-
-	protected abstract Future<RegistrationResponse> invokeRegistration(
-			Gateway gateway, UUID leaderId, long timeoutMillis) throws Exception;
-
-	/**
-	 * This method resolves the target address to a callable gateway and starts the
-	 * registration after that.
-	 */
-	@SuppressWarnings("unchecked")
-	public void startRegistration() {
-		try {
-			// trigger resolution of the resource manager address to a callable gateway
-			Future<Gateway> resourceManagerFuture = rpcService.connect(targetAddress, targetType);
-	
-			// upon success, start the registration attempts
-			resourceManagerFuture.onSuccess(new OnSuccess<Gateway>() {
-				@Override
-				public void onSuccess(Gateway result) {
-					log.info("Resolved {} address, beginning registration", targetName);
-					register(result, 1, initialRegistrationTimeout);
-				}
-			}, rpcService.getExecutionContext());
-	
-			// upon failure, retry, unless this is cancelled
-			resourceManagerFuture.onFailure(new OnFailure() {
-				@Override
-				public void onFailure(Throwable failure) {
-					if (!isCanceled()) {
-						log.warn("Could not resolve {} address {}, retrying...", targetName, targetAddress);
-						startRegistration();
-					}
-				}
-			}, rpcService.getExecutionContext());
-		}
-		catch (Throwable t) {
-			cancel();
-			completionPromise.tryFailure(t);
-		}
-	}
-
-	/**
-	 * This method performs a registration attempt and triggers either a success notification or a retry,
-	 * depending on the result.
-	 */
-	@SuppressWarnings("unchecked")
-	private void register(final Gateway gateway, final int attempt, final long timeoutMillis) {
-		// eager check for canceling to avoid some unnecessary work
-		if (canceled) {
-			return;
-		}
-
-		try {
-			log.info("Registration at {} attempt {} (timeout={}ms)", targetName, attempt, timeoutMillis);
-			Future<RegistrationResponse> registrationFuture = invokeRegistration(gateway, leaderId, timeoutMillis);
-	
-			// if the registration was successful, let the TaskExecutor know
-			registrationFuture.onSuccess(new OnSuccess<RegistrationResponse>() {
-				
-				@Override
-				public void onSuccess(RegistrationResponse result) throws Throwable {
-					if (!isCanceled()) {
-						if (result instanceof RegistrationResponse.Success) {
-							// registration successful!
-							Success success = (Success) result;
-							completionPromise.success(new Tuple2<>(gateway, success));
-						}
-						else {
-							// registration refused or unknown
-							if (result instanceof RegistrationResponse.Decline) {
-								RegistrationResponse.Decline decline = (RegistrationResponse.Decline) result;
-								log.info("Registration at {} was declined: {}", targetName, decline.getReason());
-							} else {
-								log.error("Received unknown response to registration attempt: " + result);
-							}
-
-							log.info("Pausing and re-attempting registration in {} ms", delayOnRefusedRegistration);
-							registerLater(gateway, 1, initialRegistrationTimeout, delayOnRefusedRegistration);
-						}
-					}
-				}
-			}, rpcService.getExecutionContext());
-	
-			// upon failure, retry
-			registrationFuture.onFailure(new OnFailure() {
-				@Override
-				public void onFailure(Throwable failure) {
-					if (!isCanceled()) {
-						if (failure instanceof TimeoutException) {
-							// we simply have not received a response in time. maybe the timeout was
-							// very low (initial fast registration attempts), maybe the target endpoint is
-							// currently down.
-							if (log.isDebugEnabled()) {
-								log.debug("Registration at {} ({}) attempt {} timed out after {} ms",
-										targetName, targetAddress, attempt, timeoutMillis);
-							}
-	
-							long newTimeoutMillis = Math.min(2 * timeoutMillis, maxRegistrationTimeout);
-							register(gateway, attempt + 1, newTimeoutMillis);
-						}
-						else {
-							// a serious failure occurred. we still should not give up, but keep trying
-							log.error("Registration at " + targetName + " failed due to an error", failure);
-							log.info("Pausing and re-attempting registration in {} ms", delayOnError);
-	
-							registerLater(gateway, 1, initialRegistrationTimeout, delayOnError);
-						}
-					}
-				}
-			}, rpcService.getExecutionContext());
-		}
-		catch (Throwable t) {
-			cancel();
-			completionPromise.tryFailure(t);
-		}
-	}
-
-	private void registerLater(final Gateway gateway, final int attempt, final long timeoutMillis, long delay) {
-		rpcService.scheduleRunnable(new Runnable() {
-			@Override
-			public void run() {
-				register(gateway, attempt, timeoutMillis);
-			}
-		}, delay, TimeUnit.MILLISECONDS);
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/JobMasterRegistration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/JobMasterRegistration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/JobMasterRegistration.java
deleted file mode 100644
index 7a2deae..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/JobMasterRegistration.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.resourcemanager;
-
-import java.io.Serializable;
-
-public class JobMasterRegistration implements Serializable {
-	private static final long serialVersionUID = 8411214999193765202L;
-
-	private final String address;
-
-	public JobMasterRegistration(String address) {
-		this.address = address;
-	}
-
-	public String getAddress() {
-		return address;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/RegistrationResponse.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/RegistrationResponse.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/RegistrationResponse.java
deleted file mode 100644
index 8ac9e49..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/RegistrationResponse.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.resourcemanager;
-
-import org.apache.flink.runtime.instance.InstanceID;
-
-import java.io.Serializable;
-
-public class RegistrationResponse implements Serializable {
-	private static final long serialVersionUID = -2379003255993119993L;
-
-	private final boolean isSuccess;
-	private final InstanceID instanceID;
-
-	public RegistrationResponse(boolean isSuccess, InstanceID instanceID) {
-		this.isSuccess = isSuccess;
-		this.instanceID = instanceID;
-	}
-
-	public boolean isSuccess() {
-		return isSuccess;
-	}
-
-	public InstanceID getInstanceID() {
-		return instanceID;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
deleted file mode 100644
index f7147c9..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.resourcemanager;
-
-import akka.dispatch.Mapper;
-
-import org.apache.flink.annotation.VisibleForTesting;
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
-import org.apache.flink.runtime.instance.InstanceID;
-import org.apache.flink.runtime.leaderelection.LeaderContender;
-import org.apache.flink.runtime.leaderelection.LeaderElectionService;
-import org.apache.flink.runtime.rpc.RpcMethod;
-import org.apache.flink.runtime.rpc.RpcEndpoint;
-import org.apache.flink.runtime.rpc.RpcService;
-import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
-import org.apache.flink.runtime.rpc.jobmaster.JobMasterGateway;
-import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutorRegistrationSuccess;
-
-import scala.concurrent.Future;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.UUID;
-
-import static org.apache.flink.util.Preconditions.checkNotNull;
-
-/**
- * ResourceManager implementation. The resource manager is responsible for resource de-/allocation
- * and bookkeeping.
- *
- * It offers the following methods as part of its rpc interface to interact with the him remotely:
- * <ul>
- *     <li>{@link #registerJobMaster(JobMasterRegistration)} registers a {@link JobMaster} at the resource manager</li>
- *     <li>{@link #requestSlot(SlotRequest)} requests a slot from the resource manager</li>
- * </ul>
- */
-public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
-	private final Map<JobMasterGateway, InstanceID> jobMasterGateways;
-	private final HighAvailabilityServices highAvailabilityServices;
-	private LeaderElectionService leaderElectionService = null;
-	private UUID leaderSessionID = null;
-
-	public ResourceManager(RpcService rpcService, HighAvailabilityServices highAvailabilityServices) {
-		super(rpcService);
-		this.highAvailabilityServices = checkNotNull(highAvailabilityServices);
-		this.jobMasterGateways = new HashMap<>();
-	}
-
-	@Override
-	public void start() {
-		// start a leader
-		try {
-			super.start();
-			leaderElectionService = highAvailabilityServices.getResourceManagerLeaderElectionService();
-			leaderElectionService.start(new ResourceManagerLeaderContender());
-		} catch (Throwable e) {
-			log.error("A fatal error happened when starting the ResourceManager", e);
-			throw new RuntimeException("A fatal error happened when starting the ResourceManager", e);
-		}
-	}
-
-	@Override
-	public void shutDown() {
-		try {
-			leaderElectionService.stop();
-			super.shutDown();
-		} catch(Throwable e) {
-			log.error("A fatal error happened when shutdown the ResourceManager", e);
-			throw new RuntimeException("A fatal error happened when shutdown the ResourceManager", e);
-		}
-	}
-
-	/**
-	 * Gets the leader session id of current resourceManager.
-	 *
-	 * @return return the leaderSessionId of current resourceManager, this returns null until the current resourceManager is granted leadership.
-	 */
-	@VisibleForTesting
-	UUID getLeaderSessionID() {
-		return leaderSessionID;
-	}
-
-	/**
-	 * Register a {@link JobMaster} at the resource manager.
-	 *
-	 * @param jobMasterRegistration Job master registration information
-	 * @return Future registration response
-	 */
-	@RpcMethod
-	public Future<RegistrationResponse> registerJobMaster(JobMasterRegistration jobMasterRegistration) {
-		Future<JobMasterGateway> jobMasterFuture = getRpcService().connect(jobMasterRegistration.getAddress(), JobMasterGateway.class);
-
-		return jobMasterFuture.map(new Mapper<JobMasterGateway, RegistrationResponse>() {
-			@Override
-			public RegistrationResponse apply(final JobMasterGateway jobMasterGateway) {
-				InstanceID instanceID;
-
-				if (jobMasterGateways.containsKey(jobMasterGateway)) {
-					instanceID = jobMasterGateways.get(jobMasterGateway);
-				} else {
-					instanceID = new InstanceID();
-					jobMasterGateways.put(jobMasterGateway, instanceID);
-				}
-
-				return new RegistrationResponse(true, instanceID);
-			}
-		}, getMainThreadExecutionContext());
-	}
-
-	/**
-	 * Requests a slot from the resource manager.
-	 *
-	 * @param slotRequest Slot request
-	 * @return Slot assignment
-	 */
-	@RpcMethod
-	public SlotAssignment requestSlot(SlotRequest slotRequest) {
-		System.out.println("SlotRequest: " + slotRequest);
-		return new SlotAssignment();
-	}
-
-
-	/**
-	 *
-	 * @param resourceManagerLeaderId  The fencing token for the ResourceManager leader 
-	 * @param taskExecutorAddress      The address of the TaskExecutor that registers
-	 * @param resourceID               The resource ID of the TaskExecutor that registers
-	 *
-	 * @return The response by the ResourceManager.
-	 */
-	@RpcMethod
-	public org.apache.flink.runtime.rpc.registration.RegistrationResponse registerTaskExecutor(
-			UUID resourceManagerLeaderId,
-			String taskExecutorAddress,
-			ResourceID resourceID) {
-
-		return new TaskExecutorRegistrationSuccess(new InstanceID(), 5000);
-	}
-
-	private class ResourceManagerLeaderContender implements LeaderContender {
-
-		/**
-		 * Callback method when current resourceManager is granted leadership
-		 *
-		 * @param leaderSessionID unique leadershipID
-		 */
-		@Override
-		public void grantLeadership(final UUID leaderSessionID) {
-			runAsync(new Runnable() {
-				@Override
-				public void run() {
-					log.info("ResourceManager {} was granted leadership with leader session ID {}", getAddress(), leaderSessionID);
-					ResourceManager.this.leaderSessionID = leaderSessionID;
-					// confirming the leader session ID might be blocking,
-					leaderElectionService.confirmLeaderSessionID(leaderSessionID);
-				}
-			});
-		}
-
-		/**
-		 * Callback method when current resourceManager lose leadership.
-		 */
-		@Override
-		public void revokeLeadership() {
-			runAsync(new Runnable() {
-				@Override
-				public void run() {
-					log.info("ResourceManager {} was revoked leadership.", getAddress());
-					jobMasterGateways.clear();
-					leaderSessionID = null;
-				}
-			});
-		}
-
-		@Override
-		public String getAddress() {
-			return ResourceManager.this.getAddress();
-		}
-
-		/**
-		 * Handles error occurring in the leader election service
-		 *
-		 * @param exception Exception being thrown in the leader election service
-		 */
-		@Override
-		public void handleError(final Exception exception) {
-			runAsync(new Runnable() {
-				@Override
-				public void run() {
-					log.error("ResourceManager received an error from the LeaderElectionService.", exception);
-					// terminate ResourceManager in case of an error
-					shutDown();
-				}
-			});
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java
deleted file mode 100644
index afddb01..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.resourcemanager;
-
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-import org.apache.flink.runtime.rpc.RpcGateway;
-import org.apache.flink.runtime.rpc.RpcTimeout;
-import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
-
-import scala.concurrent.Future;
-import scala.concurrent.duration.FiniteDuration;
-
-import java.util.UUID;
-
-/**
- * The {@link ResourceManager}'s RPC gateway interface.
- */
-public interface ResourceManagerGateway extends RpcGateway {
-
-	/**
-	 * Register a {@link JobMaster} at the resource manager.
-	 *
-	 * @param jobMasterRegistration Job master registration information
-	 * @param timeout Timeout for the future to complete
-	 * @return Future registration response
-	 */
-	Future<RegistrationResponse> registerJobMaster(
-		JobMasterRegistration jobMasterRegistration,
-		@RpcTimeout FiniteDuration timeout);
-
-	/**
-	 * Register a {@link JobMaster} at the resource manager.
-	 *
-	 * @param jobMasterRegistration Job master registration information
-	 * @return Future registration response
-	 */
-	Future<RegistrationResponse> registerJobMaster(JobMasterRegistration jobMasterRegistration);
-
-	/**
-	 * Requests a slot from the resource manager.
-	 *
-	 * @param slotRequest Slot request
-	 * @return Future slot assignment
-	 */
-	Future<SlotAssignment> requestSlot(SlotRequest slotRequest);
-
-	/**
-	 * 
-	 * @param resourceManagerLeaderId  The fencing token for the ResourceManager leader 
-	 * @param taskExecutorAddress      The address of the TaskExecutor that registers
-	 * @param resourceID               The resource ID of the TaskExecutor that registers
-	 * @param timeout                  The timeout for the response.
-	 * 
-	 * @return The future to the response by the ResourceManager.
-	 */
-	Future<org.apache.flink.runtime.rpc.registration.RegistrationResponse> registerTaskExecutor(
-			UUID resourceManagerLeaderId,
-			String taskExecutorAddress,
-			ResourceID resourceID,
-			@RpcTimeout FiniteDuration timeout);
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotAssignment.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotAssignment.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotAssignment.java
deleted file mode 100644
index 86cd8b7..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotAssignment.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.resourcemanager;
-
-import java.io.Serializable;
-
-public class SlotAssignment implements Serializable{
-	private static final long serialVersionUID = -6990813455942742322L;
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java
deleted file mode 100644
index 74c7c39..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.resourcemanager;
-
-import org.apache.flink.api.common.JobID;
-import org.apache.flink.runtime.clusterframework.types.AllocationID;
-import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
-
-import java.io.Serializable;
-
-import static org.apache.flink.util.Preconditions.checkNotNull;
-
-/**
- * This describes the requirement of the slot, mainly used by JobManager requesting slot from ResourceManager.
- */
-public class SlotRequest implements Serializable {
-
-	private static final long serialVersionUID = -6586877187990445986L;
-
-	/** The JobID of the slot requested for */
-	private final JobID jobId;
-
-	/** The unique identification of this request */
-	private final AllocationID allocationId;
-
-	/** The resource profile of the required slot */
-	private final ResourceProfile resourceProfile;
-
-	public SlotRequest(JobID jobId, AllocationID allocationId, ResourceProfile resourceProfile) {
-		this.jobId = checkNotNull(jobId);
-		this.allocationId = checkNotNull(allocationId);
-		this.resourceProfile = checkNotNull(resourceProfile);
-	}
-
-	/**
-	 * Get the JobID of the slot requested for.
-	 * @return The job id
-	 */
-	public JobID getJobId() {
-		return jobId;
-	}
-
-	/**
-	 * Get the unique identification of this request
-	 * @return the allocation id
-	 */
-	public AllocationID getAllocationId() {
-		return allocationId;
-	}
-
-	/**
-	 * Get the resource profile of the desired slot
-	 * @return The resource profile
-	 */
-	public ResourceProfile getResourceProfile() {
-		return resourceProfile;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
deleted file mode 100644
index c372ecb..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.taskexecutor;
-
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-
-import java.io.Serializable;
-import java.util.List;
-
-import static org.apache.flink.util.Preconditions.checkNotNull;
-
-/**
- * A report about the current status of all slots of the TaskExecutor, describing
- * which slots are available and allocated, and what jobs (JobManagers) the allocated slots
- * have been allocated to.
- */
-public class SlotReport implements Serializable {
-
-	private static final long serialVersionUID = -3150175198722481689L;
-
-	/** The slots status of the TaskManager */
-	private final List<SlotStatus> slotsStatus;
-
-	/** The resource id which identifies the TaskManager */
-	private final ResourceID resourceID;
-
-	public SlotReport(final List<SlotStatus> slotsStatus, final ResourceID resourceID) {
-		this.slotsStatus = checkNotNull(slotsStatus);
-		this.resourceID = checkNotNull(resourceID);
-	}
-
-	public List<SlotStatus> getSlotsStatus() {
-		return slotsStatus;
-	}
-
-	public ResourceID getResourceID() {
-		return resourceID;
-	}
-
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotStatus.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotStatus.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotStatus.java
deleted file mode 100644
index e8e2084..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotStatus.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.taskexecutor;
-
-import org.apache.flink.api.common.JobID;
-import org.apache.flink.runtime.clusterframework.types.AllocationID;
-import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
-import org.apache.flink.runtime.clusterframework.types.SlotID;
-
-import java.io.Serializable;
-
-import static org.apache.flink.util.Preconditions.checkNotNull;
-
-/**
- * This describes the slot current status which located in TaskManager.
- */
-public class SlotStatus implements Serializable {
-
-	private static final long serialVersionUID = 5099191707339664493L;
-
-	/** slotID to identify a slot */
-	private final SlotID slotID;
-
-	/** the resource profile of the slot */
-	private final ResourceProfile profiler;
-
-	/** if the slot is allocated, allocationId identify its allocation; else, allocationId is null */
-	private final AllocationID allocationID;
-
-	/** if the slot is allocated, jobId identify which job this slot is allocated to; else, jobId is null */
-	private final JobID jobID;
-
-	public SlotStatus(SlotID slotID, ResourceProfile profiler) {
-		this(slotID, profiler, null, null);
-	}
-
-	public SlotStatus(SlotID slotID, ResourceProfile profiler, AllocationID allocationID, JobID jobID) {
-		this.slotID = checkNotNull(slotID, "slotID cannot be null");
-		this.profiler = checkNotNull(profiler, "profile cannot be null");
-		this.allocationID = allocationID;
-		this.jobID = jobID;
-	}
-
-	/**
-	 * Get the unique identification of this slot
-	 *
-	 * @return The slot id
-	 */
-	public SlotID getSlotID() {
-		return slotID;
-	}
-
-	/**
-	 * Get the resource profile of this slot
-	 *
-	 * @return The resource profile
-	 */
-	public ResourceProfile getProfiler() {
-		return profiler;
-	}
-
-	/**
-	 * Get the allocation id of this slot
-	 *
-	 * @return The allocation id if this slot is allocated, otherwise null
-	 */
-	public AllocationID getAllocationID() {
-		return allocationID;
-	}
-
-	/**
-	 * Get the job id of the slot allocated for
-	 *
-	 * @return The job id if this slot is allocated, otherwise null
-	 */
-	public JobID getJobID() {
-		return jobID;
-	}
-
-	@Override
-	public boolean equals(Object o) {
-		if (this == o) {
-			return true;
-		}
-		if (o == null || getClass() != o.getClass()) {
-			return false;
-		}
-
-		SlotStatus that = (SlotStatus) o;
-
-		if (!slotID.equals(that.slotID)) {
-			return false;
-		}
-		if (!profiler.equals(that.profiler)) {
-			return false;
-		}
-		if (allocationID != null ? !allocationID.equals(that.allocationID) : that.allocationID != null) {
-			return false;
-		}
-		return jobID != null ? jobID.equals(that.jobID) : that.jobID == null;
-
-	}
-
-	@Override
-	public int hashCode() {
-		int result = slotID.hashCode();
-		result = 31 * result + profiler.hashCode();
-		result = 31 * result + (allocationID != null ? allocationID.hashCode() : 0);
-		result = 31 * result + (jobID != null ? jobID.hashCode() : 0);
-		return result;
-	}
-
-}


[48/50] [abbrv] flink git commit: [FLINK-4538][FLINK-4348] ResourceManager slot allocation protcol

Posted by tr...@apache.org.
[FLINK-4538][FLINK-4348] ResourceManager slot allocation protcol

- associates JobMasters with JobID instead of InstanceID
- adds TaskExecutorGateway to slot
- adds SlotManager as RM constructor parameter
- adds LeaderRetrievalListener to SlotManager to keep track of the leader id

- tests the interaction JM->RM requestSlot
- tests the interaction RM->TM requestSlot

This closes #2463


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/0a134a32
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/0a134a32
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/0a134a32

Branch: refs/heads/flip-6
Commit: 0a134a32eb32836ddfb190ed6be5fbd32134d039
Parents: 2630543
Author: Maximilian Michels <mx...@apache.org>
Authored: Thu Sep 1 16:53:31 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:18 2016 +0200

----------------------------------------------------------------------
 .../clusterframework/types/ResourceProfile.java |   2 +-
 .../clusterframework/types/ResourceSlot.java    |  14 +-
 .../resourcemanager/JobMasterRegistration.java  |  10 +-
 .../resourcemanager/RegistrationResponse.java   |   9 +-
 .../resourcemanager/ResourceManager.java        | 167 +++---
 .../resourcemanager/ResourceManagerGateway.java |   2 +-
 .../runtime/resourcemanager/SlotAssignment.java |  25 -
 .../runtime/resourcemanager/SlotManager.java    | 523 -----------------
 .../resourcemanager/SlotRequestRegistered.java  |  33 ++
 .../resourcemanager/SlotRequestRejected.java    |  34 ++
 .../resourcemanager/SlotRequestReply.java       |  41 ++
 .../slotmanager/SimpleSlotManager.java          |  59 ++
 .../slotmanager/SlotManager.java                | 579 +++++++++++++++++++
 .../flink/runtime/taskexecutor/SlotStatus.java  |   5 +-
 .../taskexecutor/TaskExecutorGateway.java       |  17 +
 .../resourcemanager/ResourceManagerHATest.java  |   4 +-
 .../resourcemanager/SlotManagerTest.java        | 538 -----------------
 .../slotmanager/SlotManagerTest.java            | 554 ++++++++++++++++++
 .../slotmanager/SlotProtocolTest.java           | 225 +++++++
 .../flink/runtime/rpc/TestingRpcService.java    |   6 +-
 .../runtime/rpc/TestingSerialRpcService.java    |   4 +
 21 files changed, 1677 insertions(+), 1174 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java
index ff1c4bf..fa3aabc 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java
@@ -68,6 +68,6 @@ public class ResourceProfile implements Serializable {
 	 * @return true if the requirement is matched, otherwise false
 	 */
 	public boolean isMatching(ResourceProfile required) {
-		return Double.compare(cpuCores, required.getCpuCores()) >= 0 && memoryInMB >= required.getMemoryInMB();
+		return cpuCores >= required.getCpuCores() && memoryInMB >= required.getMemoryInMB();
 	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java
index 8a6db5f..5fb8aee 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java
@@ -18,6 +18,8 @@
 
 package org.apache.flink.runtime.clusterframework.types;
 
+import org.apache.flink.runtime.taskexecutor.TaskExecutorGateway;
+
 import java.io.Serializable;
 
 import static org.apache.flink.util.Preconditions.checkNotNull;
@@ -26,7 +28,7 @@ import static org.apache.flink.util.Preconditions.checkNotNull;
  * A ResourceSlot represents a slot located in TaskManager from ResourceManager's view. It has a unique
  * identification and resource profile which we can compare to the resource request.
  */
-public class ResourceSlot implements ResourceIDRetrievable, Serializable {
+public class ResourceSlot implements ResourceIDRetrievable {
 
 	private static final long serialVersionUID = -5853720153136840674L;
 
@@ -36,9 +38,13 @@ public class ResourceSlot implements ResourceIDRetrievable, Serializable {
 	/** The resource profile of this slot */
 	private final ResourceProfile resourceProfile;
 
-	public ResourceSlot(SlotID slotId, ResourceProfile resourceProfile) {
+	/** Gateway to the TaskExecutor which owns the slot */
+	private final TaskExecutorGateway taskExecutorGateway;
+
+	public ResourceSlot(SlotID slotId, ResourceProfile resourceProfile, TaskExecutorGateway taskExecutorGateway) {
 		this.slotId = checkNotNull(slotId);
 		this.resourceProfile = checkNotNull(resourceProfile);
+		this.taskExecutorGateway = taskExecutorGateway;
 	}
 
 	@Override
@@ -54,6 +60,10 @@ public class ResourceSlot implements ResourceIDRetrievable, Serializable {
 		return resourceProfile;
 	}
 
+	public TaskExecutorGateway getTaskExecutorGateway() {
+		return taskExecutorGateway;
+	}
+
 	/**
 	 * Check whether required resource profile can be matched by this slot.
 	 *

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/JobMasterRegistration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/JobMasterRegistration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/JobMasterRegistration.java
index 309dcc1..439e56b 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/JobMasterRegistration.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/JobMasterRegistration.java
@@ -18,18 +18,26 @@
 
 package org.apache.flink.runtime.resourcemanager;
 
+import org.apache.flink.api.common.JobID;
+
 import java.io.Serializable;
 
 public class JobMasterRegistration implements Serializable {
 	private static final long serialVersionUID = 8411214999193765202L;
 
 	private final String address;
+	private final JobID jobID;
 
-	public JobMasterRegistration(String address) {
+	public JobMasterRegistration(String address, JobID jobID) {
 		this.address = address;
+		this.jobID = jobID;
 	}
 
 	public String getAddress() {
 		return address;
 	}
+
+	public JobID getJobID() {
+		return jobID;
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/RegistrationResponse.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/RegistrationResponse.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/RegistrationResponse.java
index fb6c401..796e634 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/RegistrationResponse.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/RegistrationResponse.java
@@ -18,26 +18,19 @@
 
 package org.apache.flink.runtime.resourcemanager;
 
-import org.apache.flink.runtime.instance.InstanceID;
-
 import java.io.Serializable;
 
 public class RegistrationResponse implements Serializable {
 	private static final long serialVersionUID = -2379003255993119993L;
 
 	private final boolean isSuccess;
-	private final InstanceID instanceID;
 
-	public RegistrationResponse(boolean isSuccess, InstanceID instanceID) {
+	public RegistrationResponse(boolean isSuccess) {
 		this.isSuccess = isSuccess;
-		this.instanceID = instanceID;
 	}
 
 	public boolean isSuccess() {
 		return isSuccess;
 	}
 
-	public InstanceID getInstanceID() {
-		return instanceID;
-	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManager.java
index 44c022b..29aba1a 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManager.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManager.java
@@ -21,11 +21,13 @@ package org.apache.flink.runtime.resourcemanager;
 import akka.dispatch.Mapper;
 
 import org.apache.flink.annotation.VisibleForTesting;
+import org.apache.flink.api.common.JobID;
 import org.apache.flink.runtime.clusterframework.types.ResourceID;
 import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
 import org.apache.flink.runtime.instance.InstanceID;
 import org.apache.flink.runtime.leaderelection.LeaderContender;
 import org.apache.flink.runtime.leaderelection.LeaderElectionService;
+import org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager;
 import org.apache.flink.runtime.rpc.RpcMethod;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
@@ -33,6 +35,8 @@ import org.apache.flink.runtime.jobmaster.JobMaster;
 import org.apache.flink.runtime.jobmaster.JobMasterGateway;
 import org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationSuccess;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import scala.concurrent.Future;
 
 import java.util.HashMap;
@@ -51,16 +55,28 @@ import static org.apache.flink.util.Preconditions.checkNotNull;
  *     <li>{@link #requestSlot(SlotRequest)} requests a slot from the resource manager</li>
  * </ul>
  */
-public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
-	private final Map<JobMasterGateway, InstanceID> jobMasterGateways;
+public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> implements LeaderContender {
+
+	private final Logger LOG = LoggerFactory.getLogger(getClass());
+
+	private final Map<JobID, JobMasterGateway> jobMasterGateways;
+
 	private final HighAvailabilityServices highAvailabilityServices;
-	private LeaderElectionService leaderElectionService = null;
-	private UUID leaderSessionID = null;
 
-	public ResourceManager(RpcService rpcService, HighAvailabilityServices highAvailabilityServices) {
+	private LeaderElectionService leaderElectionService;
+
+	private final SlotManager slotManager;
+
+	private UUID leaderSessionID;
+
+	public ResourceManager(
+			RpcService rpcService,
+			HighAvailabilityServices highAvailabilityServices,
+			SlotManager slotManager) {
 		super(rpcService);
 		this.highAvailabilityServices = checkNotNull(highAvailabilityServices);
 		this.jobMasterGateways = new HashMap<>();
+		this.slotManager = slotManager;
 	}
 
 	@Override
@@ -69,7 +85,7 @@ public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
 		try {
 			super.start();
 			leaderElectionService = highAvailabilityServices.getResourceManagerLeaderElectionService();
-			leaderElectionService.start(new ResourceManagerLeaderContender());
+			leaderElectionService.start(this);
 		} catch (Throwable e) {
 			log.error("A fatal error happened when starting the ResourceManager", e);
 			throw new RuntimeException("A fatal error happened when starting the ResourceManager", e);
@@ -94,7 +110,7 @@ public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
 	 */
 	@VisibleForTesting
 	UUID getLeaderSessionID() {
-		return leaderSessionID;
+		return this.leaderSessionID;
 	}
 
 	/**
@@ -105,21 +121,20 @@ public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
 	 */
 	@RpcMethod
 	public Future<RegistrationResponse> registerJobMaster(JobMasterRegistration jobMasterRegistration) {
-		Future<JobMasterGateway> jobMasterFuture = getRpcService().connect(jobMasterRegistration.getAddress(), JobMasterGateway.class);
+		final Future<JobMasterGateway> jobMasterFuture =
+			getRpcService().connect(jobMasterRegistration.getAddress(), JobMasterGateway.class);
+		final JobID jobID = jobMasterRegistration.getJobID();
 
 		return jobMasterFuture.map(new Mapper<JobMasterGateway, RegistrationResponse>() {
 			@Override
 			public RegistrationResponse apply(final JobMasterGateway jobMasterGateway) {
-				InstanceID instanceID;
 
-				if (jobMasterGateways.containsKey(jobMasterGateway)) {
-					instanceID = jobMasterGateways.get(jobMasterGateway);
-				} else {
-					instanceID = new InstanceID();
-					jobMasterGateways.put(jobMasterGateway, instanceID);
+				final JobMasterGateway existingGateway = jobMasterGateways.put(jobID, jobMasterGateway);
+				if (existingGateway != null) {
+					LOG.info("Replacing existing gateway {} for JobID {} with  {}.",
+						existingGateway, jobID, jobMasterGateway);
 				}
-
-				return new RegistrationResponse(true, instanceID);
+				return new RegistrationResponse(true);
 			}
 		}, getMainThreadExecutionContext());
 	}
@@ -131,9 +146,16 @@ public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
 	 * @return Slot assignment
 	 */
 	@RpcMethod
-	public SlotAssignment requestSlot(SlotRequest slotRequest) {
-		System.out.println("SlotRequest: " + slotRequest);
-		return new SlotAssignment();
+	public SlotRequestReply requestSlot(SlotRequest slotRequest) {
+		final JobID jobId = slotRequest.getJobId();
+		final JobMasterGateway jobMasterGateway = jobMasterGateways.get(jobId);
+
+		if (jobMasterGateway != null) {
+			return slotManager.requestSlot(slotRequest);
+		} else {
+			LOG.info("Ignoring slot request for unknown JobMaster with JobID {}", jobId);
+			return new SlotRequestRejected(slotRequest.getAllocationId());
+		}
 	}
 
 
@@ -154,61 +176,62 @@ public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
 		return new TaskExecutorRegistrationSuccess(new InstanceID(), 5000);
 	}
 
-	private class ResourceManagerLeaderContender implements LeaderContender {
-
-		/**
-		 * Callback method when current resourceManager is granted leadership
-		 *
-		 * @param leaderSessionID unique leadershipID
-		 */
-		@Override
-		public void grantLeadership(final UUID leaderSessionID) {
-			runAsync(new Runnable() {
-				@Override
-				public void run() {
-					log.info("ResourceManager {} was granted leadership with leader session ID {}", getAddress(), leaderSessionID);
-					ResourceManager.this.leaderSessionID = leaderSessionID;
-					// confirming the leader session ID might be blocking,
-					leaderElectionService.confirmLeaderSessionID(leaderSessionID);
-				}
-			});
-		}
 
-		/**
-		 * Callback method when current resourceManager lose leadership.
-		 */
-		@Override
-		public void revokeLeadership() {
-			runAsync(new Runnable() {
-				@Override
-				public void run() {
-					log.info("ResourceManager {} was revoked leadership.", getAddress());
-					jobMasterGateways.clear();
-					leaderSessionID = null;
-				}
-			});
-		}
+	// ------------------------------------------------------------------------
+	//  Leader Contender
+	// ------------------------------------------------------------------------
 
-		@Override
-		public String getAddress() {
-			return ResourceManager.this.getAddress();
-		}
+	/**
+	 * Callback method when current resourceManager is granted leadership
+	 *
+	 * @param leaderSessionID unique leadershipID
+	 */
+	@Override
+	public void grantLeadership(final UUID leaderSessionID) {
+		runAsync(new Runnable() {
+			@Override
+			public void run() {
+				log.info("ResourceManager {} was granted leadership with leader session ID {}", getAddress(), leaderSessionID);
+				// confirming the leader session ID might be blocking,
+				leaderElectionService.confirmLeaderSessionID(leaderSessionID);
+				// notify SlotManager
+				slotManager.notifyLeaderAddress(getAddress(), leaderSessionID);
+				ResourceManager.this.leaderSessionID = leaderSessionID;
+			}
+		});
+	}
 
-		/**
-		 * Handles error occurring in the leader election service
-		 *
-		 * @param exception Exception being thrown in the leader election service
-		 */
-		@Override
-		public void handleError(final Exception exception) {
-			runAsync(new Runnable() {
-				@Override
-				public void run() {
-					log.error("ResourceManager received an error from the LeaderElectionService.", exception);
-					// terminate ResourceManager in case of an error
-					shutDown();
-				}
-			});
-		}
+	/**
+	 * Callback method when current resourceManager lose leadership.
+	 */
+	@Override
+	public void revokeLeadership() {
+		runAsync(new Runnable() {
+			@Override
+			public void run() {
+				log.info("ResourceManager {} was revoked leadership.", getAddress());
+				jobMasterGateways.clear();
+				ResourceManager.this.leaderSessionID = null;
+			}
+		});
+	}
+
+	/**
+	 * Handles error occurring in the leader election service
+	 *
+	 * @param exception Exception being thrown in the leader election service
+	 */
+	@Override
+	public void handleError(final Exception exception) {
+		runAsync(new Runnable() {
+			@Override
+			public void run() {
+				log.error("ResourceManager received an error from the LeaderElectionService.", exception);
+				// notify SlotManager
+				slotManager.handleError(exception);
+				// terminate ResourceManager in case of an error
+				shutDown();
+			}
+		});
 	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManagerGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManagerGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManagerGateway.java
index b5782b0..e5c8b64 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManagerGateway.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/ResourceManagerGateway.java
@@ -58,7 +58,7 @@ public interface ResourceManagerGateway extends RpcGateway {
 	 * @param slotRequest Slot request
 	 * @return Future slot assignment
 	 */
-	Future<SlotAssignment> requestSlot(SlotRequest slotRequest);
+	Future<SlotRequestRegistered> requestSlot(SlotRequest slotRequest);
 
 	/**
 	 * 

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotAssignment.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotAssignment.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotAssignment.java
deleted file mode 100644
index 695204d..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotAssignment.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.resourcemanager;
-
-import java.io.Serializable;
-
-public class SlotAssignment implements Serializable{
-	private static final long serialVersionUID = -6990813455942742322L;
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotManager.java
deleted file mode 100644
index 5c06648..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotManager.java
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.resourcemanager;
-
-import org.apache.flink.annotation.VisibleForTesting;
-import org.apache.flink.runtime.clusterframework.types.AllocationID;
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
-import org.apache.flink.runtime.clusterframework.types.ResourceSlot;
-import org.apache.flink.runtime.clusterframework.types.SlotID;
-import org.apache.flink.runtime.taskexecutor.SlotReport;
-import org.apache.flink.runtime.taskexecutor.SlotStatus;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-import static org.apache.flink.util.Preconditions.checkNotNull;
-
-/**
- * SlotManager is responsible for receiving slot requests and do slot allocations. It allows to request
- * slots from registered TaskManagers and issues container allocation requests in case of there are not
- * enough available slots. Besides, it should sync its slot allocation with TaskManager's heartbeat.
- * <p>
- * The main operation principle of SlotManager is:
- * <ul>
- * <li>1. All slot allocation status should be synced with TaskManager, which is the ground truth.</li>
- * <li>2. All slots that have registered must be tracked, either by free pool or allocated pool.</li>
- * <li>3. All slot requests will be handled by best efforts, there is no guarantee that one request will be
- * fulfilled in time or correctly allocated. Conflicts or timeout or some special error will happen, it should
- * be handled outside SlotManager. SlotManager will make each decision based on the information it currently
- * holds.</li>
- * </ul>
- * <b>IMPORTANT:</b> This class is <b>Not Thread-safe</b>.
- */
-public abstract class SlotManager {
-
-	private static final Logger LOG = LoggerFactory.getLogger(SlotManager.class);
-
-	/** Gateway to communicate with ResourceManager */
-	private final ResourceManagerGateway resourceManagerGateway;
-
-	/** All registered slots, including free and allocated slots */
-	private final Map<ResourceID, Map<SlotID, ResourceSlot>> registeredSlots;
-
-	/** All pending slot requests, waiting available slots to fulfil */
-	private final Map<AllocationID, SlotRequest> pendingSlotRequests;
-
-	/** All free slots that can be used to be allocated */
-	private final Map<SlotID, ResourceSlot> freeSlots;
-
-	/** All allocations, we can lookup allocations either by SlotID or AllocationID */
-	private final AllocationMap allocationMap;
-
-	public SlotManager(ResourceManagerGateway resourceManagerGateway) {
-		this.resourceManagerGateway = checkNotNull(resourceManagerGateway);
-		this.registeredSlots = new HashMap<>(16);
-		this.pendingSlotRequests = new LinkedHashMap<>(16);
-		this.freeSlots = new HashMap<>(16);
-		this.allocationMap = new AllocationMap();
-	}
-
-	// ------------------------------------------------------------------------
-	//  slot managements
-	// ------------------------------------------------------------------------
-
-	/**
-	 * Request a slot with requirements, we may either fulfill the request or pending it. Trigger container
-	 * allocation if we don't have enough resource. If we have free slot which can match the request, record
-	 * this allocation and forward the request to TaskManager through ResourceManager (we want this done by
-	 * RPC's main thread to avoid race condition).
-	 *
-	 * @param request The detailed request of the slot
-	 */
-	public void requestSlot(final SlotRequest request) {
-		if (isRequestDuplicated(request)) {
-			LOG.warn("Duplicated slot request, AllocationID:{}", request.getAllocationId());
-			return;
-		}
-
-		// try to fulfil the request with current free slots
-		ResourceSlot slot = chooseSlotToUse(request, freeSlots);
-		if (slot != null) {
-			LOG.info("Assigning SlotID({}) to AllocationID({}), JobID:{}", slot.getSlotId(),
-				request.getAllocationId(), request.getJobId());
-
-			// record this allocation in bookkeeping
-			allocationMap.addAllocation(slot.getSlotId(), request.getAllocationId());
-
-			// remove selected slot from free pool
-			freeSlots.remove(slot.getSlotId());
-
-			// TODO: send slot request to TaskManager
-		} else {
-			LOG.info("Cannot fulfil slot request, try to allocate a new container for it, " +
-				"AllocationID:{}, JobID:{}", request.getAllocationId(), request.getJobId());
-			allocateContainer(request.getResourceProfile());
-			pendingSlotRequests.put(request.getAllocationId(), request);
-		}
-	}
-
-	/**
-	 * Sync slot status with TaskManager's SlotReport.
-	 */
-	public void updateSlotStatus(final SlotReport slotReport) {
-		for (SlotStatus slotStatus : slotReport.getSlotsStatus()) {
-			updateSlotStatus(slotStatus);
-		}
-	}
-
-	/**
-	 * The slot request to TaskManager may be either failed by rpc communication (timeout, network error, etc.)
-	 * or really rejected by TaskManager. We shall retry this request by:
-	 * <ul>
-	 * <li>1. verify and clear all the previous allocate information for this request
-	 * <li>2. try to request slot again
-	 * </ul>
-	 * <p>
-	 * This may cause some duplicate allocation, e.g. the slot request to TaskManager is successful but the response
-	 * is lost somehow, so we may request a slot in another TaskManager, this causes two slots assigned to one request,
-	 * but it can be taken care of by rejecting registration at JobManager.
-	 *
-	 * @param originalRequest The original slot request
-	 * @param slotId          The target SlotID
-	 */
-	public void handleSlotRequestFailedAtTaskManager(final SlotRequest originalRequest, final SlotID slotId) {
-		final AllocationID originalAllocationId = originalRequest.getAllocationId();
-		LOG.info("Slot request failed at TaskManager, SlotID:{}, AllocationID:{}, JobID:{}",
-			slotId, originalAllocationId, originalRequest.getJobId());
-
-		// verify the allocation info before we do anything
-		if (freeSlots.containsKey(slotId)) {
-			// this slot is currently empty, no need to de-allocate it from our allocations
-			LOG.info("Original slot is somehow empty, retrying this request");
-
-			// before retry, we should double check whether this request was allocated by some other ways
-			if (!allocationMap.isAllocated(originalAllocationId)) {
-				requestSlot(originalRequest);
-			} else {
-				LOG.info("The failed request has somehow been allocated, SlotID:{}",
-					allocationMap.getSlotID(originalAllocationId));
-			}
-		} else if (allocationMap.isAllocated(slotId)) {
-			final AllocationID currentAllocationId = allocationMap.getAllocationID(slotId);
-
-			// check whether we have an agreement on whom this slot belongs to
-			if (originalAllocationId.equals(currentAllocationId)) {
-				LOG.info("De-allocate this request and retry");
-				allocationMap.removeAllocation(currentAllocationId);
-
-				// put this slot back to free pool
-				ResourceSlot slot = checkNotNull(getRegisteredSlot(slotId));
-				freeSlots.put(slotId, slot);
-
-				// retry the request
-				requestSlot(originalRequest);
-			} else {
-				// the slot is taken by someone else, no need to de-allocate it from our allocations
-				LOG.info("Original slot is taken by someone else, current AllocationID:{}", currentAllocationId);
-
-				// before retry, we should double check whether this request was allocated by some other ways
-				if (!allocationMap.isAllocated(originalAllocationId)) {
-					requestSlot(originalRequest);
-				} else {
-					LOG.info("The failed request is somehow been allocated, SlotID:{}",
-						allocationMap.getSlotID(originalAllocationId));
-				}
-			}
-		} else {
-			LOG.error("BUG! {} is neither in free pool nor in allocated pool", slotId);
-		}
-	}
-
-	/**
-	 * Callback for TaskManager failures. In case that a TaskManager fails, we have to clean up all its slots.
-	 *
-	 * @param resourceId The ResourceID of the TaskManager
-	 */
-	public void notifyTaskManagerFailure(final ResourceID resourceId) {
-		LOG.info("Resource:{} been notified failure", resourceId);
-		final Map<SlotID, ResourceSlot> slotIdsToRemove = registeredSlots.remove(resourceId);
-		if (slotIdsToRemove != null) {
-			for (SlotID slotId : slotIdsToRemove.keySet()) {
-				LOG.info("Removing Slot:{} upon resource failure", slotId);
-				if (freeSlots.containsKey(slotId)) {
-					freeSlots.remove(slotId);
-				} else if (allocationMap.isAllocated(slotId)) {
-					allocationMap.removeAllocation(slotId);
-				} else {
-					LOG.error("BUG! {} is neither in free pool nor in allocated pool", slotId);
-				}
-			}
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	//  internal behaviors
-	// ------------------------------------------------------------------------
-
-	/**
-	 * Update slot status based on TaskManager's report. There are mainly two situations when we receive the report:
-	 * <ul>
-	 * <li>1. The slot is newly registered.</li>
-	 * <li>2. The slot has registered, it contains its current status.</li>
-	 * </ul>
-	 * <p>
-	 * Regarding 1: It's fairly simple, we just record this slot's status, and trigger schedule if slot is empty.
-	 * <p>
-	 * Regarding 2: It will cause some weird situation since we may have some time-gap on how the slot's status really
-	 * is. We may have some updates on the slot's allocation, but it doesn't reflected by TaskManager's heartbeat yet,
-	 * and we may make some wrong decision if we cannot guarantee we have the exact status about all the slots. So
-	 * the principle here is: We always trust TaskManager's heartbeat, we will correct our information based on that
-	 * and take next action based on the diff between our information and heartbeat status.
-	 *
-	 * @param reportedStatus Reported slot status
-	 */
-	void updateSlotStatus(final SlotStatus reportedStatus) {
-		final SlotID slotId = reportedStatus.getSlotID();
-		final ResourceSlot slot = new ResourceSlot(slotId, reportedStatus.getProfiler());
-
-		if (registerNewSlot(slot)) {
-			// we have a newly registered slot
-			LOG.info("New slot appeared, SlotID:{}, AllocationID:{}", slotId, reportedStatus.getAllocationID());
-
-			if (reportedStatus.getAllocationID() != null) {
-				// slot in use, record this in bookkeeping
-				allocationMap.addAllocation(slotId, reportedStatus.getAllocationID());
-			} else {
-				handleFreeSlot(new ResourceSlot(slotId, reportedStatus.getProfiler()));
-			}
-		} else {
-			// slot exists, update current information
-			if (reportedStatus.getAllocationID() != null) {
-				// slot is reported in use
-				final AllocationID reportedAllocationId = reportedStatus.getAllocationID();
-
-				// check whether we also thought this slot is in use
-				if (allocationMap.isAllocated(slotId)) {
-					// we also think that slot is in use, check whether the AllocationID matches
-					final AllocationID currentAllocationId = allocationMap.getAllocationID(slotId);
-
-					if (!reportedAllocationId.equals(currentAllocationId)) {
-						LOG.info("Slot allocation info mismatch! SlotID:{}, current:{}, reported:{}",
-							slotId, currentAllocationId, reportedAllocationId);
-
-						// seems we have a disagreement about the slot assignments, need to correct it
-						allocationMap.removeAllocation(slotId);
-						allocationMap.addAllocation(slotId, reportedAllocationId);
-					}
-				} else {
-					LOG.info("Slot allocation info mismatch! SlotID:{}, current:null, reported:{}",
-						slotId, reportedAllocationId);
-
-					// we thought the slot is free, should correct this information
-					allocationMap.addAllocation(slotId, reportedStatus.getAllocationID());
-
-					// remove this slot from free slots pool
-					freeSlots.remove(slotId);
-				}
-			} else {
-				// slot is reported empty
-
-				// check whether we also thought this slot is empty
-				if (allocationMap.isAllocated(slotId)) {
-					LOG.info("Slot allocation info mismatch! SlotID:{}, current:{}, reported:null",
-						slotId, allocationMap.getAllocationID(slotId));
-
-					// we thought the slot is in use, correct it
-					allocationMap.removeAllocation(slotId);
-
-					// we have a free slot!
-					handleFreeSlot(new ResourceSlot(slotId, reportedStatus.getProfiler()));
-				}
-			}
-		}
-	}
-
-	/**
-	 * When we have a free slot, try to fulfill the pending request first. If any request can be fulfilled,
-	 * record this allocation in bookkeeping and send slot request to TaskManager, else we just add this slot
-	 * to the free pool.
-	 *
-	 * @param freeSlot The free slot
-	 */
-	private void handleFreeSlot(final ResourceSlot freeSlot) {
-		SlotRequest chosenRequest = chooseRequestToFulfill(freeSlot, pendingSlotRequests);
-
-		if (chosenRequest != null) {
-			pendingSlotRequests.remove(chosenRequest.getAllocationId());
-
-			LOG.info("Assigning SlotID({}) to AllocationID({}), JobID:{}", freeSlot.getSlotId(),
-				chosenRequest.getAllocationId(), chosenRequest.getJobId());
-			allocationMap.addAllocation(freeSlot.getSlotId(), chosenRequest.getAllocationId());
-
-			// TODO: send slot request to TaskManager
-		} else {
-			freeSlots.put(freeSlot.getSlotId(), freeSlot);
-		}
-	}
-
-	/**
-	 * Check whether the request is duplicated. We use AllocationID to identify slot request, for each
-	 * formerly received slot request, it is either in pending list or already been allocated.
-	 *
-	 * @param request The slot request
-	 * @return <tt>true</tt> if the request is duplicated
-	 */
-	private boolean isRequestDuplicated(final SlotRequest request) {
-		final AllocationID allocationId = request.getAllocationId();
-		return pendingSlotRequests.containsKey(allocationId)
-			|| allocationMap.isAllocated(allocationId);
-	}
-
-	/**
-	 * Try to register slot, and tell if this slot is newly registered.
-	 *
-	 * @param slot The ResourceSlot which will be checked and registered
-	 * @return <tt>true</tt> if we meet a new slot
-	 */
-	private boolean registerNewSlot(final ResourceSlot slot) {
-		final SlotID slotId = slot.getSlotId();
-		final ResourceID resourceId = slotId.getResourceID();
-		if (!registeredSlots.containsKey(resourceId)) {
-			registeredSlots.put(resourceId, new HashMap<SlotID, ResourceSlot>());
-		}
-		return registeredSlots.get(resourceId).put(slotId, slot) == null;
-	}
-
-	private ResourceSlot getRegisteredSlot(final SlotID slotId) {
-		final ResourceID resourceId = slotId.getResourceID();
-		if (!registeredSlots.containsKey(resourceId)) {
-			return null;
-		}
-		return registeredSlots.get(resourceId).get(slotId);
-	}
-
-	// ------------------------------------------------------------------------
-	//  Framework specific behavior
-	// ------------------------------------------------------------------------
-
-	/**
-	 * Choose a slot to use among all free slots, the behavior is framework specified.
-	 *
-	 * @param request   The slot request
-	 * @param freeSlots All slots which can be used
-	 * @return The slot we choose to use, <tt>null</tt> if we did not find a match
-	 */
-	protected abstract ResourceSlot chooseSlotToUse(final SlotRequest request,
-		final Map<SlotID, ResourceSlot> freeSlots);
-
-	/**
-	 * Choose a pending request to fulfill when we have a free slot, the behavior is framework specified.
-	 *
-	 * @param offeredSlot     The free slot
-	 * @param pendingRequests All the pending slot requests
-	 * @return The chosen SlotRequest, <tt>null</tt> if we did not find a match
-	 */
-	protected abstract SlotRequest chooseRequestToFulfill(final ResourceSlot offeredSlot,
-		final Map<AllocationID, SlotRequest> pendingRequests);
-
-	/**
-	 * The framework specific code for allocating a container for specified resource profile.
-	 *
-	 * @param resourceProfile The resource profile
-	 */
-	protected abstract void allocateContainer(final ResourceProfile resourceProfile);
-
-
-	// ------------------------------------------------------------------------
-	//  Helper classes
-	// ------------------------------------------------------------------------
-
-	/**
-	 * We maintain all the allocations with SlotID and AllocationID. We are able to get or remove the allocation info
-	 * either by SlotID or AllocationID.
-	 */
-	private static class AllocationMap {
-
-		/** All allocated slots (by SlotID) */
-		private final Map<SlotID, AllocationID> allocatedSlots;
-
-		/** All allocated slots (by AllocationID), it'a a inverse view of allocatedSlots */
-		private final Map<AllocationID, SlotID> allocatedSlotsByAllocationId;
-
-		AllocationMap() {
-			this.allocatedSlots = new HashMap<>(16);
-			this.allocatedSlotsByAllocationId = new HashMap<>(16);
-		}
-
-		/**
-		 * Add a allocation
-		 *
-		 * @param slotId       The slot id
-		 * @param allocationId The allocation id
-		 */
-		void addAllocation(final SlotID slotId, final AllocationID allocationId) {
-			allocatedSlots.put(slotId, allocationId);
-			allocatedSlotsByAllocationId.put(allocationId, slotId);
-		}
-
-		/**
-		 * De-allocation with slot id
-		 *
-		 * @param slotId The slot id
-		 */
-		void removeAllocation(final SlotID slotId) {
-			if (allocatedSlots.containsKey(slotId)) {
-				final AllocationID allocationId = allocatedSlots.get(slotId);
-				allocatedSlots.remove(slotId);
-				allocatedSlotsByAllocationId.remove(allocationId);
-			}
-		}
-
-		/**
-		 * De-allocation with allocation id
-		 *
-		 * @param allocationId The allocation id
-		 */
-		void removeAllocation(final AllocationID allocationId) {
-			if (allocatedSlotsByAllocationId.containsKey(allocationId)) {
-				SlotID slotId = allocatedSlotsByAllocationId.get(allocationId);
-				allocatedSlotsByAllocationId.remove(allocationId);
-				allocatedSlots.remove(slotId);
-			}
-		}
-
-		/**
-		 * Check whether allocation exists by slot id
-		 *
-		 * @param slotId The slot id
-		 * @return true if the allocation exists
-		 */
-		boolean isAllocated(final SlotID slotId) {
-			return allocatedSlots.containsKey(slotId);
-		}
-
-		/**
-		 * Check whether allocation exists by allocation id
-		 *
-		 * @param allocationId The allocation id
-		 * @return true if the allocation exists
-		 */
-		boolean isAllocated(final AllocationID allocationId) {
-			return allocatedSlotsByAllocationId.containsKey(allocationId);
-		}
-
-		AllocationID getAllocationID(final SlotID slotId) {
-			return allocatedSlots.get(slotId);
-		}
-
-		SlotID getSlotID(final AllocationID allocationId) {
-			return allocatedSlotsByAllocationId.get(allocationId);
-		}
-
-		public int size() {
-			return allocatedSlots.size();
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	//  Testing utilities
-	// ------------------------------------------------------------------------
-
-	@VisibleForTesting
-	boolean isAllocated(final SlotID slotId) {
-		return allocationMap.isAllocated(slotId);
-	}
-
-	@VisibleForTesting
-	boolean isAllocated(final AllocationID allocationId) {
-		return allocationMap.isAllocated(allocationId);
-	}
-
-	/**
-	 * Add free slots directly to the free pool, this will not trigger pending requests allocation
-	 *
-	 * @param slot The resource slot
-	 */
-	@VisibleForTesting
-	void addFreeSlot(final ResourceSlot slot) {
-		final ResourceID resourceId = slot.getResourceID();
-		final SlotID slotId = slot.getSlotId();
-
-		if (!registeredSlots.containsKey(resourceId)) {
-			registeredSlots.put(resourceId, new HashMap<SlotID, ResourceSlot>());
-		}
-		registeredSlots.get(resourceId).put(slot.getSlotId(), slot);
-		freeSlots.put(slotId, slot);
-	}
-
-	@VisibleForTesting
-	int getAllocatedSlotCount() {
-		return allocationMap.size();
-	}
-
-	@VisibleForTesting
-	int getFreeSlotCount() {
-		return freeSlots.size();
-	}
-
-	@VisibleForTesting
-	int getPendingRequestCount() {
-		return pendingSlotRequests.size();
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRegistered.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRegistered.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRegistered.java
new file mode 100644
index 0000000..6b7f6dc
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRegistered.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+
+import java.io.Serializable;
+
+/**
+ * Acknowledgment by the ResourceManager for a SlotRequest from the JobManager
+ */
+public class SlotRequestRegistered extends SlotRequestReply {
+
+	public SlotRequestRegistered(AllocationID allocationID) {
+		super(allocationID);
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRejected.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRejected.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRejected.java
new file mode 100644
index 0000000..cb3ec72
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestRejected.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+
+import java.io.Serializable;
+
+/**
+ * Rejection message by the ResourceManager for a SlotRequest from the JobManager
+ */
+public class SlotRequestRejected extends SlotRequestReply {
+
+	public SlotRequestRejected(AllocationID allocationID) {
+		super(allocationID);
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestReply.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestReply.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestReply.java
new file mode 100644
index 0000000..1b85d0c
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/SlotRequestReply.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+
+import java.io.Serializable;
+
+/**
+ * Acknowledgment by the ResourceManager for a SlotRequest from the JobManager
+ */
+public abstract class SlotRequestReply implements Serializable {
+
+	private static final long serialVersionUID = 42;
+
+	private final AllocationID allocationID;
+
+	public SlotRequestReply(AllocationID allocationID) {
+		this.allocationID = allocationID;
+	}
+
+	public AllocationID getAllocationID() {
+		return allocationID;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SimpleSlotManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SimpleSlotManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SimpleSlotManager.java
new file mode 100644
index 0000000..ef5ce31
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SimpleSlotManager.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flink.runtime.resourcemanager.slotmanager;
+
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+import org.apache.flink.runtime.clusterframework.types.ResourceSlot;
+import org.apache.flink.runtime.clusterframework.types.SlotID;
+import org.apache.flink.runtime.resourcemanager.SlotRequest;
+
+import java.util.Iterator;
+import java.util.Map;
+
+/**
+ * A simple SlotManager which ignores resource profiles.
+ */
+public class SimpleSlotManager extends SlotManager {
+
+	@Override
+	protected ResourceSlot chooseSlotToUse(SlotRequest request, Map<SlotID, ResourceSlot> freeSlots) {
+		final Iterator<ResourceSlot> slotIterator = freeSlots.values().iterator();
+		if (slotIterator.hasNext()) {
+			return slotIterator.next();
+		} else {
+			return null;
+		}
+	}
+
+	@Override
+	protected SlotRequest chooseRequestToFulfill(ResourceSlot offeredSlot, Map<AllocationID, SlotRequest> pendingRequests) {
+		final Iterator<SlotRequest> requestIterator = pendingRequests.values().iterator();
+		if (requestIterator.hasNext()) {
+			return requestIterator.next();
+		} else {
+			return null;
+		}
+	}
+
+	@Override
+	protected void allocateContainer(ResourceProfile resourceProfile) {
+		// TODO
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManager.java
new file mode 100644
index 0000000..96fde7d
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/resourcemanager/slotmanager/SlotManager.java
@@ -0,0 +1,579 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager.slotmanager;
+
+import org.apache.flink.annotation.VisibleForTesting;
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+import org.apache.flink.runtime.clusterframework.types.ResourceSlot;
+import org.apache.flink.runtime.clusterframework.types.SlotID;
+import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener;
+import org.apache.flink.runtime.resourcemanager.SlotRequest;
+import org.apache.flink.runtime.resourcemanager.SlotRequestRegistered;
+import org.apache.flink.runtime.resourcemanager.SlotRequestReply;
+import org.apache.flink.runtime.taskexecutor.SlotReport;
+import org.apache.flink.runtime.taskexecutor.SlotStatus;
+import org.apache.flink.runtime.taskexecutor.TaskExecutorGateway;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * SlotManager is responsible for receiving slot requests and do slot allocations. It allows to request
+ * slots from registered TaskManagers and issues container allocation requests in case of there are not
+ * enough available slots. Besides, it should sync its slot allocation with TaskManager's heartbeat.
+ * <p>
+ * The main operation principle of SlotManager is:
+ * <ul>
+ * <li>1. All slot allocation status should be synced with TaskManager, which is the ground truth.</li>
+ * <li>2. All slots that have registered must be tracked, either by free pool or allocated pool.</li>
+ * <li>3. All slot requests will be handled by best efforts, there is no guarantee that one request will be
+ * fulfilled in time or correctly allocated. Conflicts or timeout or some special error will happen, it should
+ * be handled outside SlotManager. SlotManager will make each decision based on the information it currently
+ * holds.</li>
+ * </ul>
+ * <b>IMPORTANT:</b> This class is <b>Not Thread-safe</b>.
+ */
+public abstract class SlotManager implements LeaderRetrievalListener {
+
+	protected final Logger LOG = LoggerFactory.getLogger(getClass());
+
+	/** All registered task managers with ResourceID and gateway. */
+	private final Map<ResourceID, TaskExecutorGateway> taskManagerGateways;
+
+	/** All registered slots, including free and allocated slots */
+	private final Map<ResourceID, Map<SlotID, ResourceSlot>> registeredSlots;
+
+	/** All pending slot requests, waiting available slots to fulfil */
+	private final Map<AllocationID, SlotRequest> pendingSlotRequests;
+
+	/** All free slots that can be used to be allocated */
+	private final Map<SlotID, ResourceSlot> freeSlots;
+
+	/** All allocations, we can lookup allocations either by SlotID or AllocationID */
+	private final AllocationMap allocationMap;
+
+	private final FiniteDuration timeout;
+
+	/** The current leader id set by the ResourceManager */
+	private UUID leaderID;
+
+	public SlotManager() {
+		this.registeredSlots = new HashMap<>(16);
+		this.pendingSlotRequests = new LinkedHashMap<>(16);
+		this.freeSlots = new HashMap<>(16);
+		this.allocationMap = new AllocationMap();
+		this.taskManagerGateways = new HashMap<>();
+		this.timeout = new FiniteDuration(10, TimeUnit.SECONDS);
+	}
+
+
+	// ------------------------------------------------------------------------
+	//  slot managements
+	// ------------------------------------------------------------------------
+
+	/**
+	 * Request a slot with requirements, we may either fulfill the request or pending it. Trigger container
+	 * allocation if we don't have enough resource. If we have free slot which can match the request, record
+	 * this allocation and forward the request to TaskManager through ResourceManager (we want this done by
+	 * RPC's main thread to avoid race condition).
+	 *
+	 * @param request The detailed request of the slot
+	 * @return SlotRequestRegistered The confirmation message to be send to the caller
+	 */
+	public SlotRequestRegistered requestSlot(final SlotRequest request) {
+		final AllocationID allocationId = request.getAllocationId();
+		if (isRequestDuplicated(request)) {
+			LOG.warn("Duplicated slot request, AllocationID:{}", allocationId);
+			return null;
+		}
+
+		// try to fulfil the request with current free slots
+		final ResourceSlot slot = chooseSlotToUse(request, freeSlots);
+		if (slot != null) {
+			LOG.info("Assigning SlotID({}) to AllocationID({}), JobID:{}", slot.getSlotId(),
+				allocationId, request.getJobId());
+
+			// record this allocation in bookkeeping
+			allocationMap.addAllocation(slot.getSlotId(), allocationId);
+
+			// remove selected slot from free pool
+			freeSlots.remove(slot.getSlotId());
+
+			final Future<SlotRequestReply> slotRequestReplyFuture =
+				slot.getTaskExecutorGateway().requestSlot(allocationId, leaderID, timeout);
+			// TODO handle timeouts and response
+		} else {
+			LOG.info("Cannot fulfil slot request, try to allocate a new container for it, " +
+				"AllocationID:{}, JobID:{}", allocationId, request.getJobId());
+			allocateContainer(request.getResourceProfile());
+			pendingSlotRequests.put(allocationId, request);
+		}
+
+		return new SlotRequestRegistered(allocationId);
+	}
+
+	/**
+	 * Sync slot status with TaskManager's SlotReport.
+	 */
+	public void updateSlotStatus(final SlotReport slotReport) {
+		for (SlotStatus slotStatus : slotReport.getSlotsStatus()) {
+			updateSlotStatus(slotStatus);
+		}
+	}
+
+	/**
+	 * Registers a TaskExecutor
+	 * @param resourceID TaskExecutor's ResourceID
+	 * @param gateway TaskExcutor's gateway
+	 */
+	public void registerTaskExecutor(ResourceID resourceID, TaskExecutorGateway gateway) {
+		this.taskManagerGateways.put(resourceID, gateway);
+	}
+
+	/**
+	 * The slot request to TaskManager may be either failed by rpc communication (timeout, network error, etc.)
+	 * or really rejected by TaskManager. We shall retry this request by:
+	 * <ul>
+	 * <li>1. verify and clear all the previous allocate information for this request
+	 * <li>2. try to request slot again
+	 * </ul>
+	 * <p>
+	 * This may cause some duplicate allocation, e.g. the slot request to TaskManager is successful but the response
+	 * is lost somehow, so we may request a slot in another TaskManager, this causes two slots assigned to one request,
+	 * but it can be taken care of by rejecting registration at JobManager.
+	 *
+	 * @param originalRequest The original slot request
+	 * @param slotId          The target SlotID
+	 */
+	public void handleSlotRequestFailedAtTaskManager(final SlotRequest originalRequest, final SlotID slotId) {
+		final AllocationID originalAllocationId = originalRequest.getAllocationId();
+		LOG.info("Slot request failed at TaskManager, SlotID:{}, AllocationID:{}, JobID:{}",
+			slotId, originalAllocationId, originalRequest.getJobId());
+
+		// verify the allocation info before we do anything
+		if (freeSlots.containsKey(slotId)) {
+			// this slot is currently empty, no need to de-allocate it from our allocations
+			LOG.info("Original slot is somehow empty, retrying this request");
+
+			// before retry, we should double check whether this request was allocated by some other ways
+			if (!allocationMap.isAllocated(originalAllocationId)) {
+				requestSlot(originalRequest);
+			} else {
+				LOG.info("The failed request has somehow been allocated, SlotID:{}",
+					allocationMap.getSlotID(originalAllocationId));
+			}
+		} else if (allocationMap.isAllocated(slotId)) {
+			final AllocationID currentAllocationId = allocationMap.getAllocationID(slotId);
+
+			// check whether we have an agreement on whom this slot belongs to
+			if (originalAllocationId.equals(currentAllocationId)) {
+				LOG.info("De-allocate this request and retry");
+				allocationMap.removeAllocation(currentAllocationId);
+
+				// put this slot back to free pool
+				ResourceSlot slot = checkNotNull(getRegisteredSlot(slotId));
+				freeSlots.put(slotId, slot);
+
+				// retry the request
+				requestSlot(originalRequest);
+			} else {
+				// the slot is taken by someone else, no need to de-allocate it from our allocations
+				LOG.info("Original slot is taken by someone else, current AllocationID:{}", currentAllocationId);
+
+				// before retry, we should double check whether this request was allocated by some other ways
+				if (!allocationMap.isAllocated(originalAllocationId)) {
+					requestSlot(originalRequest);
+				} else {
+					LOG.info("The failed request is somehow been allocated, SlotID:{}",
+						allocationMap.getSlotID(originalAllocationId));
+				}
+			}
+		} else {
+			LOG.error("BUG! {} is neither in free pool nor in allocated pool", slotId);
+		}
+	}
+
+	/**
+	 * Callback for TaskManager failures. In case that a TaskManager fails, we have to clean up all its slots.
+	 *
+	 * @param resourceId The ResourceID of the TaskManager
+	 */
+	public void notifyTaskManagerFailure(final ResourceID resourceId) {
+		LOG.info("Resource:{} been notified failure", resourceId);
+		taskManagerGateways.remove(resourceId);
+		final Map<SlotID, ResourceSlot> slotIdsToRemove = registeredSlots.remove(resourceId);
+		if (slotIdsToRemove != null) {
+			for (SlotID slotId : slotIdsToRemove.keySet()) {
+				LOG.info("Removing Slot: {} upon resource failure", slotId);
+				if (freeSlots.containsKey(slotId)) {
+					freeSlots.remove(slotId);
+				} else if (allocationMap.isAllocated(slotId)) {
+					allocationMap.removeAllocation(slotId);
+				} else {
+					LOG.error("BUG! {} is neither in free pool nor in allocated pool", slotId);
+				}
+			}
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  internal behaviors
+	// ------------------------------------------------------------------------
+
+	/**
+	 * Update slot status based on TaskManager's report. There are mainly two situations when we receive the report:
+	 * <ul>
+	 * <li>1. The slot is newly registered.</li>
+	 * <li>2. The slot has registered, it contains its current status.</li>
+	 * </ul>
+	 * <p>
+	 * Regarding 1: It's fairly simple, we just record this slot's status, and trigger schedule if slot is empty.
+	 * <p>
+	 * Regarding 2: It will cause some weird situation since we may have some time-gap on how the slot's status really
+	 * is. We may have some updates on the slot's allocation, but it doesn't reflected by TaskManager's heartbeat yet,
+	 * and we may make some wrong decision if we cannot guarantee we have the exact status about all the slots. So
+	 * the principle here is: We always trust TaskManager's heartbeat, we will correct our information based on that
+	 * and take next action based on the diff between our information and heartbeat status.
+	 *
+	 * @param reportedStatus Reported slot status
+	 */
+	void updateSlotStatus(final SlotStatus reportedStatus) {
+		final SlotID slotId = reportedStatus.getSlotID();
+
+		final TaskExecutorGateway taskExecutorGateway = taskManagerGateways.get(slotId.getResourceID());
+		if (taskExecutorGateway == null) {
+			LOG.info("Received SlotStatus but ResourceID {} is unknown to the SlotManager",
+				slotId.getResourceID());
+			return;
+		}
+
+		final ResourceSlot slot = new ResourceSlot(slotId, reportedStatus.getProfiler(), taskExecutorGateway);
+
+		if (registerNewSlot(slot)) {
+			// we have a newly registered slot
+			LOG.info("New slot appeared, SlotID:{}, AllocationID:{}", slotId, reportedStatus.getAllocationID());
+
+			if (reportedStatus.getAllocationID() != null) {
+				// slot in use, record this in bookkeeping
+				allocationMap.addAllocation(slotId, reportedStatus.getAllocationID());
+			} else {
+				handleFreeSlot(slot);
+			}
+		} else {
+			// slot exists, update current information
+			if (reportedStatus.getAllocationID() != null) {
+				// slot is reported in use
+				final AllocationID reportedAllocationId = reportedStatus.getAllocationID();
+
+				// check whether we also thought this slot is in use
+				if (allocationMap.isAllocated(slotId)) {
+					// we also think that slot is in use, check whether the AllocationID matches
+					final AllocationID currentAllocationId = allocationMap.getAllocationID(slotId);
+
+					if (!reportedAllocationId.equals(currentAllocationId)) {
+						LOG.info("Slot allocation info mismatch! SlotID:{}, current:{}, reported:{}",
+							slotId, currentAllocationId, reportedAllocationId);
+
+						// seems we have a disagreement about the slot assignments, need to correct it
+						allocationMap.removeAllocation(slotId);
+						allocationMap.addAllocation(slotId, reportedAllocationId);
+					}
+				} else {
+					LOG.info("Slot allocation info mismatch! SlotID:{}, current:null, reported:{}",
+						slotId, reportedAllocationId);
+
+					// we thought the slot is free, should correct this information
+					allocationMap.addAllocation(slotId, reportedStatus.getAllocationID());
+
+					// remove this slot from free slots pool
+					freeSlots.remove(slotId);
+				}
+			} else {
+				// slot is reported empty
+
+				// check whether we also thought this slot is empty
+				if (allocationMap.isAllocated(slotId)) {
+					LOG.info("Slot allocation info mismatch! SlotID:{}, current:{}, reported:null",
+						slotId, allocationMap.getAllocationID(slotId));
+
+					// we thought the slot is in use, correct it
+					allocationMap.removeAllocation(slotId);
+
+					// we have a free slot!
+					handleFreeSlot(slot);
+				}
+			}
+		}
+	}
+
+	/**
+	 * When we have a free slot, try to fulfill the pending request first. If any request can be fulfilled,
+	 * record this allocation in bookkeeping and send slot request to TaskManager, else we just add this slot
+	 * to the free pool.
+	 *
+	 * @param freeSlot The free slot
+	 */
+	private void handleFreeSlot(final ResourceSlot freeSlot) {
+		SlotRequest chosenRequest = chooseRequestToFulfill(freeSlot, pendingSlotRequests);
+
+		if (chosenRequest != null) {
+			final AllocationID allocationId = chosenRequest.getAllocationId();
+			pendingSlotRequests.remove(allocationId);
+
+			LOG.info("Assigning SlotID({}) to AllocationID({}), JobID:{}", freeSlot.getSlotId(),
+				allocationId, chosenRequest.getJobId());
+			allocationMap.addAllocation(freeSlot.getSlotId(), allocationId);
+
+			final Future<SlotRequestReply> slotRequestReplyFuture =
+				freeSlot.getTaskExecutorGateway().requestSlot(allocationId, leaderID, timeout);
+			// TODO handle timeouts and response
+		} else {
+			freeSlots.put(freeSlot.getSlotId(), freeSlot);
+		}
+	}
+
+	/**
+	 * Check whether the request is duplicated. We use AllocationID to identify slot request, for each
+	 * formerly received slot request, it is either in pending list or already been allocated.
+	 *
+	 * @param request The slot request
+	 * @return <tt>true</tt> if the request is duplicated
+	 */
+	private boolean isRequestDuplicated(final SlotRequest request) {
+		final AllocationID allocationId = request.getAllocationId();
+		return pendingSlotRequests.containsKey(allocationId)
+			|| allocationMap.isAllocated(allocationId);
+	}
+
+	/**
+	 * Try to register slot, and tell if this slot is newly registered.
+	 *
+	 * @param slot The ResourceSlot which will be checked and registered
+	 * @return <tt>true</tt> if we meet a new slot
+	 */
+	private boolean registerNewSlot(final ResourceSlot slot) {
+		final SlotID slotId = slot.getSlotId();
+		final ResourceID resourceId = slotId.getResourceID();
+		if (!registeredSlots.containsKey(resourceId)) {
+			registeredSlots.put(resourceId, new HashMap<SlotID, ResourceSlot>());
+		}
+		return registeredSlots.get(resourceId).put(slotId, slot) == null;
+	}
+
+	private ResourceSlot getRegisteredSlot(final SlotID slotId) {
+		final ResourceID resourceId = slotId.getResourceID();
+		if (!registeredSlots.containsKey(resourceId)) {
+			return null;
+		}
+		return registeredSlots.get(resourceId).get(slotId);
+	}
+
+	// ------------------------------------------------------------------------
+	//  Framework specific behavior
+	// ------------------------------------------------------------------------
+
+	/**
+	 * Choose a slot to use among all free slots, the behavior is framework specified.
+	 *
+	 * @param request   The slot request
+	 * @param freeSlots All slots which can be used
+	 * @return The slot we choose to use, <tt>null</tt> if we did not find a match
+	 */
+	protected abstract ResourceSlot chooseSlotToUse(final SlotRequest request,
+		final Map<SlotID, ResourceSlot> freeSlots);
+
+	/**
+	 * Choose a pending request to fulfill when we have a free slot, the behavior is framework specified.
+	 *
+	 * @param offeredSlot     The free slot
+	 * @param pendingRequests All the pending slot requests
+	 * @return The chosen SlotRequest, <tt>null</tt> if we did not find a match
+	 */
+	protected abstract SlotRequest chooseRequestToFulfill(final ResourceSlot offeredSlot,
+		final Map<AllocationID, SlotRequest> pendingRequests);
+
+	/**
+	 * The framework specific code for allocating a container for specified resource profile.
+	 *
+	 * @param resourceProfile The resource profile
+	 */
+	protected abstract void allocateContainer(final ResourceProfile resourceProfile);
+
+	// ------------------------------------------------------------------------
+	//  Helper classes
+	// ------------------------------------------------------------------------
+
+	/**
+	 * We maintain all the allocations with SlotID and AllocationID. We are able to get or remove the allocation info
+	 * either by SlotID or AllocationID.
+	 */
+	private static class AllocationMap {
+
+		/** All allocated slots (by SlotID) */
+		private final Map<SlotID, AllocationID> allocatedSlots;
+
+		/** All allocated slots (by AllocationID), it'a a inverse view of allocatedSlots */
+		private final Map<AllocationID, SlotID> allocatedSlotsByAllocationId;
+
+		AllocationMap() {
+			this.allocatedSlots = new HashMap<>(16);
+			this.allocatedSlotsByAllocationId = new HashMap<>(16);
+		}
+
+		/**
+		 * Add a allocation
+		 *
+		 * @param slotId       The slot id
+		 * @param allocationId The allocation id
+		 */
+		void addAllocation(final SlotID slotId, final AllocationID allocationId) {
+			allocatedSlots.put(slotId, allocationId);
+			allocatedSlotsByAllocationId.put(allocationId, slotId);
+		}
+
+		/**
+		 * De-allocation with slot id
+		 *
+		 * @param slotId The slot id
+		 */
+		void removeAllocation(final SlotID slotId) {
+			if (allocatedSlots.containsKey(slotId)) {
+				final AllocationID allocationId = allocatedSlots.get(slotId);
+				allocatedSlots.remove(slotId);
+				allocatedSlotsByAllocationId.remove(allocationId);
+			}
+		}
+
+		/**
+		 * De-allocation with allocation id
+		 *
+		 * @param allocationId The allocation id
+		 */
+		void removeAllocation(final AllocationID allocationId) {
+			if (allocatedSlotsByAllocationId.containsKey(allocationId)) {
+				SlotID slotId = allocatedSlotsByAllocationId.get(allocationId);
+				allocatedSlotsByAllocationId.remove(allocationId);
+				allocatedSlots.remove(slotId);
+			}
+		}
+
+		/**
+		 * Check whether allocation exists by slot id
+		 *
+		 * @param slotId The slot id
+		 * @return true if the allocation exists
+		 */
+		boolean isAllocated(final SlotID slotId) {
+			return allocatedSlots.containsKey(slotId);
+		}
+
+		/**
+		 * Check whether allocation exists by allocation id
+		 *
+		 * @param allocationId The allocation id
+		 * @return true if the allocation exists
+		 */
+		boolean isAllocated(final AllocationID allocationId) {
+			return allocatedSlotsByAllocationId.containsKey(allocationId);
+		}
+
+		AllocationID getAllocationID(final SlotID slotId) {
+			return allocatedSlots.get(slotId);
+		}
+
+		SlotID getSlotID(final AllocationID allocationId) {
+			return allocatedSlotsByAllocationId.get(allocationId);
+		}
+
+		public int size() {
+			return allocatedSlots.size();
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  High availability
+	// ------------------------------------------------------------------------
+
+	@Override
+	public void notifyLeaderAddress(String leaderAddress, UUID leaderSessionID) {
+		this.leaderID = leaderSessionID;
+	}
+
+	@Override
+	public void handleError(Exception exception) {
+		LOG.error("Slot Manager received an error from the leader service", exception);
+	}
+
+	// ------------------------------------------------------------------------
+	//  Testing utilities
+	// ------------------------------------------------------------------------
+
+	@VisibleForTesting
+	boolean isAllocated(final SlotID slotId) {
+		return allocationMap.isAllocated(slotId);
+	}
+
+	@VisibleForTesting
+	boolean isAllocated(final AllocationID allocationId) {
+		return allocationMap.isAllocated(allocationId);
+	}
+
+	/**
+	 * Add free slots directly to the free pool, this will not trigger pending requests allocation
+	 *
+	 * @param slot The resource slot
+	 */
+	@VisibleForTesting
+	void addFreeSlot(final ResourceSlot slot) {
+		final ResourceID resourceId = slot.getResourceID();
+		final SlotID slotId = slot.getSlotId();
+
+		if (!registeredSlots.containsKey(resourceId)) {
+			registeredSlots.put(resourceId, new HashMap<SlotID, ResourceSlot>());
+		}
+		registeredSlots.get(resourceId).put(slot.getSlotId(), slot);
+		freeSlots.put(slotId, slot);
+	}
+
+	@VisibleForTesting
+	int getAllocatedSlotCount() {
+		return allocationMap.size();
+	}
+
+	@VisibleForTesting
+	int getFreeSlotCount() {
+		return freeSlots.size();
+	}
+
+	@VisibleForTesting
+	int getPendingRequestCount() {
+		return pendingSlotRequests.size();
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotStatus.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotStatus.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotStatus.java
index 744b674..0f57bb1 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotStatus.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/SlotStatus.java
@@ -50,7 +50,10 @@ public class SlotStatus implements Serializable {
 		this(slotID, profiler, null, null);
 	}
 
-	public SlotStatus(SlotID slotID, ResourceProfile profiler, AllocationID allocationID, JobID jobID) {
+	public SlotStatus(
+			SlotID slotID, ResourceProfile profiler,
+			JobID jobID,
+			AllocationID allocationID) {
 		this.slotID = checkNotNull(slotID, "slotID cannot be null");
 		this.profiler = checkNotNull(profiler, "profile cannot be null");
 		this.allocationID = allocationID;

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorGateway.java
index 6c99706..7257436 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorGateway.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorGateway.java
@@ -18,7 +18,12 @@
 
 package org.apache.flink.runtime.taskexecutor;
 
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.resourcemanager.SlotRequestReply;
 import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcTimeout;
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
 
 import java.util.UUID;
 
@@ -32,4 +37,16 @@ public interface TaskExecutorGateway extends RpcGateway {
 	// ------------------------------------------------------------------------
 
 	void notifyOfNewResourceManagerLeader(String address, UUID resourceManagerLeaderId);
+
+	/**
+	 * Send by the ResourceManager to the TaskExecutor
+	 * @param allocationID id for the request
+	 * @param resourceManagerLeaderID current leader id of the ResourceManager
+	 * @return SlotRequestReply Answer to the request
+	 */
+
+	Future<SlotRequestReply> requestSlot(
+		AllocationID allocationID,
+		UUID resourceManagerLeaderID,
+		@RpcTimeout FiniteDuration timeout);
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/0a134a32/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerHATest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerHATest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerHATest.java
index 5799e62..8183c0a 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerHATest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/ResourceManagerHATest.java
@@ -20,6 +20,7 @@ package org.apache.flink.runtime.resourcemanager;
 
 import org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices;
 import org.apache.flink.runtime.leaderelection.TestingLeaderElectionService;
+import org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager;
 import org.apache.flink.runtime.rpc.MainThreadExecutor;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcGateway;
@@ -53,7 +54,8 @@ public class ResourceManagerHATest {
 		TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
 		highAvailabilityServices.setResourceManagerLeaderElectionService(leaderElectionService);
 
-		final ResourceManager resourceManager = new ResourceManager(rpcService, highAvailabilityServices);
+		SlotManager slotManager = mock(SlotManager.class);
+		final ResourceManager resourceManager = new ResourceManager(rpcService, highAvailabilityServices, slotManager);
 		resourceManager.start();
 		// before grant leadership, resourceManager's leaderId is null
 		Assert.assertNull(resourceManager.getLeaderSessionID());


[24/50] [abbrv] flink git commit: [FLINK-4355] [cluster management] Implement TaskManager side of registration at ResourceManager.

Posted by tr...@apache.org.
[FLINK-4355] [cluster management] Implement TaskManager side of registration at ResourceManager.

This closes #2353


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/fe90811a
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/fe90811a
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/fe90811a

Branch: refs/heads/flip-6
Commit: fe90811ad115abb0c95f47461ae6630cd994246f
Parents: 613f5a7
Author: Stephan Ewen <se...@apache.org>
Authored: Wed Aug 10 20:42:45 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:14 2016 +0200

----------------------------------------------------------------------
 .../HighAvailabilityServices.java               |  39 +++
 .../runtime/highavailability/NonHaServices.java |  59 ++++
 .../StandaloneLeaderRetrievalService.java       |  72 +++--
 .../apache/flink/runtime/rpc/RpcEndpoint.java   |   1 -
 .../apache/flink/runtime/rpc/RpcService.java    |  27 ++
 .../flink/runtime/rpc/akka/AkkaRpcService.java  |  18 ++
 .../runtime/rpc/akka/messages/RunAsync.java     |   1 +
 .../rpc/registration/RegistrationResponse.java  |  84 ++++++
 .../rpc/registration/RetryingRegistration.java  | 292 +++++++++++++++++++
 .../rpc/resourcemanager/ResourceManager.java    |  23 ++
 .../resourcemanager/ResourceManagerGateway.java |  21 +-
 .../runtime/rpc/taskexecutor/SlotReport.java    |  38 +++
 .../runtime/rpc/taskexecutor/TaskExecutor.java  | 169 ++++++++---
 .../rpc/taskexecutor/TaskExecutorGateway.java   |  29 +-
 .../TaskExecutorRegistrationSuccess.java        |  75 +++++
 ...TaskExecutorToResourceManagerConnection.java | 194 ++++++++++++
 .../runtime/rpc/akka/AkkaRpcServiceTest.java    |  51 +++-
 .../rpc/taskexecutor/TaskExecutorTest.java      |  87 +-----
 18 files changed, 1105 insertions(+), 175 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java
new file mode 100644
index 0000000..094d36f
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.highavailability;
+
+import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
+
+/**
+ * This class gives access to all services needed for
+ *
+ * <ul>
+ *     <li>ResourceManager leader election and leader retrieval</li>
+ *     <li>JobManager leader election and leader retrieval</li>
+ *     <li>Persistence for checkpoint metadata</li>
+ *     <li>Registering the latest completed checkpoint(s)</li>
+ * </ul>
+ */
+public interface HighAvailabilityServices {
+
+	/**
+	 * Gets the leader retriever for the cluster's resource manager.
+	 */
+	LeaderRetrievalService getResourceManagerLeaderRetriever() throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java
new file mode 100644
index 0000000..b8c2ed8
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.highavailability;
+
+import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
+import org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService;
+
+import java.util.UUID;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * An implementation of the {@link HighAvailabilityServices} for the non-high-availability case.
+ * This implementation can be used for testing, and for cluster setups that do not
+ * tolerate failures of the master processes (JobManager, ResourceManager).
+ * 
+ * <p>This implementation has no dependencies on any external services. It returns fix
+ * pre-configured leaders, and stores checkpoints and metadata simply on the heap and therefore
+ * in volatile memory.
+ */
+public class NonHaServices implements HighAvailabilityServices {
+
+	/** The fix address of the ResourceManager */
+	private final String resourceManagerAddress;
+
+	/**
+	 * Creates a new services class for the fix pre-defined leaders.
+	 * 
+	 * @param resourceManagerAddress    The fix address of the ResourceManager
+	 */
+	public NonHaServices(String resourceManagerAddress) {
+		this.resourceManagerAddress = checkNotNull(resourceManagerAddress);
+	}
+
+	// ------------------------------------------------------------------------
+	//  Services
+	// ------------------------------------------------------------------------
+
+	@Override
+	public LeaderRetrievalService getResourceManagerLeaderRetriever() throws Exception {
+		return new StandaloneLeaderRetrievalService(resourceManagerAddress, new UUID(0, 0));
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/leaderretrieval/StandaloneLeaderRetrievalService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/leaderretrieval/StandaloneLeaderRetrievalService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/leaderretrieval/StandaloneLeaderRetrievalService.java
index 26a34aa..16b163c 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/leaderretrieval/StandaloneLeaderRetrievalService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/leaderretrieval/StandaloneLeaderRetrievalService.java
@@ -18,44 +18,74 @@
 
 package org.apache.flink.runtime.leaderretrieval;
 
-import org.apache.flink.util.Preconditions;
+import java.util.UUID;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+import static org.apache.flink.util.Preconditions.checkState;
 
 /**
- * Standalone implementation of the {@link LeaderRetrievalService}. The standalone implementation
- * assumes that there is only a single {@link org.apache.flink.runtime.jobmanager.JobManager} whose
- * address is given to the service when creating it. This address is directly given to the
- * {@link LeaderRetrievalListener} when the service is started.
+ * Standalone implementation of the {@link LeaderRetrievalService}. This implementation
+ * assumes that there is only a single contender for leadership
+ * (e.g., a single JobManager or ResourceManager process) and that this process is
+ * reachable under a constant address.
+ * 
+ * <p>As soon as this service is started, it immediately notifies the leader listener
+ * of the leader contender with the pre-configured address.
  */
 public class StandaloneLeaderRetrievalService implements LeaderRetrievalService {
 
-	/** Address of the only JobManager */
-	private final String jobManagerAddress;
+	private final Object startStopLock = new Object();
+	
+	/** The fix address of the leader */
+	private final String leaderAddress;
+
+	/** The fix leader ID (leader lock fencing token) */
+	private final UUID leaderId;
 
-	/** Listener which wants to be notified about the new leader */
-	private LeaderRetrievalListener leaderListener;
+	/** Flag whether this service is started */
+	private boolean started;
 
 	/**
-	 * Creates a StandaloneLeaderRetrievalService with the given JobManager address.
+	 * Creates a StandaloneLeaderRetrievalService with the given leader address.
+	 * The leaderId will be null.
 	 *
-	 * @param jobManagerAddress The JobManager's address which is returned to the
-	 * 							{@link LeaderRetrievalListener}
+	 * @param leaderAddress The leader's pre-configured address
 	 */
-	public StandaloneLeaderRetrievalService(String jobManagerAddress) {
-		this.jobManagerAddress = jobManagerAddress;
+	public StandaloneLeaderRetrievalService(String leaderAddress) {
+		this.leaderAddress = checkNotNull(leaderAddress);
+		this.leaderId = null;
 	}
 
+	/**
+	 * Creates a StandaloneLeaderRetrievalService with the given leader address.
+	 *
+	 * @param leaderAddress The leader's pre-configured address
+	 * @param leaderId      The constant leaderId.
+	 */
+	public StandaloneLeaderRetrievalService(String leaderAddress, UUID leaderId) {
+		this.leaderAddress = checkNotNull(leaderAddress);
+		this.leaderId = checkNotNull(leaderId);
+	}
+
+	// ------------------------------------------------------------------------
+
 	@Override
 	public void start(LeaderRetrievalListener listener) {
-		Preconditions.checkNotNull(listener, "Listener must not be null.");
-		Preconditions.checkState(leaderListener == null, "StandaloneLeaderRetrievalService can " +
-				"only be started once.");
+		checkNotNull(listener, "Listener must not be null.");
 
-		leaderListener = listener;
+		synchronized (startStopLock) {
+			checkState(!started, "StandaloneLeaderRetrievalService can only be started once.");
+			started = true;
 
-		// directly notify the listener, because we already know the leading JobManager's address
-		leaderListener.notifyLeaderAddress(jobManagerAddress, null);
+			// directly notify the listener, because we already know the leading JobManager's address
+			listener.notifyLeaderAddress(leaderAddress, leaderId);
+		}
 	}
 
 	@Override
-	public void stop() {}
+	public void stop() {
+		synchronized (startStopLock) {
+			started = false;
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
index 67ac182..a28bc14 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
@@ -237,7 +237,6 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	 * }</pre>
 	 */
 	public void validateRunsInMainThread() {
-		// because the initialization is lazy, it can be that certain methods are
 		assert currentMainThread.get() == Thread.currentThread();
 	}
 

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
index f93be83..fabdb05 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
@@ -18,8 +18,11 @@
 
 package org.apache.flink.runtime.rpc;
 
+import scala.concurrent.ExecutionContext;
 import scala.concurrent.Future;
 
+import java.util.concurrent.TimeUnit;
+
 /**
  * Interface for rpc services. An rpc service is used to start and connect to a {@link RpcEndpoint}.
  * Connecting to a rpc server will return a {@link RpcGateway} which can be used to call remote
@@ -71,4 +74,28 @@ public interface RpcService {
 	 * @return Fully qualified address
 	 */
 	<C extends RpcGateway> String getAddress(C selfGateway);
+
+	/**
+	 * Gets the execution context, provided by this RPC service. This execution
+	 * context can be used for example for the {@code onComplete(...)} or {@code onSuccess(...)}
+	 * methods of Futures.
+	 * 
+	 * <p><b>IMPORTANT:</b> This execution context does not isolate the method invocations against
+	 * any concurrent invocations and is therefore not suitable to run completion methods of futures
+	 * that modify state of an {@link RpcEndpoint}. For such operations, one needs to use the
+	 * {@link RpcEndpoint#getMainThreadExecutionContext() MainThreadExecutionContext} of that
+	 * {@code RpcEndpoint}.
+	 * 
+	 * @return The execution context provided by the RPC service
+	 */
+	ExecutionContext getExecutionContext();
+
+	/**
+	 * Execute the runnable in the execution context of this RPC Service, as returned by
+	 * {@link #getExecutionContext()}, after a scheduled delay.
+	 *
+	 * @param runnable Runnable to be executed
+	 * @param delay    The delay after which the runnable will be executed
+	 */
+	void scheduleRunnable(Runnable runnable, long delay, TimeUnit unit);
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
index 7b33524..b647bbd 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
@@ -38,14 +38,18 @@ import org.apache.flink.runtime.rpc.StartStoppable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import scala.concurrent.ExecutionContext;
 import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
 
 import javax.annotation.concurrent.ThreadSafe;
 import java.lang.reflect.InvocationHandler;
 import java.lang.reflect.Proxy;
 import java.util.HashSet;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 
+import static org.apache.flink.util.Preconditions.checkArgument;
 import static org.apache.flink.util.Preconditions.checkNotNull;
 import static org.apache.flink.util.Preconditions.checkState;
 
@@ -199,4 +203,18 @@ public class AkkaRpcService implements RpcService {
 			throw new IllegalArgumentException("Cannot get address for non " + className + '.');
 		}
 	}
+
+	@Override
+	public ExecutionContext getExecutionContext() {
+		return actorSystem.dispatcher();
+	}
+
+	@Override
+	public void scheduleRunnable(Runnable runnable, long delay, TimeUnit unit) {
+		checkNotNull(runnable, "runnable");
+		checkNotNull(unit, "unit");
+		checkArgument(delay >= 0, "delay must be zero or larger");
+
+		actorSystem.scheduler().scheduleOnce(new FiniteDuration(delay, unit), runnable, getExecutionContext());
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java
index c18906c..ce4f9d6 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java
@@ -36,6 +36,7 @@ public final class RunAsync implements Serializable {
 	private final long delay;
 
 	/**
+	 * Creates a new {@code RunAsync} message.
 	 * 
 	 * @param runnable  The Runnable to run.
 	 * @param delay     The delay in milliseconds. Zero indicates immediate execution.

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RegistrationResponse.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RegistrationResponse.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RegistrationResponse.java
new file mode 100644
index 0000000..2de560a
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RegistrationResponse.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.registration;
+
+import java.io.Serializable;
+
+/**
+ * Base class for responses given to registration attempts from {@link RetryingRegistration}.
+ */
+public abstract class RegistrationResponse implements Serializable {
+
+	private static final long serialVersionUID = 1L;
+
+	// ----------------------------------------------------------------------------
+	
+	/**
+	 * Base class for a successful registration. Concrete registration implementations
+	 * will typically extend this class to attach more information.
+	 */
+	public static class Success extends RegistrationResponse {
+		private static final long serialVersionUID = 1L;
+		
+		@Override
+		public String toString() {
+			return "Registration Successful";
+		}
+	}
+
+	// ----------------------------------------------------------------------------
+
+	/**
+	 * A rejected (declined) registration.
+	 */
+	public static final class Decline extends RegistrationResponse {
+		private static final long serialVersionUID = 1L;
+
+		/** the rejection reason */
+		private final String reason;
+
+		/**
+		 * Creates a new rejection message.
+		 * 
+		 * @param reason The reason for the rejection.
+		 */
+		public Decline(String reason) {
+			this.reason = reason != null ? reason : "(unknown)";
+		}
+
+		/**
+		 * Gets the reason for the rejection.
+		 */
+		public String getReason() {
+			return reason;
+		}
+
+		@Override
+		public String toString() {
+			return "Registration Declined (" + reason + ')';
+		}
+	}
+}
+
+
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java
new file mode 100644
index 0000000..4c93684
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java
@@ -0,0 +1,292 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.registration;
+
+import akka.dispatch.OnFailure;
+import akka.dispatch.OnSuccess;
+
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcService;
+
+import org.slf4j.Logger;
+
+import scala.concurrent.Future;
+import scala.concurrent.Promise;
+import scala.concurrent.impl.Promise.DefaultPromise;
+
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+import static org.apache.flink.util.Preconditions.checkArgument;
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+
+/**
+ * This utility class implements the basis of registering one component at another component,
+ * for example registering the TaskExecutor at the ResourceManager.
+ * This {@code RetryingRegistration} implements both the initial address resolution
+ * and the retries-with-backoff strategy.
+ * 
+ * <p>The registration gives access to a future that is completed upon successful registration.
+ * The registration can be canceled, for example when the target where it tries to register
+ * at looses leader status.
+ * 
+ * @param <Gateway> The type of the gateway to connect to.
+ * @param <Success> The type of the successful registration responses.
+ */
+public abstract class RetryingRegistration<Gateway extends RpcGateway, Success extends RegistrationResponse.Success> {
+
+	// ------------------------------------------------------------------------
+	//  default configuration values
+	// ------------------------------------------------------------------------
+
+	private static final long INITIAL_REGISTRATION_TIMEOUT_MILLIS = 100;
+
+	private static final long MAX_REGISTRATION_TIMEOUT_MILLIS = 30000;
+
+	private static final long ERROR_REGISTRATION_DELAY_MILLIS = 10000;
+
+	private static final long REFUSED_REGISTRATION_DELAY_MILLIS = 30000;
+
+	// ------------------------------------------------------------------------
+	// Fields
+	// ------------------------------------------------------------------------
+
+	private final Logger log;
+
+	private final RpcService rpcService;
+
+	private final String targetName;
+
+	private final Class<Gateway> targetType;
+
+	private final String targetAddress;
+
+	private final UUID leaderId;
+
+	private final Promise<Tuple2<Gateway, Success>> completionPromise;
+
+	private final long initialRegistrationTimeout;
+
+	private final long maxRegistrationTimeout;
+
+	private final long delayOnError;
+
+	private final long delayOnRefusedRegistration;
+
+	private volatile boolean canceled;
+
+	// ------------------------------------------------------------------------
+
+	public RetryingRegistration(
+			Logger log,
+			RpcService rpcService,
+			String targetName,
+			Class<Gateway> targetType,
+			String targetAddress,
+			UUID leaderId) {
+		this(log, rpcService, targetName, targetType, targetAddress, leaderId,
+				INITIAL_REGISTRATION_TIMEOUT_MILLIS, MAX_REGISTRATION_TIMEOUT_MILLIS,
+				ERROR_REGISTRATION_DELAY_MILLIS, REFUSED_REGISTRATION_DELAY_MILLIS);
+	}
+
+	public RetryingRegistration(
+			Logger log,
+			RpcService rpcService,
+			String targetName, 
+			Class<Gateway> targetType,
+			String targetAddress,
+			UUID leaderId,
+			long initialRegistrationTimeout,
+			long maxRegistrationTimeout,
+			long delayOnError,
+			long delayOnRefusedRegistration) {
+
+		checkArgument(initialRegistrationTimeout > 0, "initial registration timeout must be greater than zero");
+		checkArgument(maxRegistrationTimeout > 0, "maximum registration timeout must be greater than zero");
+		checkArgument(delayOnError >= 0, "delay on error must be non-negative");
+		checkArgument(delayOnRefusedRegistration >= 0, "delay on refused registration must be non-negative");
+
+		this.log = checkNotNull(log);
+		this.rpcService = checkNotNull(rpcService);
+		this.targetName = checkNotNull(targetName);
+		this.targetType = checkNotNull(targetType);
+		this.targetAddress = checkNotNull(targetAddress);
+		this.leaderId = checkNotNull(leaderId);
+		this.initialRegistrationTimeout = initialRegistrationTimeout;
+		this.maxRegistrationTimeout = maxRegistrationTimeout;
+		this.delayOnError = delayOnError;
+		this.delayOnRefusedRegistration = delayOnRefusedRegistration;
+
+		this.completionPromise = new DefaultPromise<>();
+	}
+
+	// ------------------------------------------------------------------------
+	//  completion and cancellation
+	// ------------------------------------------------------------------------
+
+	public Future<Tuple2<Gateway, Success>> getFuture() {
+		return completionPromise.future();
+	}
+
+	/**
+	 * Cancels the registration procedure.
+	 */
+	public void cancel() {
+		canceled = true;
+	}
+
+	/**
+	 * Checks if the registration was canceled.
+	 * @return True if the registration was canceled, false otherwise.
+	 */
+	public boolean isCanceled() {
+		return canceled;
+	}
+
+	// ------------------------------------------------------------------------
+	//  registration
+	// ------------------------------------------------------------------------
+
+	protected abstract Future<RegistrationResponse> invokeRegistration(
+			Gateway gateway, UUID leaderId, long timeoutMillis) throws Exception;
+
+	/**
+	 * This method resolves the target address to a callable gateway and starts the
+	 * registration after that.
+	 */
+	@SuppressWarnings("unchecked")
+	public void startRegistration() {
+		try {
+			// trigger resolution of the resource manager address to a callable gateway
+			Future<Gateway> resourceManagerFuture = rpcService.connect(targetAddress, targetType);
+	
+			// upon success, start the registration attempts
+			resourceManagerFuture.onSuccess(new OnSuccess<Gateway>() {
+				@Override
+				public void onSuccess(Gateway result) {
+					log.info("Resolved {} address, beginning registration", targetName);
+					register(result, 1, initialRegistrationTimeout);
+				}
+			}, rpcService.getExecutionContext());
+	
+			// upon failure, retry, unless this is cancelled
+			resourceManagerFuture.onFailure(new OnFailure() {
+				@Override
+				public void onFailure(Throwable failure) {
+					if (!isCanceled()) {
+						log.warn("Could not resolve {} address {}, retrying...", targetName, targetAddress);
+						startRegistration();
+					}
+				}
+			}, rpcService.getExecutionContext());
+		}
+		catch (Throwable t) {
+			cancel();
+			completionPromise.tryFailure(t);
+		}
+	}
+
+	/**
+	 * This method performs a registration attempt and triggers either a success notification or a retry,
+	 * depending on the result.
+	 */
+	@SuppressWarnings("unchecked")
+	private void register(final Gateway gateway, final int attempt, final long timeoutMillis) {
+		// eager check for canceling to avoid some unnecessary work
+		if (canceled) {
+			return;
+		}
+
+		try {
+			log.info("Registration at {} attempt {} (timeout={}ms)", targetName, attempt, timeoutMillis);
+			Future<RegistrationResponse> registrationFuture = invokeRegistration(gateway, leaderId, timeoutMillis);
+	
+			// if the registration was successful, let the TaskExecutor know
+			registrationFuture.onSuccess(new OnSuccess<RegistrationResponse>() {
+				
+				@Override
+				public void onSuccess(RegistrationResponse result) throws Throwable {
+					if (!isCanceled()) {
+						if (result instanceof RegistrationResponse.Success) {
+							// registration successful!
+							Success success = (Success) result;
+							completionPromise.success(new Tuple2<>(gateway, success));
+						}
+						else {
+							// registration refused or unknown
+							if (result instanceof RegistrationResponse.Decline) {
+								RegistrationResponse.Decline decline = (RegistrationResponse.Decline) result;
+								log.info("Registration at {} was declined: {}", targetName, decline.getReason());
+							} else {
+								log.error("Received unknown response to registration attempt: " + result);
+							}
+
+							log.info("Pausing and re-attempting registration in {} ms", delayOnRefusedRegistration);
+							registerLater(gateway, 1, initialRegistrationTimeout, delayOnRefusedRegistration);
+						}
+					}
+				}
+			}, rpcService.getExecutionContext());
+	
+			// upon failure, retry
+			registrationFuture.onFailure(new OnFailure() {
+				@Override
+				public void onFailure(Throwable failure) {
+					if (!isCanceled()) {
+						if (failure instanceof TimeoutException) {
+							// we simply have not received a response in time. maybe the timeout was
+							// very low (initial fast registration attempts), maybe the target endpoint is
+							// currently down.
+							if (log.isDebugEnabled()) {
+								log.debug("Registration at {} ({}) attempt {} timed out after {} ms",
+										targetName, targetAddress, attempt, timeoutMillis);
+							}
+	
+							long newTimeoutMillis = Math.min(2 * timeoutMillis, maxRegistrationTimeout);
+							register(gateway, attempt + 1, newTimeoutMillis);
+						}
+						else {
+							// a serious failure occurred. we still should not give up, but keep trying
+							log.error("Registration at " + targetName + " failed due to an error", failure);
+							log.info("Pausing and re-attempting registration in {} ms", delayOnError);
+	
+							registerLater(gateway, 1, initialRegistrationTimeout, delayOnError);
+						}
+					}
+				}
+			}, rpcService.getExecutionContext());
+		}
+		catch (Throwable t) {
+			cancel();
+			completionPromise.tryFailure(t);
+		}
+	}
+
+	private void registerLater(final Gateway gateway, final int attempt, final long timeoutMillis, long delay) {
+		rpcService.scheduleRunnable(new Runnable() {
+			@Override
+			public void run() {
+				register(gateway, attempt, timeoutMillis);
+			}
+		}, delay, TimeUnit.MILLISECONDS);
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
index 729ef0c..6f34465 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
@@ -19,19 +19,24 @@
 package org.apache.flink.runtime.rpc.resourcemanager;
 
 import akka.dispatch.Mapper;
+
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
 import org.apache.flink.runtime.instance.InstanceID;
 import org.apache.flink.runtime.rpc.RpcMethod;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
 import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
 import org.apache.flink.runtime.rpc.jobmaster.JobMasterGateway;
+import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutorRegistrationSuccess;
 import org.apache.flink.util.Preconditions;
+
 import scala.concurrent.ExecutionContext;
 import scala.concurrent.ExecutionContext$;
 import scala.concurrent.Future;
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.UUID;
 import java.util.concurrent.ExecutorService;
 
 /**
@@ -93,4 +98,22 @@ public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
 		System.out.println("SlotRequest: " + slotRequest);
 		return new SlotAssignment();
 	}
+
+
+	/**
+	 *
+	 * @param resourceManagerLeaderId  The fencing token for the ResourceManager leader 
+	 * @param taskExecutorAddress      The address of the TaskExecutor that registers
+	 * @param resourceID               The resource ID of the TaskExecutor that registers
+	 *
+	 * @return The response by the ResourceManager.
+	 */
+	@RpcMethod
+	public org.apache.flink.runtime.rpc.registration.RegistrationResponse registerTaskExecutor(
+			UUID resourceManagerLeaderId,
+			String taskExecutorAddress,
+			ResourceID resourceID) {
+
+		return new TaskExecutorRegistrationSuccess(new InstanceID(), 5000);
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java
index 464a261..afddb01 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerGateway.java
@@ -18,14 +18,18 @@
 
 package org.apache.flink.runtime.rpc.resourcemanager;
 
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
 import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.RpcTimeout;
 import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
+
 import scala.concurrent.Future;
 import scala.concurrent.duration.FiniteDuration;
 
+import java.util.UUID;
+
 /**
- * {@link ResourceManager} rpc gateway interface.
+ * The {@link ResourceManager}'s RPC gateway interface.
  */
 public interface ResourceManagerGateway extends RpcGateway {
 
@@ -55,4 +59,19 @@ public interface ResourceManagerGateway extends RpcGateway {
 	 * @return Future slot assignment
 	 */
 	Future<SlotAssignment> requestSlot(SlotRequest slotRequest);
+
+	/**
+	 * 
+	 * @param resourceManagerLeaderId  The fencing token for the ResourceManager leader 
+	 * @param taskExecutorAddress      The address of the TaskExecutor that registers
+	 * @param resourceID               The resource ID of the TaskExecutor that registers
+	 * @param timeout                  The timeout for the response.
+	 * 
+	 * @return The future to the response by the ResourceManager.
+	 */
+	Future<org.apache.flink.runtime.rpc.registration.RegistrationResponse> registerTaskExecutor(
+			UUID resourceManagerLeaderId,
+			String taskExecutorAddress,
+			ResourceID resourceID,
+			@RpcTimeout FiniteDuration timeout);
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
new file mode 100644
index 0000000..e42fa4a
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.taskexecutor;
+
+import java.io.Serializable;
+
+/**
+ * A report about the current status of all slots of the TaskExecutor, describing
+ * which slots are available and allocated, and what jobs (JobManagers) the allocated slots
+ * have been allocated to.
+ */
+public class SlotReport implements Serializable{
+
+	private static final long serialVersionUID = 1L;
+
+	// ------------------------------------------------------------------------
+	
+	@Override
+	public String toString() {
+		return "SlotReport";
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
index 3a7dd9f..1a637bb 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
@@ -18,67 +18,152 @@
 
 package org.apache.flink.runtime.rpc.taskexecutor;
 
-import akka.dispatch.ExecutionContexts$;
-import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
-import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
-import org.apache.flink.runtime.messages.Acknowledge;
-import org.apache.flink.runtime.rpc.RpcMethod;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
+import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcMethod;
 import org.apache.flink.runtime.rpc.RpcService;
-import org.apache.flink.util.Preconditions;
-import scala.concurrent.ExecutionContext;
 
-import java.util.HashSet;
-import java.util.Set;
-import java.util.concurrent.ExecutorService;
+import java.util.UUID;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
 
 /**
  * TaskExecutor implementation. The task executor is responsible for the execution of multiple
  * {@link org.apache.flink.runtime.taskmanager.Task}.
- *
- * It offers the following methods as part of its rpc interface to interact with him remotely:
- * <ul>
- *     <li>{@link #executeTask(TaskDeploymentDescriptor)} executes a given task on the TaskExecutor</li>
- *     <li>{@link #cancelTask(ExecutionAttemptID)} cancels a given task identified by the {@link ExecutionAttemptID}</li>
- * </ul>
  */
 public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
-	private final ExecutionContext executionContext;
-	private final Set<ExecutionAttemptID> tasks = new HashSet<>();
 
-	public TaskExecutor(RpcService rpcService, ExecutorService executorService) {
+	/** The unique resource ID of this TaskExecutor */
+	private final ResourceID resourceID;
+
+	/** The access to the leader election and metadata storage services */
+	private final HighAvailabilityServices haServices;
+
+	// --------- resource manager --------
+
+	private TaskExecutorToResourceManagerConnection resourceManagerConnection;
+
+	// ------------------------------------------------------------------------
+
+	public TaskExecutor(
+			RpcService rpcService,
+			HighAvailabilityServices haServices,
+			ResourceID resourceID) {
+
 		super(rpcService);
-		this.executionContext = ExecutionContexts$.MODULE$.fromExecutor(
-			Preconditions.checkNotNull(executorService));
+
+		this.haServices = checkNotNull(haServices);
+		this.resourceID = checkNotNull(resourceID);
+	}
+
+	// ------------------------------------------------------------------------
+	//  Properties
+	// ------------------------------------------------------------------------
+
+	public ResourceID getResourceID() {
+		return resourceID;
+	}
+
+	// ------------------------------------------------------------------------
+	//  Life cycle
+	// ------------------------------------------------------------------------
+
+	@Override
+	public void start() {
+		// start by connecting to the ResourceManager
+		try {
+			haServices.getResourceManagerLeaderRetriever().start(new ResourceManagerLeaderListener());
+		} catch (Exception e) {
+			onFatalErrorAsync(e);
+		}
+	}
+
+
+	// ------------------------------------------------------------------------
+	//  RPC methods - ResourceManager related
+	// ------------------------------------------------------------------------
+
+	@RpcMethod
+	public void notifyOfNewResourceManagerLeader(String newLeaderAddress, UUID newLeaderId) {
+		if (resourceManagerConnection != null) {
+			if (newLeaderAddress != null) {
+				// the resource manager switched to a new leader
+				log.info("ResourceManager leader changed from {} to {}. Registering at new leader.",
+						resourceManagerConnection.getResourceManagerAddress(), newLeaderAddress);
+			}
+			else {
+				// address null means that the current leader is lost without a new leader being there, yet
+				log.info("Current ResourceManager {} lost leader status. Waiting for new ResourceManager leader.",
+						resourceManagerConnection.getResourceManagerAddress());
+			}
+
+			// drop the current connection or connection attempt
+			if (resourceManagerConnection != null) {
+				resourceManagerConnection.close();
+				resourceManagerConnection = null;
+			}
+		}
+
+		// establish a connection to the new leader
+		if (newLeaderAddress != null) {
+			log.info("Attempting to register at ResourceManager {}", newLeaderAddress);
+			resourceManagerConnection = 
+					new TaskExecutorToResourceManagerConnection(log, this, newLeaderAddress, newLeaderId);
+			resourceManagerConnection.start();
+		}
 	}
 
+	// ------------------------------------------------------------------------
+	//  Error handling
+	// ------------------------------------------------------------------------
+
 	/**
-	 * Execute the given task on the task executor. The task is described by the provided
-	 * {@link TaskDeploymentDescriptor}.
-	 *
-	 * @param taskDeploymentDescriptor Descriptor for the task to be executed
-	 * @return Acknowledge the start of the task execution
+	 * Notifies the TaskExecutor that a fatal error has occurred and it cannot proceed.
+	 * This method should be used when asynchronous threads want to notify the
+	 * TaskExecutor of a fatal error.
+	 * 
+	 * @param t The exception describing the fatal error
 	 */
-	@RpcMethod
-	public Acknowledge executeTask(TaskDeploymentDescriptor taskDeploymentDescriptor) {
-		tasks.add(taskDeploymentDescriptor.getExecutionId());
-		return Acknowledge.get();
+	void onFatalErrorAsync(final Throwable t) {
+		runAsync(new Runnable() {
+			@Override
+			public void run() {
+				onFatalError(t);
+			}
+		});
 	}
 
 	/**
-	 * Cancel a task identified by it {@link ExecutionAttemptID}. If the task cannot be found, then
-	 * the method throws an {@link Exception}.
-	 *
-	 * @param executionAttemptId Execution attempt ID identifying the task to be canceled.
-	 * @return Acknowledge the task canceling
-	 * @throws Exception if the task with the given execution attempt id could not be found
+	 * Notifies the TaskExecutor that a fatal error has occurred and it cannot proceed.
+	 * This method must only be called from within the TaskExecutor's main thread.
+	 * 
+	 * @param t The exception describing the fatal error
 	 */
-	@RpcMethod
-	public Acknowledge cancelTask(ExecutionAttemptID executionAttemptId) throws Exception {
-		if (tasks.contains(executionAttemptId)) {
-			return Acknowledge.get();
-		} else {
-			throw new Exception("Could not find task.");
+	void onFatalError(Throwable t) {
+		// to be determined, probably delegate to a fatal error handler that 
+		// would either log (mini cluster) ot kill the process (yarn, mesos, ...)
+		log.error("FATAL ERROR", t);
+	}
+
+	// ------------------------------------------------------------------------
+	//  Utility classes
+	// ------------------------------------------------------------------------
+
+	/**
+	 * The listener for leader changes of the resource manager
+	 */
+	private class ResourceManagerLeaderListener implements LeaderRetrievalListener {
+
+		@Override
+		public void notifyLeaderAddress(String leaderAddress, UUID leaderSessionID) {
+			getSelf().notifyOfNewResourceManagerLeader(leaderAddress, leaderSessionID);
+		}
+
+		@Override
+		public void handleError(Exception exception) {
+			onFatalErrorAsync(exception);
 		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java
index 450423e..b0b21b0 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorGateway.java
@@ -18,31 +18,18 @@
 
 package org.apache.flink.runtime.rpc.taskexecutor;
 
-import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
-import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
-import org.apache.flink.runtime.messages.Acknowledge;
 import org.apache.flink.runtime.rpc.RpcGateway;
-import scala.concurrent.Future;
+
+import java.util.UUID;
 
 /**
- * {@link TaskExecutor} rpc gateway interface
+ * {@link TaskExecutor} RPC gateway interface
  */
 public interface TaskExecutorGateway extends RpcGateway {
-	/**
-	 * Execute the given task on the task executor. The task is described by the provided
-	 * {@link TaskDeploymentDescriptor}.
-	 *
-	 * @param taskDeploymentDescriptor Descriptor for the task to be executed
-	 * @return Future acknowledge of the start of the task execution
-	 */
-	Future<Acknowledge> executeTask(TaskDeploymentDescriptor taskDeploymentDescriptor);
 
-	/**
-	 * Cancel a task identified by it {@link ExecutionAttemptID}. If the task cannot be found, then
-	 * the method throws an {@link Exception}.
-	 *
-	 * @param executionAttemptId Execution attempt ID identifying the task to be canceled.
-	 * @return Future acknowledge of the task canceling
-	 */
-	Future<Acknowledge> cancelTask(ExecutionAttemptID executionAttemptId);
+	// ------------------------------------------------------------------------
+	//  ResourceManager handlers
+	// ------------------------------------------------------------------------
+
+	void notifyOfNewResourceManagerLeader(String address, UUID resourceManagerLeaderId);
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorRegistrationSuccess.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorRegistrationSuccess.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorRegistrationSuccess.java
new file mode 100644
index 0000000..641102d
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorRegistrationSuccess.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.taskexecutor;
+
+import org.apache.flink.runtime.instance.InstanceID;
+import org.apache.flink.runtime.rpc.registration.RegistrationResponse;
+
+import java.io.Serializable;
+
+/**
+ * Base class for responses from the ResourceManager to a registration attempt by a
+ * TaskExecutor.
+ */
+public final class TaskExecutorRegistrationSuccess extends RegistrationResponse.Success implements Serializable {
+
+	private static final long serialVersionUID = 1L;
+
+	private final InstanceID registrationId;
+
+	private final long heartbeatInterval;
+
+	/**
+	 * Create a new {@code TaskExecutorRegistrationSuccess} message.
+	 * 
+	 * @param registrationId     The ID that the ResourceManager assigned the registration.
+	 * @param heartbeatInterval  The interval in which the ResourceManager will heartbeat the TaskExecutor.
+	 */
+	public TaskExecutorRegistrationSuccess(InstanceID registrationId, long heartbeatInterval) {
+		this.registrationId = registrationId;
+		this.heartbeatInterval = heartbeatInterval;
+	}
+
+	/**
+	 * Gets the ID that the ResourceManager assigned the registration.
+	 */
+	public InstanceID getRegistrationId() {
+		return registrationId;
+	}
+
+	/**
+	 * Gets the interval in which the ResourceManager will heartbeat the TaskExecutor.
+	 */
+	public long getHeartbeatInterval() {
+		return heartbeatInterval;
+	}
+
+	@Override
+	public String toString() {
+		return "TaskExecutorRegistrationSuccess (" + registrationId + " / " + heartbeatInterval + ')';
+	}
+
+}
+
+
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
new file mode 100644
index 0000000..ef75862
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.taskexecutor;
+
+import akka.dispatch.OnFailure;
+import akka.dispatch.OnSuccess;
+
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.instance.InstanceID;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.rpc.registration.RegistrationResponse;
+import org.apache.flink.runtime.rpc.registration.RetryingRegistration;
+import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
+
+import org.slf4j.Logger;
+
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+import static org.apache.flink.util.Preconditions.checkState;
+
+public class TaskExecutorToResourceManagerConnection {
+
+	/** the logger for all log messages of this class */
+	private final Logger log;
+
+	/** the TaskExecutor whose connection to the ResourceManager this represents */
+	private final TaskExecutor taskExecutor;
+
+	private final UUID resourceManagerLeaderId;
+
+	private final String resourceManagerAddress;
+
+	private ResourceManagerRegistration pendingRegistration;
+
+	private ResourceManagerGateway registeredResourceManager;
+
+	private InstanceID registrationId;
+
+	/** flag indicating that the connection is closed */
+	private volatile boolean closed;
+
+
+	public TaskExecutorToResourceManagerConnection(
+			Logger log,
+			TaskExecutor taskExecutor,
+			String resourceManagerAddress,
+			UUID resourceManagerLeaderId) {
+
+		this.log = checkNotNull(log);
+		this.taskExecutor = checkNotNull(taskExecutor);
+		this.resourceManagerAddress = checkNotNull(resourceManagerAddress);
+		this.resourceManagerLeaderId = checkNotNull(resourceManagerLeaderId);
+	}
+
+	// ------------------------------------------------------------------------
+	//  Life cycle
+	// ------------------------------------------------------------------------
+
+	@SuppressWarnings("unchecked")
+	public void start() {
+		checkState(!closed, "The connection is already closed");
+		checkState(!isRegistered() && pendingRegistration == null, "The connection is already started");
+
+		ResourceManagerRegistration registration = new ResourceManagerRegistration(
+				log, taskExecutor.getRpcService(),
+				resourceManagerAddress, resourceManagerLeaderId,
+				taskExecutor.getAddress(), taskExecutor.getResourceID());
+
+		Future<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>> future = registration.getFuture();
+		
+		future.onSuccess(new OnSuccess<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>>() {
+			@Override
+			public void onSuccess(Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess> result) {
+				registeredResourceManager = result.f0;
+				registrationId = result.f1.getRegistrationId();
+			}
+		}, taskExecutor.getMainThreadExecutionContext());
+		
+		// this future should only ever fail if there is a bug, not if the registration is declined
+		future.onFailure(new OnFailure() {
+			@Override
+			public void onFailure(Throwable failure) {
+				taskExecutor.onFatalError(failure);
+			}
+		}, taskExecutor.getMainThreadExecutionContext());
+	}
+
+	public void close() {
+		closed = true;
+
+		// make sure we do not keep re-trying forever
+		if (pendingRegistration != null) {
+			pendingRegistration.cancel();
+		}
+	}
+
+	public boolean isClosed() {
+		return closed;
+	}
+
+	// ------------------------------------------------------------------------
+	//  Properties
+	// ------------------------------------------------------------------------
+
+	public UUID getResourceManagerLeaderId() {
+		return resourceManagerLeaderId;
+	}
+
+	public String getResourceManagerAddress() {
+		return resourceManagerAddress;
+	}
+
+	/**
+	 * Gets the ResourceManagerGateway. This returns null until the registration is completed.
+	 */
+	public ResourceManagerGateway getResourceManager() {
+		return registeredResourceManager;
+	}
+
+	/**
+	 * Gets the ID under which the TaskExecutor is registered at the ResourceManager.
+	 * This returns null until the registration is completed.
+	 */
+	public InstanceID getRegistrationId() {
+		return registrationId;
+	}
+
+	public boolean isRegistered() {
+		return registeredResourceManager != null;
+	}
+
+	// ------------------------------------------------------------------------
+
+	@Override
+	public String toString() {
+		return String.format("Connection to ResourceManager %s (leaderId=%s)",
+				resourceManagerAddress, resourceManagerLeaderId); 
+	}
+
+	// ------------------------------------------------------------------------
+	//  Utilities
+	// ------------------------------------------------------------------------
+
+	static class ResourceManagerRegistration
+			extends RetryingRegistration<ResourceManagerGateway, TaskExecutorRegistrationSuccess> {
+
+		private final String taskExecutorAddress;
+		
+		private final ResourceID resourceID;
+
+		public ResourceManagerRegistration(
+				Logger log,
+				RpcService rpcService,
+				String targetAddress,
+				UUID leaderId,
+				String taskExecutorAddress,
+				ResourceID resourceID) {
+
+			super(log, rpcService, "ResourceManager", ResourceManagerGateway.class, targetAddress, leaderId);
+			this.taskExecutorAddress = checkNotNull(taskExecutorAddress);
+			this.resourceID = checkNotNull(resourceID);
+		}
+
+		@Override
+		protected Future<RegistrationResponse> invokeRegistration(
+				ResourceManagerGateway resourceManager, UUID leaderId, long timeoutMillis) throws Exception {
+
+			FiniteDuration timeout = new FiniteDuration(timeoutMillis, TimeUnit.MILLISECONDS);
+			return resourceManager.registerTaskExecutor(leaderId, taskExecutorAddress, resourceID, timeout);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
index fd55904..7b4ab89 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
@@ -20,15 +20,17 @@ package org.apache.flink.runtime.rpc.akka;
 
 import akka.actor.ActorSystem;
 import akka.util.Timeout;
+
+import org.apache.flink.core.testutils.OneShotLatch;
 import org.apache.flink.runtime.akka.AkkaUtils;
-import org.apache.flink.runtime.rpc.RpcEndpoint;
-import org.apache.flink.runtime.rpc.RpcGateway;
-import org.apache.flink.runtime.rpc.RpcService;
 import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
 import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
 import org.apache.flink.runtime.rpc.resourcemanager.ResourceManager;
 import org.apache.flink.util.TestLogger;
+
+import org.junit.AfterClass;
 import org.junit.Test;
+
 import scala.concurrent.duration.Deadline;
 import scala.concurrent.duration.FiniteDuration;
 
@@ -41,6 +43,49 @@ import static org.junit.Assert.assertTrue;
 
 public class AkkaRpcServiceTest extends TestLogger {
 
+	// ------------------------------------------------------------------------
+	//  shared test members
+	// ------------------------------------------------------------------------
+
+	private static ActorSystem actorSystem = AkkaUtils.createDefaultActorSystem();
+
+	private static AkkaRpcService akkaRpcService =
+			new AkkaRpcService(actorSystem, new Timeout(10000, TimeUnit.MILLISECONDS));
+
+	@AfterClass
+	public static void shutdown() {
+		akkaRpcService.stopService();
+		actorSystem.shutdown();
+	}
+
+	// ------------------------------------------------------------------------
+	//  tests
+	// ------------------------------------------------------------------------
+
+	@Test
+	public void testScheduleRunnable() throws Exception {
+		final OneShotLatch latch = new OneShotLatch();
+		final long delay = 100;
+		final long start = System.nanoTime();
+
+		akkaRpcService.scheduleRunnable(new Runnable() {
+			@Override
+			public void run() {
+				latch.trigger();
+			}
+		}, delay, TimeUnit.MILLISECONDS);
+
+		latch.await();
+		final long stop = System.nanoTime();
+
+		assertTrue("call was not properly delayed", ((stop - start) / 1000000) >= delay);
+	}
+
+	// ------------------------------------------------------------------------
+	//  specific component tests - should be moved to the test classes
+	//  for those components
+	// ------------------------------------------------------------------------
+
 	/**
 	 * Tests that the {@link JobMaster} can connect to the {@link ResourceManager} using the
 	 * {@link AkkaRpcService}.

http://git-wip-us.apache.org/repos/asf/flink/blob/fe90811a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
index c96f4f6..9f9bab3 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
@@ -18,93 +18,8 @@
 
 package org.apache.flink.runtime.rpc.taskexecutor;
 
-import org.apache.flink.api.common.ExecutionConfig;
-import org.apache.flink.api.common.JobID;
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.runtime.blob.BlobKey;
-import org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor;
-import org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor;
-import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
-import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
-import org.apache.flink.runtime.jobgraph.JobVertexID;
-import org.apache.flink.runtime.messages.Acknowledge;
-import org.apache.flink.runtime.rpc.MainThreadExecutor;
-import org.apache.flink.runtime.rpc.RpcEndpoint;
-import org.apache.flink.runtime.rpc.RpcGateway;
-import org.apache.flink.runtime.rpc.RpcService;
-import org.apache.flink.runtime.rpc.StartStoppable;
-import org.apache.flink.runtime.util.DirectExecutorService;
-import org.apache.flink.util.SerializedValue;
 import org.apache.flink.util.TestLogger;
-import org.junit.Test;
-import org.mockito.Matchers;
-import org.mockito.cglib.proxy.InvocationHandler;
-import org.mockito.cglib.proxy.Proxy;
-import scala.concurrent.Future;
-
-import java.net.URL;
-import java.util.Collections;
-
-import static org.junit.Assert.fail;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
 
 public class TaskExecutorTest extends TestLogger {
-
-	/**
-	 * Tests that we can deploy and cancel a task on the TaskExecutor without exceptions
-	 */
-	@Test
-	public void testTaskExecution() throws Exception {
-		RpcService testingRpcService = mock(RpcService.class);
-		InvocationHandler invocationHandler = mock(InvocationHandler.class);
-		Object selfGateway = Proxy.newProxyInstance(ClassLoader.getSystemClassLoader(), new Class<?>[] {TaskExecutorGateway.class, MainThreadExecutor.class, StartStoppable.class}, invocationHandler);
-		when(testingRpcService.startServer(Matchers.any(RpcEndpoint.class))).thenReturn((RpcGateway)selfGateway);
-
-		DirectExecutorService directExecutorService = new DirectExecutorService();
-		TaskExecutor taskExecutor = new TaskExecutor(testingRpcService, directExecutorService);
-		taskExecutor.start();
-
-		TaskDeploymentDescriptor tdd = new TaskDeploymentDescriptor(
-			new JobID(),
-			"Test job",
-			new JobVertexID(),
-			new ExecutionAttemptID(),
-			new SerializedValue<ExecutionConfig>(null),
-			"Test task",
-			0,
-			1,
-			0,
-			new Configuration(),
-			new Configuration(),
-			"Invokable",
-			Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
-			Collections.<InputGateDeploymentDescriptor>emptyList(),
-			Collections.<BlobKey>emptyList(),
-			Collections.<URL>emptyList(),
-			0
-		);
-
-		Acknowledge ack = taskExecutor.executeTask(tdd);
-
-		ack = taskExecutor.cancelTask(tdd.getExecutionId());
-	}
-
-	/**
-	 * Tests that cancelling a non-existing task will return an exception
-	 */
-	@Test(expected=Exception.class)
-	public void testWrongTaskCancellation() throws Exception {
-		RpcService testingRpcService = mock(RpcService.class);
-		InvocationHandler invocationHandler = mock(InvocationHandler.class);
-		Object selfGateway = Proxy.newProxyInstance(ClassLoader.getSystemClassLoader(), new Class<?>[] {TaskExecutorGateway.class, MainThreadExecutor.class, StartStoppable.class}, invocationHandler);
-		when(testingRpcService.startServer(Matchers.any(RpcEndpoint.class))).thenReturn((RpcGateway)selfGateway);
-		DirectExecutorService directExecutorService = null;
-		TaskExecutor taskExecutor = new TaskExecutor(testingRpcService, directExecutorService);
-		taskExecutor.start();
-
-		taskExecutor.cancelTask(new ExecutionAttemptID());
-
-		fail("The cancellation should have thrown an exception.");
-	}
+	
 }


[04/50] [abbrv] flink git commit: [FLINK-4594] [core] Validate lower bound in MathUtils.checkedDownCast

Posted by tr...@apache.org.
[FLINK-4594] [core] Validate lower bound in MathUtils.checkedDownCast

This closes #2481


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/4f84a02f
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/4f84a02f
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/4f84a02f

Branch: refs/heads/flip-6
Commit: 4f84a02f4b8be5948b233531fbcb07dfdb5e7930
Parents: 470b752
Author: Greg Hogan <co...@greghogan.com>
Authored: Thu Sep 8 10:35:39 2016 -0400
Committer: Greg Hogan <co...@greghogan.com>
Committed: Tue Sep 20 10:10:20 2016 -0400

----------------------------------------------------------------------
 flink-core/src/main/java/org/apache/flink/util/MathUtils.java | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/4f84a02f/flink-core/src/main/java/org/apache/flink/util/MathUtils.java
----------------------------------------------------------------------
diff --git a/flink-core/src/main/java/org/apache/flink/util/MathUtils.java b/flink-core/src/main/java/org/apache/flink/util/MathUtils.java
index 49056cc..074e8ae 100644
--- a/flink-core/src/main/java/org/apache/flink/util/MathUtils.java
+++ b/flink-core/src/main/java/org/apache/flink/util/MathUtils.java
@@ -80,12 +80,14 @@ public final class MathUtils {
 	 * 
 	 * @param value The value to be cast to an integer.
 	 * @return The given value as an integer.
+	 * @see Math#toIntExact(long)
 	 */
 	public static int checkedDownCast(long value) {
-		if (value > Integer.MAX_VALUE) {
+		int downCast = (int) value;
+		if (downCast != value) {
 			throw new IllegalArgumentException("Cannot downcast long value " + value + " to integer.");
 		}
-		return (int) value;
+		return downCast;
 	}
 
 	/**


[25/50] [abbrv] flink git commit: [FLINK-4403] [rpc] Use relative classloader for proxies, rather than system class loader.

Posted by tr...@apache.org.
[FLINK-4403] [rpc] Use relative classloader for proxies, rather than system class loader.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/68998378
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/68998378
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/68998378

Branch: refs/heads/flip-6
Commit: 689983781c30db903b52d593b2fbfebba1ab6236
Parents: fe90811
Author: Stephan Ewen <se...@apache.org>
Authored: Tue Aug 16 21:11:01 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:14 2016 +0200

----------------------------------------------------------------------
 .../org/apache/flink/runtime/rpc/akka/AkkaRpcService.java     | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/68998378/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
index b647bbd..d987c2f 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
@@ -104,9 +104,14 @@ public class AkkaRpcService implements RpcService {
 
 				InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(actorRef, timeout, maximumFramesize);
 
+				// Rather than using the System ClassLoader directly, we derive the ClassLoader
+				// from this class . That works better in cases where Flink runs embedded and all Flink
+				// code is loaded dynamically (for example from an OSGI bundle) through a custom ClassLoader
+				ClassLoader classLoader = AkkaRpcService.this.getClass().getClassLoader();
+				
 				@SuppressWarnings("unchecked")
 				C proxy = (C) Proxy.newProxyInstance(
-					ClassLoader.getSystemClassLoader(),
+					classLoader,
 					new Class<?>[] {clazz},
 					akkaInvocationHandler);
 


[45/50] [abbrv] flink git commit: [FLINK-4451] [rpc] Throw RpcConnectionException when rpc endpoint is not reachable

Posted by tr...@apache.org.
[FLINK-4451] [rpc] Throw RpcConnectionException when rpc endpoint is not reachable

This PR introduces a RpcConnectionException which is thrown if the rpc endpoint
is not reachable when calling RpcService.connect.

This closes #2405.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/9718dcd3
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/9718dcd3
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/9718dcd3

Branch: refs/heads/flip-6
Commit: 9718dcd39742fc542b64d5a7b97a14dc2dd21380
Parents: 12af3b1
Author: Till Rohrmann <tr...@apache.org>
Authored: Tue Aug 23 17:59:54 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:17 2016 +0200

----------------------------------------------------------------------
 .../registration/RetryingRegistration.java      |  2 +-
 .../apache/flink/runtime/rpc/RpcService.java    |  7 +++-
 .../flink/runtime/rpc/akka/AkkaRpcService.java  | 38 +++++++++++-------
 .../rpc/exceptions/RpcConnectionException.java  | 41 ++++++++++++++++++++
 .../runtime/rpc/akka/AkkaRpcActorTest.java      | 18 +++++++++
 5 files changed, 88 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/9718dcd3/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RetryingRegistration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RetryingRegistration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RetryingRegistration.java
index 88fe9b5..ea49e42 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RetryingRegistration.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/registration/RetryingRegistration.java
@@ -197,7 +197,7 @@ public abstract class RetryingRegistration<Gateway extends RpcGateway, Success e
 				@Override
 				public void onFailure(Throwable failure) {
 					if (!isCanceled()) {
-						log.warn("Could not resolve {} address {}, retrying...", targetName, targetAddress);
+						log.warn("Could not resolve {} address {}, retrying...", targetName, targetAddress, failure);
 						startRegistration();
 					}
 				}

http://git-wip-us.apache.org/repos/asf/flink/blob/9718dcd3/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
index bc0f5cb..78c1cec 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
@@ -18,6 +18,7 @@
 
 package org.apache.flink.runtime.rpc;
 
+import org.apache.flink.runtime.rpc.exceptions.RpcConnectionException;
 import scala.concurrent.ExecutionContext;
 import scala.concurrent.Future;
 
@@ -32,12 +33,14 @@ public interface RpcService {
 
 	/**
 	 * Connect to a remote rpc server under the provided address. Returns a rpc gateway which can
-	 * be used to communicate with the rpc server.
+	 * be used to communicate with the rpc server. If the connection failed, then the returned
+	 * future is failed with a {@link RpcConnectionException}.
 	 *
 	 * @param address Address of the remote rpc server
 	 * @param clazz Class of the rpc gateway to return
 	 * @param <C> Type of the rpc gateway to return
-	 * @return Future containing the rpc gateway
+	 * @return Future containing the rpc gateway or an {@link RpcConnectionException} if the
+	 * connection attempt failed
 	 */
 	<C extends RpcGateway> Future<C> connect(String address, Class<C> clazz);
 

http://git-wip-us.apache.org/repos/asf/flink/blob/9718dcd3/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
index 00a6932..060a1ef 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
@@ -35,6 +35,7 @@ import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
 import org.apache.flink.runtime.rpc.StartStoppable;
+import org.apache.flink.runtime.rpc.exceptions.RpcConnectionException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -99,25 +100,32 @@ public class AkkaRpcService implements RpcService {
 		final Future<Object> identify = asker.ask(new Identify(42), timeout);
 		return identify.map(new Mapper<Object, C>(){
 			@Override
-			public C apply(Object obj) {
-				ActorRef actorRef = ((ActorIdentity) obj).getRef();
+			public C checkedApply(Object obj) throws Exception {
 
-				final String address = AkkaUtils.getAkkaURL(actorSystem, actorRef);
+				ActorIdentity actorIdentity = (ActorIdentity) obj;
 
-				InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(address, actorRef, timeout, maximumFramesize);
+				if (actorIdentity.getRef() == null) {
+					throw new RpcConnectionException("Could not connect to rpc endpoint under address " + address + '.');
+				} else {
+					ActorRef actorRef = actorIdentity.getRef();
+
+					final String address = AkkaUtils.getAkkaURL(actorSystem, actorRef);
+
+					InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(address, actorRef, timeout, maximumFramesize);
 
-				// Rather than using the System ClassLoader directly, we derive the ClassLoader
-				// from this class . That works better in cases where Flink runs embedded and all Flink
-				// code is loaded dynamically (for example from an OSGI bundle) through a custom ClassLoader
-				ClassLoader classLoader = AkkaRpcService.this.getClass().getClassLoader();
-				
-				@SuppressWarnings("unchecked")
-				C proxy = (C) Proxy.newProxyInstance(
-					classLoader,
-					new Class<?>[] {clazz},
-					akkaInvocationHandler);
+					// Rather than using the System ClassLoader directly, we derive the ClassLoader
+					// from this class . That works better in cases where Flink runs embedded and all Flink
+					// code is loaded dynamically (for example from an OSGI bundle) through a custom ClassLoader
+					ClassLoader classLoader = AkkaRpcService.this.getClass().getClassLoader();
 
-				return proxy;
+					@SuppressWarnings("unchecked")
+					C proxy = (C) Proxy.newProxyInstance(
+						classLoader,
+						new Class<?>[]{clazz},
+						akkaInvocationHandler);
+
+					return proxy;
+				}
 			}
 		}, actorSystem.dispatcher());
 	}

http://git-wip-us.apache.org/repos/asf/flink/blob/9718dcd3/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/exceptions/RpcConnectionException.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/exceptions/RpcConnectionException.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/exceptions/RpcConnectionException.java
new file mode 100644
index 0000000..a22ebe7
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/exceptions/RpcConnectionException.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.exceptions;
+
+import java.util.concurrent.ExecutionException;
+
+/**
+ * Exception class which is thrown if a rpc connection failed. Usually this happens if the remote
+ * host cannot be reached.
+ */
+public class RpcConnectionException extends ExecutionException {
+	private static final long serialVersionUID = -5500560405481142472L;
+
+	public RpcConnectionException(String message) {
+		super(message);
+	}
+
+	public RpcConnectionException(String message, Throwable cause) {
+		super(message, cause);
+	}
+
+	public RpcConnectionException(Throwable cause) {
+		super(cause);
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/9718dcd3/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java
index 82d13f0..a6ceb91 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java
@@ -25,6 +25,7 @@ import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.RpcMethod;
 import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.rpc.exceptions.RpcConnectionException;
 import org.apache.flink.util.TestLogger;
 import org.hamcrest.core.Is;
 import org.junit.AfterClass;
@@ -36,6 +37,7 @@ import java.util.concurrent.TimeUnit;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
 
 public class AkkaRpcActorTest extends TestLogger {
 
@@ -73,6 +75,22 @@ public class AkkaRpcActorTest extends TestLogger {
 	}
 
 	/**
+	 * Tests that a {@link RpcConnectionException} is thrown if the rpc endpoint cannot be connected to.
+	 */
+	@Test
+	public void testFailingAddressResolution() throws Exception {
+		Future<DummyRpcGateway> futureRpcGateway = akkaRpcService.connect("foobar", DummyRpcGateway.class);
+
+		try {
+			DummyRpcGateway gateway = Await.result(futureRpcGateway, timeout.duration());
+
+			fail("The rpc connection resolution should have failed.");
+		} catch (RpcConnectionException exception) {
+			// we're expecting a RpcConnectionException
+		}
+	}
+
+	/**
 	 * Tests that the {@link AkkaRpcActor} stashes messages until the corresponding
 	 * {@link RpcEndpoint} has been started.
 	 */


[20/50] [abbrv] flink git commit: [FLINK-4392] [rpc] Make RPC Service thread-safe

Posted by tr...@apache.org.
[FLINK-4392] [rpc] Make RPC Service thread-safe


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/86f21bf9
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/86f21bf9
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/86f21bf9

Branch: refs/heads/flip-6
Commit: 86f21bf9d6daee9bdf6af1e9052faa058203133f
Parents: f5614a4
Author: Stephan Ewen <se...@apache.org>
Authored: Sat Aug 13 19:11:47 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:13 2016 +0200

----------------------------------------------------------------------
 .../flink/runtime/rpc/akka/AkkaGateway.java     |  3 +-
 .../flink/runtime/rpc/akka/AkkaRpcService.java  | 92 +++++++++++++++-----
 2 files changed, 70 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/86f21bf9/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
index a826e7d..ec3091c 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
@@ -19,11 +19,12 @@
 package org.apache.flink.runtime.rpc.akka;
 
 import akka.actor.ActorRef;
+import org.apache.flink.runtime.rpc.RpcGateway;
 
 /**
  * Interface for Akka based rpc gateways
  */
-interface AkkaGateway {
+interface AkkaGateway extends RpcGateway {
 
 	ActorRef getRpcServer();
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/86f21bf9/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
index 17983d0..448216c 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
@@ -28,47 +28,61 @@ import akka.actor.Props;
 import akka.dispatch.Mapper;
 import akka.pattern.AskableActorSelection;
 import akka.util.Timeout;
+
 import org.apache.flink.runtime.akka.AkkaUtils;
 import org.apache.flink.runtime.rpc.MainThreadExecutor;
 import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
-import org.apache.flink.util.Preconditions;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+
 import scala.concurrent.Future;
 
+import javax.annotation.concurrent.ThreadSafe;
 import java.lang.reflect.InvocationHandler;
 import java.lang.reflect.Proxy;
-import java.util.Collection;
 import java.util.HashSet;
+import java.util.Set;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+import static org.apache.flink.util.Preconditions.checkState;
 
 /**
- * Akka based {@link RpcService} implementation. The rpc service starts an Akka actor to receive
- * rpcs from a {@link RpcGateway}.
+ * Akka based {@link RpcService} implementation. The RPC service starts an Akka actor to receive
+ * RPC invocations from a {@link RpcGateway}.
  */
+@ThreadSafe
 public class AkkaRpcService implements RpcService {
+
 	private static final Logger LOG = LoggerFactory.getLogger(AkkaRpcService.class);
 
+	private final Object lock = new Object();
+
 	private final ActorSystem actorSystem;
 	private final Timeout timeout;
-	private final Collection<ActorRef> actors = new HashSet<>(4);
+	private final Set<ActorRef> actors = new HashSet<>(4);
+
+	private volatile boolean stopped;
 
 	public AkkaRpcService(final ActorSystem actorSystem, final Timeout timeout) {
-		this.actorSystem = Preconditions.checkNotNull(actorSystem, "actor system");
-		this.timeout = Preconditions.checkNotNull(timeout, "timeout");
+		this.actorSystem = checkNotNull(actorSystem, "actor system");
+		this.timeout = checkNotNull(timeout, "timeout");
 	}
 
+	// this method does not mutate state and is thus thread-safe
 	@Override
 	public <C extends RpcGateway> Future<C> connect(final String address, final Class<C> clazz) {
-		LOG.info("Try to connect to remote rpc server with address {}. Returning a {} gateway.", address, clazz.getName());
+		checkState(!stopped, "RpcService is stopped");
 
-		final ActorSelection actorSel = actorSystem.actorSelection(address);
+		LOG.debug("Try to connect to remote RPC endpoint with address {}. Returning a {} gateway.",
+				address, clazz.getName());
 
+		final ActorSelection actorSel = actorSystem.actorSelection(address);
 		final AskableActorSelection asker = new AskableActorSelection(actorSel);
 
 		final Future<Object> identify = asker.ask(new Identify(42), timeout);
-
 		return identify.map(new Mapper<Object, C>(){
 			@Override
 			public C apply(Object obj) {
@@ -89,20 +103,29 @@ public class AkkaRpcService implements RpcService {
 
 	@Override
 	public <C extends RpcGateway, S extends RpcEndpoint<C>> C startServer(S rpcEndpoint) {
-		Preconditions.checkNotNull(rpcEndpoint, "rpc endpoint");
-
-		LOG.info("Start Akka rpc actor to handle rpcs for {}.", rpcEndpoint.getClass().getName());
+		checkNotNull(rpcEndpoint, "rpc endpoint");
 
 		Props akkaRpcActorProps = Props.create(AkkaRpcActor.class, rpcEndpoint);
+		ActorRef actorRef;
+
+		synchronized (lock) {
+			checkState(!stopped, "RpcService is stopped");
+			actorRef = actorSystem.actorOf(akkaRpcActorProps);
+			actors.add(actorRef);
+		}
 
-		ActorRef actorRef = actorSystem.actorOf(akkaRpcActorProps);
-		actors.add(actorRef);
+		LOG.info("Starting RPC endpoint for {} at {} .", rpcEndpoint.getClass().getName(), actorRef.path());
 
 		InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(actorRef, timeout);
 
+		// Rather than using the System ClassLoader directly, we derive the ClassLoader
+		// from this class . That works better in cases where Flink runs embedded and all Flink
+		// code is loaded dynamically (for example from an OSGI bundle) through a custom ClassLoader
+		ClassLoader classLoader = getClass().getClassLoader();
+
 		@SuppressWarnings("unchecked")
 		C self = (C) Proxy.newProxyInstance(
-			ClassLoader.getSystemClassLoader(),
+			classLoader,
 			new Class<?>[]{rpcEndpoint.getSelfGatewayType(), MainThreadExecutor.class, AkkaGateway.class},
 			akkaInvocationHandler);
 
@@ -110,35 +133,56 @@ public class AkkaRpcService implements RpcService {
 	}
 
 	@Override
-	public <C extends RpcGateway> void stopServer(C selfGateway) {
+	public void stopServer(RpcGateway selfGateway) {
 		if (selfGateway instanceof AkkaGateway) {
 			AkkaGateway akkaClient = (AkkaGateway) selfGateway;
 
-			if (actors.contains(akkaClient.getRpcServer())) {
-				ActorRef selfActorRef = akkaClient.getRpcServer();
-
-				LOG.info("Stop Akka rpc actor {}.", selfActorRef.path());
+			boolean fromThisService;
+			synchronized (lock) {
+				if (stopped) {
+					return;
+				} else {
+					fromThisService = actors.remove(akkaClient.getRpcServer());
+				}
+			}
 
+			if (fromThisService) {
+				ActorRef selfActorRef = akkaClient.getRpcServer();
+				LOG.info("Stopping RPC endpoint {}.", selfActorRef.path());
 				selfActorRef.tell(PoisonPill.getInstance(), ActorRef.noSender());
+			} else {
+				LOG.debug("RPC endpoint {} already stopped or from different RPC service");
 			}
 		}
 	}
 
 	@Override
 	public void stopService() {
-		LOG.info("Stop Akka rpc service.");
-		actorSystem.shutdown();
+		LOG.info("Stopping Akka RPC service.");
+
+		synchronized (lock) {
+			if (stopped) {
+				return;
+			}
+
+			stopped = true;
+			actorSystem.shutdown();
+			actors.clear();
+		}
+
 		actorSystem.awaitTermination();
 	}
 
 	@Override
 	public <C extends RpcGateway> String getAddress(C selfGateway) {
+		checkState(!stopped, "RpcService is stopped");
+
 		if (selfGateway instanceof AkkaGateway) {
 			ActorRef actorRef = ((AkkaGateway) selfGateway).getRpcServer();
 			return AkkaUtils.getAkkaURL(actorSystem, actorRef);
 		} else {
 			String className = AkkaGateway.class.getName();
-			throw new RuntimeException("Cannot get address for non " + className + '.');
+			throw new IllegalArgumentException("Cannot get address for non " + className + '.');
 		}
 	}
 }


[42/50] [abbrv] flink git commit: [FLINK-4516] leader election of resourcemanager

Posted by tr...@apache.org.
[FLINK-4516] leader election of resourcemanager

- add serial rpc service
- add a special rpcService implementation which directly executes the asynchronous calls serially one by one, it is just for testcase
- Change ResourceManagerLeaderContender code and TestingSerialRpcService code
- override shutdown logic to stop leadershipService
- use a mocked RpcService rather than TestingSerialRpcService for resourceManager HA test

This closes #2427


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/ce92a753
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/ce92a753
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/ce92a753

Branch: refs/heads/flip-6
Commit: ce92a753d406a88073541dd4a3b0396d14d131a3
Parents: ba78de8
Author: beyond1920 <be...@126.com>
Authored: Sat Aug 27 14:14:28 2016 +0800
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:16 2016 +0200

----------------------------------------------------------------------
 .../HighAvailabilityServices.java               |   7 +
 .../runtime/highavailability/NonHaServices.java |   5 +
 .../rpc/resourcemanager/ResourceManager.java    | 111 +++++-
 .../TestingHighAvailabilityServices.java        |  19 +-
 .../runtime/rpc/TestingSerialRpcService.java    | 369 +++++++++++++++++++
 .../resourcemanager/ResourceManagerHATest.java  |  76 ++++
 6 files changed, 578 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/ce92a753/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java
index 73e4f1f..298147c 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/HighAvailabilityServices.java
@@ -40,6 +40,13 @@ public interface HighAvailabilityServices {
 	LeaderRetrievalService getResourceManagerLeaderRetriever() throws Exception;
 
 	/**
+	 * Gets the leader election service for the cluster's resource manager.
+	 * @return
+	 * @throws Exception
+	 */
+	LeaderElectionService getResourceManagerLeaderElectionService() throws Exception;
+
+	/**
 	 * Gets the leader election service for the given job.
 	 *
 	 * @param jobID The identifier of the job running the election.

http://git-wip-us.apache.org/repos/asf/flink/blob/ce92a753/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java
index 3d2769b..292a404 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/highavailability/NonHaServices.java
@@ -61,6 +61,11 @@ public class NonHaServices implements HighAvailabilityServices {
 	}
 
 	@Override
+	public LeaderElectionService getResourceManagerLeaderElectionService() throws Exception {
+		return new StandaloneLeaderElectionService();
+	}
+
+	@Override
 	public LeaderElectionService getJobMasterLeaderElectionService(JobID jobID) throws Exception {
 		return new StandaloneLeaderElectionService();
 	}

http://git-wip-us.apache.org/repos/asf/flink/blob/ce92a753/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
index 6f34465..f7147c9 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
@@ -20,24 +20,26 @@ package org.apache.flink.runtime.rpc.resourcemanager;
 
 import akka.dispatch.Mapper;
 
+import org.apache.flink.annotation.VisibleForTesting;
 import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
 import org.apache.flink.runtime.instance.InstanceID;
+import org.apache.flink.runtime.leaderelection.LeaderContender;
+import org.apache.flink.runtime.leaderelection.LeaderElectionService;
 import org.apache.flink.runtime.rpc.RpcMethod;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
 import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
 import org.apache.flink.runtime.rpc.jobmaster.JobMasterGateway;
 import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutorRegistrationSuccess;
-import org.apache.flink.util.Preconditions;
 
-import scala.concurrent.ExecutionContext;
-import scala.concurrent.ExecutionContext$;
 import scala.concurrent.Future;
 
 import java.util.HashMap;
 import java.util.Map;
 import java.util.UUID;
-import java.util.concurrent.ExecutorService;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
 
 /**
  * ResourceManager implementation. The resource manager is responsible for resource de-/allocation
@@ -50,16 +52,51 @@ import java.util.concurrent.ExecutorService;
  * </ul>
  */
 public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
-	private final ExecutionContext executionContext;
 	private final Map<JobMasterGateway, InstanceID> jobMasterGateways;
+	private final HighAvailabilityServices highAvailabilityServices;
+	private LeaderElectionService leaderElectionService = null;
+	private UUID leaderSessionID = null;
 
-	public ResourceManager(RpcService rpcService, ExecutorService executorService) {
+	public ResourceManager(RpcService rpcService, HighAvailabilityServices highAvailabilityServices) {
 		super(rpcService);
-		this.executionContext = ExecutionContext$.MODULE$.fromExecutor(
-			Preconditions.checkNotNull(executorService));
+		this.highAvailabilityServices = checkNotNull(highAvailabilityServices);
 		this.jobMasterGateways = new HashMap<>();
 	}
 
+	@Override
+	public void start() {
+		// start a leader
+		try {
+			super.start();
+			leaderElectionService = highAvailabilityServices.getResourceManagerLeaderElectionService();
+			leaderElectionService.start(new ResourceManagerLeaderContender());
+		} catch (Throwable e) {
+			log.error("A fatal error happened when starting the ResourceManager", e);
+			throw new RuntimeException("A fatal error happened when starting the ResourceManager", e);
+		}
+	}
+
+	@Override
+	public void shutDown() {
+		try {
+			leaderElectionService.stop();
+			super.shutDown();
+		} catch(Throwable e) {
+			log.error("A fatal error happened when shutdown the ResourceManager", e);
+			throw new RuntimeException("A fatal error happened when shutdown the ResourceManager", e);
+		}
+	}
+
+	/**
+	 * Gets the leader session id of current resourceManager.
+	 *
+	 * @return return the leaderSessionId of current resourceManager, this returns null until the current resourceManager is granted leadership.
+	 */
+	@VisibleForTesting
+	UUID getLeaderSessionID() {
+		return leaderSessionID;
+	}
+
 	/**
 	 * Register a {@link JobMaster} at the resource manager.
 	 *
@@ -116,4 +153,62 @@ public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
 
 		return new TaskExecutorRegistrationSuccess(new InstanceID(), 5000);
 	}
+
+	private class ResourceManagerLeaderContender implements LeaderContender {
+
+		/**
+		 * Callback method when current resourceManager is granted leadership
+		 *
+		 * @param leaderSessionID unique leadershipID
+		 */
+		@Override
+		public void grantLeadership(final UUID leaderSessionID) {
+			runAsync(new Runnable() {
+				@Override
+				public void run() {
+					log.info("ResourceManager {} was granted leadership with leader session ID {}", getAddress(), leaderSessionID);
+					ResourceManager.this.leaderSessionID = leaderSessionID;
+					// confirming the leader session ID might be blocking,
+					leaderElectionService.confirmLeaderSessionID(leaderSessionID);
+				}
+			});
+		}
+
+		/**
+		 * Callback method when current resourceManager lose leadership.
+		 */
+		@Override
+		public void revokeLeadership() {
+			runAsync(new Runnable() {
+				@Override
+				public void run() {
+					log.info("ResourceManager {} was revoked leadership.", getAddress());
+					jobMasterGateways.clear();
+					leaderSessionID = null;
+				}
+			});
+		}
+
+		@Override
+		public String getAddress() {
+			return ResourceManager.this.getAddress();
+		}
+
+		/**
+		 * Handles error occurring in the leader election service
+		 *
+		 * @param exception Exception being thrown in the leader election service
+		 */
+		@Override
+		public void handleError(final Exception exception) {
+			runAsync(new Runnable() {
+				@Override
+				public void run() {
+					log.error("ResourceManager received an error from the LeaderElectionService.", exception);
+					// terminate ResourceManager in case of an error
+					shutDown();
+				}
+			});
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/ce92a753/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java b/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java
index 4d654a3..3162f40 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java
@@ -32,6 +32,8 @@ public class TestingHighAvailabilityServices implements HighAvailabilityServices
 
 	private volatile LeaderElectionService jobMasterLeaderElectionService;
 
+	private volatile LeaderElectionService resourceManagerLeaderElectionService;
+
 
 	// ------------------------------------------------------------------------
 	//  Setters for mock / testing implementations
@@ -44,7 +46,11 @@ public class TestingHighAvailabilityServices implements HighAvailabilityServices
 	public void setJobMasterLeaderElectionService(LeaderElectionService leaderElectionService) {
 		this.jobMasterLeaderElectionService = leaderElectionService;
 	}
-	
+
+	public void setResourceManagerLeaderElectionService(LeaderElectionService leaderElectionService) {
+		this.resourceManagerLeaderElectionService = leaderElectionService;
+	}
+
 	// ------------------------------------------------------------------------
 	//  HA Services Methods
 	// ------------------------------------------------------------------------
@@ -69,4 +75,15 @@ public class TestingHighAvailabilityServices implements HighAvailabilityServices
 			throw new IllegalStateException("JobMasterLeaderElectionService has not been set");
 		}
 	}
+
+	@Override
+	public LeaderElectionService getResourceManagerLeaderElectionService() throws Exception {
+		LeaderElectionService service = resourceManagerLeaderElectionService;
+
+		if (service != null) {
+			return service;
+		} else {
+			throw new IllegalStateException("ResourceManagerLeaderElectionService has not been set");
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/ce92a753/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java
new file mode 100644
index 0000000..7bdbb99
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java
@@ -0,0 +1,369 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+import akka.dispatch.ExecutionContexts;
+import akka.dispatch.Futures;
+import akka.util.Timeout;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.runtime.util.DirectExecutorService;
+import org.apache.flink.util.Preconditions;
+import scala.concurrent.Await;
+import scala.concurrent.ExecutionContext;
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+import java.util.BitSet;
+import java.util.UUID;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+
+/**
+ * An RPC Service implementation for testing. This RPC service directly executes all asynchronous calls one by one in the main thread.
+ */
+public class TestingSerialRpcService implements RpcService {
+
+	private final DirectExecutorService executorService;
+	private final ConcurrentHashMap<String, RpcGateway> registeredConnections;
+
+	public TestingSerialRpcService() {
+		executorService = new DirectExecutorService();
+		this.registeredConnections = new ConcurrentHashMap<>();
+	}
+
+	@Override
+	public void scheduleRunnable(final Runnable runnable, final long delay, final TimeUnit unit) {
+		try {
+			unit.sleep(delay);
+			runnable.run();
+		} catch (Throwable e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	@Override
+	public ExecutionContext getExecutionContext() {
+		return ExecutionContexts.fromExecutorService(executorService);
+	}
+
+	@Override
+	public void stopService() {
+		executorService.shutdown();
+		registeredConnections.clear();
+	}
+
+	@Override
+	public void stopServer(RpcGateway selfGateway) {
+
+	}
+
+	@Override
+	public <C extends RpcGateway, S extends RpcEndpoint<C>> C startServer(S rpcEndpoint) {
+		final String address = UUID.randomUUID().toString();
+
+		InvocationHandler akkaInvocationHandler = new TestingSerialInvocationHandler(address, rpcEndpoint);
+		ClassLoader classLoader = getClass().getClassLoader();
+
+		@SuppressWarnings("unchecked")
+		C self = (C) Proxy.newProxyInstance(
+			classLoader,
+			new Class<?>[]{
+				rpcEndpoint.getSelfGatewayType(),
+				MainThreadExecutor.class,
+				StartStoppable.class,
+				RpcGateway.class
+			},
+			akkaInvocationHandler);
+
+		return self;
+	}
+
+	@Override
+	public <C extends RpcGateway> Future<C> connect(String address, Class<C> clazz) {
+		RpcGateway gateway = registeredConnections.get(address);
+
+		if (gateway != null) {
+			if (clazz.isAssignableFrom(gateway.getClass())) {
+				@SuppressWarnings("unchecked")
+				C typedGateway = (C) gateway;
+				return Futures.successful(typedGateway);
+			} else {
+				return Futures.failed(
+					new Exception("Gateway registered under " + address + " is not of type " + clazz));
+			}
+		} else {
+			return Futures.failed(new Exception("No gateway registered under that name"));
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	// connections
+	// ------------------------------------------------------------------------
+
+	public void registerGateway(String address, RpcGateway gateway) {
+		checkNotNull(address);
+		checkNotNull(gateway);
+
+		if (registeredConnections.putIfAbsent(address, gateway) != null) {
+			throw new IllegalStateException("a gateway is already registered under " + address);
+		}
+	}
+
+	private static class TestingSerialInvocationHandler<C extends RpcGateway, T extends RpcEndpoint<C>> implements InvocationHandler, RpcGateway, MainThreadExecutor, StartStoppable {
+
+		private final T rpcEndpoint;
+
+		/** default timeout for asks */
+		private final Timeout timeout;
+
+		private final String address;
+
+		private TestingSerialInvocationHandler(String address, T rpcEndpoint) {
+			this(address, rpcEndpoint, new Timeout(new FiniteDuration(10, TimeUnit.SECONDS)));
+		}
+
+		private TestingSerialInvocationHandler(String address, T rpcEndpoint, Timeout timeout) {
+			this.rpcEndpoint = rpcEndpoint;
+			this.timeout = timeout;
+			this.address = address;
+		}
+
+		@Override
+		public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
+			Class<?> declaringClass = method.getDeclaringClass();
+			if (declaringClass.equals(MainThreadExecutor.class) ||
+				declaringClass.equals(Object.class) || declaringClass.equals(StartStoppable.class) ||
+				declaringClass.equals(RpcGateway.class)) {
+				return method.invoke(this, args);
+			} else {
+				final String methodName = method.getName();
+				Class<?>[] parameterTypes = method.getParameterTypes();
+				Annotation[][] parameterAnnotations = method.getParameterAnnotations();
+				Timeout futureTimeout = extractRpcTimeout(parameterAnnotations, args, timeout);
+
+				final Tuple2<Class<?>[], Object[]> filteredArguments = filterArguments(
+					parameterTypes,
+					parameterAnnotations,
+					args);
+
+				Class<?> returnType = method.getReturnType();
+
+				if (returnType.equals(Future.class)) {
+					try {
+						Object result = handleRpcInvocationSync(methodName, filteredArguments.f0, filteredArguments.f1, futureTimeout);
+						return Futures.successful(result);
+					} catch (Throwable e) {
+						return Futures.failed(e);
+					}
+				} else {
+					return handleRpcInvocationSync(methodName, filteredArguments.f0, filteredArguments.f1, futureTimeout);
+				}
+			}
+		}
+
+		/**
+		 * Handle rpc invocations by looking up the rpc method on the rpc endpoint and calling this
+		 * method with the provided method arguments. If the method has a return value, it is returned
+		 * to the sender of the call.
+		 */
+		private Object handleRpcInvocationSync(final String methodName,
+			final Class<?>[] parameterTypes,
+			final Object[] args,
+			final Timeout futureTimeout) throws Exception {
+			final Method rpcMethod = lookupRpcMethod(methodName, parameterTypes);
+			Object result = rpcMethod.invoke(rpcEndpoint, args);
+
+			if (result != null && result instanceof Future) {
+				Future<?> future = (Future<?>) result;
+				return Await.result(future, futureTimeout.duration());
+			} else {
+				return result;
+			}
+		}
+
+		@Override
+		public void runAsync(Runnable runnable) {
+			runnable.run();
+		}
+
+		@Override
+		public <V> Future<V> callAsync(Callable<V> callable, Timeout callTimeout) {
+			try {
+				return Futures.successful(callable.call());
+			} catch (Throwable e) {
+				return Futures.failed(e);
+			}
+		}
+
+		@Override
+		public void scheduleRunAsync(final Runnable runnable, final long delay) {
+			try {
+				TimeUnit.MILLISECONDS.sleep(delay);
+				runnable.run();
+			} catch (Throwable e) {
+				throw new RuntimeException(e);
+			}
+		}
+
+		@Override
+		public String getAddress() {
+			return address;
+		}
+
+		@Override
+		public void start() {
+			// do nothing
+		}
+
+		@Override
+		public void stop() {
+			// do nothing
+		}
+
+		/**
+		 * Look up the rpc method on the given {@link RpcEndpoint} instance.
+		 *
+		 * @param methodName     Name of the method
+		 * @param parameterTypes Parameter types of the method
+		 * @return Method of the rpc endpoint
+		 * @throws NoSuchMethodException Thrown if the method with the given name and parameter types
+		 *                               cannot be found at the rpc endpoint
+		 */
+		private Method lookupRpcMethod(final String methodName,
+			final Class<?>[] parameterTypes) throws NoSuchMethodException {
+			return rpcEndpoint.getClass().getMethod(methodName, parameterTypes);
+		}
+
+		// ------------------------------------------------------------------------
+		//  Helper methods
+		// ------------------------------------------------------------------------
+
+		/**
+		 * Extracts the {@link RpcTimeout} annotated rpc timeout value from the list of given method
+		 * arguments. If no {@link RpcTimeout} annotated parameter could be found, then the default
+		 * timeout is returned.
+		 *
+		 * @param parameterAnnotations Parameter annotations
+		 * @param args                 Array of arguments
+		 * @param defaultTimeout       Default timeout to return if no {@link RpcTimeout} annotated parameter
+		 *                             has been found
+		 * @return Timeout extracted from the array of arguments or the default timeout
+		 */
+		private static Timeout extractRpcTimeout(Annotation[][] parameterAnnotations, Object[] args,
+			Timeout defaultTimeout) {
+			if (args != null) {
+				Preconditions.checkArgument(parameterAnnotations.length == args.length);
+
+				for (int i = 0; i < parameterAnnotations.length; i++) {
+					if (isRpcTimeout(parameterAnnotations[i])) {
+						if (args[i] instanceof FiniteDuration) {
+							return new Timeout((FiniteDuration) args[i]);
+						} else {
+							throw new RuntimeException("The rpc timeout parameter must be of type " +
+								FiniteDuration.class.getName() + ". The type " + args[i].getClass().getName() +
+								" is not supported.");
+						}
+					}
+				}
+			}
+
+			return defaultTimeout;
+		}
+
+		/**
+		 * Removes all {@link RpcTimeout} annotated parameters from the parameter type and argument
+		 * list.
+		 *
+		 * @param parameterTypes       Array of parameter types
+		 * @param parameterAnnotations Array of parameter annotations
+		 * @param args                 Arary of arguments
+		 * @return Tuple of filtered parameter types and arguments which no longer contain the
+		 * {@link RpcTimeout} annotated parameter types and arguments
+		 */
+		private static Tuple2<Class<?>[], Object[]> filterArguments(
+			Class<?>[] parameterTypes,
+			Annotation[][] parameterAnnotations,
+			Object[] args) {
+
+			Class<?>[] filteredParameterTypes;
+			Object[] filteredArgs;
+
+			if (args == null) {
+				filteredParameterTypes = parameterTypes;
+				filteredArgs = null;
+			} else {
+				Preconditions.checkArgument(parameterTypes.length == parameterAnnotations.length);
+				Preconditions.checkArgument(parameterAnnotations.length == args.length);
+
+				BitSet isRpcTimeoutParameter = new BitSet(parameterTypes.length);
+				int numberRpcParameters = parameterTypes.length;
+
+				for (int i = 0; i < parameterTypes.length; i++) {
+					if (isRpcTimeout(parameterAnnotations[i])) {
+						isRpcTimeoutParameter.set(i);
+						numberRpcParameters--;
+					}
+				}
+
+				if (numberRpcParameters == parameterTypes.length) {
+					filteredParameterTypes = parameterTypes;
+					filteredArgs = args;
+				} else {
+					filteredParameterTypes = new Class<?>[numberRpcParameters];
+					filteredArgs = new Object[numberRpcParameters];
+					int counter = 0;
+
+					for (int i = 0; i < parameterTypes.length; i++) {
+						if (!isRpcTimeoutParameter.get(i)) {
+							filteredParameterTypes[counter] = parameterTypes[i];
+							filteredArgs[counter] = args[i];
+							counter++;
+						}
+					}
+				}
+			}
+			return Tuple2.of(filteredParameterTypes, filteredArgs);
+		}
+
+		/**
+		 * Checks whether any of the annotations is of type {@link RpcTimeout}
+		 *
+		 * @param annotations Array of annotations
+		 * @return True if {@link RpcTimeout} was found; otherwise false
+		 */
+		private static boolean isRpcTimeout(Annotation[] annotations) {
+			for (Annotation annotation : annotations) {
+				if (annotation.annotationType().equals(RpcTimeout.class)) {
+					return true;
+				}
+			}
+
+			return false;
+		}
+
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/ce92a753/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerHATest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerHATest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerHATest.java
new file mode 100644
index 0000000..dfffeda
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerHATest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.resourcemanager;
+
+import org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices;
+import org.apache.flink.runtime.leaderelection.TestingLeaderElectionService;
+import org.apache.flink.runtime.rpc.MainThreadExecutor;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.rpc.StartStoppable;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.UUID;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doCallRealMethod;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+/**
+ * resourceManager HA test, including grant leadership and revoke leadership
+ */
+public class ResourceManagerHATest {
+
+	@Test
+	public void testGrantAndRevokeLeadership() throws Exception {
+		// mock a RpcService which will return a special RpcGateway when call its startServer method, the returned RpcGateway directly execute runAsync call
+		TestingResourceManagerGatewayProxy gateway = mock(TestingResourceManagerGatewayProxy.class);
+		doCallRealMethod().when(gateway).runAsync(any(Runnable.class));
+
+		RpcService rpcService = mock(RpcService.class);
+		when(rpcService.startServer(any(RpcEndpoint.class))).thenReturn(gateway);
+
+		TestingLeaderElectionService leaderElectionService = new TestingLeaderElectionService();
+		TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
+		highAvailabilityServices.setResourceManagerLeaderElectionService(leaderElectionService);
+
+		final ResourceManager resourceManager = new ResourceManager(rpcService, highAvailabilityServices);
+		resourceManager.start();
+		// before grant leadership, resourceManager's leaderId is null
+		Assert.assertNull(resourceManager.getLeaderSessionID());
+		final UUID leaderId = UUID.randomUUID();
+		leaderElectionService.isLeader(leaderId);
+		// after grant leadership, resourceManager's leaderId has value
+		Assert.assertEquals(leaderId, resourceManager.getLeaderSessionID());
+		// then revoke leadership, resourceManager's leaderId is null again
+		leaderElectionService.notLeader();
+		Assert.assertNull(resourceManager.getLeaderSessionID());
+	}
+
+	private static abstract class TestingResourceManagerGatewayProxy implements MainThreadExecutor, StartStoppable, RpcGateway {
+		@Override
+		public void runAsync(Runnable runnable) {
+			runnable.run();
+		}
+	}
+
+}


[49/50] [abbrv] flink git commit: [FLINK-4361] Introduce Flink's own future abstraction

Posted by tr...@apache.org.
[FLINK-4361] Introduce Flink's own future abstraction

Flink's future abstraction whose API is similar to Java 8's CompletableFuture.
That's in order to ease a future transition to this class once we ditch Java 7.
The current set of operations comprises:

- isDone to check the completion of the future
- get/getNow to obtain the future's value
- cancel to cancel the future (best effort basis)
- thenApplyAsync to transform the future's value into another value
- thenAcceptAsync to register a callback for a successful completion of the future
- exceptionallyAsync to register a callback for an exception completion of the future
- thenComposeAsync to transform the future's value and flatten the returned future
- handleAsync to register a callback which is called either with the regular result
or the exceptional result

Additionally, Flink offers a CompletableFuture which can be completed with a regular
value or an exception:

- complete/completeExceptionally

Complete FlinkCompletableFuture exceptionally with a CanellationException upon cancel

This closes #2472.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/f766f120
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/f766f120
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/f766f120

Branch: refs/heads/flip-6
Commit: f766f120e4af6b60a3e628836963eb981005325c
Parents: 0a134a3
Author: Till Rohrmann <tr...@apache.org>
Authored: Fri Sep 2 21:13:34 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:18 2016 +0200

----------------------------------------------------------------------
 .../runtime/concurrent/AcceptFunction.java      |  34 +++
 .../flink/runtime/concurrent/ApplyFunction.java |  36 +++
 .../flink/runtime/concurrent/BiFunction.java    |  38 +++
 .../runtime/concurrent/CompletableFuture.java   |  47 ++++
 .../apache/flink/runtime/concurrent/Future.java | 156 +++++++++++
 .../concurrent/impl/FlinkCompletableFuture.java |  71 +++++
 .../runtime/concurrent/impl/FlinkFuture.java    | 273 +++++++++++++++++++
 .../runtime/concurrent/FlinkFutureTest.java     | 269 ++++++++++++++++++
 8 files changed, 924 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/f766f120/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/AcceptFunction.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/AcceptFunction.java b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/AcceptFunction.java
new file mode 100644
index 0000000..a300647
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/AcceptFunction.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.concurrent;
+
+/**
+ * Function which is called with a single argument and does not return a value.
+ *
+ * @param <T> type of the argument
+ */
+public interface AcceptFunction<T> {
+
+	/**
+	 * Method which handles the function call.
+	 *
+	 * @param value is the function's argument
+	 */
+	void accept(T value);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f766f120/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/ApplyFunction.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/ApplyFunction.java b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/ApplyFunction.java
new file mode 100644
index 0000000..64def98
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/ApplyFunction.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.concurrent;
+
+/**
+ * Function which is called with a single argument.
+ *
+ * @param <V> type of the argument
+ * @param <R> type of the return value
+ */
+public interface ApplyFunction<V, R> {
+
+	/**
+	 * Method which handles the function call.
+	 *
+	 * @param value is the single argument
+	 * @return the function value
+	 */
+	R apply(V value);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f766f120/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/BiFunction.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/BiFunction.java b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/BiFunction.java
new file mode 100644
index 0000000..2b09de8
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/BiFunction.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.concurrent;
+
+/**
+ * Function which is called with two arguments and returns a value.
+ *
+ * @param <T> type of the first argument
+ * @param <U> type of the second argument
+ * @param <R> type of the return value
+ */
+public interface BiFunction<T, U, R> {
+
+	/**
+	 * Method which handles the function call.
+	 *
+	 * @param t first argument
+	 * @param u second argument
+	 * @return the function value
+	 */
+	R apply(T t, U u);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f766f120/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/CompletableFuture.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/CompletableFuture.java b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/CompletableFuture.java
new file mode 100644
index 0000000..5288bf2
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/CompletableFuture.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.concurrent;
+
+/**
+ * Flink's completable future abstraction. A completable future can be completed with a regular
+ * value or an exception.
+ *
+ * @param <T> type of the future's value
+ */
+public interface CompletableFuture<T> extends Future<T> {
+
+	/**
+	 * Completes the future with the given value. The complete operation only succeeds if the future
+	 * has not been completed before. Whether it is successful or not is returned by the method.
+	 *
+	 * @param value to complete the future with
+	 * @return true if the completion was successful; otherwise false
+	 */
+	boolean complete(T value);
+
+	/**
+	 * Completes the future with the given exception. The complete operation only succeeds if the
+	 * future has not been completed before. Whether it is successful or not is returned by the
+	 * method.
+	 *
+	 * @param t the exception to complete the future with
+	 * @return true if the completion was successful; otherwise false
+	 */
+	boolean completeExceptionally(Throwable t);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f766f120/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/Future.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/Future.java b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/Future.java
new file mode 100644
index 0000000..b32bcd4
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/Future.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.concurrent;
+
+import java.util.concurrent.CancellationException;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Flink's basic future abstraction. A future represents an asynchronous operation whose result
+ * will be contained in this instance upon completion.
+ *
+ * @param <T> type of the future's result
+ */
+public interface Future<T> {
+
+	/**
+	 * Checks if the future has been completed. A future is completed, if the result has been
+	 * delivered.
+	 *
+	 * @return true if the future is completed; otherwise false
+	 */
+	boolean isDone();
+
+	/**
+	 * Tries to cancel the future's operation. Note that not all future operations can be canceled.
+	 * The result of the cancelling will be returned.
+	 *
+	 * @param mayInterruptIfRunning true iff the future operation may be interrupted
+	 * @return true if the cancelling was successful; otherwise false
+	 */
+	boolean cancel(boolean mayInterruptIfRunning);
+
+	/**
+	 * Gets the result value of the future. If the future has not been completed, then this
+	 * operation will block indefinitely until the result has been delivered.
+	 *
+	 * @return the result value
+	 * @throws CancellationException if the future has been cancelled
+	 * @throws InterruptedException if the current thread was interrupted while waiting for the result
+	 * @throws ExecutionException if the future has been completed with an exception
+	 */
+	T get() throws InterruptedException, ExecutionException;
+
+	/**
+	 * Gets the result value of the future. If the future has not been done, then this operation
+	 * will block the given timeout value. If the result has not been delivered within the timeout,
+	 * then the method throws an {@link TimeoutException}.
+	 *
+	 * @param timeout the time to wait for the future to be done
+	 * @param unit time unit for the timeout argument
+	 * @return the result value
+	 * @throws CancellationException if the future has been cancelled
+	 * @throws InterruptedException if the current thread was interrupted while waiting for the result
+	 * @throws ExecutionException if the future has been completed with an exception
+	 * @throws TimeoutException if the future has not been completed within the given timeout
+	 */
+	T get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException;
+
+	/**
+	 * Gets the value of the future. If the future has not been completed when calling this
+	 * function, the given value is returned.
+	 *
+	 * @param valueIfAbsent value which is returned if the future has not been completed
+	 * @return value of the future or the given value if the future has not been completed
+	 * @throws ExecutionException if the future has been completed with an exception
+	 */
+	T getNow(T valueIfAbsent) throws ExecutionException;
+
+	/**
+	 * Applies the given function to the value of the future. The result of the apply function is
+	 * the value of the newly returned future.
+	 * <p>
+	 * The apply function is executed asynchronously by the given executor.
+	 *
+	 * @param applyFunction function to apply to the future's value
+	 * @param executor used to execute the given apply function asynchronously
+	 * @param <R> type of the apply function's return value
+	 * @return future representing the return value of the given apply function
+	 */
+	<R> Future<R> thenApplyAsync(ApplyFunction<? super T, ? extends R> applyFunction, Executor executor);
+
+	/**
+	 * Applies the accept function to the value of the future. Unlike the {@link ApplyFunction}, the
+	 * {@link AcceptFunction} does not return a value. The returned future, thus, represents only
+	 * the completion of the accept callback.
+	 * <p>
+	 * The accept function is executed asynchronously by the given executor.
+	 *
+	 * @param acceptFunction function to apply to the future's value
+	 * @param executor used to execute the given apply function asynchronously
+	 * @return future representing the completion of the accept callback
+	 */
+	Future<Void> thenAcceptAsync(AcceptFunction<? super T> acceptFunction, Executor executor);
+
+	/**
+	 * Applies the given function to the value of the future if the future has been completed
+	 * exceptionally. The completing exception is given to the apply function which can return a new
+	 * value which is the value of the returned future.
+	 * <p>
+	 * The apply function is executed asynchronously by the given executor.
+	 *
+	 * @param exceptionallyFunction to apply to the future's value if it is an exception
+	 * @param executor used to execute the given apply function asynchronously
+	 * @param <R> type of the apply function's return value
+	 * @return future representing the return value of the given apply function
+	 */
+	<R> Future<R> exceptionallyAsync(ApplyFunction<Throwable, ? extends R> exceptionallyFunction, Executor executor);
+
+	/**
+	 * Applies the given function to the value of the future. The apply function returns a future
+	 * result, which is flattened. This means that the resulting future of this method represents
+	 * the future's value of the apply function.
+	 * <p>
+	 * The apply function is executed asynchronously by the given executor.
+	 *
+	 * @param composeFunction to apply to the future's value. The function returns a future which is
+	 *                        flattened
+	 * @param executor used to execute the given apply function asynchronously
+	 * @param <R> type of the returned future's value
+	 * @return future representing the flattened return value of the apply function
+	 */
+	<R> Future<R> thenComposeAsync(ApplyFunction<? super T, Future<? extends R>> composeFunction, Executor executor);
+
+	/**
+	 * Applies the given handle function to the result of the future. The result can either be the
+	 * future's value or the exception with which the future has been completed. The two cases are
+	 * mutually exclusive. The result of the handle function is the returned future's value.
+	 * <p>
+	 * The handle function is executed asynchronously by the given executor.
+	 *
+	 * @param biFunction applied to the result (normal and exceptional) of the future
+	 * @param executor used to execute the handle function asynchronously
+	 * @param <R> type of the handle function's return value
+	 * @return future representing the handle function's return value
+	 */
+	<R> Future<R> handleAsync(BiFunction<? super T, Throwable, ? extends R> biFunction, Executor executor);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f766f120/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/impl/FlinkCompletableFuture.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/impl/FlinkCompletableFuture.java b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/impl/FlinkCompletableFuture.java
new file mode 100644
index 0000000..5566880
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/impl/FlinkCompletableFuture.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.concurrent.impl;
+
+import org.apache.flink.runtime.concurrent.CompletableFuture;
+import org.apache.flink.util.Preconditions;
+import scala.concurrent.Promise;
+
+import java.util.concurrent.CancellationException;
+
+/**
+ * Implementation of {@link CompletableFuture} which is backed by {@link Promise}.
+ *
+ * @param <T> type of the future's value
+ */
+public class FlinkCompletableFuture<T> extends FlinkFuture<T> implements CompletableFuture<T> {
+
+	private final Promise<T> promise;
+
+	public FlinkCompletableFuture() {
+		promise = new scala.concurrent.impl.Promise.DefaultPromise<>();
+		scalaFuture = promise.future();
+	}
+
+	@Override
+	public boolean complete(T value) {
+		Preconditions.checkNotNull(value);
+
+		try {
+			promise.success(value);
+
+			return true;
+		} catch (IllegalStateException e) {
+			return false;
+		}
+	}
+
+	@Override
+	public boolean completeExceptionally(Throwable t) {
+		Preconditions.checkNotNull(t);
+
+		try {
+			promise.failure(t);
+
+			return true;
+		} catch (IllegalStateException e) {
+			return false;
+		}
+	}
+
+	@Override
+	public boolean cancel(boolean mayInterruptIfRunning) {
+		return completeExceptionally(new CancellationException("Future has been canceled."));
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f766f120/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/impl/FlinkFuture.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/impl/FlinkFuture.java b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/impl/FlinkFuture.java
new file mode 100644
index 0000000..361cd3d
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/concurrent/impl/FlinkFuture.java
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.concurrent.impl;
+
+import akka.dispatch.ExecutionContexts$;
+import akka.dispatch.Futures;
+import akka.dispatch.Mapper;
+import akka.dispatch.Recover;
+import org.apache.flink.runtime.concurrent.AcceptFunction;
+import org.apache.flink.runtime.concurrent.ApplyFunction;
+import org.apache.flink.runtime.concurrent.Future;
+import org.apache.flink.runtime.concurrent.BiFunction;
+import org.apache.flink.util.Preconditions;
+import scala.Option;
+import scala.concurrent.Await;
+import scala.concurrent.ExecutionContext;
+import scala.concurrent.duration.Duration;
+import scala.concurrent.duration.FiniteDuration;
+import scala.util.Failure;
+import scala.util.Success;
+import scala.util.Try;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Implementation of {@link Future} which is backed by {@link scala.concurrent.Future}.
+ *
+ * @param <T> type of the future's value
+ */
+public class FlinkFuture<T> implements Future<T> {
+
+	protected scala.concurrent.Future<T> scalaFuture;
+
+	FlinkFuture() {
+		scalaFuture = null;
+	}
+
+	public FlinkFuture(scala.concurrent.Future<T> scalaFuture) {
+		this.scalaFuture = Preconditions.checkNotNull(scalaFuture);
+	}
+
+	//-----------------------------------------------------------------------------------
+	// Future's methods
+	//-----------------------------------------------------------------------------------
+
+	@Override
+	public boolean isDone() {
+		return scalaFuture.isCompleted();
+	}
+
+	@Override
+	public boolean cancel(boolean mayInterruptIfRunning) {
+		return false;
+	}
+
+	@Override
+	public T get() throws InterruptedException, ExecutionException {
+		Preconditions.checkNotNull(scalaFuture);
+
+		try {
+			return Await.result(scalaFuture, Duration.Inf());
+		} catch (InterruptedException e) {
+			throw e;
+		} catch (Exception e) {
+			throw new ExecutionException(e);
+		}
+	}
+
+	@Override
+	public T get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
+		Preconditions.checkNotNull(scalaFuture);
+		Preconditions.checkArgument(timeout >= 0L, "The timeout value has to be larger or " +
+			"equal than 0.");
+
+		try {
+			return Await.result(scalaFuture, new FiniteDuration(timeout, unit));
+		} catch (InterruptedException | TimeoutException e) {
+			throw e;
+		} catch (Exception e) {
+			throw new ExecutionException(e);
+		}
+	}
+
+	@Override
+	public T getNow(T valueIfAbsent) throws ExecutionException {
+		Preconditions.checkNotNull(scalaFuture);
+		Preconditions.checkNotNull(valueIfAbsent);
+
+		Option<Try<T>> value = scalaFuture.value();
+
+		if (value.isDefined()) {
+			Try<T> tri = value.get();
+
+			if (tri instanceof Success) {
+				return ((Success<T>)tri).value();
+			} else {
+				throw new ExecutionException(((Failure<T>)tri).exception());
+			}
+		} else {
+			return valueIfAbsent;
+		}
+	}
+
+	@Override
+	public <R> Future<R> thenApplyAsync(final ApplyFunction<? super T, ? extends R> applyFunction, Executor executor) {
+		Preconditions.checkNotNull(scalaFuture);
+		Preconditions.checkNotNull(applyFunction);
+		Preconditions.checkNotNull(executor);
+
+		scala.concurrent.Future<R> mappedFuture = scalaFuture.map(new Mapper<T, R>() {
+			@Override
+			public R apply(T value) {
+				return applyFunction.apply(value);
+			}
+		}, createExecutionContext(executor));
+
+		return new FlinkFuture<>(mappedFuture);
+	}
+
+	@Override
+	public Future<Void> thenAcceptAsync(final AcceptFunction<? super T> acceptFunction, Executor executor) {
+		Preconditions.checkNotNull(scalaFuture);
+		Preconditions.checkNotNull(acceptFunction);
+		Preconditions.checkNotNull(executor);
+
+		scala.concurrent.Future<Void> acceptedFuture = scalaFuture.map(new Mapper<T, Void>() {
+			@Override
+			public Void apply(T value) {
+				acceptFunction.accept(value);
+
+				return null;
+			}
+		}, createExecutionContext(executor));
+
+		return new FlinkFuture<>(acceptedFuture);
+	}
+
+	@Override
+	public <R> Future<R> exceptionallyAsync(final ApplyFunction<Throwable, ? extends R> exceptionallyFunction, Executor executor) {
+		Preconditions.checkNotNull(scalaFuture);
+		Preconditions.checkNotNull(exceptionallyFunction);
+		Preconditions.checkNotNull(executor);
+
+		scala.concurrent.Future<R> recoveredFuture = scalaFuture.recover(new Recover<R>() {
+			@Override
+			public R recover(Throwable failure) throws Throwable {
+				return exceptionallyFunction.apply(failure);
+			}
+		}, createExecutionContext(executor));
+
+		return new FlinkFuture<>(recoveredFuture);
+	}
+
+	@Override
+	public <R> Future<R> thenComposeAsync(final ApplyFunction<? super T, Future<? extends R>> applyFunction, final Executor executor) {
+		Preconditions.checkNotNull(scalaFuture);
+		Preconditions.checkNotNull(applyFunction);
+		Preconditions.checkNotNull(executor);
+
+		scala.concurrent.Future<R> flatMappedFuture = scalaFuture.flatMap(new Mapper<T, scala.concurrent.Future<R>>() {
+			@Override
+			public scala.concurrent.Future<R> apply(T value) {
+				final Future<? extends R> future = applyFunction.apply(value);
+
+				if (future instanceof FlinkFuture) {
+					@SuppressWarnings("unchecked")
+					FlinkFuture<R> flinkFuture = (FlinkFuture<R>) future;
+
+					return flinkFuture.scalaFuture;
+				} else {
+					return Futures.future(new Callable<R>() {
+						@Override
+						public R call() throws Exception {
+							return future.get();
+						}
+					}, createExecutionContext(executor));
+				}
+			}
+		}, createExecutionContext(executor));
+
+		return new FlinkFuture<>(flatMappedFuture);
+	}
+
+	@Override
+	public <R> Future<R> handleAsync(final BiFunction<? super T, Throwable, ? extends R> biFunction, Executor executor) {
+		Preconditions.checkNotNull(scalaFuture);
+		Preconditions.checkNotNull(biFunction);
+		Preconditions.checkNotNull(executor);
+
+		scala.concurrent.Future<R> mappedFuture = scalaFuture.map(new Mapper<T, R>() {
+			@Override
+			public R checkedApply(T value) throws Exception {
+				try {
+					return biFunction.apply(value, null);
+				} catch (Throwable t) {
+					throw new FlinkFuture.WrapperException(t);
+				}
+			}
+		}, createExecutionContext(executor));
+
+		scala.concurrent.Future<R> recoveredFuture = mappedFuture.recover(new Recover<R>() {
+			@Override
+			public R recover(Throwable failure) throws Throwable {
+				if (failure instanceof FlinkFuture.WrapperException) {
+					throw failure.getCause();
+				} else {
+					return biFunction.apply(null, failure);
+				}
+			}
+		}, createExecutionContext(executor));
+
+
+		return new FlinkFuture<>(recoveredFuture);
+	}
+
+	//-----------------------------------------------------------------------------------
+	// Static factory methods
+	//-----------------------------------------------------------------------------------
+
+	/**
+	 * Creates a future whose value is determined by the asynchronously executed callable.
+	 *
+	 * @param callable whose value is delivered by the future
+	 * @param executor to be used to execute the callable
+	 * @param <T> type of the future's value
+	 * @return future which represents the value of the callable
+	 */
+	public static <T> Future<T> supplyAsync(Callable<T> callable, Executor executor) {
+		Preconditions.checkNotNull(callable);
+		Preconditions.checkNotNull(executor);
+
+		scala.concurrent.Future<T> scalaFuture = Futures.future(callable, createExecutionContext(executor));
+
+		return new FlinkFuture<>(scalaFuture);
+	}
+
+	//-----------------------------------------------------------------------------------
+	// Helper functions and types
+	//-----------------------------------------------------------------------------------
+
+	private static ExecutionContext createExecutionContext(Executor executor) {
+		return ExecutionContexts$.MODULE$.fromExecutor(executor);
+	}
+
+	private static class WrapperException extends Exception {
+
+		private static final long serialVersionUID = 6533166370660884091L;
+
+		WrapperException(Throwable cause) {
+			super(cause);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f766f120/flink-runtime/src/test/java/org/apache/flink/runtime/concurrent/FlinkFutureTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/concurrent/FlinkFutureTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/concurrent/FlinkFutureTest.java
new file mode 100644
index 0000000..bd5af66
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/concurrent/FlinkFutureTest.java
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.concurrent;
+
+import org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture;
+import org.apache.flink.runtime.concurrent.impl.FlinkFuture;
+import org.apache.flink.util.TestLogger;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+/**
+ * Tests for Flink's future implementation.
+ */
+public class FlinkFutureTest extends TestLogger {
+
+	private static ExecutorService executor;
+
+	@BeforeClass
+	public static void setup() {
+		executor = Executors.newSingleThreadExecutor();
+	}
+
+	@AfterClass
+	public static void teardown() {
+		executor.shutdown();
+	}
+
+	@Test
+	public void testFutureApply() throws Exception {
+		int expectedValue = 42;
+
+		CompletableFuture<Integer> initialFuture = new FlinkCompletableFuture<>();
+
+		Future<String> appliedFuture = initialFuture.thenApplyAsync(new ApplyFunction<Integer, String>() {
+			@Override
+			public String apply(Integer value) {
+				return String.valueOf(value);
+			}
+		}, executor);
+
+		initialFuture.complete(expectedValue);
+
+		assertEquals(String.valueOf(expectedValue), appliedFuture.get());
+	}
+
+	@Test(expected = TimeoutException.class)
+	public void testFutureGetTimeout() throws InterruptedException, ExecutionException, TimeoutException {
+		CompletableFuture<Integer> future = new FlinkCompletableFuture<>();
+
+		future.get(10, TimeUnit.MILLISECONDS);
+
+		fail("Get should have thrown a timeout exception.");
+	}
+
+	@Test(expected = TestException.class)
+	public void testExceptionalCompletion() throws Throwable {
+		CompletableFuture<Integer> initialFuture = new FlinkCompletableFuture<>();
+
+		initialFuture.completeExceptionally(new TestException("Test exception"));
+
+		try {
+			initialFuture.get();
+
+			fail("Get should have thrown an exception.");
+		} catch (ExecutionException e) {
+			throw e.getCause();
+		}
+	}
+
+	/**
+	 * Tests that an exception is propagated through an apply function.
+	 */
+	@Test(expected = TestException.class)
+	public void testExceptionPropagation() throws Throwable {
+		CompletableFuture<Integer> initialFuture = new FlinkCompletableFuture<>();
+
+		Future<String> mappedFuture = initialFuture.thenApplyAsync(new ApplyFunction<Integer, String>() {
+			@Override
+			public String apply(Integer value) {
+				throw new TestException("Test exception");
+			}
+		}, executor);
+
+		Future<String> mapped2Future = mappedFuture.thenApplyAsync(new ApplyFunction<String, String>() {
+			@Override
+			public String apply(String value) {
+				return "foobar";
+			}
+		}, executor);
+
+		initialFuture.complete(42);
+
+		try {
+			mapped2Future.get();
+
+			fail("Get should have thrown an exception.");
+		} catch (ExecutionException e) {
+			throw e.getCause();
+		}
+	}
+
+	@Test
+	public void testExceptionally() throws ExecutionException, InterruptedException {
+		CompletableFuture<Integer> initialFuture = new FlinkCompletableFuture<>();
+		String exceptionMessage = "Foobar";
+
+		Future<String> recovered = initialFuture.exceptionallyAsync(new ApplyFunction<Throwable, String>() {
+			@Override
+			public String apply(Throwable value) {
+				return value.getMessage();
+			}
+		}, executor);
+
+		initialFuture.completeExceptionally(new TestException(exceptionMessage));
+
+		String actualMessage = recovered.get();
+
+		assertEquals(exceptionMessage, actualMessage);
+	}
+
+	@Test
+	public void testCompose() throws ExecutionException, InterruptedException {
+		CompletableFuture<Integer> initialFuture = new FlinkCompletableFuture<>();
+
+		final int expectedValue = 42;
+
+		Future<Integer> composedFuture = initialFuture.thenComposeAsync(new ApplyFunction<Integer, Future<? extends Integer>>() {
+			@Override
+			public Future<? extends Integer> apply(Integer value) {
+				return FlinkFuture.supplyAsync(new Callable<Integer>() {
+					@Override
+					public Integer call() throws Exception {
+						return expectedValue;
+					}
+				}, executor);
+			}
+		}, executor);
+
+		initialFuture.complete(42);
+
+		int actualValue = composedFuture.get();
+
+		assertEquals(expectedValue, actualValue);
+	}
+
+	@Test
+	public void testGetNow() throws ExecutionException {
+		CompletableFuture<Integer> initialFuture = new FlinkCompletableFuture<>();
+
+		final int absentValue = 41;
+
+		assertEquals(new Integer(absentValue), initialFuture.getNow(absentValue));
+	}
+
+	@Test
+	public void testAccept() throws ExecutionException, InterruptedException {
+		CompletableFuture<Integer> initialFuture = new FlinkCompletableFuture<>();
+		final AtomicInteger atomicInteger = new AtomicInteger(0);
+		int expectedValue = 42;
+
+		Future<Void> result = initialFuture.thenAcceptAsync(new AcceptFunction<Integer>() {
+			@Override
+			public void accept(Integer value) {
+				atomicInteger.set(value);
+			}
+		}, executor);
+
+		initialFuture.complete(expectedValue);
+
+		result.get();
+
+		assertEquals(expectedValue, atomicInteger.get());
+	}
+
+	@Test
+	public void testHandle() throws ExecutionException, InterruptedException {
+		CompletableFuture<Integer> initialFuture = new FlinkCompletableFuture<>();
+		int expectedValue = 43;
+
+		Future<String> result = initialFuture.handleAsync(new BiFunction<Integer, Throwable, String>() {
+			@Override
+			public String apply(Integer integer, Throwable throwable) {
+				if (integer != null) {
+					return String.valueOf(integer);
+				} else {
+					return throwable.getMessage();
+				}
+			}
+		}, executor);
+
+		initialFuture.complete(expectedValue);
+
+		assertEquals(String.valueOf(expectedValue), result.get());
+	}
+
+	@Test
+	public void testHandleException() throws ExecutionException, InterruptedException {
+		CompletableFuture<Integer> initialFuture = new FlinkCompletableFuture<>();
+		String exceptionMessage = "foobar";
+
+		Future<String> result = initialFuture.handleAsync(new BiFunction<Integer, Throwable, String>() {
+			@Override
+			public String apply(Integer integer, Throwable throwable) {
+				if (integer != null) {
+					return String.valueOf(integer);
+				} else {
+					return throwable.getMessage();
+				}
+			}
+		}, executor);
+
+		initialFuture.completeExceptionally(new TestException(exceptionMessage));
+
+		assertEquals(exceptionMessage, result.get());
+	}
+
+	@Test
+	public void testMultipleCompleteOperations() throws ExecutionException, InterruptedException {
+		CompletableFuture<Integer> initialFuture = new FlinkCompletableFuture<>();
+		int expectedValue = 42;
+
+		assertTrue(initialFuture.complete(expectedValue));
+
+		assertFalse(initialFuture.complete(1337));
+
+		assertFalse(initialFuture.completeExceptionally(new TestException("foobar")));
+
+		assertEquals(new Integer(expectedValue), initialFuture.get());
+	}
+
+	private static class TestException extends RuntimeException {
+
+		private static final long serialVersionUID = -1274022962838535130L;
+
+		public TestException(String message) {
+			super(message);
+		}
+	}
+}


[40/50] [abbrv] flink git commit: [FLINK-4363] Implement TaskManager basic startup of all components in java

Posted by tr...@apache.org.
[FLINK-4363] Implement TaskManager basic startup of all components in java

This closes #2400


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/35e80103
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/35e80103
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/35e80103

Branch: refs/heads/flip-6
Commit: 35e8010337fd8ccee231724f6ea2f4f5eb196c60
Parents: ffd20e9
Author: \u6dd8\u6c5f <ta...@alibaba-inc.com>
Authored: Tue Aug 30 11:28:14 2016 +0800
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:16 2016 +0200

----------------------------------------------------------------------
 .../runtime/rpc/taskexecutor/TaskExecutor.java  | 686 ++++++++++++++++++-
 .../taskexecutor/TaskExecutorConfiguration.java | 151 ++++
 .../rpc/taskexecutor/TaskExecutorTest.java      |   8 +-
 3 files changed, 822 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/35e80103/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
index f201e00..36d6310 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
@@ -18,15 +18,60 @@
 
 package org.apache.flink.runtime.rpc.taskexecutor;
 
+import akka.actor.ActorSystem;
+import akka.dispatch.ExecutionContexts$;
+import akka.util.Timeout;
+import com.typesafe.config.Config;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.configuration.ConfigConstants;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.IllegalConfigurationException;
+import org.apache.flink.core.memory.HeapMemorySegment;
+import org.apache.flink.core.memory.HybridMemorySegment;
+import org.apache.flink.core.memory.MemorySegmentFactory;
+import org.apache.flink.core.memory.MemoryType;
+import org.apache.flink.runtime.akka.AkkaUtils;
 import org.apache.flink.annotation.VisibleForTesting;
 import org.apache.flink.runtime.clusterframework.types.ResourceID;
 import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
+import org.apache.flink.runtime.instance.InstanceConnectionInfo;
+import org.apache.flink.runtime.io.disk.iomanager.IOManager;
+import org.apache.flink.runtime.io.disk.iomanager.IOManager.IOMode;
+import org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync;
+import org.apache.flink.runtime.io.network.NetworkEnvironment;
+import org.apache.flink.runtime.io.network.netty.NettyConfig;
+import org.apache.flink.runtime.leaderelection.LeaderElectionService;
 import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener;
+import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
+import org.apache.flink.runtime.memory.MemoryManager;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcMethod;
 import org.apache.flink.runtime.rpc.RpcService;
-
+import org.apache.flink.runtime.rpc.akka.AkkaRpcService;
+import org.apache.flink.runtime.taskmanager.MemoryLogger;
+import org.apache.flink.runtime.util.EnvironmentInformation;
+import org.apache.flink.runtime.util.LeaderRetrievalUtils;
+import org.apache.flink.runtime.taskmanager.NetworkEnvironmentConfiguration;
+import org.apache.flink.util.MathUtils;
+import org.apache.flink.util.NetUtils;
+
+import scala.Tuple2;
+import scala.Option;
+import scala.Some;
+import scala.concurrent.ExecutionContext;
+import scala.concurrent.duration.Duration;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
 import java.util.UUID;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.TimeUnit;
 
 import static org.apache.flink.util.Preconditions.checkNotNull;
 
@@ -36,12 +81,29 @@ import static org.apache.flink.util.Preconditions.checkNotNull;
  */
 public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 
+	private static final Logger LOG = LoggerFactory.getLogger(TaskExecutor.class);
+
 	/** The unique resource ID of this TaskExecutor */
 	private final ResourceID resourceID;
 
 	/** The access to the leader election and metadata storage services */
 	private final HighAvailabilityServices haServices;
 
+	/** The task manager configuration */
+	private final TaskExecutorConfiguration taskExecutorConfig;
+
+	/** The I/O manager component in the task manager */
+	private final IOManager ioManager;
+
+	/** The memory manager component in the task manager */
+	private final MemoryManager memoryManager;
+
+	/** The network component in the task manager */
+	private final NetworkEnvironment networkEnvironment;
+
+	/** The number of slots in the task manager, should be 1 for YARN */
+	private final int numberOfSlots;
+
 	// --------- resource manager --------
 
 	private TaskExecutorToResourceManagerConnection resourceManagerConnection;
@@ -49,22 +111,24 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 	// ------------------------------------------------------------------------
 
 	public TaskExecutor(
+			TaskExecutorConfiguration taskExecutorConfig,
+			ResourceID resourceID,
+			MemoryManager memoryManager,
+			IOManager ioManager,
+			NetworkEnvironment networkEnvironment,
+			int numberOfSlots,
 			RpcService rpcService,
-			HighAvailabilityServices haServices,
-			ResourceID resourceID) {
+			HighAvailabilityServices haServices) {
 
 		super(rpcService);
 
-		this.haServices = checkNotNull(haServices);
+		this.taskExecutorConfig = checkNotNull(taskExecutorConfig);
 		this.resourceID = checkNotNull(resourceID);
-	}
-
-	// ------------------------------------------------------------------------
-	//  Properties
-	// ------------------------------------------------------------------------
-
-	public ResourceID getResourceID() {
-		return resourceID;
+		this.memoryManager = checkNotNull(memoryManager);
+		this.ioManager = checkNotNull(ioManager);
+		this.networkEnvironment = checkNotNull(networkEnvironment);
+		this.numberOfSlots = checkNotNull(numberOfSlots);
+		this.haServices = checkNotNull(haServices);
 	}
 
 	// ------------------------------------------------------------------------
@@ -83,7 +147,6 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 		}
 	}
 
-
 	// ------------------------------------------------------------------------
 	//  RPC methods - ResourceManager related
 	// ------------------------------------------------------------------------
@@ -94,12 +157,12 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 			if (newLeaderAddress != null) {
 				// the resource manager switched to a new leader
 				log.info("ResourceManager leader changed from {} to {}. Registering at new leader.",
-						resourceManagerConnection.getResourceManagerAddress(), newLeaderAddress);
+					resourceManagerConnection.getResourceManagerAddress(), newLeaderAddress);
 			}
 			else {
 				// address null means that the current leader is lost without a new leader being there, yet
 				log.info("Current ResourceManager {} lost leader status. Waiting for new ResourceManager leader.",
-						resourceManagerConnection.getResourceManagerAddress());
+					resourceManagerConnection.getResourceManagerAddress());
 			}
 
 			// drop the current connection or connection attempt
@@ -112,21 +175,604 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 		// establish a connection to the new leader
 		if (newLeaderAddress != null) {
 			log.info("Attempting to register at ResourceManager {}", newLeaderAddress);
-			resourceManagerConnection = 
-					new TaskExecutorToResourceManagerConnection(log, this, newLeaderAddress, newLeaderId);
+			resourceManagerConnection =
+				new TaskExecutorToResourceManagerConnection(log, this, newLeaderAddress, newLeaderId);
 			resourceManagerConnection.start();
 		}
 	}
 
+	/**
+	 * Starts and runs the TaskManager.
+	 * <p/>
+	 * This method first tries to select the network interface to use for the TaskManager
+	 * communication. The network interface is used both for the actor communication
+	 * (coordination) as well as for the data exchange between task managers. Unless
+	 * the hostname/interface is explicitly configured in the configuration, this
+	 * method will try out various interfaces and methods to connect to the JobManager
+	 * and select the one where the connection attempt is successful.
+	 * <p/>
+	 * After selecting the network interface, this method brings up an actor system
+	 * for the TaskManager and its actors, starts the TaskManager's services
+	 * (library cache, shuffle network stack, ...), and starts the TaskManager itself.
+	 *
+	 * @param configuration    The configuration for the TaskManager.
+	 * @param resourceID       The id of the resource which the task manager will run on.
+	 */
+	public static void selectNetworkInterfaceAndRunTaskManager(
+		Configuration configuration,
+		ResourceID resourceID) throws Exception {
+
+		final InetSocketAddress taskManagerAddress = selectNetworkInterfaceAndPort(configuration);
+
+		runTaskManager(taskManagerAddress.getHostName(), resourceID, taskManagerAddress.getPort(), configuration);
+	}
+
+	private static InetSocketAddress selectNetworkInterfaceAndPort(Configuration configuration)
+		throws Exception {
+		String taskManagerHostname = configuration.getString(ConfigConstants.TASK_MANAGER_HOSTNAME_KEY, null);
+		if (taskManagerHostname != null) {
+			LOG.info("Using configured hostname/address for TaskManager: " + taskManagerHostname);
+		} else {
+			LeaderRetrievalService leaderRetrievalService = LeaderRetrievalUtils.createLeaderRetrievalService(configuration);
+			FiniteDuration lookupTimeout = AkkaUtils.getLookupTimeout(configuration);
+
+			InetAddress taskManagerAddress = LeaderRetrievalUtils.findConnectingAddress(leaderRetrievalService, lookupTimeout);
+			taskManagerHostname = taskManagerAddress.getHostName();
+			LOG.info("TaskManager will use hostname/address '{}' ({}) for communication.",
+				taskManagerHostname, taskManagerAddress.getHostAddress());
+		}
+
+		// if no task manager port has been configured, use 0 (system will pick any free port)
+		final int actorSystemPort = configuration.getInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, 0);
+		if (actorSystemPort < 0 || actorSystemPort > 65535) {
+			throw new IllegalConfigurationException("Invalid value for '" +
+				ConfigConstants.TASK_MANAGER_IPC_PORT_KEY +
+				"' (port for the TaskManager actor system) : " + actorSystemPort +
+				" - Leave config parameter empty or use 0 to let the system choose a port automatically.");
+		}
+
+		return new InetSocketAddress(taskManagerHostname, actorSystemPort);
+	}
+
+	/**
+	 * Starts and runs the TaskManager. Brings up an actor system for the TaskManager and its
+	 * actors, starts the TaskManager's services (library cache, shuffle network stack, ...),
+	 * and starts the TaskManager itself.
+	 * <p/>
+	 * This method will also spawn a process reaper for the TaskManager (kill the process if
+	 * the actor fails) and optionally start the JVM memory logging thread.
+	 *
+	 * @param taskManagerHostname The hostname/address of the interface where the actor system
+	 *                            will communicate.
+	 * @param resourceID          The id of the resource which the task manager will run on.
+	 * @param actorSystemPort   The port at which the actor system will communicate.
+	 * @param configuration       The configuration for the TaskManager.
+	 */
+	private static void runTaskManager(
+		String taskManagerHostname,
+		ResourceID resourceID,
+		int actorSystemPort,
+		final Configuration configuration) throws Exception {
+
+		LOG.info("Starting TaskManager");
+
+		// Bring up the TaskManager actor system first, bind it to the given address.
+
+		LOG.info("Starting TaskManager actor system at " +
+			NetUtils.hostAndPortToUrlString(taskManagerHostname, actorSystemPort));
+
+		final ActorSystem taskManagerSystem;
+		try {
+			Tuple2<String, Object> address = new Tuple2<String, Object>(taskManagerHostname, actorSystemPort);
+			Config akkaConfig = AkkaUtils.getAkkaConfig(configuration, new Some<>(address));
+			LOG.debug("Using akka configuration\n " + akkaConfig);
+			taskManagerSystem = AkkaUtils.createActorSystem(akkaConfig);
+		} catch (Throwable t) {
+			if (t instanceof org.jboss.netty.channel.ChannelException) {
+				Throwable cause = t.getCause();
+				if (cause != null && t.getCause() instanceof java.net.BindException) {
+					String address = NetUtils.hostAndPortToUrlString(taskManagerHostname, actorSystemPort);
+					throw new IOException("Unable to bind TaskManager actor system to address " +
+						address + " - " + cause.getMessage(), t);
+				}
+			}
+			throw new Exception("Could not create TaskManager actor system", t);
+		}
+
+		// start akka rpc service based on actor system
+		final Timeout timeout = new Timeout(AkkaUtils.getTimeout(configuration).toMillis(), TimeUnit.MILLISECONDS);
+		final AkkaRpcService akkaRpcService = new AkkaRpcService(taskManagerSystem, timeout);
+
+		// start high availability service to implement getResourceManagerLeaderRetriever method only
+		final HighAvailabilityServices haServices = new HighAvailabilityServices() {
+			@Override
+			public LeaderRetrievalService getResourceManagerLeaderRetriever() throws Exception {
+				return LeaderRetrievalUtils.createLeaderRetrievalService(configuration);
+			}
+
+			@Override
+			public LeaderElectionService getResourceManagerLeaderElectionService() throws Exception {
+				return null;
+			}
+
+			@Override
+			public LeaderElectionService getJobMasterLeaderElectionService(JobID jobID) throws Exception {
+				return null;
+			}
+		};
+
+		// start all the TaskManager services (network stack,  library cache, ...)
+		// and the TaskManager actor
+		try {
+			LOG.info("Starting TaskManager actor");
+			TaskExecutor taskExecutor = startTaskManagerComponentsAndActor(
+				configuration,
+				resourceID,
+				akkaRpcService,
+				taskManagerHostname,
+				haServices,
+				false);
+
+			taskExecutor.start();
+
+			// if desired, start the logging daemon that periodically logs the memory usage information
+			if (LOG.isInfoEnabled() && configuration.getBoolean(
+				ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD,
+				ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_START_LOG_THREAD)) {
+				LOG.info("Starting periodic memory usage logger");
+
+				long interval = configuration.getLong(
+					ConfigConstants.TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS,
+					ConfigConstants.DEFAULT_TASK_MANAGER_DEBUG_MEMORY_USAGE_LOG_INTERVAL_MS);
+
+				MemoryLogger logger = new MemoryLogger(LOG, interval, taskManagerSystem);
+				logger.start();
+			}
+
+			// block until everything is done
+			taskManagerSystem.awaitTermination();
+		} catch (Throwable t) {
+			LOG.error("Error while starting up taskManager", t);
+			try {
+				taskManagerSystem.shutdown();
+			} catch (Throwable tt) {
+				LOG.warn("Could not cleanly shut down actor system", tt);
+			}
+			throw t;
+		}
+	}
+
+	// --------------------------------------------------------------------------
+	//  Starting and running the TaskManager
+	// --------------------------------------------------------------------------
+
+	/**
+	 * @param configuration                 The configuration for the TaskManager.
+	 * @param resourceID                    The id of the resource which the task manager will run on.
+	 * @param rpcService                  The rpc service which is used to start and connect to the TaskManager RpcEndpoint .
+	 * @param taskManagerHostname       The hostname/address that describes the TaskManager's data location.
+	 * @param haServices        Optionally, a high availability service can be provided. If none is given,
+	 *                                      then a HighAvailabilityServices is constructed from the configuration.
+	 * @param localTaskManagerCommunication     If true, the TaskManager will not initiate the TCP network stack.
+	 * @return An ActorRef to the TaskManager actor.
+	 * @throws org.apache.flink.configuration.IllegalConfigurationException     Thrown, if the given config contains illegal values.
+	 * @throws java.io.IOException      Thrown, if any of the I/O components (such as buffer pools,
+	 *                                       I/O manager, ...) cannot be properly started.
+	 * @throws java.lang.Exception      Thrown is some other error occurs while parsing the configuration
+	 *                                      or starting the TaskManager components.
+	 */
+	public static TaskExecutor startTaskManagerComponentsAndActor(
+		Configuration configuration,
+		ResourceID resourceID,
+		RpcService rpcService,
+		String taskManagerHostname,
+		HighAvailabilityServices haServices,
+		boolean localTaskManagerCommunication) throws Exception {
+
+		final TaskExecutorConfiguration taskExecutorConfig = parseTaskManagerConfiguration(
+			configuration, taskManagerHostname, localTaskManagerCommunication);
+
+		MemoryType memType = taskExecutorConfig.getNetworkConfig().memoryType();
+
+		// pre-start checks
+		checkTempDirs(taskExecutorConfig.getTmpDirPaths());
+
+		ExecutionContext executionContext = ExecutionContexts$.MODULE$.fromExecutor(new ForkJoinPool());
+
+		// we start the network first, to make sure it can allocate its buffers first
+		final NetworkEnvironment network = new NetworkEnvironment(
+			executionContext,
+			taskExecutorConfig.getTimeout(),
+			taskExecutorConfig.getNetworkConfig(),
+			taskExecutorConfig.getConnectionInfo());
+
+		// computing the amount of memory to use depends on how much memory is available
+		// it strictly needs to happen AFTER the network stack has been initialized
+
+		// check if a value has been configured
+		long configuredMemory = configuration.getLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1L);
+		checkConfigParameter(configuredMemory == -1 || configuredMemory > 0, configuredMemory,
+			ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY,
+			"MemoryManager needs at least one MB of memory. " +
+				"If you leave this config parameter empty, the system automatically " +
+				"pick a fraction of the available memory.");
+
+		final long memorySize;
+		boolean preAllocateMemory = configuration.getBoolean(
+			ConfigConstants.TASK_MANAGER_MEMORY_PRE_ALLOCATE_KEY,
+			ConfigConstants.DEFAULT_TASK_MANAGER_MEMORY_PRE_ALLOCATE);
+		if (configuredMemory > 0) {
+			if (preAllocateMemory) {
+				LOG.info("Using {} MB for managed memory." , configuredMemory);
+			} else {
+				LOG.info("Limiting managed memory to {} MB, memory will be allocated lazily." , configuredMemory);
+			}
+			memorySize = configuredMemory << 20; // megabytes to bytes
+		} else {
+			float fraction = configuration.getFloat(
+				ConfigConstants.TASK_MANAGER_MEMORY_FRACTION_KEY,
+				ConfigConstants.DEFAULT_MEMORY_MANAGER_MEMORY_FRACTION);
+			checkConfigParameter(fraction > 0.0f && fraction < 1.0f, fraction,
+				ConfigConstants.TASK_MANAGER_MEMORY_FRACTION_KEY,
+				"MemoryManager fraction of the free memory must be between 0.0 and 1.0");
+
+			if (memType == MemoryType.HEAP) {
+				long relativeMemSize = (long) (EnvironmentInformation.getSizeOfFreeHeapMemoryWithDefrag() * fraction);
+				if (preAllocateMemory) {
+					LOG.info("Using {} of the currently free heap space for managed heap memory ({} MB)." ,
+						fraction , relativeMemSize >> 20);
+				} else {
+					LOG.info("Limiting managed memory to {} of the currently free heap space ({} MB), " +
+						"memory will be allocated lazily." , fraction , relativeMemSize >> 20);
+				}
+				memorySize = relativeMemSize;
+			} else if (memType == MemoryType.OFF_HEAP) {
+				// The maximum heap memory has been adjusted according to the fraction
+				long maxMemory = EnvironmentInformation.getMaxJvmHeapMemory();
+				long directMemorySize = (long) (maxMemory / (1.0 - fraction) * fraction);
+				if (preAllocateMemory) {
+					LOG.info("Using {} of the maximum memory size for managed off-heap memory ({} MB)." ,
+						fraction, directMemorySize >> 20);
+				} else {
+					LOG.info("Limiting managed memory to {} of the maximum memory size ({} MB)," +
+						" memory will be allocated lazily.", fraction, directMemorySize >> 20);
+				}
+				memorySize = directMemorySize;
+			} else {
+				throw new RuntimeException("No supported memory type detected.");
+			}
+		}
+
+		// now start the memory manager
+		final MemoryManager memoryManager;
+		try {
+			memoryManager = new MemoryManager(
+				memorySize,
+				taskExecutorConfig.getNumberOfSlots(),
+				taskExecutorConfig.getNetworkConfig().networkBufferSize(),
+				memType,
+				preAllocateMemory);
+		} catch (OutOfMemoryError e) {
+			if (memType == MemoryType.HEAP) {
+				throw new Exception("OutOfMemory error (" + e.getMessage() +
+					") while allocating the TaskManager heap memory (" + memorySize + " bytes).", e);
+			} else if (memType == MemoryType.OFF_HEAP) {
+				throw new Exception("OutOfMemory error (" + e.getMessage() +
+					") while allocating the TaskManager off-heap memory (" + memorySize +
+					" bytes).Try increasing the maximum direct memory (-XX:MaxDirectMemorySize)", e);
+			} else {
+				throw e;
+			}
+		}
+
+		// start the I/O manager, it will create some temp directories.
+		final IOManager ioManager = new IOManagerAsync(taskExecutorConfig.getTmpDirPaths());
+
+		final TaskExecutor taskExecutor = new TaskExecutor(
+			taskExecutorConfig,
+			resourceID,
+			memoryManager,
+			ioManager,
+			network,
+			taskExecutorConfig.getNumberOfSlots(),
+			rpcService,
+			haServices);
+
+		return taskExecutor;
+	}
+
+	// --------------------------------------------------------------------------
+	//  Parsing and checking the TaskManager Configuration
+	// --------------------------------------------------------------------------
+
+	/**
+	 * Utility method to extract TaskManager config parameters from the configuration and to
+	 * sanity check them.
+	 *
+	 * @param configuration                 The configuration.
+	 * @param taskManagerHostname           The host name under which the TaskManager communicates.
+	 * @param localTaskManagerCommunication             True, to skip initializing the network stack.
+	 *                                      Use only in cases where only one task manager runs.
+	 * @return TaskExecutorConfiguration that wrappers InstanceConnectionInfo, NetworkEnvironmentConfiguration, etc.
+	 */
+	private static TaskExecutorConfiguration parseTaskManagerConfiguration(
+		Configuration configuration,
+		String taskManagerHostname,
+		boolean localTaskManagerCommunication) throws Exception {
+
+		// ------- read values from the config and check them ---------
+		//                      (a lot of them)
+
+		// ----> hosts / ports for communication and data exchange
+
+		int dataport = configuration.getInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY,
+			ConfigConstants.DEFAULT_TASK_MANAGER_DATA_PORT);
+		if (dataport == 0) {
+			dataport = NetUtils.getAvailablePort();
+		}
+		checkConfigParameter(dataport > 0, dataport, ConfigConstants.TASK_MANAGER_DATA_PORT_KEY,
+			"Leave config parameter empty or use 0 to let the system choose a port automatically.");
+
+		InetAddress taskManagerAddress = InetAddress.getByName(taskManagerHostname);
+		final InstanceConnectionInfo connectionInfo = new InstanceConnectionInfo(taskManagerAddress, dataport);
+
+		// ----> memory / network stack (shuffles/broadcasts), task slots, temp directories
+
+		// we need this because many configs have been written with a "-1" entry
+		int slots = configuration.getInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
+		if (slots == -1) {
+			slots = 1;
+		}
+		checkConfigParameter(slots >= 1, slots, ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS,
+			"Number of task slots must be at least one.");
+
+		final int numNetworkBuffers = configuration.getInteger(
+			ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY,
+			ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS);
+		checkConfigParameter(numNetworkBuffers > 0, numNetworkBuffers,
+			ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, "");
+
+		final int pageSize = configuration.getInteger(
+			ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY,
+			ConfigConstants.DEFAULT_TASK_MANAGER_MEMORY_SEGMENT_SIZE);
+		// check page size of for minimum size
+		checkConfigParameter(pageSize >= MemoryManager.MIN_PAGE_SIZE, pageSize,
+			ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY,
+			"Minimum memory segment size is " + MemoryManager.MIN_PAGE_SIZE);
+		// check page size for power of two
+		checkConfigParameter(MathUtils.isPowerOf2(pageSize), pageSize,
+			ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY,
+			"Memory segment size must be a power of 2.");
+
+		// check whether we use heap or off-heap memory
+		final MemoryType memType;
+		if (configuration.getBoolean(ConfigConstants.TASK_MANAGER_MEMORY_OFF_HEAP_KEY, false)) {
+			memType = MemoryType.OFF_HEAP;
+		} else {
+			memType = MemoryType.HEAP;
+		}
+
+		// initialize the memory segment factory accordingly
+		if (memType == MemoryType.HEAP) {
+			if (!MemorySegmentFactory.initializeIfNotInitialized(HeapMemorySegment.FACTORY)) {
+				throw new Exception("Memory type is set to heap memory, but memory segment " +
+					"factory has been initialized for off-heap memory segments");
+			}
+		} else {
+			if (!MemorySegmentFactory.initializeIfNotInitialized(HybridMemorySegment.FACTORY)) {
+				throw new Exception("Memory type is set to off-heap memory, but memory segment " +
+					"factory has been initialized for heap memory segments");
+			}
+		}
+
+		final String[] tmpDirs = configuration.getString(
+			ConfigConstants.TASK_MANAGER_TMP_DIR_KEY,
+			ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH).split(",|" + File.pathSeparator);
+
+		final NettyConfig nettyConfig;
+		if (!localTaskManagerCommunication) {
+			nettyConfig = new NettyConfig(connectionInfo.address(), connectionInfo.dataPort(), pageSize, slots, configuration);
+		} else {
+			nettyConfig = null;
+		}
+
+		// Default spill I/O mode for intermediate results
+		final String syncOrAsync = configuration.getString(
+			ConfigConstants.TASK_MANAGER_NETWORK_DEFAULT_IO_MODE,
+			ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_DEFAULT_IO_MODE);
+
+		final IOMode ioMode;
+		if (syncOrAsync.equals("async")) {
+			ioMode = IOManager.IOMode.ASYNC;
+		} else {
+			ioMode = IOManager.IOMode.SYNC;
+		}
+
+		final int queryServerPort =  configuration.getInteger(
+			ConfigConstants.QUERYABLE_STATE_SERVER_PORT,
+			ConfigConstants.DEFAULT_QUERYABLE_STATE_SERVER_PORT);
+
+		final int queryServerNetworkThreads =  configuration.getInteger(
+			ConfigConstants.QUERYABLE_STATE_SERVER_NETWORK_THREADS,
+			ConfigConstants.DEFAULT_QUERYABLE_STATE_SERVER_NETWORK_THREADS);
+
+		final int queryServerQueryThreads =  configuration.getInteger(
+			ConfigConstants.QUERYABLE_STATE_SERVER_QUERY_THREADS,
+			ConfigConstants.DEFAULT_QUERYABLE_STATE_SERVER_QUERY_THREADS);
+
+		final NetworkEnvironmentConfiguration networkConfig = new NetworkEnvironmentConfiguration(
+			numNetworkBuffers,
+			pageSize,
+			memType,
+			ioMode,
+			queryServerPort,
+			queryServerNetworkThreads,
+			queryServerQueryThreads,
+			localTaskManagerCommunication ? Option.<NettyConfig>empty() : new Some<>(nettyConfig),
+			new Tuple2<>(500, 3000));
+
+		// ----> timeouts, library caching, profiling
+
+		final FiniteDuration timeout;
+		try {
+			timeout = AkkaUtils.getTimeout(configuration);
+		} catch (Exception e) {
+			throw new IllegalArgumentException(
+				"Invalid format for '" + ConfigConstants.AKKA_ASK_TIMEOUT +
+					"'.Use formats like '50 s' or '1 min' to specify the timeout.");
+		}
+		LOG.info("Messages between TaskManager and JobManager have a max timeout of " + timeout);
+
+		final long cleanupInterval = configuration.getLong(
+			ConfigConstants.LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL,
+			ConfigConstants.DEFAULT_LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL) * 1000;
+
+		final FiniteDuration finiteRegistrationDuration;
+		try {
+			Duration maxRegistrationDuration = Duration.create(configuration.getString(
+				ConfigConstants.TASK_MANAGER_MAX_REGISTRATION_DURATION,
+				ConfigConstants.DEFAULT_TASK_MANAGER_MAX_REGISTRATION_DURATION));
+			if (maxRegistrationDuration.isFinite()) {
+				finiteRegistrationDuration = new FiniteDuration(maxRegistrationDuration.toSeconds(), TimeUnit.SECONDS);
+			} else {
+				finiteRegistrationDuration = null;
+			}
+		} catch (NumberFormatException e) {
+			throw new IllegalArgumentException("Invalid format for parameter " +
+				ConfigConstants.TASK_MANAGER_MAX_REGISTRATION_DURATION, e);
+		}
+
+		final FiniteDuration initialRegistrationPause;
+		try {
+			Duration pause = Duration.create(configuration.getString(
+				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE,
+				ConfigConstants.DEFAULT_TASK_MANAGER_INITIAL_REGISTRATION_PAUSE));
+			if (pause.isFinite()) {
+				initialRegistrationPause = new FiniteDuration(pause.toSeconds(), TimeUnit.SECONDS);
+			} else {
+				throw new IllegalArgumentException("The initial registration pause must be finite: " + pause);
+			}
+		} catch (NumberFormatException e) {
+			throw new IllegalArgumentException("Invalid format for parameter " +
+				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, e);
+		}
+
+		final FiniteDuration maxRegistrationPause;
+		try {
+			Duration pause = Duration.create(configuration.getString(
+				ConfigConstants.TASK_MANAGER_MAX_REGISTARTION_PAUSE,
+				ConfigConstants.DEFAULT_TASK_MANAGER_MAX_REGISTRATION_PAUSE));
+			if (pause.isFinite()) {
+				maxRegistrationPause = new FiniteDuration(pause.toSeconds(), TimeUnit.SECONDS);
+			} else {
+				throw new IllegalArgumentException("The maximum registration pause must be finite: " + pause);
+			}
+		} catch (NumberFormatException e) {
+			throw new IllegalArgumentException("Invalid format for parameter " +
+				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, e);
+		}
+
+		final FiniteDuration refusedRegistrationPause;
+		try {
+			Duration pause = Duration.create(configuration.getString(
+				ConfigConstants.TASK_MANAGER_REFUSED_REGISTRATION_PAUSE,
+				ConfigConstants.DEFAULT_TASK_MANAGER_REFUSED_REGISTRATION_PAUSE));
+			if (pause.isFinite()) {
+				refusedRegistrationPause = new FiniteDuration(pause.toSeconds(), TimeUnit.SECONDS);
+			} else {
+				throw new IllegalArgumentException("The refused registration pause must be finite: " + pause);
+			}
+		} catch (NumberFormatException e) {
+			throw new IllegalArgumentException("Invalid format for parameter " +
+				ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, e);
+		}
+
+		return new TaskExecutorConfiguration(
+			tmpDirs,
+			cleanupInterval,
+			connectionInfo,
+			networkConfig,
+			timeout,
+			finiteRegistrationDuration,
+			slots,
+			configuration,
+			initialRegistrationPause,
+			maxRegistrationPause,
+			refusedRegistrationPause);
+	}
+
+	/**
+	 * Validates a condition for a config parameter and displays a standard exception, if the
+	 * the condition does not hold.
+	 *
+	 * @param condition    The condition that must hold. If the condition is false, an exception is thrown.
+	 * @param parameter    The parameter value. Will be shown in the exception message.
+	 * @param name         The name of the config parameter. Will be shown in the exception message.
+	 * @param errorMessage The optional custom error message to append to the exception message.
+	 */
+	private static void checkConfigParameter(
+		boolean condition,
+		Object parameter,
+		String name,
+		String errorMessage) {
+		if (!condition) {
+			throw new IllegalConfigurationException("Invalid configuration value for " + name + " : " + parameter + " - " + errorMessage);
+		}
+	}
+
+	/**
+	 * Validates that all the directories denoted by the strings do actually exist, are proper
+	 * directories (not files), and are writable.
+	 *
+	 * @param tmpDirs The array of directory paths to check.
+	 * @throws Exception Thrown if any of the directories does not exist or is not writable
+	 *                   or is a file, rather than a directory.
+	 */
+	private static void checkTempDirs(String[] tmpDirs) throws IOException {
+		for (String dir : tmpDirs) {
+			if (dir != null && !dir.equals("")) {
+				File file = new File(dir);
+				if (!file.exists()) {
+					throw new IOException("Temporary file directory " + file.getAbsolutePath() + " does not exist.");
+				}
+				if (!file.isDirectory()) {
+					throw new IOException("Temporary file directory " + file.getAbsolutePath() + " is not a directory.");
+				}
+				if (!file.canWrite()) {
+					throw new IOException("Temporary file directory " + file.getAbsolutePath() + " is not writable.");
+				}
+
+				if (LOG.isInfoEnabled()) {
+					long totalSpaceGb = file.getTotalSpace() >> 30;
+					long usableSpaceGb = file.getUsableSpace() >> 30;
+					double usablePercentage = (double)usableSpaceGb / totalSpaceGb * 100;
+					String path = file.getAbsolutePath();
+					LOG.info(String.format("Temporary file directory '%s': total %d GB, " + "usable %d GB (%.2f%% usable)",
+						path, totalSpaceGb, usableSpaceGb, usablePercentage));
+				}
+			} else {
+				throw new IllegalArgumentException("Temporary file directory #$id is null.");
+			}
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  Properties
+	// ------------------------------------------------------------------------
+
+	public ResourceID getResourceID() {
+		return resourceID;
+	}
+
 	// ------------------------------------------------------------------------
-	//  Error handling
+	//  Error Handling
 	// ------------------------------------------------------------------------
 
 	/**
 	 * Notifies the TaskExecutor that a fatal error has occurred and it cannot proceed.
 	 * This method should be used when asynchronous threads want to notify the
 	 * TaskExecutor of a fatal error.
-	 * 
+	 *
 	 * @param t The exception describing the fatal error
 	 */
 	void onFatalErrorAsync(final Throwable t) {
@@ -141,7 +787,7 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 	/**
 	 * Notifies the TaskExecutor that a fatal error has occurred and it cannot proceed.
 	 * This method must only be called from within the TaskExecutor's main thread.
-	 * 
+	 *
 	 * @param t The exception describing the fatal error
 	 */
 	void onFatalError(Throwable t) {

http://git-wip-us.apache.org/repos/asf/flink/blob/35e80103/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorConfiguration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorConfiguration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorConfiguration.java
new file mode 100644
index 0000000..32484e1
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorConfiguration.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.taskexecutor;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.instance.InstanceConnectionInfo;
+import org.apache.flink.runtime.taskmanager.NetworkEnvironmentConfiguration;
+
+import scala.concurrent.duration.FiniteDuration;
+
+import java.io.Serializable;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * {@link TaskExecutor} Configuration
+ */
+public class TaskExecutorConfiguration implements Serializable {
+
+	private static final long serialVersionUID = 1L;
+
+	private final String[] tmpDirPaths;
+
+	private final long cleanupInterval;
+
+	private final int numberOfSlots;
+
+	private final Configuration configuration;
+
+	private final FiniteDuration timeout;
+	private final FiniteDuration maxRegistrationDuration;
+	private final FiniteDuration initialRegistrationPause;
+	private final FiniteDuration maxRegistrationPause;
+	private final FiniteDuration refusedRegistrationPause;
+
+	private final NetworkEnvironmentConfiguration networkConfig;
+
+	private final InstanceConnectionInfo connectionInfo;
+
+	public TaskExecutorConfiguration(
+			String[] tmpDirPaths,
+			long cleanupInterval,
+			InstanceConnectionInfo connectionInfo,
+			NetworkEnvironmentConfiguration networkConfig,
+			FiniteDuration timeout,
+			FiniteDuration maxRegistrationDuration,
+			int numberOfSlots,
+			Configuration configuration) {
+
+		this (tmpDirPaths,
+			cleanupInterval,
+			connectionInfo,
+			networkConfig,
+			timeout,
+			maxRegistrationDuration,
+			numberOfSlots,
+			configuration,
+			new FiniteDuration(500, TimeUnit.MILLISECONDS),
+			new FiniteDuration(30, TimeUnit.SECONDS),
+			new FiniteDuration(10, TimeUnit.SECONDS));
+	}
+
+	public TaskExecutorConfiguration(
+			String[] tmpDirPaths,
+			long cleanupInterval,
+			InstanceConnectionInfo connectionInfo,
+			NetworkEnvironmentConfiguration networkConfig,
+			FiniteDuration timeout,
+			FiniteDuration maxRegistrationDuration,
+			int numberOfSlots,
+			Configuration configuration,
+			FiniteDuration initialRegistrationPause,
+			FiniteDuration maxRegistrationPause,
+			FiniteDuration refusedRegistrationPause) {
+
+		this.tmpDirPaths = checkNotNull(tmpDirPaths);
+		this.cleanupInterval = checkNotNull(cleanupInterval);
+		this.connectionInfo = checkNotNull(connectionInfo);
+		this.networkConfig = checkNotNull(networkConfig);
+		this.timeout = checkNotNull(timeout);
+		this.maxRegistrationDuration = maxRegistrationDuration;
+		this.numberOfSlots = checkNotNull(numberOfSlots);
+		this.configuration = checkNotNull(configuration);
+		this.initialRegistrationPause = checkNotNull(initialRegistrationPause);
+		this.maxRegistrationPause = checkNotNull(maxRegistrationPause);
+		this.refusedRegistrationPause = checkNotNull(refusedRegistrationPause);
+	}
+
+	// --------------------------------------------------------------------------------------------
+	//  Properties
+	// --------------------------------------------------------------------------------------------
+
+	public String[] getTmpDirPaths() {
+		return tmpDirPaths;
+	}
+
+	public long getCleanupInterval() {
+		return cleanupInterval;
+	}
+
+	public InstanceConnectionInfo getConnectionInfo() { return connectionInfo; }
+
+	public NetworkEnvironmentConfiguration getNetworkConfig() { return networkConfig; }
+
+	public FiniteDuration getTimeout() {
+		return timeout;
+	}
+
+	public FiniteDuration getMaxRegistrationDuration() {
+		return maxRegistrationDuration;
+	}
+
+	public int getNumberOfSlots() {
+		return numberOfSlots;
+	}
+
+	public Configuration getConfiguration() {
+		return configuration;
+	}
+
+	public FiniteDuration getInitialRegistrationPause() {
+		return initialRegistrationPause;
+	}
+
+	public FiniteDuration getMaxRegistrationPause() {
+		return maxRegistrationPause;
+	}
+
+	public FiniteDuration getRefusedRegistrationPause() {
+		return refusedRegistrationPause;
+	}
+
+}
+

http://git-wip-us.apache.org/repos/asf/flink/blob/35e80103/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
index b831ead..25a670c 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
@@ -18,6 +18,7 @@
 
 package org.apache.flink.runtime.rpc.taskexecutor;
 
+import org.apache.flink.configuration.Configuration;
 import org.apache.flink.runtime.clusterframework.types.ResourceID;
 import org.apache.flink.runtime.highavailability.NonHaServices;
 import org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices;
@@ -49,9 +50,9 @@ public class TaskExecutorTest extends TestLogger {
 			rpc.registerGateway(resourceManagerAddress, rmGateway);
 
 			NonHaServices haServices = new NonHaServices(resourceManagerAddress);
-			TaskExecutor taskManager = new TaskExecutor(rpc, haServices, resourceID);
+			TaskExecutor taskManager = TaskExecutor.startTaskManagerComponentsAndActor(
+				new Configuration(), resourceID, rpc, "localhost", haServices, true);
 			String taskManagerAddress = taskManager.getAddress();
-
 			taskManager.start();
 
 			verify(rmGateway, timeout(5000)).registerTaskExecutor(
@@ -84,7 +85,8 @@ public class TaskExecutorTest extends TestLogger {
 			TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
 			haServices.setResourceManagerLeaderRetriever(testLeaderService);
 
-			TaskExecutor taskManager = new TaskExecutor(rpc, haServices, resourceID);
+			TaskExecutor taskManager = TaskExecutor.startTaskManagerComponentsAndActor(
+				new Configuration(), resourceID, rpc, "localhost", haServices, true);
 			String taskManagerAddress = taskManager.getAddress();
 			taskManager.start();
 


[02/50] [abbrv] flink git commit: [FLINK-4247] [table] CsvTableSource.getDataSet() expects Java ExecutionEnvironment

Posted by tr...@apache.org.
[FLINK-4247] [table] CsvTableSource.getDataSet() expects Java ExecutionEnvironment

This closes #2298.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/0975d9f1
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/0975d9f1
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/0975d9f1

Branch: refs/heads/flip-6
Commit: 0975d9f11dc09f8b1ea420d660175874d423cac3
Parents: 5901bf3
Author: twalthr <tw...@apache.org>
Authored: Tue Jul 26 17:27:02 2016 +0200
Committer: twalthr <tw...@apache.org>
Committed: Tue Sep 20 16:02:37 2016 +0200

----------------------------------------------------------------------
 .../api/table/sources/BatchTableSource.scala    |  7 ++++-
 .../api/table/sources/CsvTableSource.scala      | 28 +++++++++++++-------
 .../api/table/sources/StreamTableSource.scala   |  7 ++++-
 .../connectors/kafka/KafkaTableSource.java      |  4 +++
 4 files changed, 35 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/0975d9f1/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/BatchTableSource.scala
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/BatchTableSource.scala b/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/BatchTableSource.scala
index 7abb03c..74e4cd6 100644
--- a/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/BatchTableSource.scala
+++ b/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/BatchTableSource.scala
@@ -26,6 +26,11 @@ import org.apache.flink.api.java.{ExecutionEnvironment, DataSet}
   */
 trait BatchTableSource[T] extends TableSource[T] {
 
-  /** Returns the data of the table as a [[DataSet]]. */
+  /**
+    * Returns the data of the table as a [[DataSet]].
+    *
+    * NOTE: This method is for internal use only for defining a [[TableSource]].
+    *       Do not use it in Table API programs.
+    */
   def getDataSet(execEnv: ExecutionEnvironment): DataSet[T]
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/0975d9f1/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/CsvTableSource.scala
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/CsvTableSource.scala b/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/CsvTableSource.scala
index 40fdf82..9cf4397 100644
--- a/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/CsvTableSource.scala
+++ b/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/CsvTableSource.scala
@@ -56,13 +56,13 @@ class CsvTableSource(
   with StreamTableSource[Row] {
 
   /**
-  * A [[BatchTableSource]] and [[StreamTableSource]] for simple CSV files with a
-  * (logically) unlimited number of fields.
-  *
-  * @param path The path to the CSV file.
-  * @param fieldNames The names of the table fields.
-  * @param fieldTypes The types of the table fields.
-  */
+    * A [[BatchTableSource]] and [[StreamTableSource]] for simple CSV files with a
+    * (logically) unlimited number of fields.
+    *
+    * @param path The path to the CSV file.
+    * @param fieldNames The names of the table fields.
+    * @param fieldTypes The types of the table fields.
+    */
   def this(path: String, fieldNames: Array[String], fieldTypes: Array[TypeInformation[_]]) =
     this(path, fieldNames, fieldTypes, CsvInputFormat.DEFAULT_FIELD_DELIMITER,
       CsvInputFormat.DEFAULT_LINE_DELIMITER, null, false, null, false)
@@ -73,7 +73,12 @@ class CsvTableSource(
 
   private val returnType = new RowTypeInfo(fieldTypes)
 
-  /** Returns the data of the table as a [[DataSet]] of [[Row]]. */
+  /**
+    * Returns the data of the table as a [[DataSet]] of [[Row]].
+    *
+    * NOTE: This method is for internal use only for defining a [[TableSource]].
+    *       Do not use it in Table API programs.
+    */
   override def getDataSet(execEnv: ExecutionEnvironment): DataSet[Row] = {
     execEnv.createInput(createCsvInput(), returnType)
   }
@@ -90,7 +95,12 @@ class CsvTableSource(
   /** Returns the [[RowTypeInfo]] for the return type of the [[CsvTableSource]]. */
   override def getReturnType: RowTypeInfo = returnType
 
-  /** Returns the data of the table as a [[DataStream]] of [[Row]]. */
+  /**
+    * Returns the data of the table as a [[DataStream]] of [[Row]].
+    *
+    * NOTE: This method is for internal use only for defining a [[TableSource]].
+    *       Do not use it in Table API programs.
+    */
   override def getDataStream(streamExecEnv: StreamExecutionEnvironment): DataStream[Row] = {
     streamExecEnv.createInput(createCsvInput(), returnType)
   }

http://git-wip-us.apache.org/repos/asf/flink/blob/0975d9f1/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/StreamTableSource.scala
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/StreamTableSource.scala b/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/StreamTableSource.scala
index c30d9aa..cdae0b3 100644
--- a/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/StreamTableSource.scala
+++ b/flink-libraries/flink-table/src/main/scala/org/apache/flink/api/table/sources/StreamTableSource.scala
@@ -27,6 +27,11 @@ import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
   */
 trait StreamTableSource[T] extends TableSource[T] {
 
-  /** Returns the data of the table as a [[DataStream]]. */
+  /**
+    * Returns the data of the table as a [[DataStream]].
+    *
+    * NOTE: This method is for internal use only for defining a [[TableSource]].
+    *       Do not use it in Table API programs.
+    */
   def getDataStream(execEnv: StreamExecutionEnvironment): DataStream[T]
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/0975d9f1/flink-streaming-connectors/flink-connector-kafka-base/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaTableSource.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-base/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaTableSource.java b/flink-streaming-connectors/flink-connector-kafka-base/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaTableSource.java
index fc6bf44..446f203 100644
--- a/flink-streaming-connectors/flink-connector-kafka-base/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaTableSource.java
+++ b/flink-streaming-connectors/flink-connector-kafka-base/src/main/java/org/apache/flink/streaming/connectors/kafka/KafkaTableSource.java
@@ -99,6 +99,10 @@ abstract class KafkaTableSource implements StreamTableSource<Row> {
 				"Number of provided field names and types does not match.");
 	}
 
+	/**
+	 * NOTE: This method is for internal use only for defining a TableSource.
+	 *       Do not use it in Table API programs.
+	 */
 	@Override
 	public DataStream<Row> getDataStream(StreamExecutionEnvironment env) {
 		// Version-specific Kafka consumer


[10/50] [abbrv] flink git commit: [FLINK-3929] additional fixes for keytab security

Posted by tr...@apache.org.
[FLINK-3929] additional fixes for keytab security

- load flink-jaas.conf from classpath
- avoid using undocumented flink base dir config entry
- enable test cases to run on MacOS
- unify suffix of secure test cases
- fix error logging and reporting

This closes #2275


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/68709b08
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/68709b08
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/68709b08

Branch: refs/heads/flip-6
Commit: 68709b087570402cacb7bc3088e0eb35d83c8d32
Parents: 285b6f7
Author: Maximilian Michels <mx...@apache.org>
Authored: Tue Sep 20 15:41:35 2016 +0200
Committer: Maximilian Michels <mx...@apache.org>
Committed: Tue Sep 20 22:03:29 2016 +0200

----------------------------------------------------------------------
 .../org/apache/flink/client/CliFrontend.java    |  3 -
 .../src/main/flink-bin/conf/flink-jaas.conf     | 26 --------
 .../flink/runtime/security/SecurityContext.java | 67 ++++++++------------
 .../src/main/resources/flink-jaas.conf          | 26 ++++++++
 .../flink/runtime/taskmanager/TaskManager.scala |  2 -
 .../runtime/security/SecurityContextTest.java   |  4 +-
 .../connectors/fs/RollingSinkSecuredITCase.java |  1 -
 .../kafka/Kafka09SecureRunITCase.java           | 62 ------------------
 .../kafka/Kafka09SecuredRunITCase.java          | 62 ++++++++++++++++++
 .../flink/test/util/SecureTestEnvironment.java  | 23 +++----
 .../org/apache/flink/yarn/YarnTestBase.java     |  3 +-
 .../flink/yarn/YarnApplicationMasterRunner.java |  1 -
 .../flink/yarn/YarnTaskManagerRunner.java       |  5 +-
 .../flink/yarn/cli/FlinkYarnSessionCli.java     | 32 +++-------
 14 files changed, 138 insertions(+), 179 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-clients/src/main/java/org/apache/flink/client/CliFrontend.java
----------------------------------------------------------------------
diff --git a/flink-clients/src/main/java/org/apache/flink/client/CliFrontend.java b/flink-clients/src/main/java/org/apache/flink/client/CliFrontend.java
index 575ffad..0711758 100644
--- a/flink-clients/src/main/java/org/apache/flink/client/CliFrontend.java
+++ b/flink-clients/src/main/java/org/apache/flink/client/CliFrontend.java
@@ -161,9 +161,6 @@ public class CliFrontend {
 				"filesystem scheme from configuration.", e);
 		}
 
-		this.config.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, configDirectory.getAbsolutePath()
-				+ ".." + File.separator);
-
 		this.clientTimeout = AkkaUtils.getClientTimeout(config);
 	}
 

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-dist/src/main/flink-bin/conf/flink-jaas.conf
----------------------------------------------------------------------
diff --git a/flink-dist/src/main/flink-bin/conf/flink-jaas.conf b/flink-dist/src/main/flink-bin/conf/flink-jaas.conf
deleted file mode 100644
index d476e24..0000000
--- a/flink-dist/src/main/flink-bin/conf/flink-jaas.conf
+++ /dev/null
@@ -1,26 +0,0 @@
-################################################################################
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-################################################################################
-# We are using this file as an workaround for the Kafka and ZK SASL implementation
-# since they explicitly look for java.security.auth.login.config property
-# The file itself is not used by the application since the internal implementation
-# uses a process-wide in-memory java security configuration object.
-# Please do not edit/delete this file - See FLINK-3929
-sample {
-  useKeyTab=false
-  useTicketCache=true;
-};
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-runtime/src/main/java/org/apache/flink/runtime/security/SecurityContext.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/security/SecurityContext.java b/flink-runtime/src/main/java/org/apache/flink/runtime/security/SecurityContext.java
index 4b8b69b..be6611f 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/security/SecurityContext.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/security/SecurityContext.java
@@ -22,7 +22,8 @@ import org.apache.commons.lang3.StringUtils;
 import org.apache.flink.annotation.Internal;
 import org.apache.flink.configuration.ConfigConstants;
 import org.apache.flink.configuration.Configuration;
-import org.apache.flink.configuration.IllegalConfigurationException;
+import org.apache.flink.configuration.GlobalConfiguration;
+import org.apache.flink.util.Preconditions;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -33,7 +34,12 @@ import org.slf4j.LoggerFactory;
 
 import javax.security.auth.Subject;
 import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
 import java.lang.reflect.Method;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 import java.security.PrivilegedExceptionAction;
 import java.util.Collection;
 
@@ -170,15 +176,12 @@ public class SecurityContext {
 	 * Kafka current code behavior.
 	 */
 	private static void populateSystemSecurityProperties(Configuration configuration) {
+		Preconditions.checkNotNull(configuration, "The supplied configuation was null");
 
 		//required to be empty for Kafka but we will override the property
 		//with pseudo JAAS configuration file if SASL auth is enabled for ZK
 		System.setProperty(JAVA_SECURITY_AUTH_LOGIN_CONFIG, "");
 
-		if(configuration == null) {
-			return;
-		}
-
 		boolean disableSaslClient = configuration.getBoolean(ConfigConstants.ZOOKEEPER_SASL_DISABLE,
 				ConfigConstants.DEFAULT_ZOOKEEPER_SASL_DISABLE);
 		if(disableSaslClient) {
@@ -188,46 +191,26 @@ public class SecurityContext {
 			return;
 		}
 
-		String baseDir = configuration.getString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, null);
-		if(baseDir == null) {
-			String message = "SASL auth is enabled for ZK but unable to locate pseudo Jaas config " +
-					"since " + ConfigConstants.FLINK_BASE_DIR_PATH_KEY + " is not provided";
-			LOG.error(message);
-			throw new IllegalConfigurationException(message);
-		}
-
-		File f = new File(baseDir);
-		if(!f.exists() || !f.isDirectory()) {
-			LOG.error("Invalid flink base directory {} configuration provided", baseDir);
-			throw new IllegalConfigurationException("Invalid flink base directory configuration provided");
-		}
-
-		File jaasConfigFile = new File(f, JAAS_CONF_FILENAME);
-
-		if (!jaasConfigFile.exists() || !jaasConfigFile.isFile()) {
-
-			//check if there is a conf directory
-			File confDir = new File(f, "conf");
-			if(!confDir.exists() || !confDir.isDirectory()) {
-				LOG.error("Could not locate " + JAAS_CONF_FILENAME);
-				throw new IllegalConfigurationException("Could not locate " + JAAS_CONF_FILENAME);
-			}
-
-			jaasConfigFile = new File(confDir, JAAS_CONF_FILENAME);
-
-			if (!jaasConfigFile.exists() || !jaasConfigFile.isFile()) {
-				LOG.error("Could not locate " + JAAS_CONF_FILENAME);
-				throw new IllegalConfigurationException("Could not locate " + JAAS_CONF_FILENAME);
-			}
+		// load Jaas config file to initialize SASL
+		final File jaasConfFile;
+		try {
+			Path jaasConfPath = Files.createTempFile(JAAS_CONF_FILENAME, "");
+			InputStream jaasConfStream = SecurityContext.class.getClassLoader().getResourceAsStream(JAAS_CONF_FILENAME);
+			Files.copy(jaasConfStream, jaasConfPath, StandardCopyOption.REPLACE_EXISTING);
+			jaasConfFile = jaasConfPath.toFile();
+			jaasConfFile.deleteOnExit();
+		} catch (IOException e) {
+			throw new RuntimeException("SASL auth is enabled for ZK but unable to " +
+				"locate pseudo Jaas config provided with Flink", e);
 		}
 
 		LOG.info("Enabling {} property with pseudo JAAS config file: {}",
-				JAVA_SECURITY_AUTH_LOGIN_CONFIG, jaasConfigFile);
+				JAVA_SECURITY_AUTH_LOGIN_CONFIG, jaasConfFile.getAbsolutePath());
 
 		//ZK client module lookup the configuration to handle SASL.
 		//https://github.com/sgroschupf/zkclient/blob/master/src/main/java/org/I0Itec/zkclient/ZkClient.java#L900
-		System.setProperty(JAVA_SECURITY_AUTH_LOGIN_CONFIG, jaasConfigFile.getAbsolutePath());
-		System.setProperty(ZOOKEEPER_SASL_CLIENT,"true");
+		System.setProperty(JAVA_SECURITY_AUTH_LOGIN_CONFIG, jaasConfFile.getAbsolutePath());
+		System.setProperty(ZOOKEEPER_SASL_CLIENT, "true");
 
 		String zkSaslServiceName = configuration.getString(ConfigConstants.ZOOKEEPER_SASL_SERVICE_NAME, null);
 		if(!StringUtils.isBlank(zkSaslServiceName)) {
@@ -250,6 +233,10 @@ public class SecurityContext {
 
 		String principal;
 
+		public SecurityConfiguration() {
+			this.flinkConf = GlobalConfiguration.loadConfiguration();
+		}
+
 		public String getKeytab() {
 			return keytab;
 		}
@@ -310,4 +297,4 @@ public class SecurityContext {
 		T run() throws Exception;
 	}
 
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-runtime/src/main/resources/flink-jaas.conf
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/resources/flink-jaas.conf b/flink-runtime/src/main/resources/flink-jaas.conf
new file mode 100644
index 0000000..7f0f06b
--- /dev/null
+++ b/flink-runtime/src/main/resources/flink-jaas.conf
@@ -0,0 +1,26 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+# We are using this file as an workaround for the Kafka and ZK SASL implementation
+# since they explicitly look for java.security.auth.login.config property
+# The file itself is not used by the application since the internal implementation
+# uses a process-wide in-memory java security configuration object.
+# Please do not edit/delete this file - See FLINK-3929
+sample {
+  useKeyTab=false
+  useTicketCache=true;
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala b/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
index 8534ee1..9e2feb5 100644
--- a/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
+++ b/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
@@ -1583,8 +1583,6 @@ object TaskManager {
       }
     }
 
-    conf.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, cliConfig.getConfigDir() + "/..")
-
     conf
   }
 

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-runtime/src/test/java/org/apache/flink/runtime/security/SecurityContextTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/security/SecurityContextTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/security/SecurityContextTest.java
index 5f3d76a..3c48e8f 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/security/SecurityContextTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/security/SecurityContextTest.java
@@ -35,7 +35,7 @@ public class SecurityContextTest {
 		SecurityContext.SecurityConfiguration sc = new SecurityContext.SecurityConfiguration();
 		try {
 			SecurityContext.install(sc);
-			assertEquals(UserGroupInformation.getLoginUser().getUserName(),getOSUserName());
+			assertEquals(UserGroupInformation.getLoginUser().getUserName(), getOSUserName());
 		} catch (Exception e) {
 			fail(e.getMessage());
 		}
@@ -59,7 +59,7 @@ public class SecurityContextTest {
 		if( osName.contains( "windows" ) ){
 			className = "com.sun.security.auth.module.NTSystem";
 		}
-		else if( osName.contains( "linux" ) ){
+		else if( osName.contains( "linux" ) || osName.contains( "mac" )  ){
 			className = "com.sun.security.auth.module.UnixSystem";
 		}
 		else if( osName.contains( "solaris" ) || osName.contains( "sunos" ) ){

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java b/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java
index 930ddd2..051175a 100644
--- a/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java
+++ b/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java
@@ -227,7 +227,6 @@ public class RollingSinkSecuredITCase extends RollingSinkITCase {
 			TestStreamEnvironment.setAsContext(cluster, DEFAULT_PARALLELISM);
 
 		} catch (Exception e) {
-			LOG.error("Exception occured while creating MiniFlink cluster. Reason: {}", e);
 			throw new RuntimeException(e);
 		}
 	}

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecureRunITCase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecureRunITCase.java b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecureRunITCase.java
deleted file mode 100644
index d12ec65..0000000
--- a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecureRunITCase.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.streaming.connectors.kafka;
-
-import org.apache.flink.test.util.SecureTestEnvironment;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-
-/*
- * Kafka Secure Connection (kerberos) IT test case
- */
-public class Kafka09SecureRunITCase extends KafkaConsumerTestBase {
-
-	protected static final Logger LOG = LoggerFactory.getLogger(Kafka09SecureRunITCase.class);
-
-	@BeforeClass
-	public static void prepare() throws IOException, ClassNotFoundException {
-		LOG.info("-------------------------------------------------------------------------");
-		LOG.info("    Starting Kafka09SecureRunITCase ");
-		LOG.info("-------------------------------------------------------------------------");
-
-		SecureTestEnvironment.prepare(tempFolder);
-		SecureTestEnvironment.populateFlinkSecureConfigurations(getFlinkConfiguration());
-
-		startClusters(true);
-	}
-
-	@AfterClass
-	public static void shutDownServices() {
-		shutdownClusters();
-		SecureTestEnvironment.cleanup();
-	}
-
-
-	//timeout interval is large since in Travis, ZK connection timeout occurs frequently
-	//The timeout for the test case is 2 times timeout of ZK connection
-	@Test(timeout = 600000)
-	public void testMultipleTopics() throws Exception {
-		runProduceConsumeMultipleTopics();
-	}
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecuredRunITCase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecuredRunITCase.java b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecuredRunITCase.java
new file mode 100644
index 0000000..e748537
--- /dev/null
+++ b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecuredRunITCase.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.streaming.connectors.kafka;
+
+import org.apache.flink.test.util.SecureTestEnvironment;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+/*
+ * Kafka Secure Connection (kerberos) IT test case
+ */
+public class Kafka09SecuredRunITCase extends KafkaConsumerTestBase {
+
+	protected static final Logger LOG = LoggerFactory.getLogger(Kafka09SecuredRunITCase.class);
+
+	@BeforeClass
+	public static void prepare() throws IOException, ClassNotFoundException {
+		LOG.info("-------------------------------------------------------------------------");
+		LOG.info("    Starting Kafka09SecuredRunITCase ");
+		LOG.info("-------------------------------------------------------------------------");
+
+		SecureTestEnvironment.prepare(tempFolder);
+		SecureTestEnvironment.populateFlinkSecureConfigurations(getFlinkConfiguration());
+
+		startClusters(true);
+	}
+
+	@AfterClass
+	public static void shutDownServices() {
+		shutdownClusters();
+		SecureTestEnvironment.cleanup();
+	}
+
+
+	//timeout interval is large since in Travis, ZK connection timeout occurs frequently
+	//The timeout for the test case is 2 times timeout of ZK connection
+	@Test(timeout = 600000)
+	public void testMultipleTopics() throws Exception {
+		runProduceConsumeMultipleTopics();
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/SecureTestEnvironment.java
----------------------------------------------------------------------
diff --git a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/SecureTestEnvironment.java b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/SecureTestEnvironment.java
index 00b19f1..b5e622b 100644
--- a/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/SecureTestEnvironment.java
+++ b/flink-test-utils-parent/flink-test-utils/src/main/java/org/apache/flink/test/util/SecureTestEnvironment.java
@@ -20,6 +20,7 @@ package org.apache.flink.test.util;
 
 import org.apache.flink.configuration.ConfigConstants;
 import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.GlobalConfiguration;
 import org.apache.flink.runtime.security.SecurityContext;
 import org.apache.hadoop.minikdc.MiniKdc;
 import org.junit.rules.TemporaryFolder;
@@ -60,12 +61,10 @@ public class SecureTestEnvironment {
 
 	private static String hadoopServicePrincipal = null;
 
-	private static File baseDirForSecureRun = null;
-
 	public static void prepare(TemporaryFolder tempFolder) {
 
 		try {
-			baseDirForSecureRun = tempFolder.newFolder();
+			File baseDirForSecureRun = tempFolder.newFolder();
 			LOG.info("Base Directory for Secure Environment: {}", baseDirForSecureRun);
 
 			String hostName = "localhost";
@@ -113,19 +112,17 @@ public class SecureTestEnvironment {
 			//See Yarn test case module for reference
 			createJaasConfig(baseDirForSecureRun);
 			SecurityContext.SecurityConfiguration ctx = new SecurityContext.SecurityConfiguration();
-			Configuration flinkConfig = new Configuration();
+			Configuration flinkConfig = GlobalConfiguration.loadConfiguration();
 			flinkConfig.setString(ConfigConstants.SECURITY_KEYTAB_KEY, testKeytab);
 			flinkConfig.setString(ConfigConstants.SECURITY_PRINCIPAL_KEY, testPrincipal);
 			flinkConfig.setBoolean(ConfigConstants.ZOOKEEPER_SASL_DISABLE, false);
-			flinkConfig.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, baseDirForSecureRun.getAbsolutePath());
 			ctx.setFlinkConfiguration(flinkConfig);
 			TestingSecurityContext.install(ctx, getClientSecurityConfigurationMap());
 
-			populateSystemEnvVariables();
+			populateJavaPropertyVariables();
 
 		} catch(Exception e) {
-			LOG.error("Exception occured while preparing secure environment. Reason: {}", e);
-			throw new RuntimeException(e);
+			throw new RuntimeException("Exception occured while preparing secure environment.", e);
 		}
 
 	}
@@ -145,14 +142,12 @@ public class SecureTestEnvironment {
 		testPrincipal = null;
 		testZkServerPrincipal = null;
 		hadoopServicePrincipal = null;
-		baseDirForSecureRun = null;
 
 	}
 
-	private static void populateSystemEnvVariables() {
+	private static void populateJavaPropertyVariables() {
 
 		if(LOG.isDebugEnabled()) {
-			System.setProperty("FLINK_JAAS_DEBUG", "true");
 			System.setProperty("sun.security.krb5.debug", "true");
 		}
 
@@ -165,7 +160,6 @@ public class SecureTestEnvironment {
 
 	private static void resetSystemEnvVariables() {
 		System.clearProperty("java.security.krb5.conf");
-		System.clearProperty("FLINK_JAAS_DEBUG");
 		System.clearProperty("sun.security.krb5.debug");
 
 		System.clearProperty("zookeeper.authProvider.1");
@@ -227,7 +221,7 @@ public class SecureTestEnvironment {
 	}
 
 	/*
-	 * Helper method to create a temporary JAAS configuration file to ger around the Kafka and ZK SASL
+	 * Helper method to create a temporary JAAS configuration file to get around the Kafka and ZK SASL
 	 * implementation lookup java.security.auth.login.config
 	 */
 	private static void  createJaasConfig(File baseDirForSecureRun) {
@@ -241,8 +235,7 @@ public class SecureTestEnvironment {
 			out.println("useTicketCache=true;");
 			out.println("};");
 		} catch (IOException e) {
-			LOG.error("Exception occured while trying to create JAAS config. Reason: {}", e.getMessage());
-			throw new RuntimeException(e);
+			throw new RuntimeException("Exception occured while trying to create JAAS config.", e);
 		}
 
 	}

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java
----------------------------------------------------------------------
diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java
index 605aa44..afdd400 100644
--- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java
+++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java
@@ -427,8 +427,7 @@ public abstract class YarnTestBase extends TestLogger {
 					out.println(ConfigConstants.SECURITY_PRINCIPAL_KEY + ": " + principal);
 					out.println("");
 				} catch (IOException e) {
-					LOG.error("Exception occured while trying to append the security configurations. Reason: {}", e.getMessage());
-					throw new RuntimeException(e);
+					throw new RuntimeException("Exception occured while trying to append the security configurations.", e);
 				}
 
 				String configDir = tempConfPathForSecureRun.getAbsolutePath();

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationMasterRunner.java
----------------------------------------------------------------------
diff --git a/flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationMasterRunner.java b/flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationMasterRunner.java
index efb658a..b27876b 100644
--- a/flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationMasterRunner.java
+++ b/flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationMasterRunner.java
@@ -176,7 +176,6 @@ public class YarnApplicationMasterRunner {
 				flinkConfig.setString(ConfigConstants.SECURITY_KEYTAB_KEY, keytabPath);
 				flinkConfig.setString(ConfigConstants.SECURITY_PRINCIPAL_KEY, remoteKeytabPrincipal);
 			}
-			flinkConfig.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, currDir);
 
 			SecurityContext.install(sc.setFlinkConfiguration(flinkConfig));
 

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-yarn/src/main/java/org/apache/flink/yarn/YarnTaskManagerRunner.java
----------------------------------------------------------------------
diff --git a/flink-yarn/src/main/java/org/apache/flink/yarn/YarnTaskManagerRunner.java b/flink-yarn/src/main/java/org/apache/flink/yarn/YarnTaskManagerRunner.java
index c70a30b..21ed52e 100644
--- a/flink-yarn/src/main/java/org/apache/flink/yarn/YarnTaskManagerRunner.java
+++ b/flink-yarn/src/main/java/org/apache/flink/yarn/YarnTaskManagerRunner.java
@@ -127,7 +127,6 @@ public class YarnTaskManagerRunner {
 				configuration.setString(ConfigConstants.SECURITY_KEYTAB_KEY, keytabPath);
 				configuration.setString(ConfigConstants.SECURITY_PRINCIPAL_KEY, remoteKeytabPrincipal);
 			}
-			configuration.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, currDir);
 
 			SecurityContext.install(sc.setFlinkConfiguration(configuration));
 
@@ -145,9 +144,9 @@ public class YarnTaskManagerRunner {
 				}
 			});
 		} catch(Exception e) {
-			LOG.error("Exception occurred while launching Task Manager. Reason: {}", e);
+			LOG.error("Exception occurred while launching Task Manager", e);
 			throw new RuntimeException(e);
 		}
 
 	}
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/68709b08/flink-yarn/src/main/java/org/apache/flink/yarn/cli/FlinkYarnSessionCli.java
----------------------------------------------------------------------
diff --git a/flink-yarn/src/main/java/org/apache/flink/yarn/cli/FlinkYarnSessionCli.java b/flink-yarn/src/main/java/org/apache/flink/yarn/cli/FlinkYarnSessionCli.java
index b5364f0..d09340c 100644
--- a/flink-yarn/src/main/java/org/apache/flink/yarn/cli/FlinkYarnSessionCli.java
+++ b/flink-yarn/src/main/java/org/apache/flink/yarn/cli/FlinkYarnSessionCli.java
@@ -24,7 +24,6 @@ import org.apache.commons.cli.Option;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.PosixParser;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.flink.client.CliFrontend;
 import org.apache.flink.client.cli.CliFrontendParser;
 import org.apache.flink.client.cli.CustomCommandLine;
 import org.apache.flink.configuration.ConfigConstants;
@@ -463,27 +462,17 @@ public class FlinkYarnSessionCli implements CustomCommandLine<YarnClusterClient>
 		}
 	}
 
-	public static void main(final String[] args) {
-		final FlinkYarnSessionCli cli = new FlinkYarnSessionCli("", "", true); // no prefix for the YARN session
-
-		String confDirPath = CliFrontend.getConfigurationDirectoryFromEnv();
-		GlobalConfiguration.loadConfiguration(confDirPath);
+	public static void main(final String[] args) throws Exception {
+		final FlinkYarnSessionCli cli = new FlinkYarnSessionCli("", ""); // no prefix for the YARN session
 		Configuration flinkConfiguration = GlobalConfiguration.loadConfiguration();
-		flinkConfiguration.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, confDirPath);
-		try {
-			SecurityContext.install(new SecurityContext.SecurityConfiguration().setFlinkConfiguration(flinkConfiguration));
-			int retCode = SecurityContext.getInstalled().runSecured(new SecurityContext.FlinkSecuredRunner<Integer>() {
-				@Override
-				public Integer run() {
-					return cli.run(args);
-				}
-			});
-			System.exit(retCode);
-		} catch(Exception e) {
-			e.printStackTrace();
-			LOG.error("Exception Occured. Reason: {}", e);
-			return;
-		}
+		SecurityContext.install(new SecurityContext.SecurityConfiguration().setFlinkConfiguration(flinkConfiguration));
+		int retCode = SecurityContext.getInstalled().runSecured(new SecurityContext.FlinkSecuredRunner<Integer>() {
+			@Override
+			public Integer run() {
+				return cli.run(args);
+			}
+		});
+		System.exit(retCode);
 	}
 
 	@Override
@@ -544,7 +533,6 @@ public class FlinkYarnSessionCli implements CustomCommandLine<YarnClusterClient>
 		try {
 			return yarnClusterDescriptor.deploy();
 		} catch (Exception e) {
-			LOG.error("Error while deploying YARN cluster: "+e.getMessage(), e);
 			throw new RuntimeException("Error deploying the YARN cluster", e);
 		}
 


[30/50] [abbrv] flink git commit: [FLINK-4434] [rpc] Add a testing RPC service.

Posted by tr...@apache.org.
[FLINK-4434] [rpc] Add a testing RPC service.

This closes #2394.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/e6b0f12c
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/e6b0f12c
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/e6b0f12c

Branch: refs/heads/flip-6
Commit: e6b0f12cc79d7164be98c4d7c2c6482d0c8f0937
Parents: d9baa58
Author: Stephan Ewen <se...@apache.org>
Authored: Fri Aug 19 23:29:45 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:15 2016 +0200

----------------------------------------------------------------------
 .../flink/runtime/rpc/RpcCompletenessTest.java  |   3 +
 .../flink/runtime/rpc/TestingGatewayBase.java   |  85 ++++++++++++++
 .../flink/runtime/rpc/TestingRpcService.java    | 115 +++++++++++++++++++
 3 files changed, 203 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/e6b0f12c/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
index 97cf0cb..b8aad62 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
@@ -41,9 +41,11 @@ import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 public class RpcCompletenessTest extends TestLogger {
+
 	private static final Class<?> futureClass = Future.class;
 
 	@Test
+	@SuppressWarnings({"rawtypes", "unchecked"})
 	public void testRpcCompleteness() {
 		Reflections reflections = new Reflections("org.apache.flink");
 
@@ -64,6 +66,7 @@ public class RpcCompletenessTest extends TestLogger {
 		}
 	}
 
+	@SuppressWarnings("rawtypes")
 	private void checkCompleteness(Class<? extends RpcEndpoint> rpcEndpoint, Class<? extends RpcGateway> rpcGateway) {
 		Method[] gatewayMethods = rpcGateway.getDeclaredMethods();
 		Method[] serverMethods = rpcEndpoint.getDeclaredMethods();

http://git-wip-us.apache.org/repos/asf/flink/blob/e6b0f12c/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingGatewayBase.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingGatewayBase.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingGatewayBase.java
new file mode 100644
index 0000000..4256135
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingGatewayBase.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+import akka.dispatch.Futures;
+import scala.concurrent.Future;
+import scala.concurrent.Promise;
+
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Utility base class for testing gateways
+ */
+public abstract class TestingGatewayBase implements RpcGateway {
+
+	private final ScheduledExecutorService executor;
+
+	protected TestingGatewayBase() {
+		this.executor = Executors.newSingleThreadScheduledExecutor();
+	}
+
+	// ------------------------------------------------------------------------
+	//  shutdown
+	// ------------------------------------------------------------------------
+
+	public void stop() {
+		executor.shutdownNow();
+	}
+
+	@Override
+	protected void finalize() throws Throwable {
+		super.finalize();
+		executor.shutdownNow();
+	}
+
+	// ------------------------------------------------------------------------
+	//  utilities
+	// ------------------------------------------------------------------------
+
+	public <T> Future<T> futureWithTimeout(long timeoutMillis) {
+		Promise<T> promise = Futures.<T>promise();
+		executor.schedule(new FutureTimeout(promise), timeoutMillis, TimeUnit.MILLISECONDS);
+		return promise.future();
+	}
+
+	// ------------------------------------------------------------------------
+	
+	private static final class FutureTimeout implements Runnable {
+
+		private final Promise<?> promise;
+
+		private FutureTimeout(Promise<?> promise) {
+			this.promise = promise;
+		}
+
+		@Override
+		public void run() {
+			try {
+				promise.failure(new TimeoutException());
+			} catch (Throwable t) {
+				System.err.println("CAUGHT AN ERROR IN THE TEST: " + t.getMessage());
+				t.printStackTrace();
+			}
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/e6b0f12c/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingRpcService.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingRpcService.java
new file mode 100644
index 0000000..7e92e8d
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingRpcService.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+import akka.dispatch.Futures;
+import akka.util.Timeout;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.runtime.rpc.akka.AkkaRpcService;
+
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * An RPC Service implementation for testing. This RPC service acts as a replacement for
+ * teh regular RPC service for cases where tests need to return prepared mock gateways instead of
+ * proper RPC gateways.
+ * 
+ * <p>The TestingRpcService can be used for example in the following fashion,
+ * using <i>Mockito</i> for mocks and verification:
+ * 
+ * <pre>{@code
+ * TestingRpcService rpc = new TestingRpcService();
+ *
+ * ResourceManagerGateway testGateway = mock(ResourceManagerGateway.class);
+ * rpc.registerGateway("myAddress", testGateway);
+ * 
+ * MyComponentToTest component = new MyComponentToTest();
+ * component.triggerSomethingThatCallsTheGateway();
+ * 
+ * verify(testGateway, timeout(1000)).theTestMethod(any(UUID.class), anyString());
+ * }</pre>
+ */
+public class TestingRpcService extends AkkaRpcService {
+
+	/** Map of pre-registered connections */
+	private final ConcurrentHashMap<String, RpcGateway> registeredConnections;
+
+	/**
+	 * Creates a new {@code TestingRpcService}. 
+	 */
+	public TestingRpcService() {
+		this(new Configuration());
+	}
+
+	/**
+	 * Creates a new {@code TestingRpcService}, using the given configuration. 
+	 */
+	public TestingRpcService(Configuration configuration) {
+		super(AkkaUtils.createLocalActorSystem(configuration), new Timeout(new FiniteDuration(10, TimeUnit.SECONDS)));
+
+		this.registeredConnections = new ConcurrentHashMap<>();
+	}
+
+	// ------------------------------------------------------------------------
+
+	@Override
+	public void stopService() {
+		super.stopService();
+		registeredConnections.clear();
+	}
+
+	// ------------------------------------------------------------------------
+	// connections
+	// ------------------------------------------------------------------------
+
+	public void registerGateway(String address, RpcGateway gateway) {
+		checkNotNull(address);
+		checkNotNull(gateway);
+
+		if (registeredConnections.putIfAbsent(address, gateway) != null) {
+			throw new IllegalStateException("a gateway is already registered under " + address);
+		}
+	}
+
+	@Override
+	public <C extends RpcGateway> Future<C> connect(String address, Class<C> clazz) {
+		RpcGateway gateway = registeredConnections.get(address);
+
+		if (gateway != null) {
+			if (clazz.isAssignableFrom(gateway.getClass())) {
+				@SuppressWarnings("unchecked")
+				C typedGateway = (C) gateway;
+				return Futures.successful(typedGateway);
+			} else {
+				return Futures.failed(
+						new Exception("Gateway registered under " + address + " is not of type " + clazz));
+			}
+		} else {
+			return Futures.failed(new Exception("No gateway registered under that name"));
+		}
+	}
+}
\ No newline at end of file


[14/50] [abbrv] flink git commit: [FLINK-4362] [rpc] Auto generate rpc gateways via Java proxies

Posted by tr...@apache.org.
http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
index 642a380..a4e1d7f 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
@@ -61,10 +61,10 @@ public class AkkaRpcServiceTest extends TestLogger {
 		AkkaGateway akkaClient = (AkkaGateway) rm;
 
 		
-		jobMaster.registerAtResourceManager(AkkaUtils.getAkkaURL(actorSystem, akkaClient.getActorRef()));
+		jobMaster.registerAtResourceManager(AkkaUtils.getAkkaURL(actorSystem, akkaClient.getRpcServer()));
 
 		// wait for successful registration
-		FiniteDuration timeout = new FiniteDuration(20, TimeUnit.SECONDS);
+		FiniteDuration timeout = new FiniteDuration(200, TimeUnit.SECONDS);
 		Deadline deadline = timeout.fromNow();
 
 		while (deadline.hasTimeLeft() && !jobMaster.isConnected()) {

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
index c143527..33c9cb6 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
@@ -48,7 +48,7 @@ public class TaskExecutorTest extends TestLogger {
 	@Test
 	public void testTaskExecution() throws Exception {
 		RpcService testingRpcService = mock(RpcService.class);
-		DirectExecutorService directExecutorService = null;
+		DirectExecutorService directExecutorService = new DirectExecutorService();
 		TaskExecutor taskExecutor = new TaskExecutor(testingRpcService, directExecutorService);
 
 		TaskDeploymentDescriptor tdd = new TaskDeploymentDescriptor(


[19/50] [abbrv] flink git commit: [FLINK-4383] [rpc] Eagerly serialize remote rpc invocation messages

Posted by tr...@apache.org.
[FLINK-4383] [rpc] Eagerly serialize remote rpc invocation messages

This PR introduces an eager serialization for remote rpc invocation messages.
That way it is possible to check whether the message is serializable and
whether it exceeds the maximum allowed akka frame size. If either of these
constraints is violated, a proper exception is thrown instead of simply
swallowing the exception as Akka does it.

Address PR comments

This closes #2365.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/433a1fd0
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/433a1fd0
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/433a1fd0

Branch: refs/heads/flip-6
Commit: 433a1fd0364cff2c73d81629dcb470743dea84ae
Parents: 4ca049b
Author: Till Rohrmann <tr...@apache.org>
Authored: Fri Aug 12 10:32:30 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:13 2016 +0200

----------------------------------------------------------------------
 .../flink/runtime/rpc/akka/AkkaGateway.java     |   2 +-
 .../runtime/rpc/akka/AkkaInvocationHandler.java |  83 ++++++--
 .../flink/runtime/rpc/akka/AkkaRpcActor.java    |  26 ++-
 .../flink/runtime/rpc/akka/AkkaRpcService.java  |  20 +-
 .../rpc/akka/messages/LocalRpcInvocation.java   |  54 +++++
 .../rpc/akka/messages/RemoteRpcInvocation.java  | 206 ++++++++++++++++++
 .../rpc/akka/messages/RpcInvocation.java        | 106 +++-------
 .../runtime/rpc/akka/AkkaRpcServiceTest.java    |   2 +-
 .../rpc/akka/MessageSerializationTest.java      | 210 +++++++++++++++++++
 9 files changed, 597 insertions(+), 112 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/433a1fd0/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
index ec3091c..f6125dc 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
@@ -26,5 +26,5 @@ import org.apache.flink.runtime.rpc.RpcGateway;
  */
 interface AkkaGateway extends RpcGateway {
 
-	ActorRef getRpcServer();
+	ActorRef getRpcEndpoint();
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/433a1fd0/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
index 580b161..297104b 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
@@ -25,13 +25,17 @@ import org.apache.flink.api.java.tuple.Tuple2;
 import org.apache.flink.runtime.rpc.MainThreadExecutor;
 import org.apache.flink.runtime.rpc.RpcTimeout;
 import org.apache.flink.runtime.rpc.akka.messages.CallAsync;
+import org.apache.flink.runtime.rpc.akka.messages.LocalRpcInvocation;
+import org.apache.flink.runtime.rpc.akka.messages.RemoteRpcInvocation;
 import org.apache.flink.runtime.rpc.akka.messages.RpcInvocation;
 import org.apache.flink.runtime.rpc.akka.messages.RunAsync;
 import org.apache.flink.util.Preconditions;
+import org.apache.log4j.Logger;
 import scala.concurrent.Await;
 import scala.concurrent.Future;
 import scala.concurrent.duration.FiniteDuration;
 
+import java.io.IOException;
 import java.lang.annotation.Annotation;
 import java.lang.reflect.InvocationHandler;
 import java.lang.reflect.Method;
@@ -42,19 +46,28 @@ import static org.apache.flink.util.Preconditions.checkNotNull;
 import static org.apache.flink.util.Preconditions.checkArgument;
 
 /**
- * Invocation handler to be used with a {@link AkkaRpcActor}. The invocation handler wraps the
- * rpc in a {@link RpcInvocation} message and then sends it to the {@link AkkaRpcActor} where it is
+ * Invocation handler to be used with an {@link AkkaRpcActor}. The invocation handler wraps the
+ * rpc in a {@link LocalRpcInvocation} message and then sends it to the {@link AkkaRpcActor} where it is
  * executed.
  */
 class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThreadExecutor {
-	private final ActorRef rpcServer;
+	private static final Logger LOG = Logger.getLogger(AkkaInvocationHandler.class);
+
+	private final ActorRef rpcEndpoint;
+
+	// whether the actor ref is local and thus no message serialization is needed
+	private final boolean isLocal;
 
 	// default timeout for asks
 	private final Timeout timeout;
 
-	AkkaInvocationHandler(ActorRef rpcServer, Timeout timeout) {
-		this.rpcServer = Preconditions.checkNotNull(rpcServer);
+	private final long maximumFramesize;
+
+	AkkaInvocationHandler(ActorRef rpcEndpoint, Timeout timeout, long maximumFramesize) {
+		this.rpcEndpoint = Preconditions.checkNotNull(rpcEndpoint);
+		this.isLocal = this.rpcEndpoint.path().address().hasLocalScope();
 		this.timeout = Preconditions.checkNotNull(timeout);
+		this.maximumFramesize = maximumFramesize;
 	}
 
 	@Override
@@ -76,23 +89,43 @@ class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThrea
 				parameterAnnotations,
 				args);
 
-			RpcInvocation rpcInvocation = new RpcInvocation(
-				methodName,
-				filteredArguments.f0,
-				filteredArguments.f1);
+			RpcInvocation rpcInvocation;
+
+			if (isLocal) {
+				rpcInvocation = new LocalRpcInvocation(
+					methodName,
+					filteredArguments.f0,
+					filteredArguments.f1);
+			} else {
+				try {
+					RemoteRpcInvocation remoteRpcInvocation = new RemoteRpcInvocation(
+						methodName,
+						filteredArguments.f0,
+						filteredArguments.f1);
+
+					if (remoteRpcInvocation.getSize() > maximumFramesize) {
+						throw new IOException("The rpc invocation size exceeds the maximum akka framesize.");
+					} else {
+						rpcInvocation = remoteRpcInvocation;
+					}
+				} catch (IOException e) {
+					LOG.warn("Could not create remote rpc invocation message. Failing rpc invocation because...", e);
+					throw e;
+				}
+			}
 
 			Class<?> returnType = method.getReturnType();
 
 			if (returnType.equals(Void.TYPE)) {
-				rpcServer.tell(rpcInvocation, ActorRef.noSender());
+				rpcEndpoint.tell(rpcInvocation, ActorRef.noSender());
 
 				result = null;
 			} else if (returnType.equals(Future.class)) {
 				// execute an asynchronous call
-				result = Patterns.ask(rpcServer, rpcInvocation, futureTimeout);
+				result = Patterns.ask(rpcEndpoint, rpcInvocation, futureTimeout);
 			} else {
 				// execute a synchronous call
-				Future<?> futureResult = Patterns.ask(rpcServer, rpcInvocation, futureTimeout);
+				Future<?> futureResult = Patterns.ask(rpcEndpoint, rpcInvocation, futureTimeout);
 				FiniteDuration duration = timeout.duration();
 
 				result = Await.result(futureResult, duration);
@@ -103,8 +136,8 @@ class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThrea
 	}
 
 	@Override
-	public ActorRef getRpcServer() {
-		return rpcServer;
+	public ActorRef getRpcEndpoint() {
+		return rpcEndpoint;
 	}
 
 	@Override
@@ -117,19 +150,25 @@ class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThrea
 		checkNotNull(runnable, "runnable");
 		checkArgument(delay >= 0, "delay must be zero or greater");
 		
-		// Unfortunately I couldn't find a way to allow only local communication. Therefore, the
-		// runnable field is transient transient
-		rpcServer.tell(new RunAsync(runnable, delay), ActorRef.noSender());
+		if (isLocal) {
+			rpcEndpoint.tell(new RunAsync(runnable, delay), ActorRef.noSender());
+		} else {
+			throw new RuntimeException("Trying to send a Runnable to a remote actor at " +
+				rpcEndpoint.path() + ". This is not supported.");
+		}
 	}
 
 	@Override
 	public <V> Future<V> callAsync(Callable<V> callable, Timeout callTimeout) {
-		// Unfortunately I couldn't find a way to allow only local communication. Therefore, the
-		// callable field is declared transient
-		@SuppressWarnings("unchecked")
-		Future<V> result = (Future<V>) Patterns.ask(rpcServer, new CallAsync(callable), callTimeout);
+		if(isLocal) {
+			@SuppressWarnings("unchecked")
+			Future<V> result = (Future<V>) Patterns.ask(rpcEndpoint, new CallAsync(callable), callTimeout);
 
-		return result;
+			return result;
+		} else {
+			throw new RuntimeException("Trying to send a Callable to a remote actor at " +
+				rpcEndpoint.path() + ". This is not supported.");
+		}
 	}
 
 	/**

http://git-wip-us.apache.org/repos/asf/flink/blob/433a1fd0/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
index 5e0a7da..dfcbcc3 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
@@ -26,6 +26,7 @@ import org.apache.flink.runtime.rpc.MainThreadValidatorUtil;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.akka.messages.CallAsync;
+import org.apache.flink.runtime.rpc.akka.messages.LocalRpcInvocation;
 import org.apache.flink.runtime.rpc.akka.messages.RpcInvocation;
 import org.apache.flink.runtime.rpc.akka.messages.RunAsync;
 
@@ -35,6 +36,7 @@ import org.slf4j.LoggerFactory;
 import scala.concurrent.Future;
 import scala.concurrent.duration.FiniteDuration;
 
+import java.io.IOException;
 import java.lang.reflect.Method;
 import java.util.concurrent.Callable;
 import java.util.concurrent.TimeUnit;
@@ -42,10 +44,10 @@ import java.util.concurrent.TimeUnit;
 import static org.apache.flink.util.Preconditions.checkNotNull;
 
 /**
- * Akka rpc actor which receives {@link RpcInvocation}, {@link RunAsync} and {@link CallAsync}
+ * Akka rpc actor which receives {@link LocalRpcInvocation}, {@link RunAsync} and {@link CallAsync}
  * messages.
  * <p>
- * The {@link RpcInvocation} designates a rpc and is dispatched to the given {@link RpcEndpoint}
+ * The {@link LocalRpcInvocation} designates a rpc and is dispatched to the given {@link RpcEndpoint}
  * instance.
  * <p>
  * The {@link RunAsync} and {@link CallAsync} messages contain executable code which is executed
@@ -95,15 +97,12 @@ class AkkaRpcActor<C extends RpcGateway, T extends RpcEndpoint<C>> extends Untyp
 	 * @param rpcInvocation Rpc invocation message
 	 */
 	private void handleRpcInvocation(RpcInvocation rpcInvocation) {
-		Method rpcMethod = null;
-
 		try {
-			rpcMethod = lookupRpcMethod(rpcInvocation.getMethodName(), rpcInvocation.getParameterTypes());
-		} catch (final NoSuchMethodException e) {
-			LOG.error("Could not find rpc method for rpc invocation: {}.", rpcInvocation, e);
-		}
+			String methodName = rpcInvocation.getMethodName();
+			Class<?>[] parameterTypes = rpcInvocation.getParameterTypes();
+
+			Method rpcMethod = lookupRpcMethod(methodName, parameterTypes);
 
-		if (rpcMethod != null) {
 			if (rpcMethod.getReturnType().equals(Void.TYPE)) {
 				// No return value to send back
 				try {
@@ -127,6 +126,12 @@ class AkkaRpcActor<C extends RpcGateway, T extends RpcEndpoint<C>> extends Untyp
 					getSender().tell(new Status.Failure(e), getSelf());
 				}
 			}
+		} catch(ClassNotFoundException e) {
+			LOG.error("Could not load method arguments.", e);
+		} catch (IOException e) {
+			LOG.error("Could not deserialize rpc invocation message.", e);
+		} catch (final NoSuchMethodException e) {
+			LOG.error("Could not find rpc method for rpc invocation: {}.", rpcInvocation, e);
 		}
 	}
 
@@ -195,7 +200,8 @@ class AkkaRpcActor<C extends RpcGateway, T extends RpcEndpoint<C>> extends Untyp
 	 * @param methodName Name of the method
 	 * @param parameterTypes Parameter types of the method
 	 * @return Method of the rpc endpoint
-	 * @throws NoSuchMethodException
+	 * @throws NoSuchMethodException Thrown if the method with the given name and parameter types
+	 * 									cannot be found at the rpc endpoint
 	 */
 	private Method lookupRpcMethod(final String methodName, final Class<?>[] parameterTypes) throws NoSuchMethodException {
 		return rpcEndpoint.getClass().getMethod(methodName, parameterTypes);

http://git-wip-us.apache.org/repos/asf/flink/blob/433a1fd0/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
index db40f10..b963c53 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
@@ -58,17 +58,27 @@ public class AkkaRpcService implements RpcService {
 
 	private static final Logger LOG = LoggerFactory.getLogger(AkkaRpcService.class);
 
+	static final String MAXIMUM_FRAME_SIZE_PATH = "akka.remote.netty.tcp.maximum-frame-size";
+
 	private final Object lock = new Object();
 
 	private final ActorSystem actorSystem;
 	private final Timeout timeout;
 	private final Set<ActorRef> actors = new HashSet<>(4);
+	private final long maximumFramesize;
 
 	private volatile boolean stopped;
 
 	public AkkaRpcService(final ActorSystem actorSystem, final Timeout timeout) {
 		this.actorSystem = checkNotNull(actorSystem, "actor system");
 		this.timeout = checkNotNull(timeout, "timeout");
+
+		if (actorSystem.settings().config().hasPath(MAXIMUM_FRAME_SIZE_PATH)) {
+			maximumFramesize = actorSystem.settings().config().getBytes(MAXIMUM_FRAME_SIZE_PATH);
+		} else {
+			// only local communication
+			maximumFramesize = Long.MAX_VALUE;
+		}
 	}
 
 	// this method does not mutate state and is thus thread-safe
@@ -88,7 +98,7 @@ public class AkkaRpcService implements RpcService {
 			public C apply(Object obj) {
 				ActorRef actorRef = ((ActorIdentity) obj).getRef();
 
-				InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(actorRef, timeout);
+				InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(actorRef, timeout, maximumFramesize);
 
 				@SuppressWarnings("unchecked")
 				C proxy = (C) Proxy.newProxyInstance(
@@ -116,7 +126,7 @@ public class AkkaRpcService implements RpcService {
 
 		LOG.info("Starting RPC endpoint for {} at {} .", rpcEndpoint.getClass().getName(), actorRef.path());
 
-		InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(actorRef, timeout);
+		InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(actorRef, timeout, maximumFramesize);
 
 		// Rather than using the System ClassLoader directly, we derive the ClassLoader
 		// from this class . That works better in cases where Flink runs embedded and all Flink
@@ -142,12 +152,12 @@ public class AkkaRpcService implements RpcService {
 				if (stopped) {
 					return;
 				} else {
-					fromThisService = actors.remove(akkaClient.getRpcServer());
+					fromThisService = actors.remove(akkaClient.getRpcEndpoint());
 				}
 			}
 
 			if (fromThisService) {
-				ActorRef selfActorRef = akkaClient.getRpcServer();
+				ActorRef selfActorRef = akkaClient.getRpcEndpoint();
 				LOG.info("Stopping RPC endpoint {}.", selfActorRef.path());
 				selfActorRef.tell(PoisonPill.getInstance(), ActorRef.noSender());
 			} else {
@@ -178,7 +188,7 @@ public class AkkaRpcService implements RpcService {
 		checkState(!stopped, "RpcService is stopped");
 
 		if (selfGateway instanceof AkkaGateway) {
-			ActorRef actorRef = ((AkkaGateway) selfGateway).getRpcServer();
+			ActorRef actorRef = ((AkkaGateway) selfGateway).getRpcEndpoint();
 			return AkkaUtils.getAkkaURL(actorSystem, actorRef);
 		} else {
 			String className = AkkaGateway.class.getName();

http://git-wip-us.apache.org/repos/asf/flink/blob/433a1fd0/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/LocalRpcInvocation.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/LocalRpcInvocation.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/LocalRpcInvocation.java
new file mode 100644
index 0000000..97c10d7
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/LocalRpcInvocation.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import org.apache.flink.util.Preconditions;
+
+/**
+ * Local rpc invocation message containing the remote procedure name, its parameter types and the
+ * corresponding call arguments. This message will only be sent if the communication is local and,
+ * thus, the message does not have to be serialized.
+ */
+public final class LocalRpcInvocation implements RpcInvocation {
+
+	private final String methodName;
+	private final Class<?>[] parameterTypes;
+	private final Object[] args;
+
+	public LocalRpcInvocation(String methodName, Class<?>[] parameterTypes, Object[] args) {
+		this.methodName = Preconditions.checkNotNull(methodName);
+		this.parameterTypes = Preconditions.checkNotNull(parameterTypes);
+		this.args = args;
+	}
+
+	@Override
+	public String getMethodName() {
+		return methodName;
+	}
+
+	@Override
+	public Class<?>[] getParameterTypes() {
+		return parameterTypes;
+	}
+
+	@Override
+	public Object[] getArgs() {
+		return args;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/433a1fd0/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RemoteRpcInvocation.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RemoteRpcInvocation.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RemoteRpcInvocation.java
new file mode 100644
index 0000000..bc26a29
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RemoteRpcInvocation.java
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import org.apache.flink.util.Preconditions;
+import org.apache.flink.util.SerializedValue;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+
+/**
+ * Remote rpc invocation message which is used when the actor communication is remote and, thus, the
+ * message has to be serialized.
+ * <p>
+ * In order to fail fast and report an appropriate error message to the user, the method name, the
+ * parameter types and the arguments are eagerly serialized. In case the the invocation call
+ * contains a non-serializable object, then an {@link IOException} is thrown.
+ */
+public class RemoteRpcInvocation implements RpcInvocation, Serializable {
+	private static final long serialVersionUID = 6179354390913843809L;
+
+	// Serialized invocation data
+	private SerializedValue<RemoteRpcInvocation.MethodInvocation> serializedMethodInvocation;
+
+	// Transient field which is lazily initialized upon first access to the invocation data
+	private transient RemoteRpcInvocation.MethodInvocation methodInvocation;
+
+	public  RemoteRpcInvocation(
+		final String methodName,
+		final Class<?>[] parameterTypes,
+		final Object[] args) throws IOException {
+
+		serializedMethodInvocation = new SerializedValue<>(new RemoteRpcInvocation.MethodInvocation(methodName, parameterTypes, args));
+		methodInvocation = null;
+	}
+
+	@Override
+	public String getMethodName() throws IOException, ClassNotFoundException {
+		deserializeMethodInvocation();
+
+		return methodInvocation.getMethodName();
+	}
+
+	@Override
+	public Class<?>[] getParameterTypes() throws IOException, ClassNotFoundException {
+		deserializeMethodInvocation();
+
+		return methodInvocation.getParameterTypes();
+	}
+
+	@Override
+	public Object[] getArgs() throws IOException, ClassNotFoundException {
+		deserializeMethodInvocation();
+
+		return methodInvocation.getArgs();
+	}
+
+	/**
+	 * Size (#bytes of the serialized data) of the rpc invocation message.
+	 *
+	 * @return Size of the remote rpc invocation message
+	 */
+	public long getSize() {
+		return serializedMethodInvocation.getByteArray().length;
+	}
+
+	private void deserializeMethodInvocation() throws IOException, ClassNotFoundException {
+		if (methodInvocation == null) {
+			methodInvocation = serializedMethodInvocation.deserializeValue(ClassLoader.getSystemClassLoader());
+		}
+	}
+
+	// -------------------------------------------------------------------
+	// Serialization methods
+	// -------------------------------------------------------------------
+
+	private void writeObject(ObjectOutputStream oos) throws IOException {
+		oos.writeObject(serializedMethodInvocation);
+	}
+
+	@SuppressWarnings("unchecked")
+	private void readObject(ObjectInputStream ois) throws IOException, ClassNotFoundException {
+		serializedMethodInvocation = (SerializedValue<RemoteRpcInvocation.MethodInvocation>) ois.readObject();
+		methodInvocation = null;
+	}
+
+	// -------------------------------------------------------------------
+	// Utility classes
+	// -------------------------------------------------------------------
+
+	/**
+	 * Wrapper class for the method invocation information
+	 */
+	private static final class MethodInvocation implements Serializable {
+		private static final long serialVersionUID = 9187962608946082519L;
+
+		private String methodName;
+		private Class<?>[] parameterTypes;
+		private Object[] args;
+
+		private MethodInvocation(final String methodName, final Class<?>[] parameterTypes, final Object[] args) {
+			this.methodName = methodName;
+			this.parameterTypes = Preconditions.checkNotNull(parameterTypes);
+			this.args = args;
+		}
+
+		String getMethodName() {
+			return methodName;
+		}
+
+		Class<?>[] getParameterTypes() {
+			return parameterTypes;
+		}
+
+		Object[] getArgs() {
+			return args;
+		}
+
+		private void writeObject(ObjectOutputStream oos) throws IOException {
+			oos.writeUTF(methodName);
+
+			oos.writeInt(parameterTypes.length);
+
+			for (Class<?> parameterType : parameterTypes) {
+				oos.writeObject(parameterType);
+			}
+
+			if (args != null) {
+				oos.writeBoolean(true);
+
+				for (int i = 0; i < args.length; i++) {
+					try {
+						oos.writeObject(args[i]);
+					} catch (IOException e) {
+						throw new IOException("Could not serialize " + i + "th argument of method " +
+							methodName + ". This indicates that the argument type " +
+							args.getClass().getName() + " is not serializable. Arguments have to " +
+							"be serializable for remote rpc calls.", e);
+					}
+				}
+			} else {
+				oos.writeBoolean(false);
+			}
+		}
+
+		private void readObject(ObjectInputStream ois) throws IOException, ClassNotFoundException {
+			methodName = ois.readUTF();
+
+			int length = ois.readInt();
+
+			parameterTypes = new Class<?>[length];
+
+			for (int i = 0; i < length; i++) {
+				try {
+					parameterTypes[i] = (Class<?>) ois.readObject();
+				} catch (IOException e) {
+					throw new IOException("Could not deserialize " + i + "th parameter type of method " +
+						methodName + '.', e);
+				} catch (ClassNotFoundException e) {
+					throw new ClassNotFoundException("Could not deserialize " + i + "th " +
+						"parameter type of method " + methodName + ". This indicates that the parameter " +
+						"type is not part of the system class loader.", e);
+				}
+			}
+
+			boolean hasArgs = ois.readBoolean();
+
+			if (hasArgs) {
+				args = new Object[length];
+
+				for (int i = 0; i < length; i++) {
+					try {
+						args[i] = ois.readObject();
+					} catch (IOException e) {
+						throw new IOException("Could not deserialize " + i + "th argument of method " +
+							methodName + '.', e);
+					} catch (ClassNotFoundException e) {
+						throw new ClassNotFoundException("Could not deserialize " + i + "th " +
+							"argument of method " + methodName + ". This indicates that the argument " +
+							"type is not part of the system class loader.", e);
+					}
+				}
+			} else {
+				args = null;
+			}
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/433a1fd0/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RpcInvocation.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RpcInvocation.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RpcInvocation.java
index 5d52ef1..b174c99 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RpcInvocation.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RpcInvocation.java
@@ -18,81 +18,41 @@
 
 package org.apache.flink.runtime.rpc.akka.messages;
 
-import org.apache.flink.util.Preconditions;
-
 import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.Serializable;
 
 /**
- * Rpc invocation message containing the remote procedure name, its parameter types and the
- * corresponding call arguments.
+ * Interface for rpc invocation messages. The interface allows to request all necessary information
+ * to lookup a method and call it with the corresponding arguments.
  */
-public final class RpcInvocation implements Serializable {
-	private static final long serialVersionUID = -7058254033460536037L;
-
-	private final String methodName;
-	private final Class<?>[] parameterTypes;
-	private transient Object[] args;
-
-	public RpcInvocation(String methodName, Class<?>[] parameterTypes, Object[] args) {
-		this.methodName = Preconditions.checkNotNull(methodName);
-		this.parameterTypes = Preconditions.checkNotNull(parameterTypes);
-		this.args = args;
-	}
-
-	public String getMethodName() {
-		return methodName;
-	}
-
-	public Class<?>[] getParameterTypes() {
-		return parameterTypes;
-	}
-
-	public Object[] getArgs() {
-		return args;
-	}
-
-	private void writeObject(ObjectOutputStream oos) throws IOException {
-		oos.defaultWriteObject();
-
-		if (args != null) {
-			// write has args true
-			oos.writeBoolean(true);
-
-			for (int i = 0; i < args.length; i++) {
-				try {
-					oos.writeObject(args[i]);
-				} catch (IOException e) {
-					Class<?> argClass = args[i].getClass();
-
-					throw new IOException("Could not write " + i + "th argument of method " +
-						methodName + ". The argument type is " + argClass + ". " +
-						"Make sure that this type is serializable.", e);
-				}
-			}
-		} else {
-			// write has args false
-			oos.writeBoolean(false);
-		}
-	}
-
-	private void readObject(ObjectInputStream ois) throws IOException, ClassNotFoundException {
-		ois.defaultReadObject();
-
-		boolean hasArgs = ois.readBoolean();
-
-		if (hasArgs) {
-			int numberArguments = parameterTypes.length;
-
-			args = new Object[numberArguments];
-
-			for (int i = 0; i < numberArguments; i++) {
-				args[i] = ois.readObject();
-			}
-		} else {
-			args = null;
-		}
-	}
+public interface RpcInvocation {
+
+	/**
+	 * Returns the method's name.
+	 *
+	 * @return Method name
+	 * @throws IOException if the rpc invocation message is a remote message and could not be deserialized
+	 * @throws ClassNotFoundException if the rpc invocation message is a remote message and contains
+	 * serialized classes which cannot be found on the receiving side
+	 */
+	String getMethodName() throws IOException, ClassNotFoundException;
+
+	/**
+	 * Returns the method's parameter types
+	 *
+	 * @return Method's parameter types
+	 * @throws IOException if the rpc invocation message is a remote message and could not be deserialized
+	 * @throws ClassNotFoundException if the rpc invocation message is a remote message and contains
+	 * serialized classes which cannot be found on the receiving side
+	 */
+	Class<?>[] getParameterTypes() throws IOException, ClassNotFoundException;
+
+	/**
+	 * Returns the arguments of the remote procedure call
+	 *
+	 * @return Arguments of the remote procedure call
+	 * @throws IOException if the rpc invocation message is a remote message and could not be deserialized
+	 * @throws ClassNotFoundException if the rpc invocation message is a remote message and contains
+	 * serialized classes which cannot be found on the receiving side
+	 */
+	Object[] getArgs() throws IOException, ClassNotFoundException;
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/433a1fd0/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
index 5e37e10..f26b40b 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
@@ -64,7 +64,7 @@ public class AkkaRpcServiceTest extends TestLogger {
 		AkkaGateway akkaClient = (AkkaGateway) rm;
 
 		
-		jobMaster.registerAtResourceManager(AkkaUtils.getAkkaURL(actorSystem, akkaClient.getRpcServer()));
+		jobMaster.registerAtResourceManager(AkkaUtils.getAkkaURL(actorSystem, akkaClient.getRpcEndpoint()));
 
 		// wait for successful registration
 		FiniteDuration timeout = new FiniteDuration(200, TimeUnit.SECONDS);

http://git-wip-us.apache.org/repos/asf/flink/blob/433a1fd0/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MessageSerializationTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MessageSerializationTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MessageSerializationTest.java
new file mode 100644
index 0000000..ca8179c
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MessageSerializationTest.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka;
+
+import akka.actor.ActorSystem;
+import akka.util.Timeout;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigValueFactory;
+import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcMethod;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.util.TestLogger;
+import org.hamcrest.core.Is;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import scala.concurrent.Await;
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.io.IOException;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
+/**
+ * Tests that akka rpc invocation messages are properly serialized and errors reported
+ */
+public class MessageSerializationTest extends TestLogger {
+	private static ActorSystem actorSystem1;
+	private static ActorSystem actorSystem2;
+	private static AkkaRpcService akkaRpcService1;
+	private static AkkaRpcService akkaRpcService2;
+
+	private static final FiniteDuration timeout = new FiniteDuration(10L, TimeUnit.SECONDS);
+	private static final int maxFrameSize = 32000;
+
+	@BeforeClass
+	public static void setup() {
+		Config akkaConfig = AkkaUtils.getDefaultAkkaConfig();
+		Config modifiedAkkaConfig = akkaConfig.withValue(AkkaRpcService.MAXIMUM_FRAME_SIZE_PATH, ConfigValueFactory.fromAnyRef(maxFrameSize + "b"));
+
+		actorSystem1 = AkkaUtils.createActorSystem(modifiedAkkaConfig);
+		actorSystem2 = AkkaUtils.createActorSystem(modifiedAkkaConfig);
+
+		akkaRpcService1 = new AkkaRpcService(actorSystem1, new Timeout(timeout));
+		akkaRpcService2 = new AkkaRpcService(actorSystem2, new Timeout(timeout));
+	}
+
+	@AfterClass
+	public static void teardown() {
+		akkaRpcService1.stopService();
+		akkaRpcService2.stopService();
+
+		actorSystem1.shutdown();
+		actorSystem2.shutdown();
+
+		actorSystem1.awaitTermination();
+		actorSystem2.awaitTermination();
+	}
+
+	/**
+	 * Tests that a local rpc call with a non serializable argument can be executed.
+	 */
+	@Test
+	public void testNonSerializableLocalMessageTransfer() throws InterruptedException, IOException {
+		LinkedBlockingQueue<Object> linkedBlockingQueue = new LinkedBlockingQueue<>();
+		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService1, linkedBlockingQueue);
+
+		TestGateway testGateway = testEndpoint.getSelf();
+
+		NonSerializableObject expected = new NonSerializableObject(42);
+
+		testGateway.foobar(expected);
+
+		assertThat(linkedBlockingQueue.take(), Is.<Object>is(expected));
+	}
+
+	/**
+	 * Tests that a remote rpc call with a non-serializable argument fails with an
+	 * {@link IOException} (or an {@link java.lang.reflect.UndeclaredThrowableException} if the
+	 * the method declaration does not include the {@link IOException} as throwable).
+	 */
+	@Test(expected = IOException.class)
+	public void testNonSerializableRemoteMessageTransfer() throws Exception {
+		LinkedBlockingQueue<Object> linkedBlockingQueue = new LinkedBlockingQueue<>();
+
+		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService1, linkedBlockingQueue);
+
+		String address = testEndpoint.getAddress();
+
+		Future<TestGateway> remoteGatewayFuture = akkaRpcService2.connect(address, TestGateway.class);
+
+		TestGateway remoteGateway = Await.result(remoteGatewayFuture, timeout);
+
+		remoteGateway.foobar(new Object());
+
+		fail("Should have failed because Object is not serializable.");
+	}
+
+	/**
+	 * Tests that a remote rpc call with a serializable argument can be successfully executed.
+	 */
+	@Test
+	public void testSerializableRemoteMessageTransfer() throws Exception {
+		LinkedBlockingQueue<Object> linkedBlockingQueue = new LinkedBlockingQueue<>();
+
+		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService1, linkedBlockingQueue);
+
+		String address = testEndpoint.getAddress();
+
+		Future<TestGateway> remoteGatewayFuture = akkaRpcService2.connect(address, TestGateway.class);
+
+		TestGateway remoteGateway = Await.result(remoteGatewayFuture, timeout);
+
+		int expected = 42;
+
+		remoteGateway.foobar(expected);
+
+		assertThat(linkedBlockingQueue.take(), Is.<Object>is(expected));
+	}
+
+	/**
+	 * Tests that a message which exceeds the maximum frame size is detected and a corresponding
+	 * exception is thrown.
+	 */
+	@Test(expected = IOException.class)
+	public void testMaximumFramesizeRemoteMessageTransfer() throws Exception {
+		LinkedBlockingQueue<Object> linkedBlockingQueue = new LinkedBlockingQueue<>();
+
+		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService1, linkedBlockingQueue);
+
+		String address = testEndpoint.getAddress();
+
+		Future<TestGateway> remoteGatewayFuture = akkaRpcService2.connect(address, TestGateway.class);
+
+		TestGateway remoteGateway = Await.result(remoteGatewayFuture, timeout);
+
+		int bufferSize = maxFrameSize + 1;
+		byte[] buffer = new byte[bufferSize];
+
+		remoteGateway.foobar(buffer);
+
+		fail("Should have failed due to exceeding the maximum framesize.");
+	}
+
+	private interface TestGateway extends RpcGateway {
+		void foobar(Object object) throws IOException, InterruptedException;
+	}
+
+	private static class TestEndpoint extends RpcEndpoint<TestGateway> {
+
+		private final LinkedBlockingQueue<Object> queue;
+
+		protected TestEndpoint(RpcService rpcService, LinkedBlockingQueue<Object> queue) {
+			super(rpcService);
+			this.queue = queue;
+		}
+
+		@RpcMethod
+		public void foobar(Object object) throws InterruptedException {
+			queue.put(object);
+		}
+	}
+
+	private static class NonSerializableObject {
+		private final Object object = new Object();
+		private final int value;
+
+		NonSerializableObject(int value) {
+			this.value = value;
+		}
+
+		@Override
+		public boolean equals(Object obj) {
+			if (obj instanceof NonSerializableObject) {
+				NonSerializableObject nonSerializableObject = (NonSerializableObject) obj;
+
+				return value == nonSerializableObject.value;
+			} else {
+				return false;
+			}
+		}
+
+		@Override
+		public int hashCode() {
+			return value * 41;
+		}
+	}
+}


[22/50] [abbrv] flink git commit: [FLINK-4386] [rpc] Add a utility to verify calls happen in the Rpc Endpoint's main thread

Posted by tr...@apache.org.
[FLINK-4386] [rpc] Add a utility to verify calls happen in the Rpc Endpoint's main thread


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/4ca049b5
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/4ca049b5
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/4ca049b5

Branch: refs/heads/flip-6
Commit: 4ca049b5c5d7c543a2e0ec40534cd08a13cd113a
Parents: 86f21bf
Author: Stephan Ewen <se...@apache.org>
Authored: Thu Aug 11 20:30:54 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:13 2016 +0200

----------------------------------------------------------------------
 .../flink/runtime/rpc/MainThreadExecutor.java   |  2 +-
 .../runtime/rpc/MainThreadValidatorUtil.java    | 47 ++++++++++
 .../apache/flink/runtime/rpc/RpcEndpoint.java   | 38 +++++++-
 .../flink/runtime/rpc/akka/AkkaRpcActor.java    | 37 +++++---
 .../flink/runtime/rpc/akka/AkkaRpcService.java  |  2 +-
 .../rpc/akka/MainThreadValidationTest.java      | 97 ++++++++++++++++++++
 6 files changed, 205 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/4ca049b5/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
index 4efb382..5e4fead 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
@@ -30,7 +30,7 @@ import java.util.concurrent.TimeoutException;
  *
  * <p>This interface is intended to be implemented by the self gateway in a {@link RpcEndpoint}
  * implementation which allows to dispatch local procedures to the main thread of the underlying
- * rpc server.
+ * RPC endpoint.
  */
 public interface MainThreadExecutor {
 

http://git-wip-us.apache.org/repos/asf/flink/blob/4ca049b5/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadValidatorUtil.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadValidatorUtil.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadValidatorUtil.java
new file mode 100644
index 0000000..b3fea77
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadValidatorUtil.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * This utility exists to bridge between the visibility of the
+ * {@code currentMainThread} field in the {@link RpcEndpoint}.
+ * 
+ * The {@code currentMainThread} can be hidden from {@code RpcEndpoint} implementations
+ * and only be accessed via this utility from other packages.
+ */
+public final class MainThreadValidatorUtil {
+
+	private final RpcEndpoint<?> endpoint;
+
+	public MainThreadValidatorUtil(RpcEndpoint<?> endpoint) {
+		this.endpoint = checkNotNull(endpoint);
+	}
+
+	public void enterMainThread() {
+		assert(endpoint.currentMainThread.compareAndSet(null, Thread.currentThread())) : 
+				"The RpcEndpoint has concurrent access from " + endpoint.currentMainThread.get();
+	}
+	
+	public void exitMainThread() {
+		assert(endpoint.currentMainThread.compareAndSet(Thread.currentThread(), null)) :
+				"The RpcEndpoint has concurrent access from " + endpoint.currentMainThread.get();
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/4ca049b5/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
index 44933d5..d36a283 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
@@ -29,6 +29,7 @@ import scala.concurrent.Future;
 
 import java.util.concurrent.Callable;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
 
 import static org.apache.flink.util.Preconditions.checkNotNull;
 
@@ -75,6 +76,9 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	 * of the executing rpc server. */
 	private final MainThreadExecutionContext mainThreadExecutionContext;
 
+	/** A reference to the endpoint's main thread, if the current method is called by the main thread */
+	final AtomicReference<Thread> currentMainThread = new AtomicReference<>(null); 
+
 	/**
 	 * Initializes the RPC endpoint.
 	 * 
@@ -92,6 +96,15 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 		this.mainThreadExecutionContext = new MainThreadExecutionContext((MainThreadExecutor) self);
 	}
 
+	/**
+	 * Returns the class of the self gateway type.
+	 *
+	 * @return Class of the self gateway type
+	 */
+	public final Class<C> getSelfGatewayType() {
+		return selfGatewayType;
+	}
+	
 	// ------------------------------------------------------------------------
 	//  Shutdown
 	// ------------------------------------------------------------------------
@@ -193,13 +206,28 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 		return ((MainThreadExecutor) self).callAsync(callable, timeout);
 	}
 
+	// ------------------------------------------------------------------------
+	//  Main Thread Validation
+	// ------------------------------------------------------------------------
+
 	/**
-	 * Returns the class of the self gateway type.
-	 *
-	 * @return Class of the self gateway type
+	 * Validates that the method call happens in the RPC endpoint's main thread.
+	 * 
+	 * <p><b>IMPORTANT:</b> This check only happens when assertions are enabled,
+	 * such as when running tests.
+	 * 
+	 * <p>This can be used for additional checks, like
+	 * <pre>{@code
+	 * protected void concurrencyCriticalMethod() {
+	 *     validateRunsInMainThread();
+	 *     
+	 *     // some critical stuff
+	 * }
+	 * }</pre>
 	 */
-	public final Class<C> getSelfGatewayType() {
-		return selfGatewayType;
+	public void validateRunsInMainThread() {
+		// because the initialization is lazy, it can be that certain methods are
+		assert currentMainThread.get() == Thread.currentThread();
 	}
 
 	// ------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/flink/blob/4ca049b5/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
index 18ccf1b..5e0a7da 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
@@ -22,14 +22,16 @@ import akka.actor.ActorRef;
 import akka.actor.Status;
 import akka.actor.UntypedActor;
 import akka.pattern.Patterns;
+import org.apache.flink.runtime.rpc.MainThreadValidatorUtil;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.akka.messages.CallAsync;
 import org.apache.flink.runtime.rpc.akka.messages.RpcInvocation;
 import org.apache.flink.runtime.rpc.akka.messages.RunAsync;
-import org.apache.flink.util.Preconditions;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+
 import scala.concurrent.Future;
 import scala.concurrent.duration.FiniteDuration;
 
@@ -37,6 +39,8 @@ import java.lang.reflect.Method;
 import java.util.concurrent.Callable;
 import java.util.concurrent.TimeUnit;
 
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
 /**
  * Akka rpc actor which receives {@link RpcInvocation}, {@link RunAsync} and {@link CallAsync}
  * messages.
@@ -51,24 +55,35 @@ import java.util.concurrent.TimeUnit;
  * @param <T> Type of the {@link RpcEndpoint}
  */
 class AkkaRpcActor<C extends RpcGateway, T extends RpcEndpoint<C>> extends UntypedActor {
+	
 	private static final Logger LOG = LoggerFactory.getLogger(AkkaRpcActor.class);
 
+	/** the endpoint to invoke the methods on */
 	private final T rpcEndpoint;
 
+	/** the helper that tracks whether calls come from the main thread */
+	private final MainThreadValidatorUtil mainThreadValidator;
+
 	AkkaRpcActor(final T rpcEndpoint) {
-		this.rpcEndpoint = Preconditions.checkNotNull(rpcEndpoint, "rpc endpoint");
+		this.rpcEndpoint = checkNotNull(rpcEndpoint, "rpc endpoint");
+		this.mainThreadValidator = new MainThreadValidatorUtil(rpcEndpoint);
 	}
 
 	@Override
-	public void onReceive(final Object message)  {
-		if (message instanceof RunAsync) {
-			handleRunAsync((RunAsync) message);
-		} else if (message instanceof CallAsync) {
-			handleCallAsync((CallAsync) message);
-		} else if (message instanceof RpcInvocation) {
-			handleRpcInvocation((RpcInvocation) message);
-		} else {
-			LOG.warn("Received message of unknown type {}. Dropping this message!", message.getClass());
+	public void onReceive(final Object message) {
+		mainThreadValidator.enterMainThread();
+		try {
+			if (message instanceof RunAsync) {
+				handleRunAsync((RunAsync) message);
+			} else if (message instanceof CallAsync) {
+				handleCallAsync((CallAsync) message);
+			} else if (message instanceof RpcInvocation) {
+				handleRpcInvocation((RpcInvocation) message);
+			} else {
+				LOG.warn("Received message of unknown type {}. Dropping this message!", message.getClass());
+			}
+		} finally {
+			mainThreadValidator.exitMainThread();
 		}
 	}
 

http://git-wip-us.apache.org/repos/asf/flink/blob/4ca049b5/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
index 448216c..db40f10 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
@@ -174,7 +174,7 @@ public class AkkaRpcService implements RpcService {
 	}
 
 	@Override
-	public <C extends RpcGateway> String getAddress(C selfGateway) {
+	public String getAddress(RpcGateway selfGateway) {
 		checkState(!stopped, "RpcService is stopped");
 
 		if (selfGateway instanceof AkkaGateway) {

http://git-wip-us.apache.org/repos/asf/flink/blob/4ca049b5/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MainThreadValidationTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MainThreadValidationTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MainThreadValidationTest.java
new file mode 100644
index 0000000..b854143
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MainThreadValidationTest.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka;
+
+import akka.util.Timeout;
+
+import org.apache.flink.runtime.akka.AkkaUtils;
+
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcMethod;
+import org.apache.flink.runtime.rpc.RpcService;
+
+import org.junit.Test;
+
+import java.util.concurrent.TimeUnit;
+
+import static org.junit.Assert.assertTrue;
+
+public class MainThreadValidationTest {
+
+	@Test
+	public void failIfNotInMainThread() {
+		// test if assertions are activated. The test only works if assertions are loaded.
+		try {
+			assert false;
+			// apparently they are not activated
+			return;
+		} catch (AssertionError ignored) {}
+
+		// actual test
+		AkkaRpcService akkaRpcService = new AkkaRpcService(
+				AkkaUtils.createDefaultActorSystem(),
+				new Timeout(10000, TimeUnit.MILLISECONDS));
+
+		try {
+			TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService);
+
+			// this works, because it is executed as an RPC call
+			testEndpoint.getSelf().someConcurrencyCriticalFunction();
+
+			// this fails, because it is executed directly
+			boolean exceptionThrown;
+			try {
+				testEndpoint.someConcurrencyCriticalFunction();
+				exceptionThrown = false;
+			}
+			catch (AssertionError e) {
+				exceptionThrown = true;
+			}
+			assertTrue("should fail with an assertion error", exceptionThrown);
+
+			akkaRpcService.stopServer(testEndpoint.getSelf());
+		}
+		finally {
+			akkaRpcService.stopService();
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  test RPC endpoint
+	// ------------------------------------------------------------------------
+
+	interface TestGateway extends RpcGateway {
+
+		void someConcurrencyCriticalFunction();
+	}
+
+	@SuppressWarnings("unused")
+	public static class TestEndpoint extends RpcEndpoint<TestGateway> {
+
+		public TestEndpoint(RpcService rpcService) {
+			super(rpcService);
+		}
+
+		@RpcMethod
+		public void someConcurrencyCriticalFunction() {
+			validateRunsInMainThread();
+		}
+	}
+}


[46/50] [abbrv] flink git commit: [hotfix] [taskmanager] Fixes TaskManager component creation at startup

Posted by tr...@apache.org.
[hotfix] [taskmanager] Fixes TaskManager component creation at startup


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/26305430
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/26305430
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/26305430

Branch: refs/heads/flip-6
Commit: 2630543084b917524689c2af9e1090da7f5a92d6
Parents: 9718dcd
Author: Till Rohrmann <tr...@apache.org>
Authored: Thu Sep 8 18:43:15 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:17 2016 +0200

----------------------------------------------------------------------
 .../runtime/taskexecutor/TaskExecutor.java      | 189 ++++++++++++++++---
 .../taskexecutor/TaskExecutorConfiguration.java |   9 -
 2 files changed, 159 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/26305430/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
index 735730b..a455fe2 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
@@ -19,9 +19,19 @@
 package org.apache.flink.runtime.taskexecutor;
 
 import akka.actor.ActorSystem;
-import akka.dispatch.ExecutionContexts$;
 import akka.util.Timeout;
 import com.typesafe.config.Config;
+import org.apache.flink.runtime.io.network.ConnectionManager;
+import org.apache.flink.runtime.io.network.LocalConnectionManager;
+import org.apache.flink.runtime.io.network.TaskEventDispatcher;
+import org.apache.flink.runtime.io.network.buffer.NetworkBufferPool;
+import org.apache.flink.runtime.io.network.netty.NettyConnectionManager;
+import org.apache.flink.runtime.io.network.partition.ResultPartitionManager;
+import org.apache.flink.runtime.query.KvStateRegistry;
+import org.apache.flink.runtime.query.netty.DisabledKvStateRequestStats;
+import org.apache.flink.runtime.query.netty.KvStateServer;
+import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
+import org.apache.flink.util.Preconditions;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -37,7 +47,6 @@ import org.apache.flink.runtime.akka.AkkaUtils;
 import org.apache.flink.annotation.VisibleForTesting;
 import org.apache.flink.runtime.clusterframework.types.ResourceID;
 import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
-import org.apache.flink.runtime.instance.InstanceConnectionInfo;
 import org.apache.flink.runtime.io.disk.iomanager.IOManager;
 import org.apache.flink.runtime.io.disk.iomanager.IOManager.IOMode;
 import org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync;
@@ -61,7 +70,6 @@ import org.apache.flink.util.NetUtils;
 import scala.Tuple2;
 import scala.Option;
 import scala.Some;
-import scala.concurrent.ExecutionContext;
 import scala.concurrent.duration.Duration;
 import scala.concurrent.duration.FiniteDuration;
 
@@ -70,9 +78,9 @@ import java.io.IOException;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.util.UUID;
-import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.TimeUnit;
 
+import static org.apache.flink.util.Preconditions.checkArgument;
 import static org.apache.flink.util.Preconditions.checkNotNull;
 
 /**
@@ -86,6 +94,8 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 	/** The unique resource ID of this TaskExecutor */
 	private final ResourceID resourceID;
 
+	private final TaskManagerLocation taskManagerLocation;
+
 	/** The access to the leader election and metadata storage services */
 	private final HighAvailabilityServices haServices;
 
@@ -113,22 +123,26 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 	public TaskExecutor(
 			TaskExecutorConfiguration taskExecutorConfig,
 			ResourceID resourceID,
+			TaskManagerLocation taskManagerLocation,
 			MemoryManager memoryManager,
 			IOManager ioManager,
 			NetworkEnvironment networkEnvironment,
-			int numberOfSlots,
 			RpcService rpcService,
 			HighAvailabilityServices haServices) {
 
 		super(rpcService);
 
+		checkArgument(taskExecutorConfig.getNumberOfSlots() > 0, "The number of slots has to be larger than 0.");
+
 		this.taskExecutorConfig = checkNotNull(taskExecutorConfig);
 		this.resourceID = checkNotNull(resourceID);
+		this.taskManagerLocation = checkNotNull(taskManagerLocation);
 		this.memoryManager = checkNotNull(memoryManager);
 		this.ioManager = checkNotNull(ioManager);
 		this.networkEnvironment = checkNotNull(networkEnvironment);
-		this.numberOfSlots = checkNotNull(numberOfSlots);
 		this.haServices = checkNotNull(haServices);
+
+		this.numberOfSlots = taskExecutorConfig.getNumberOfSlots();
 	}
 
 	// ------------------------------------------------------------------------
@@ -360,10 +374,10 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 	 *                                      then a HighAvailabilityServices is constructed from the configuration.
 	 * @param localTaskManagerCommunication     If true, the TaskManager will not initiate the TCP network stack.
 	 * @return An ActorRef to the TaskManager actor.
-	 * @throws org.apache.flink.configuration.IllegalConfigurationException     Thrown, if the given config contains illegal values.
-	 * @throws java.io.IOException      Thrown, if any of the I/O components (such as buffer pools,
+	 * @throws IllegalConfigurationException     Thrown, if the given config contains illegal values.
+	 * @throws IOException      Thrown, if any of the I/O components (such as buffer pools,
 	 *                                       I/O manager, ...) cannot be properly started.
-	 * @throws java.lang.Exception      Thrown is some other error occurs while parsing the configuration
+	 * @throws Exception      Thrown is some other error occurs while parsing the configuration
 	 *                                      or starting the TaskManager components.
 	 */
 	public static TaskExecutor startTaskManagerComponentsAndActor(
@@ -377,19 +391,105 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 		final TaskExecutorConfiguration taskExecutorConfig = parseTaskManagerConfiguration(
 			configuration, taskManagerHostname, localTaskManagerCommunication);
 
+		TaskManagerComponents taskManagerComponents = createTaskManagerComponents(
+			resourceID,
+			InetAddress.getByName(taskManagerHostname),
+			taskExecutorConfig,
+			configuration);
+
+		final TaskExecutor taskExecutor = new TaskExecutor(
+			taskExecutorConfig,
+			resourceID,
+			taskManagerComponents.getTaskManagerLocation(),
+			taskManagerComponents.getMemoryManager(),
+			taskManagerComponents.getIOManager(),
+			taskManagerComponents.getNetworkEnvironment(),
+			rpcService,
+			haServices);
+
+		return taskExecutor;
+	}
+
+	/**
+	 * Creates and returns the task manager components.
+	 *
+	 * @param resourceID resource ID of the task manager
+	 * @param taskManagerAddress address of the task manager
+	 * @param taskExecutorConfig task manager configuration
+	 * @param configuration of Flink
+	 * @return task manager components
+	 * @throws Exception
+	 */
+	private static TaskExecutor.TaskManagerComponents createTaskManagerComponents(
+		ResourceID resourceID,
+		InetAddress taskManagerAddress,
+		TaskExecutorConfiguration taskExecutorConfig,
+		Configuration configuration) throws Exception {
 		MemoryType memType = taskExecutorConfig.getNetworkConfig().memoryType();
 
 		// pre-start checks
 		checkTempDirs(taskExecutorConfig.getTmpDirPaths());
 
-		ExecutionContext executionContext = ExecutionContexts$.MODULE$.fromExecutor(new ForkJoinPool());
+		NetworkEnvironmentConfiguration networkEnvironmentConfiguration = taskExecutorConfig.getNetworkConfig();
+
+		NetworkBufferPool networkBufferPool = new NetworkBufferPool(
+			networkEnvironmentConfiguration.numNetworkBuffers(),
+			networkEnvironmentConfiguration.networkBufferSize(),
+			networkEnvironmentConfiguration.memoryType());
+
+		ConnectionManager connectionManager;
+
+		if (networkEnvironmentConfiguration.nettyConfig().isDefined()) {
+			connectionManager = new NettyConnectionManager(networkEnvironmentConfiguration.nettyConfig().get());
+		} else {
+			connectionManager = new LocalConnectionManager();
+		}
+
+		ResultPartitionManager resultPartitionManager = new ResultPartitionManager();
+		TaskEventDispatcher taskEventDispatcher = new TaskEventDispatcher();
+
+		KvStateRegistry kvStateRegistry = new KvStateRegistry();
+
+		KvStateServer kvStateServer;
+
+		if (networkEnvironmentConfiguration.nettyConfig().isDefined()) {
+			NettyConfig nettyConfig = networkEnvironmentConfiguration.nettyConfig().get();
+
+			int numNetworkThreads = networkEnvironmentConfiguration.queryServerNetworkThreads() == 0 ?
+				nettyConfig.getNumberOfSlots() : networkEnvironmentConfiguration.queryServerNetworkThreads();
+
+			int numQueryThreads = networkEnvironmentConfiguration.queryServerQueryThreads() == 0 ?
+				nettyConfig.getNumberOfSlots() : networkEnvironmentConfiguration.queryServerQueryThreads();
+
+			kvStateServer = new KvStateServer(
+				taskManagerAddress,
+				networkEnvironmentConfiguration.queryServerPort(),
+				numNetworkThreads,
+				numQueryThreads,
+				kvStateRegistry,
+				new DisabledKvStateRequestStats());
+		} else {
+			kvStateServer = null;
+		}
 
 		// we start the network first, to make sure it can allocate its buffers first
 		final NetworkEnvironment network = new NetworkEnvironment(
-			executionContext,
-			taskExecutorConfig.getTimeout(),
-			taskExecutorConfig.getNetworkConfig(),
-			taskExecutorConfig.getConnectionInfo());
+			networkBufferPool,
+			connectionManager,
+			resultPartitionManager,
+			taskEventDispatcher,
+			kvStateRegistry,
+			kvStateServer,
+			networkEnvironmentConfiguration.ioMode(),
+			networkEnvironmentConfiguration.partitionRequestInitialBackoff(),
+			networkEnvironmentConfiguration.partitinRequestMaxBackoff());
+
+		network.start();
+
+		TaskManagerLocation taskManagerLocation = new TaskManagerLocation(
+			resourceID,
+			taskManagerAddress,
+			network.getConnectionManager().getDataPort());
 
 		// computing the amount of memory to use depends on how much memory is available
 		// it strictly needs to happen AFTER the network stack has been initialized
@@ -473,17 +573,7 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 		// start the I/O manager, it will create some temp directories.
 		final IOManager ioManager = new IOManagerAsync(taskExecutorConfig.getTmpDirPaths());
 
-		final TaskExecutor taskExecutor = new TaskExecutor(
-			taskExecutorConfig,
-			resourceID,
-			memoryManager,
-			ioManager,
-			network,
-			taskExecutorConfig.getNumberOfSlots(),
-			rpcService,
-			haServices);
-
-		return taskExecutor;
+		return new TaskExecutor.TaskManagerComponents(taskManagerLocation, memoryManager, ioManager, network);
 	}
 
 	// --------------------------------------------------------------------------
@@ -519,7 +609,7 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 			"Leave config parameter empty or use 0 to let the system choose a port automatically.");
 
 		InetAddress taskManagerAddress = InetAddress.getByName(taskManagerHostname);
-		final InstanceConnectionInfo connectionInfo = new InstanceConnectionInfo(taskManagerAddress, dataport);
+		final InetSocketAddress taskManagerInetSocketAddress = new InetSocketAddress(taskManagerAddress, dataport);
 
 		// ----> memory / network stack (shuffles/broadcasts), task slots, temp directories
 
@@ -576,7 +666,12 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 
 		final NettyConfig nettyConfig;
 		if (!localTaskManagerCommunication) {
-			nettyConfig = new NettyConfig(connectionInfo.address(), connectionInfo.dataPort(), pageSize, slots, configuration);
+			nettyConfig = new NettyConfig(
+				taskManagerInetSocketAddress.getAddress(),
+				taskManagerInetSocketAddress.getPort(),
+				pageSize,
+				slots,
+				configuration);
 		} else {
 			nettyConfig = null;
 		}
@@ -613,8 +708,9 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 			queryServerPort,
 			queryServerNetworkThreads,
 			queryServerQueryThreads,
-			localTaskManagerCommunication ? Option.<NettyConfig>empty() : new Some<>(nettyConfig),
-			new Tuple2<>(500, 3000));
+			Option.apply(nettyConfig),
+			500,
+			30000);
 
 		// ----> timeouts, library caching, profiling
 
@@ -695,7 +791,6 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 		return new TaskExecutorConfiguration(
 			tmpDirs,
 			cleanupInterval,
-			connectionInfo,
 			networkConfig,
 			timeout,
 			finiteRegistrationDuration,
@@ -829,4 +924,38 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 			onFatalErrorAsync(exception);
 		}
 	}
+
+	private static class TaskManagerComponents {
+		private final TaskManagerLocation taskManagerLocation;
+		private final MemoryManager memoryManager;
+		private final IOManager ioManager;
+		private final NetworkEnvironment networkEnvironment;
+
+		private TaskManagerComponents(
+				TaskManagerLocation taskManagerLocation,
+				MemoryManager memoryManager,
+				IOManager ioManager,
+				NetworkEnvironment networkEnvironment) {
+			this.taskManagerLocation = Preconditions.checkNotNull(taskManagerLocation);
+			this.memoryManager = Preconditions.checkNotNull(memoryManager);
+			this.ioManager = Preconditions.checkNotNull(ioManager);
+			this.networkEnvironment = Preconditions.checkNotNull(networkEnvironment);
+		}
+
+		public MemoryManager getMemoryManager() {
+			return memoryManager;
+		}
+
+		public IOManager getIOManager() {
+			return ioManager;
+		}
+
+		public NetworkEnvironment getNetworkEnvironment() {
+			return networkEnvironment;
+		}
+
+		public TaskManagerLocation getTaskManagerLocation() {
+			return taskManagerLocation;
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/26305430/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorConfiguration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorConfiguration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorConfiguration.java
index 3707a47..c97c893 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorConfiguration.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorConfiguration.java
@@ -19,7 +19,6 @@
 package org.apache.flink.runtime.taskexecutor;
 
 import org.apache.flink.configuration.Configuration;
-import org.apache.flink.runtime.instance.InstanceConnectionInfo;
 import org.apache.flink.runtime.taskmanager.NetworkEnvironmentConfiguration;
 
 import scala.concurrent.duration.FiniteDuration;
@@ -52,12 +51,9 @@ public class TaskExecutorConfiguration implements Serializable {
 
 	private final NetworkEnvironmentConfiguration networkConfig;
 
-	private final InstanceConnectionInfo connectionInfo;
-
 	public TaskExecutorConfiguration(
 			String[] tmpDirPaths,
 			long cleanupInterval,
-			InstanceConnectionInfo connectionInfo,
 			NetworkEnvironmentConfiguration networkConfig,
 			FiniteDuration timeout,
 			FiniteDuration maxRegistrationDuration,
@@ -66,7 +62,6 @@ public class TaskExecutorConfiguration implements Serializable {
 
 		this (tmpDirPaths,
 			cleanupInterval,
-			connectionInfo,
 			networkConfig,
 			timeout,
 			maxRegistrationDuration,
@@ -80,7 +75,6 @@ public class TaskExecutorConfiguration implements Serializable {
 	public TaskExecutorConfiguration(
 			String[] tmpDirPaths,
 			long cleanupInterval,
-			InstanceConnectionInfo connectionInfo,
 			NetworkEnvironmentConfiguration networkConfig,
 			FiniteDuration timeout,
 			FiniteDuration maxRegistrationDuration,
@@ -92,7 +86,6 @@ public class TaskExecutorConfiguration implements Serializable {
 
 		this.tmpDirPaths = checkNotNull(tmpDirPaths);
 		this.cleanupInterval = checkNotNull(cleanupInterval);
-		this.connectionInfo = checkNotNull(connectionInfo);
 		this.networkConfig = checkNotNull(networkConfig);
 		this.timeout = checkNotNull(timeout);
 		this.maxRegistrationDuration = maxRegistrationDuration;
@@ -115,8 +108,6 @@ public class TaskExecutorConfiguration implements Serializable {
 		return cleanupInterval;
 	}
 
-	public InstanceConnectionInfo getConnectionInfo() { return connectionInfo; }
-
 	public NetworkEnvironmentConfiguration getNetworkConfig() { return networkConfig; }
 
 	public FiniteDuration getTimeout() {


[08/50] [abbrv] flink git commit: [FLINK-2662] [dataSet] [optimizer] Fix merging of unions with multiple outputs.

Posted by tr...@apache.org.
[FLINK-2662] [dataSet] [optimizer] Fix merging of unions with multiple outputs.

Translate union with N outputs into N unions with single output.

This closes #2508.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/303f6fee
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/303f6fee
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/303f6fee

Branch: refs/heads/flip-6
Commit: 303f6fee99b731dd138e37513705271f97f76d72
Parents: 5c02988
Author: Fabian Hueske <fh...@apache.org>
Authored: Fri Sep 16 18:40:32 2016 +0200
Committer: Fabian Hueske <fh...@apache.org>
Committed: Tue Sep 20 21:52:08 2016 +0200

----------------------------------------------------------------------
 .../api/java/operators/OperatorTranslation.java |  23 +++--
 .../flink/optimizer/dag/BinaryUnionNode.java    |   8 +-
 .../flink/optimizer/UnionReplacementTest.java   | 102 ++++++++++++++++++-
 .../dataexchange/UnionClosedBranchingTest.java  |  26 +++--
 4 files changed, 141 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/303f6fee/flink-java/src/main/java/org/apache/flink/api/java/operators/OperatorTranslation.java
----------------------------------------------------------------------
diff --git a/flink-java/src/main/java/org/apache/flink/api/java/operators/OperatorTranslation.java b/flink-java/src/main/java/org/apache/flink/api/java/operators/OperatorTranslation.java
index 3f44d58..88c9c37 100644
--- a/flink-java/src/main/java/org/apache/flink/api/java/operators/OperatorTranslation.java
+++ b/flink-java/src/main/java/org/apache/flink/api/java/operators/OperatorTranslation.java
@@ -40,11 +40,11 @@ import java.util.Map;
 public class OperatorTranslation {
 	
 	/** The already translated operations */
-	private Map<DataSet<?>, Operator<?>> translated = new HashMap<DataSet<?>, Operator<?>>();
+	private Map<DataSet<?>, Operator<?>> translated = new HashMap<>();
 	
 	
 	public Plan translateToPlan(List<DataSink<?>> sinks, String jobName) {
-		List<GenericDataSinkBase<?>> planSinks = new ArrayList<GenericDataSinkBase<?>>();
+		List<GenericDataSinkBase<?>> planSinks = new ArrayList<>();
 		
 		for (DataSink<?> sink : sinks) {
 			planSinks.add(translate(sink));
@@ -74,11 +74,18 @@ public class OperatorTranslation {
 		}
 
 		// check if we have already translated that data set (operation or source)
-		Operator<?> previous = (Operator<?>) this.translated.get(dataSet);
+		Operator<?> previous = this.translated.get(dataSet);
 		if (previous != null) {
-			@SuppressWarnings("unchecked")
-			Operator<T> typedPrevious = (Operator<T>) previous;
-			return typedPrevious;
+
+			// Union operators may only have a single output.
+			// We ensure this by not reusing previously created union operators.
+			// The optimizer will merge subsequent binary unions into one n-ary union.
+			if (!(dataSet instanceof UnionOperator)) {
+				// all other operators are reused.
+				@SuppressWarnings("unchecked")
+				Operator<T> typedPrevious = (Operator<T>) previous;
+				return typedPrevious;
+			}
 		}
 		
 		Operator<T> dataFlowOp;
@@ -190,7 +197,7 @@ public class OperatorTranslation {
 		BulkIterationResultSet<T> iterationEnd = (BulkIterationResultSet<T>) untypedIterationEnd;
 		
 		BulkIterationBase<T> iterationOperator =
-				new BulkIterationBase<T>(new UnaryOperatorInformation<T, T>(iterationEnd.getType(), iterationEnd.getType()), "Bulk Iteration");
+				new BulkIterationBase<>(new UnaryOperatorInformation<>(iterationEnd.getType(), iterationEnd.getType()), "Bulk Iteration");
 		IterativeDataSet<T> iterationHead = iterationEnd.getIterationHead();
 
 		translated.put(iterationHead, iterationOperator.getPartialSolution());
@@ -216,7 +223,7 @@ public class OperatorTranslation {
 		
 		String name = iterationHead.getName() == null ? "Unnamed Delta Iteration" : iterationHead.getName();
 		
-		DeltaIterationBase<D, W> iterationOperator = new DeltaIterationBase<D, W>(new BinaryOperatorInformation<D, W, D>(iterationEnd.getType(), iterationEnd.getWorksetType(), iterationEnd.getType()),
+		DeltaIterationBase<D, W> iterationOperator = new DeltaIterationBase<>(new BinaryOperatorInformation<>(iterationEnd.getType(), iterationEnd.getWorksetType(), iterationEnd.getType()),
 				iterationEnd.getKeyPositions(), name);
 		
 		iterationOperator.setMaximumNumberOfIterations(iterationEnd.getMaxIterations());

http://git-wip-us.apache.org/repos/asf/flink/blob/303f6fee/flink-optimizer/src/main/java/org/apache/flink/optimizer/dag/BinaryUnionNode.java
----------------------------------------------------------------------
diff --git a/flink-optimizer/src/main/java/org/apache/flink/optimizer/dag/BinaryUnionNode.java b/flink-optimizer/src/main/java/org/apache/flink/optimizer/dag/BinaryUnionNode.java
index fdd76a8..d262cf6 100644
--- a/flink-optimizer/src/main/java/org/apache/flink/optimizer/dag/BinaryUnionNode.java
+++ b/flink-optimizer/src/main/java/org/apache/flink/optimizer/dag/BinaryUnionNode.java
@@ -98,6 +98,12 @@ public class BinaryUnionNode extends TwoInputNode {
 	
 	@Override
 	public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
+
+		// check that union has only a single successor
+		if (this.getOutgoingConnections().size() > 1) {
+			throw new CompilerException("BinaryUnionNode has more than one successor.");
+		}
+
 		// check if we have a cached version
 		if (this.cachedPlans != null) {
 			return this.cachedPlans;
@@ -173,7 +179,7 @@ public class BinaryUnionNode extends TwoInputNode {
 						}
 					}
 					
-					// create a candidate channel for the first input. mark it cached, if the connection says so
+					// create a candidate channel for the second input. mark it cached, if the connection says so
 					Channel c2 = new Channel(child2, this.input2.getMaterializationMode());
 					if (this.input2.getShipStrategy() == null) {
 						// free to choose the ship strategy

http://git-wip-us.apache.org/repos/asf/flink/blob/303f6fee/flink-optimizer/src/test/java/org/apache/flink/optimizer/UnionReplacementTest.java
----------------------------------------------------------------------
diff --git a/flink-optimizer/src/test/java/org/apache/flink/optimizer/UnionReplacementTest.java b/flink-optimizer/src/test/java/org/apache/flink/optimizer/UnionReplacementTest.java
index 65dd2b3..3be7657 100644
--- a/flink-optimizer/src/test/java/org/apache/flink/optimizer/UnionReplacementTest.java
+++ b/flink-optimizer/src/test/java/org/apache/flink/optimizer/UnionReplacementTest.java
@@ -18,16 +18,25 @@
 
 package org.apache.flink.optimizer;
 
+import junit.framework.Assert;
+import org.apache.flink.api.common.operators.util.FieldList;
 import org.apache.flink.api.java.DataSet;
 import org.apache.flink.api.java.ExecutionEnvironment;
 import org.apache.flink.api.common.Plan;
 import org.apache.flink.api.java.io.DiscardingOutputFormat;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.optimizer.plan.Channel;
+import org.apache.flink.optimizer.plan.NAryUnionPlanNode;
 import org.apache.flink.optimizer.plan.OptimizedPlan;
+import org.apache.flink.optimizer.plan.SingleInputPlanNode;
 import org.apache.flink.optimizer.plantranslate.JobGraphGenerator;
 import org.apache.flink.optimizer.util.CompilerTestBase;
+import org.apache.flink.runtime.operators.shipping.ShipStrategyType;
 import org.junit.Test;
 
-import static org.junit.Assert.fail;
+import java.util.List;
+
+import static org.junit.Assert.*;
 
 @SuppressWarnings("serial")
 public class UnionReplacementTest extends CompilerTestBase {
@@ -54,4 +63,95 @@ public class UnionReplacementTest extends CompilerTestBase {
 			fail(e.getMessage());
 		}
 	}
+
+	/**
+	 *
+	 * Test for FLINK-2662.
+	 *
+	 * Checks that a plan with an union with two outputs is correctly translated.
+	 * The program can be illustrated as follows:
+	 *
+	 * Src1 ----------------\
+	 *                       >-> Union123 -> GroupBy(0) -> Sum -> Output
+	 * Src2 -\              /
+	 *        >-> Union23--<
+	 * Src3 -/              \
+	 *                       >-> Union234 -> GroupBy(1) -> Sum -> Output
+	 * Src4 ----------------/
+	 *
+	 * The fix for FLINK-2662 translates the union with two output (Union-23) into two separate
+	 * unions (Union-23_1 and Union-23_2) with one output each. Due to this change, the interesting
+	 * partitioning properties for GroupBy(0) and GroupBy(1) are pushed through Union-23_1 and
+	 * Union-23_2 and do not interfere with each other (which would be the case if Union-23 would
+	 * be a single operator with two outputs).
+	 *
+	 */
+	@Test
+	public void testUnionWithTwoOutputsTest() throws Exception {
+
+		// -----------------------------------------------------------------------------------------
+		// Build test program
+		// -----------------------------------------------------------------------------------------
+
+		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+		env.setParallelism(DEFAULT_PARALLELISM);
+
+		DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
+		DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
+		DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
+		DataSet<Tuple2<Long, Long>> src4 = env.fromElements(new Tuple2<>(0L, 0L));
+
+		DataSet<Tuple2<Long, Long>> union23 = src2.union(src3);
+		DataSet<Tuple2<Long, Long>> union123 = src1.union(union23);
+		DataSet<Tuple2<Long, Long>> union234 = src4.union(union23);
+
+		union123.groupBy(0).sum(1).name("1").output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
+		union234.groupBy(1).sum(0).name("2").output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
+
+		// -----------------------------------------------------------------------------------------
+		// Verify optimized plan
+		// -----------------------------------------------------------------------------------------
+
+		OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
+
+		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
+
+		SingleInputPlanNode groupRed1 = resolver.getNode("1");
+		SingleInputPlanNode groupRed2 = resolver.getNode("2");
+
+		// check partitioning is correct
+		Assert.assertTrue("Reduce input should be partitioned on 0.",
+			groupRed1.getInput().getGlobalProperties().getPartitioningFields().isExactMatch(new FieldList(0)));
+		Assert.assertTrue("Reduce input should be partitioned on 1.",
+			groupRed2.getInput().getGlobalProperties().getPartitioningFields().isExactMatch(new FieldList(1)));
+
+		// check group reduce inputs are n-ary unions with three inputs
+		Assert.assertTrue("Reduce input should be n-ary union with three inputs.",
+			groupRed1.getInput().getSource() instanceof NAryUnionPlanNode &&
+				((NAryUnionPlanNode) groupRed1.getInput().getSource()).getListOfInputs().size() == 3);
+		Assert.assertTrue("Reduce input should be n-ary union with three inputs.",
+			groupRed2.getInput().getSource() instanceof NAryUnionPlanNode &&
+				((NAryUnionPlanNode) groupRed2.getInput().getSource()).getListOfInputs().size() == 3);
+
+		// check channel from union to group reduce is forwarding
+		Assert.assertTrue("Channel between union and group reduce should be forwarding",
+			groupRed1.getInput().getShipStrategy().equals(ShipStrategyType.FORWARD));
+		Assert.assertTrue("Channel between union and group reduce should be forwarding",
+			groupRed2.getInput().getShipStrategy().equals(ShipStrategyType.FORWARD));
+
+		// check that all inputs of unions are hash partitioned
+		List<Channel> union123In = ((NAryUnionPlanNode) groupRed1.getInput().getSource()).getListOfInputs();
+		for(Channel i : union123In) {
+			Assert.assertTrue("Union input channel should hash partition on 0",
+				i.getShipStrategy().equals(ShipStrategyType.PARTITION_HASH) &&
+					i.getShipStrategyKeys().isExactMatch(new FieldList(0)));
+		}
+		List<Channel> union234In = ((NAryUnionPlanNode) groupRed2.getInput().getSource()).getListOfInputs();
+		for(Channel i : union234In) {
+			Assert.assertTrue("Union input channel should hash partition on 0",
+				i.getShipStrategy().equals(ShipStrategyType.PARTITION_HASH) &&
+					i.getShipStrategyKeys().isExactMatch(new FieldList(1)));
+		}
+
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/303f6fee/flink-optimizer/src/test/java/org/apache/flink/optimizer/dataexchange/UnionClosedBranchingTest.java
----------------------------------------------------------------------
diff --git a/flink-optimizer/src/test/java/org/apache/flink/optimizer/dataexchange/UnionClosedBranchingTest.java b/flink-optimizer/src/test/java/org/apache/flink/optimizer/dataexchange/UnionClosedBranchingTest.java
index b870a91..77b150a 100644
--- a/flink-optimizer/src/test/java/org/apache/flink/optimizer/dataexchange/UnionClosedBranchingTest.java
+++ b/flink-optimizer/src/test/java/org/apache/flink/optimizer/dataexchange/UnionClosedBranchingTest.java
@@ -37,6 +37,7 @@ import org.apache.flink.runtime.io.network.partition.consumer.UnionInputGate;
 import org.apache.flink.runtime.jobgraph.IntermediateDataSet;
 import org.apache.flink.runtime.jobgraph.JobGraph;
 import org.apache.flink.runtime.jobgraph.JobVertex;
+import org.apache.flink.runtime.operators.shipping.ShipStrategyType;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -73,9 +74,9 @@ public class UnionClosedBranchingTest extends CompilerTestBase {
 	@Parameterized.Parameters
 	public static Collection<Object[]> params() {
 		Collection<Object[]> params = Arrays.asList(new Object[][]{
-				{ExecutionMode.PIPELINED, PIPELINED, BATCH},
+				{ExecutionMode.PIPELINED, BATCH, PIPELINED},
 				{ExecutionMode.PIPELINED_FORCED, PIPELINED, PIPELINED},
-				{ExecutionMode.BATCH, BATCH, BATCH},
+				{ExecutionMode.BATCH, BATCH, PIPELINED},
 				{ExecutionMode.BATCH_FORCED, BATCH, BATCH},
 		});
 
@@ -93,10 +94,16 @@ public class UnionClosedBranchingTest extends CompilerTestBase {
 	/** Expected {@link DataExchangeMode} from union to join. */
 	private final DataExchangeMode unionToJoin;
 
+	/** Expected {@link ShipStrategyType} from source to union. */
+	private final ShipStrategyType sourceToUnionStrategy = ShipStrategyType.PARTITION_HASH;
+
+	/** Expected {@link ShipStrategyType} from union to join. */
+	private final ShipStrategyType unionToJoinStrategy = ShipStrategyType.FORWARD;
+
 	public UnionClosedBranchingTest(
-			ExecutionMode executionMode,
-			DataExchangeMode sourceToUnion,
-			DataExchangeMode unionToJoin) {
+		ExecutionMode executionMode,
+		DataExchangeMode sourceToUnion,
+		DataExchangeMode unionToJoin) {
 
 		this.executionMode = executionMode;
 		this.sourceToUnion = sourceToUnion;
@@ -140,12 +147,16 @@ public class UnionClosedBranchingTest extends CompilerTestBase {
 		for (Channel channel : joinNode.getInputs()) {
 			assertEquals("Unexpected data exchange mode between union and join node.",
 					unionToJoin, channel.getDataExchangeMode());
+			assertEquals("Unexpected ship strategy between union and join node.",
+					unionToJoinStrategy, channel.getShipStrategy());
 		}
 
 		for (SourcePlanNode src : optimizedPlan.getDataSources()) {
 			for (Channel channel : src.getOutgoingChannels()) {
 				assertEquals("Unexpected data exchange mode between source and union node.",
 						sourceToUnion, channel.getDataExchangeMode());
+				assertEquals("Unexpected ship strategy between source and union node.",
+					sourceToUnionStrategy, channel.getShipStrategy());
 			}
 		}
 
@@ -176,9 +187,8 @@ public class UnionClosedBranchingTest extends CompilerTestBase {
 			for (IntermediateDataSet dataSet : src.getProducedDataSets()) {
 				ResultPartitionType dsType = dataSet.getResultType();
 
-				// The result type is determined by the channel between the union and the join node
-				// and *not* the channel between source and union.
-				if (unionToJoin.equals(BATCH)) {
+				// Ensure batch exchange unless PIPELINED_FORCE is enabled.
+				if (!executionMode.equals(ExecutionMode.PIPELINED_FORCED)) {
 					assertTrue("Expected batch exchange, but result type is " + dsType + ".",
 							dsType.isBlocking());
 				} else {


[33/50] [abbrv] flink git commit: [FLINK-4529] [flip-6] Move TaskExecutor, JobMaster and ResourceManager out of the rpc package

Posted by tr...@apache.org.
http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/SlotManagerTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/SlotManagerTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/SlotManagerTest.java
new file mode 100644
index 0000000..52d9d06
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/resourcemanager/SlotManagerTest.java
@@ -0,0 +1,538 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.resourcemanager;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+import org.apache.flink.runtime.clusterframework.types.ResourceSlot;
+import org.apache.flink.runtime.clusterframework.types.SlotID;
+import org.apache.flink.runtime.taskexecutor.SlotStatus;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
+public class SlotManagerTest {
+
+	private static final double DEFAULT_TESTING_CPU_CORES = 1.0;
+
+	private static final long DEFAULT_TESTING_MEMORY = 512;
+
+	private static final ResourceProfile DEFAULT_TESTING_PROFILE =
+		new ResourceProfile(DEFAULT_TESTING_CPU_CORES, DEFAULT_TESTING_MEMORY);
+
+	private static final ResourceProfile DEFAULT_TESTING_BIG_PROFILE =
+		new ResourceProfile(DEFAULT_TESTING_CPU_CORES * 2, DEFAULT_TESTING_MEMORY * 2);
+
+	private ResourceManagerGateway resourceManagerGateway;
+
+	@Before
+	public void setUp() {
+		resourceManagerGateway = mock(ResourceManagerGateway.class);
+	}
+
+	/**
+	 * Tests that there are no free slots when we request, need to allocate from cluster manager master
+	 */
+	@Test
+	public void testRequestSlotWithoutFreeSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertEquals(1, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_PROFILE, slotManager.getAllocatedContainers().get(0));
+	}
+
+	/**
+	 * Tests that there are some free slots when we request, and the request is fulfilled immediately
+	 */
+	@Test
+	public void testRequestSlotWithFreeSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 1);
+		assertEquals(1, slotManager.getFreeSlotCount());
+
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertEquals(0, slotManager.getAllocatedContainers().size());
+	}
+
+	/**
+	 * Tests that there are some free slots when we request, but none of them are suitable
+	 */
+	@Test
+	public void testRequestSlotWithoutSuitableSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 2);
+		assertEquals(2, slotManager.getFreeSlotCount());
+
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(2, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertEquals(1, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
+	}
+
+	/**
+	 * Tests that we send duplicated slot request
+	 */
+	@Test
+	public void testDuplicatedSlotRequest() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 1);
+
+		SlotRequest request1 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE);
+
+		slotManager.requestSlot(request1);
+		slotManager.requestSlot(request2);
+		slotManager.requestSlot(request2);
+		slotManager.requestSlot(request1);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertEquals(1, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
+	}
+
+	/**
+	 * Tests that we send multiple slot requests
+	 */
+	@Test
+	public void testRequestMultipleSlots() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 5);
+
+		// request 3 normal slots
+		for (int i = 0; i < 3; ++i) {
+			slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		}
+
+		// request 2 big slots
+		for (int i = 0; i < 2; ++i) {
+			slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
+		}
+
+		// request 1 normal slot again
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+
+		assertEquals(4, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(2, slotManager.getPendingRequestCount());
+		assertEquals(2, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(1));
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, and we used it to fulfill a pending request
+	 */
+	@Test
+	public void testNewlyAppearedFreeSlotFulfillPendingRequest() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		assertEquals(1, slotManager.getPendingRequestCount());
+
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, but we have no pending request
+	 */
+	@Test
+	public void testNewlyAppearedFreeSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, but it't not suitable for all the pending requests
+	 */
+	@Test
+	public void testNewlyAppearedFreeSlotNotMatchPendingRequests() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
+		assertEquals(1, slotManager.getPendingRequestCount());
+
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertFalse(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, and it's been reported using by some job
+	 */
+	@Test
+	public void testNewlyAppearedInUseSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new AllocationID(), new JobID());
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertTrue(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that we had a slot in-use, and it's confirmed by SlotReport
+	 */
+	@Test
+	public void testExistingInUseSlotUpdateStatus() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		// make this slot in use
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertTrue(slotManager.isAllocated(slotId));
+
+		// slot status is confirmed
+		SlotStatus slotStatus2 = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE,
+			request.getAllocationId(), request.getJobId());
+		slotManager.updateSlotStatus(slotStatus2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertTrue(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that we had a slot in-use, but it's empty according to the SlotReport
+	 */
+	@Test
+	public void testExistingInUseSlotAdjustedToEmpty() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		SlotRequest request1 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request1);
+
+		// make this slot in use
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		// another request pending
+		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertTrue(slotManager.isAllocated(request1.getAllocationId()));
+
+
+		// but slot is reported empty again, request2 will be fulfilled, request1 will be missing
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
+	}
+
+	/**
+	 * Tests that we had a slot in use, and it's also reported in use by TaskManager, but the allocation
+	 * information didn't match.
+	 */
+	@Test
+	public void testExistingInUseSlotWithDifferentAllocationInfo() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		// make this slot in use
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertTrue(slotManager.isAllocated(request.getAllocationId()));
+
+		SlotStatus slotStatus2 = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new AllocationID(), new JobID());
+		// update slot status with different allocation info
+		slotManager.updateSlotStatus(slotStatus2);
+
+		// original request is missing and won't be allocated
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertFalse(slotManager.isAllocated(request.getAllocationId()));
+		assertTrue(slotManager.isAllocated(slotStatus2.getAllocationID()));
+	}
+
+	/**
+	 * Tests that we had a free slot, and it's confirmed by SlotReport
+	 */
+	@Test
+	public void testExistingEmptySlotUpdateStatus() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
+		slotManager.addFreeSlot(slot);
+
+		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+	}
+
+	/**
+	 * Tests that we had a free slot, and it's reported in-use by TaskManager
+	 */
+	@Test
+	public void testExistingEmptySlotAdjustedToInUse() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
+		slotManager.addFreeSlot(slot);
+
+		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), DEFAULT_TESTING_PROFILE,
+			new AllocationID(), new JobID());
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slot.getSlotId()));
+	}
+
+	/**
+	 * Tests that we did some allocation but failed / rejected by TaskManager, request will retry
+	 */
+	@Test
+	public void testSlotAllocationFailedAtTaskManager() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
+		slotManager.addFreeSlot(slot);
+
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slot.getSlotId()));
+
+		slotManager.handleSlotRequestFailedAtTaskManager(request, slot.getSlotId());
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+	}
+
+
+	/**
+	 * Tests that we did some allocation but failed / rejected by TaskManager, and slot is occupied by another request
+	 */
+	@Test
+	public void testSlotAllocationFailedAtTaskManagerOccupiedByOther() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
+		slotManager.addFreeSlot(slot);
+
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		// slot is set empty by heartbeat
+		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), slot.getResourceProfile());
+		slotManager.updateSlotStatus(slotStatus);
+
+		// another request took this slot
+		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertFalse(slotManager.isAllocated(request.getAllocationId()));
+		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
+
+		// original request should be pended
+		slotManager.handleSlotRequestFailedAtTaskManager(request, slot.getSlotId());
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertFalse(slotManager.isAllocated(request.getAllocationId()));
+		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
+	}
+
+	@Test
+	public void testNotifyTaskManagerFailure() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+
+		ResourceID resource1 = ResourceID.generate();
+		ResourceID resource2 = ResourceID.generate();
+
+		ResourceSlot slot11 = new ResourceSlot(new SlotID(resource1, 1), DEFAULT_TESTING_PROFILE);
+		ResourceSlot slot12 = new ResourceSlot(new SlotID(resource1, 2), DEFAULT_TESTING_PROFILE);
+		ResourceSlot slot21 = new ResourceSlot(new SlotID(resource2, 1), DEFAULT_TESTING_PROFILE);
+		ResourceSlot slot22 = new ResourceSlot(new SlotID(resource2, 2), DEFAULT_TESTING_PROFILE);
+
+		slotManager.addFreeSlot(slot11);
+		slotManager.addFreeSlot(slot21);
+
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+
+		assertEquals(2, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+
+		slotManager.addFreeSlot(slot12);
+		slotManager.addFreeSlot(slot22);
+
+		assertEquals(2, slotManager.getAllocatedSlotCount());
+		assertEquals(2, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+
+		slotManager.notifyTaskManagerFailure(resource2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+
+		// notify an not exist resource failure
+		slotManager.notifyTaskManagerFailure(ResourceID.generate());
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+	}
+
+	// ------------------------------------------------------------------------
+	//  testing utilities
+	// ------------------------------------------------------------------------
+
+	private void directlyProvideFreeSlots(
+		final SlotManager slotManager,
+		final ResourceProfile resourceProfile,
+		final int freeSlotNum)
+	{
+		for (int i = 0; i < freeSlotNum; ++i) {
+			slotManager.addFreeSlot(new ResourceSlot(SlotID.generate(), new ResourceProfile(resourceProfile)));
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  testing classes
+	// ------------------------------------------------------------------------
+
+	private static class TestingSlotManager extends SlotManager {
+
+		private final List<ResourceProfile> allocatedContainers;
+
+		TestingSlotManager(ResourceManagerGateway resourceManagerGateway) {
+			super(resourceManagerGateway);
+			this.allocatedContainers = new LinkedList<>();
+		}
+
+		/**
+		 * Choose slot randomly if it matches requirement
+		 *
+		 * @param request   The slot request
+		 * @param freeSlots All slots which can be used
+		 * @return The chosen slot or null if cannot find a match
+		 */
+		@Override
+		protected ResourceSlot chooseSlotToUse(SlotRequest request, Map<SlotID, ResourceSlot> freeSlots) {
+			for (ResourceSlot slot : freeSlots.values()) {
+				if (slot.isMatchingRequirement(request.getResourceProfile())) {
+					return slot;
+				}
+			}
+			return null;
+		}
+
+		/**
+		 * Choose request randomly if offered slot can match its requirement
+		 *
+		 * @param offeredSlot     The free slot
+		 * @param pendingRequests All the pending slot requests
+		 * @return The chosen request's AllocationID or null if cannot find a match
+		 */
+		@Override
+		protected SlotRequest chooseRequestToFulfill(ResourceSlot offeredSlot,
+			Map<AllocationID, SlotRequest> pendingRequests)
+		{
+			for (Map.Entry<AllocationID, SlotRequest> pendingRequest : pendingRequests.entrySet()) {
+				if (offeredSlot.isMatchingRequirement(pendingRequest.getValue().getResourceProfile())) {
+					return pendingRequest.getValue();
+				}
+			}
+			return null;
+		}
+
+		@Override
+		protected void allocateContainer(ResourceProfile resourceProfile) {
+			allocatedContainers.add(resourceProfile);
+		}
+
+		List<ResourceProfile> getAllocatedContainers() {
+			return allocatedContainers;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
index 2790cf8..f55069e 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
@@ -21,28 +21,14 @@ package org.apache.flink.runtime.rpc.akka;
 import akka.actor.ActorSystem;
 import akka.util.Timeout;
 import org.apache.flink.core.testutils.OneShotLatch;
-import org.apache.flink.configuration.Configuration;
 import org.apache.flink.runtime.akka.AkkaUtils;
-import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
-import org.apache.flink.runtime.highavailability.NonHaServices;
-import org.apache.flink.runtime.jobgraph.JobGraph;
-import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
-import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
-import org.apache.flink.runtime.rpc.resourcemanager.ResourceManager;
 import org.apache.flink.util.TestLogger;
 
 import org.junit.AfterClass;
 import org.junit.Test;
 
-import org.mockito.Mockito;
-import scala.concurrent.duration.Deadline;
-import scala.concurrent.duration.FiniteDuration;
-
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.TimeUnit;
 
-import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 public class AkkaRpcServiceTest extends TestLogger {

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/RetryingRegistrationTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/RetryingRegistrationTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/RetryingRegistrationTest.java
deleted file mode 100644
index 9508825..0000000
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/RetryingRegistrationTest.java
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.registration;
-
-import akka.dispatch.Futures;
-
-import org.apache.flink.api.java.tuple.Tuple2;
-import org.apache.flink.runtime.rpc.RpcService;
-import org.apache.flink.runtime.rpc.TestingRpcService;
-import org.apache.flink.util.TestLogger;
-
-import org.junit.Test;
-
-import org.slf4j.LoggerFactory;
-
-import scala.concurrent.Await;
-import scala.concurrent.ExecutionContext$;
-import scala.concurrent.Future;
-import scala.concurrent.Promise;
-import scala.concurrent.duration.FiniteDuration;
-
-import java.util.UUID;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeoutException;
-
-import static java.util.concurrent.TimeUnit.SECONDS;
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.*;
-
-/**
- * Tests for the generic retrying registration class, validating the failure, retry, and back-off behavior.
- */
-public class RetryingRegistrationTest extends TestLogger {
-
-	@Test
-	public void testSimpleSuccessfulRegistration() throws Exception {
-		final String testId = "laissez les bon temps roulez";
-		final String testEndpointAddress = "<test-address>";
-		final UUID leaderId = UUID.randomUUID();
-
-		// an endpoint that immediately returns success
-		TestRegistrationGateway testGateway = new TestRegistrationGateway(new TestRegistrationSuccess(testId));
-		TestingRpcService rpc = new TestingRpcService();
-
-		try {
-			rpc.registerGateway(testEndpointAddress, testGateway);
-
-			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
-			registration.startRegistration();
-
-			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
-			assertNotNull(future);
-
-			// multiple accesses return the same future
-			assertEquals(future, registration.getFuture());
-
-			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success = 
-					Await.result(future, new FiniteDuration(10, SECONDS));
-
-			// validate correct invocation and result
-			assertEquals(testId, success.f1.getCorrelationId());
-			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
-		}
-		finally {
-			testGateway.stop();
-			rpc.stopService();
-		}
-	}
-	
-	@Test
-	public void testPropagateFailures() throws Exception {
-		final String testExceptionMessage = "testExceptionMessage";
-
-		// RPC service that fails with exception upon the connection
-		RpcService rpc = mock(RpcService.class);
-		when(rpc.connect(anyString(), any(Class.class))).thenThrow(new RuntimeException(testExceptionMessage));
-
-		TestRetryingRegistration registration = new TestRetryingRegistration(rpc, "testaddress", UUID.randomUUID());
-		registration.startRegistration();
-
-		Future<?> future = registration.getFuture();
-		assertTrue(future.failed().isCompleted());
-
-		assertEquals(testExceptionMessage, future.failed().value().get().get().getMessage());
-	}
-
-	@Test
-	public void testRetryConnectOnFailure() throws Exception {
-		final String testId = "laissez les bon temps roulez";
-		final UUID leaderId = UUID.randomUUID();
-
-		ExecutorService executor = Executors.newCachedThreadPool();
-		TestRegistrationGateway testGateway = new TestRegistrationGateway(new TestRegistrationSuccess(testId));
-
-		try {
-			// RPC service that fails upon the first connection, but succeeds on the second
-			RpcService rpc = mock(RpcService.class);
-			when(rpc.connect(anyString(), any(Class.class))).thenReturn(
-					Futures.failed(new Exception("test connect failure")),  // first connection attempt fails
-					Futures.successful(testGateway)                         // second connection attempt succeeds
-			);
-			when(rpc.getExecutionContext()).thenReturn(ExecutionContext$.MODULE$.fromExecutor(executor));
-
-			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, "foobar address", leaderId);
-			registration.startRegistration();
-
-			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
-					Await.result(registration.getFuture(), new FiniteDuration(10, SECONDS));
-
-			// validate correct invocation and result
-			assertEquals(testId, success.f1.getCorrelationId());
-			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
-		}
-		finally {
-			testGateway.stop();
-			executor.shutdown();
-		}
-	}
-
-	@Test
-	public void testRetriesOnTimeouts() throws Exception {
-		final String testId = "rien ne va plus";
-		final String testEndpointAddress = "<test-address>";
-		final UUID leaderId = UUID.randomUUID();
-
-		// an endpoint that immediately returns futures with timeouts before returning a successful future
-		TestRegistrationGateway testGateway = new TestRegistrationGateway(
-				null, // timeout
-				null, // timeout
-				new TestRegistrationSuccess(testId) // success
-		);
-
-		TestingRpcService rpc = new TestingRpcService();
-
-		try {
-			rpc.registerGateway(testEndpointAddress, testGateway);
-	
-			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
-	
-			long started = System.nanoTime();
-			registration.startRegistration();
-	
-			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
-			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
-					Await.result(future, new FiniteDuration(10, SECONDS));
-	
-			long finished = System.nanoTime();
-			long elapsedMillis = (finished - started) / 1000000;
-	
-			// validate correct invocation and result
-			assertEquals(testId, success.f1.getCorrelationId());
-			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
-	
-			// validate that some retry-delay / back-off behavior happened
-			assertTrue("retries did not properly back off", elapsedMillis >= 3 * TestRetryingRegistration.INITIAL_TIMEOUT);
-		}
-		finally {
-			rpc.stopService();
-			testGateway.stop();
-		}
-	}
-
-	@Test
-	public void testDecline() throws Exception {
-		final String testId = "qui a coupe le fromage";
-		final String testEndpointAddress = "<test-address>";
-		final UUID leaderId = UUID.randomUUID();
-
-		TestingRpcService rpc = new TestingRpcService();
-
-		TestRegistrationGateway testGateway = new TestRegistrationGateway(
-				null, // timeout
-				new RegistrationResponse.Decline("no reason "),
-				null, // timeout
-				new TestRegistrationSuccess(testId) // success
-		);
-
-		try {
-			rpc.registerGateway(testEndpointAddress, testGateway);
-
-			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
-
-			long started = System.nanoTime();
-			registration.startRegistration();
-	
-			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
-			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
-					Await.result(future, new FiniteDuration(10, SECONDS));
-
-			long finished = System.nanoTime();
-			long elapsedMillis = (finished - started) / 1000000;
-
-			// validate correct invocation and result
-			assertEquals(testId, success.f1.getCorrelationId());
-			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
-
-			// validate that some retry-delay / back-off behavior happened
-			assertTrue("retries did not properly back off", elapsedMillis >= 
-					2 * TestRetryingRegistration.INITIAL_TIMEOUT + TestRetryingRegistration.DELAY_ON_DECLINE);
-		}
-		finally {
-			testGateway.stop();
-			rpc.stopService();
-		}
-	}
-	
-	@Test
-	@SuppressWarnings("unchecked")
-	public void testRetryOnError() throws Exception {
-		final String testId = "Petit a petit, l'oiseau fait son nid";
-		final String testEndpointAddress = "<test-address>";
-		final UUID leaderId = UUID.randomUUID();
-
-		TestingRpcService rpc = new TestingRpcService();
-
-		try {
-			// gateway that upon calls first responds with a failure, then with a success
-			TestRegistrationGateway testGateway = mock(TestRegistrationGateway.class);
-
-			when(testGateway.registrationCall(any(UUID.class), anyLong())).thenReturn(
-					Futures.<RegistrationResponse>failed(new Exception("test exception")),
-					Futures.<RegistrationResponse>successful(new TestRegistrationSuccess(testId)));
-			
-			rpc.registerGateway(testEndpointAddress, testGateway);
-
-			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
-
-			long started = System.nanoTime();
-			registration.startRegistration();
-
-			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
-			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
-					Await.result(future, new FiniteDuration(10, SECONDS));
-
-			long finished = System.nanoTime();
-			long elapsedMillis = (finished - started) / 1000000;
-			
-			assertEquals(testId, success.f1.getCorrelationId());
-
-			// validate that some retry-delay / back-off behavior happened
-			assertTrue("retries did not properly back off",
-					elapsedMillis >= TestRetryingRegistration.DELAY_ON_ERROR);
-		}
-		finally {
-			rpc.stopService();
-		}
-	}
-
-	@Test
-	public void testCancellation() throws Exception {
-		final String testEndpointAddress = "my-test-address";
-		final UUID leaderId = UUID.randomUUID();
-
-		TestingRpcService rpc = new TestingRpcService();
-
-		try {
-			Promise<RegistrationResponse> result = Futures.promise();
-
-			TestRegistrationGateway testGateway = mock(TestRegistrationGateway.class);
-			when(testGateway.registrationCall(any(UUID.class), anyLong())).thenReturn(result.future());
-
-			rpc.registerGateway(testEndpointAddress, testGateway);
-
-			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
-			registration.startRegistration();
-
-			// cancel and fail the current registration attempt
-			registration.cancel();
-			result.failure(new TimeoutException());
-
-			// there should not be a second registration attempt
-			verify(testGateway, atMost(1)).registrationCall(any(UUID.class), anyLong());
-		}
-		finally {
-			rpc.stopService();
-		}
-	}
-
-	// ------------------------------------------------------------------------
-	//  test registration
-	// ------------------------------------------------------------------------
-
-	private static class TestRegistrationSuccess extends RegistrationResponse.Success {
-		private static final long serialVersionUID = 5542698790917150604L;
-
-		private final String correlationId;
-
-		private TestRegistrationSuccess(String correlationId) {
-			this.correlationId = correlationId;
-		}
-
-		public String getCorrelationId() {
-			return correlationId;
-		}
-	}
-
-	private static class TestRetryingRegistration extends RetryingRegistration<TestRegistrationGateway, TestRegistrationSuccess> {
-
-		// we use shorter timeouts here to speed up the tests
-		static final long INITIAL_TIMEOUT = 20;
-		static final long MAX_TIMEOUT = 200;
-		static final long DELAY_ON_ERROR = 200;
-		static final long DELAY_ON_DECLINE = 200;
-
-		public TestRetryingRegistration(RpcService rpc, String targetAddress, UUID leaderId) {
-			super(LoggerFactory.getLogger(RetryingRegistrationTest.class),
-					rpc, "TestEndpoint",
-					TestRegistrationGateway.class,
-					targetAddress, leaderId,
-					INITIAL_TIMEOUT, MAX_TIMEOUT, DELAY_ON_ERROR, DELAY_ON_DECLINE);
-		}
-
-		@Override
-		protected Future<RegistrationResponse> invokeRegistration(
-				TestRegistrationGateway gateway, UUID leaderId, long timeoutMillis) {
-			return gateway.registrationCall(leaderId, timeoutMillis);
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/TestRegistrationGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/TestRegistrationGateway.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/TestRegistrationGateway.java
deleted file mode 100644
index a049e48..0000000
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/TestRegistrationGateway.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.registration;
-
-import akka.dispatch.Futures;
-
-import org.apache.flink.runtime.rpc.TestingGatewayBase;
-import org.apache.flink.util.Preconditions;
-
-import scala.concurrent.Future;
-
-import java.util.UUID;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.LinkedBlockingQueue;
-
-public class TestRegistrationGateway extends TestingGatewayBase {
-
-	private final BlockingQueue<RegistrationCall> invocations;
-
-	private final RegistrationResponse[] responses;
-
-	private int pos;
-
-	public TestRegistrationGateway(RegistrationResponse... responses) {
-		Preconditions.checkArgument(responses != null && responses.length > 0);
-
-		this.invocations = new LinkedBlockingQueue<>();
-		this.responses = responses;
-		
-	}
-
-	// ------------------------------------------------------------------------
-
-	public Future<RegistrationResponse> registrationCall(UUID leaderId, long timeout) {
-		invocations.add(new RegistrationCall(leaderId, timeout));
-
-		RegistrationResponse response = responses[pos];
-		if (pos < responses.length - 1) {
-			pos++;
-		}
-
-		// return a completed future (for a proper value), or one that never completes and will time out (for null)
-		return response != null ? Futures.successful(response) : this.<RegistrationResponse>futureWithTimeout(timeout);
-	}
-
-	public BlockingQueue<RegistrationCall> getInvocations() {
-		return invocations;
-	}
-
-	// ------------------------------------------------------------------------
-
-	public static class RegistrationCall {
-		private final UUID leaderId;
-		private final long timeout;
-
-		public RegistrationCall(UUID leaderId, long timeout) {
-			this.leaderId = leaderId;
-			this.timeout = timeout;
-		}
-
-		public UUID leaderId() {
-			return leaderId;
-		}
-
-		public long timeout() {
-			return timeout;
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerHATest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerHATest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerHATest.java
deleted file mode 100644
index dfffeda..0000000
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerHATest.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.resourcemanager;
-
-import org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices;
-import org.apache.flink.runtime.leaderelection.TestingLeaderElectionService;
-import org.apache.flink.runtime.rpc.MainThreadExecutor;
-import org.apache.flink.runtime.rpc.RpcEndpoint;
-import org.apache.flink.runtime.rpc.RpcGateway;
-import org.apache.flink.runtime.rpc.RpcService;
-import org.apache.flink.runtime.rpc.StartStoppable;
-import org.junit.Assert;
-import org.junit.Test;
-
-import java.util.UUID;
-
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.doCallRealMethod;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-/**
- * resourceManager HA test, including grant leadership and revoke leadership
- */
-public class ResourceManagerHATest {
-
-	@Test
-	public void testGrantAndRevokeLeadership() throws Exception {
-		// mock a RpcService which will return a special RpcGateway when call its startServer method, the returned RpcGateway directly execute runAsync call
-		TestingResourceManagerGatewayProxy gateway = mock(TestingResourceManagerGatewayProxy.class);
-		doCallRealMethod().when(gateway).runAsync(any(Runnable.class));
-
-		RpcService rpcService = mock(RpcService.class);
-		when(rpcService.startServer(any(RpcEndpoint.class))).thenReturn(gateway);
-
-		TestingLeaderElectionService leaderElectionService = new TestingLeaderElectionService();
-		TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
-		highAvailabilityServices.setResourceManagerLeaderElectionService(leaderElectionService);
-
-		final ResourceManager resourceManager = new ResourceManager(rpcService, highAvailabilityServices);
-		resourceManager.start();
-		// before grant leadership, resourceManager's leaderId is null
-		Assert.assertNull(resourceManager.getLeaderSessionID());
-		final UUID leaderId = UUID.randomUUID();
-		leaderElectionService.isLeader(leaderId);
-		// after grant leadership, resourceManager's leaderId has value
-		Assert.assertEquals(leaderId, resourceManager.getLeaderSessionID());
-		// then revoke leadership, resourceManager's leaderId is null again
-		leaderElectionService.notLeader();
-		Assert.assertNull(resourceManager.getLeaderSessionID());
-	}
-
-	private static abstract class TestingResourceManagerGatewayProxy implements MainThreadExecutor, StartStoppable, RpcGateway {
-		@Override
-		public void runAsync(Runnable runnable) {
-			runnable.run();
-		}
-	}
-
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
deleted file mode 100644
index 25a670c..0000000
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.taskexecutor;
-
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.runtime.clusterframework.types.ResourceID;
-import org.apache.flink.runtime.highavailability.NonHaServices;
-import org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices;
-import org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService;
-import org.apache.flink.runtime.rpc.TestingRpcService;
-import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
-import org.apache.flink.util.TestLogger;
-
-import org.junit.Test;
-
-import scala.concurrent.duration.FiniteDuration;
-
-import java.util.UUID;
-
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.*;
-
-public class TaskExecutorTest extends TestLogger {
-
-	@Test
-	public void testImmediatelyRegistersIfLeaderIsKnown() throws Exception {
-		final ResourceID resourceID = ResourceID.generate();
-		final String resourceManagerAddress = "/resource/manager/address/one";
-
-		final TestingRpcService rpc = new TestingRpcService();
-		try {
-			// register a mock resource manager gateway
-			ResourceManagerGateway rmGateway = mock(ResourceManagerGateway.class);
-			rpc.registerGateway(resourceManagerAddress, rmGateway);
-
-			NonHaServices haServices = new NonHaServices(resourceManagerAddress);
-			TaskExecutor taskManager = TaskExecutor.startTaskManagerComponentsAndActor(
-				new Configuration(), resourceID, rpc, "localhost", haServices, true);
-			String taskManagerAddress = taskManager.getAddress();
-			taskManager.start();
-
-			verify(rmGateway, timeout(5000)).registerTaskExecutor(
-					any(UUID.class), eq(taskManagerAddress), eq(resourceID), any(FiniteDuration.class));
-		}
-		finally {
-			rpc.stopService();
-		}
-	}
-
-	@Test
-	public void testTriggerRegistrationOnLeaderChange() throws Exception {
-		final ResourceID resourceID = ResourceID.generate();
-
-		final String address1 = "/resource/manager/address/one";
-		final String address2 = "/resource/manager/address/two";
-		final UUID leaderId1 = UUID.randomUUID();
-		final UUID leaderId2 = UUID.randomUUID();
-
-		final TestingRpcService rpc = new TestingRpcService();
-		try {
-			// register the mock resource manager gateways
-			ResourceManagerGateway rmGateway1 = mock(ResourceManagerGateway.class);
-			ResourceManagerGateway rmGateway2 = mock(ResourceManagerGateway.class);
-			rpc.registerGateway(address1, rmGateway1);
-			rpc.registerGateway(address2, rmGateway2);
-
-			TestingLeaderRetrievalService testLeaderService = new TestingLeaderRetrievalService();
-
-			TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
-			haServices.setResourceManagerLeaderRetriever(testLeaderService);
-
-			TaskExecutor taskManager = TaskExecutor.startTaskManagerComponentsAndActor(
-				new Configuration(), resourceID, rpc, "localhost", haServices, true);
-			String taskManagerAddress = taskManager.getAddress();
-			taskManager.start();
-
-			// no connection initially, since there is no leader
-			assertNull(taskManager.getResourceManagerConnection());
-
-			// define a leader and see that a registration happens
-			testLeaderService.notifyListener(address1, leaderId1);
-
-			verify(rmGateway1, timeout(5000)).registerTaskExecutor(
-					eq(leaderId1), eq(taskManagerAddress), eq(resourceID), any(FiniteDuration.class));
-			assertNotNull(taskManager.getResourceManagerConnection());
-
-			// cancel the leader 
-			testLeaderService.notifyListener(null, null);
-
-			// set a new leader, see that a registration happens 
-			testLeaderService.notifyListener(address2, leaderId2);
-
-			verify(rmGateway2, timeout(5000)).registerTaskExecutor(
-					eq(leaderId2), eq(taskManagerAddress), eq(resourceID), any(FiniteDuration.class));
-			assertNotNull(taskManager.getResourceManagerConnection());
-		}
-		finally {
-			rpc.stopService();
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/2f12ba3e/flink-runtime/src/test/java/org/apache/flink/runtime/taskexecutor/TaskExecutorTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/taskexecutor/TaskExecutorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/taskexecutor/TaskExecutorTest.java
new file mode 100644
index 0000000..a8d5bd7
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/taskexecutor/TaskExecutorTest.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.taskexecutor;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.highavailability.NonHaServices;
+import org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices;
+import org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService;
+import org.apache.flink.runtime.rpc.TestingRpcService;
+import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway;
+import org.apache.flink.util.TestLogger;
+
+import org.junit.Test;
+
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.UUID;
+
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
+public class TaskExecutorTest extends TestLogger {
+
+	@Test
+	public void testImmediatelyRegistersIfLeaderIsKnown() throws Exception {
+		final ResourceID resourceID = ResourceID.generate();
+		final String resourceManagerAddress = "/resource/manager/address/one";
+
+		final TestingRpcService rpc = new TestingRpcService();
+		try {
+			// register a mock resource manager gateway
+			ResourceManagerGateway rmGateway = mock(ResourceManagerGateway.class);
+			rpc.registerGateway(resourceManagerAddress, rmGateway);
+
+			NonHaServices haServices = new NonHaServices(resourceManagerAddress);
+			TaskExecutor taskManager = TaskExecutor.startTaskManagerComponentsAndActor(
+				new Configuration(), resourceID, rpc, "localhost", haServices, true);
+			String taskManagerAddress = taskManager.getAddress();
+			taskManager.start();
+
+			verify(rmGateway, timeout(5000)).registerTaskExecutor(
+					any(UUID.class), eq(taskManagerAddress), eq(resourceID), any(FiniteDuration.class));
+		}
+		finally {
+			rpc.stopService();
+		}
+	}
+
+	@Test
+	public void testTriggerRegistrationOnLeaderChange() throws Exception {
+		final ResourceID resourceID = ResourceID.generate();
+
+		final String address1 = "/resource/manager/address/one";
+		final String address2 = "/resource/manager/address/two";
+		final UUID leaderId1 = UUID.randomUUID();
+		final UUID leaderId2 = UUID.randomUUID();
+
+		final TestingRpcService rpc = new TestingRpcService();
+		try {
+			// register the mock resource manager gateways
+			ResourceManagerGateway rmGateway1 = mock(ResourceManagerGateway.class);
+			ResourceManagerGateway rmGateway2 = mock(ResourceManagerGateway.class);
+			rpc.registerGateway(address1, rmGateway1);
+			rpc.registerGateway(address2, rmGateway2);
+
+			TestingLeaderRetrievalService testLeaderService = new TestingLeaderRetrievalService();
+
+			TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
+			haServices.setResourceManagerLeaderRetriever(testLeaderService);
+
+			TaskExecutor taskManager = TaskExecutor.startTaskManagerComponentsAndActor(
+				new Configuration(), resourceID, rpc, "localhost", haServices, true);
+			String taskManagerAddress = taskManager.getAddress();
+			taskManager.start();
+
+			// no connection initially, since there is no leader
+			assertNull(taskManager.getResourceManagerConnection());
+
+			// define a leader and see that a registration happens
+			testLeaderService.notifyListener(address1, leaderId1);
+
+			verify(rmGateway1, timeout(5000)).registerTaskExecutor(
+					eq(leaderId1), eq(taskManagerAddress), eq(resourceID), any(FiniteDuration.class));
+			assertNotNull(taskManager.getResourceManagerConnection());
+
+			// cancel the leader 
+			testLeaderService.notifyListener(null, null);
+
+			// set a new leader, see that a registration happens 
+			testLeaderService.notifyListener(address2, leaderId2);
+
+			verify(rmGateway2, timeout(5000)).registerTaskExecutor(
+					eq(leaderId2), eq(taskManagerAddress), eq(resourceID), any(FiniteDuration.class));
+			assertNotNull(taskManager.getResourceManagerConnection());
+		}
+		finally {
+			rpc.stopService();
+		}
+	}
+}


[31/50] [abbrv] flink git commit: [FLINK-4355] [cluster management] Add tests for the TaskManager -> ResourceManager registration.

Posted by tr...@apache.org.
[FLINK-4355] [cluster management] Add tests for the TaskManager -> ResourceManager registration.

This closes #2395.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/5ea97a18
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/5ea97a18
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/5ea97a18

Branch: refs/heads/flip-6
Commit: 5ea97a185d266f96570a4ea3c967ecbc384378cd
Parents: e6b0f12
Author: Stephan Ewen <se...@apache.org>
Authored: Fri Aug 19 23:45:54 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:15 2016 +0200

----------------------------------------------------------------------
 .../rpc/registration/RetryingRegistration.java  |   4 +
 .../runtime/rpc/taskexecutor/SlotReport.java    |  38 ---
 .../runtime/rpc/taskexecutor/TaskExecutor.java  |  12 +
 ...TaskExecutorToResourceManagerConnection.java |   4 +
 .../TestingHighAvailabilityServices.java        |  53 +++
 .../flink/runtime/rpc/TestingGatewayBase.java   |  18 +-
 .../registration/RetryingRegistrationTest.java  | 336 +++++++++++++++++++
 .../registration/TestRegistrationGateway.java   |  85 +++++
 .../rpc/taskexecutor/TaskExecutorTest.java      |  92 ++++-
 9 files changed, 602 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/5ea97a18/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java
index 4c93684..dcb5011 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/registration/RetryingRegistration.java
@@ -58,12 +58,16 @@ public abstract class RetryingRegistration<Gateway extends RpcGateway, Success e
 	//  default configuration values
 	// ------------------------------------------------------------------------
 
+	/** default value for the initial registration timeout (milliseconds) */
 	private static final long INITIAL_REGISTRATION_TIMEOUT_MILLIS = 100;
 
+	/** default value for the maximum registration timeout, after exponential back-off (milliseconds) */
 	private static final long MAX_REGISTRATION_TIMEOUT_MILLIS = 30000;
 
+	/** The pause (milliseconds) made after an registration attempt caused an exception (other than timeout) */
 	private static final long ERROR_REGISTRATION_DELAY_MILLIS = 10000;
 
+	/** The pause (milliseconds) made after the registration attempt was refused */
 	private static final long REFUSED_REGISTRATION_DELAY_MILLIS = 30000;
 
 	// ------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/flink/blob/5ea97a18/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
deleted file mode 100644
index e42fa4a..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.taskexecutor;
-
-import java.io.Serializable;
-
-/**
- * A report about the current status of all slots of the TaskExecutor, describing
- * which slots are available and allocated, and what jobs (JobManagers) the allocated slots
- * have been allocated to.
- */
-public class SlotReport implements Serializable{
-
-	private static final long serialVersionUID = 1L;
-
-	// ------------------------------------------------------------------------
-	
-	@Override
-	public String toString() {
-		return "SlotReport";
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/5ea97a18/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
index 1a637bb..f201e00 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
@@ -18,6 +18,7 @@
 
 package org.apache.flink.runtime.rpc.taskexecutor;
 
+import org.apache.flink.annotation.VisibleForTesting;
 import org.apache.flink.runtime.clusterframework.types.ResourceID;
 import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
 import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener;
@@ -72,6 +73,8 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 
 	@Override
 	public void start() {
+		super.start();
+
 		// start by connecting to the ResourceManager
 		try {
 			haServices.getResourceManagerLeaderRetriever().start(new ResourceManagerLeaderListener());
@@ -148,6 +151,15 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 	}
 
 	// ------------------------------------------------------------------------
+	//  Access to fields for testing
+	// ------------------------------------------------------------------------
+
+	@VisibleForTesting
+	TaskExecutorToResourceManagerConnection getResourceManagerConnection() {
+		return resourceManagerConnection;
+	}
+
+	// ------------------------------------------------------------------------
 	//  Utility classes
 	// ------------------------------------------------------------------------
 

http://git-wip-us.apache.org/repos/asf/flink/blob/5ea97a18/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
index ef75862..f398b7d 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorToResourceManagerConnection.java
@@ -40,6 +40,9 @@ import java.util.concurrent.TimeUnit;
 import static org.apache.flink.util.Preconditions.checkNotNull;
 import static org.apache.flink.util.Preconditions.checkState;
 
+/**
+ * The connection between a TaskExecutor and the ResourceManager.
+ */
 public class TaskExecutorToResourceManagerConnection {
 
 	/** the logger for all log messages of this class */
@@ -87,6 +90,7 @@ public class TaskExecutorToResourceManagerConnection {
 				log, taskExecutor.getRpcService(),
 				resourceManagerAddress, resourceManagerLeaderId,
 				taskExecutor.getAddress(), taskExecutor.getResourceID());
+		registration.startRegistration();
 
 		Future<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>> future = registration.getFuture();
 		

http://git-wip-us.apache.org/repos/asf/flink/blob/5ea97a18/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java b/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java
new file mode 100644
index 0000000..3a9f943
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/highavailability/TestingHighAvailabilityServices.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.highavailability;
+
+import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
+
+/**
+ * A variant of the HighAvailabilityServices for testing. Each individual service can be set
+ * to an arbitrary implementation, such as a mock or default service.
+ */
+public class TestingHighAvailabilityServices implements HighAvailabilityServices {
+
+	private volatile LeaderRetrievalService resourceManagerLeaderRetriever;
+
+
+	// ------------------------------------------------------------------------
+	//  Setters for mock / testing implementations
+	// ------------------------------------------------------------------------
+
+	public void setResourceManagerLeaderRetriever(LeaderRetrievalService resourceManagerLeaderRetriever) {
+		this.resourceManagerLeaderRetriever = resourceManagerLeaderRetriever;
+	}
+	
+	// ------------------------------------------------------------------------
+	//  HA Services Methods
+	// ------------------------------------------------------------------------
+
+	@Override
+	public LeaderRetrievalService getResourceManagerLeaderRetriever() throws Exception {
+		LeaderRetrievalService service = this.resourceManagerLeaderRetriever;
+		if (service != null) {
+			return service;
+		} else {
+			throw new IllegalStateException("ResourceManagerLeaderRetriever has not been set");
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/5ea97a18/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingGatewayBase.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingGatewayBase.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingGatewayBase.java
index 4256135..8133a87 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingGatewayBase.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingGatewayBase.java
@@ -34,8 +34,15 @@ public abstract class TestingGatewayBase implements RpcGateway {
 
 	private final ScheduledExecutorService executor;
 
-	protected TestingGatewayBase() {
+	private final String address;
+
+	protected TestingGatewayBase(final String address) {
 		this.executor = Executors.newSingleThreadScheduledExecutor();
+		this.address = address;
+	}
+
+	protected TestingGatewayBase() {
+		this("localhost");
 	}
 
 	// ------------------------------------------------------------------------
@@ -53,6 +60,15 @@ public abstract class TestingGatewayBase implements RpcGateway {
 	}
 
 	// ------------------------------------------------------------------------
+	//  Base class methods
+	// ------------------------------------------------------------------------
+
+	@Override
+	public String getAddress() {
+		return address;
+	}
+
+	// ------------------------------------------------------------------------
 	//  utilities
 	// ------------------------------------------------------------------------
 

http://git-wip-us.apache.org/repos/asf/flink/blob/5ea97a18/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/RetryingRegistrationTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/RetryingRegistrationTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/RetryingRegistrationTest.java
new file mode 100644
index 0000000..9508825
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/RetryingRegistrationTest.java
@@ -0,0 +1,336 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.registration;
+
+import akka.dispatch.Futures;
+
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.rpc.TestingRpcService;
+import org.apache.flink.util.TestLogger;
+
+import org.junit.Test;
+
+import org.slf4j.LoggerFactory;
+
+import scala.concurrent.Await;
+import scala.concurrent.ExecutionContext$;
+import scala.concurrent.Future;
+import scala.concurrent.Promise;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.UUID;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeoutException;
+
+import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
+/**
+ * Tests for the generic retrying registration class, validating the failure, retry, and back-off behavior.
+ */
+public class RetryingRegistrationTest extends TestLogger {
+
+	@Test
+	public void testSimpleSuccessfulRegistration() throws Exception {
+		final String testId = "laissez les bon temps roulez";
+		final String testEndpointAddress = "<test-address>";
+		final UUID leaderId = UUID.randomUUID();
+
+		// an endpoint that immediately returns success
+		TestRegistrationGateway testGateway = new TestRegistrationGateway(new TestRegistrationSuccess(testId));
+		TestingRpcService rpc = new TestingRpcService();
+
+		try {
+			rpc.registerGateway(testEndpointAddress, testGateway);
+
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
+			registration.startRegistration();
+
+			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
+			assertNotNull(future);
+
+			// multiple accesses return the same future
+			assertEquals(future, registration.getFuture());
+
+			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success = 
+					Await.result(future, new FiniteDuration(10, SECONDS));
+
+			// validate correct invocation and result
+			assertEquals(testId, success.f1.getCorrelationId());
+			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
+		}
+		finally {
+			testGateway.stop();
+			rpc.stopService();
+		}
+	}
+	
+	@Test
+	public void testPropagateFailures() throws Exception {
+		final String testExceptionMessage = "testExceptionMessage";
+
+		// RPC service that fails with exception upon the connection
+		RpcService rpc = mock(RpcService.class);
+		when(rpc.connect(anyString(), any(Class.class))).thenThrow(new RuntimeException(testExceptionMessage));
+
+		TestRetryingRegistration registration = new TestRetryingRegistration(rpc, "testaddress", UUID.randomUUID());
+		registration.startRegistration();
+
+		Future<?> future = registration.getFuture();
+		assertTrue(future.failed().isCompleted());
+
+		assertEquals(testExceptionMessage, future.failed().value().get().get().getMessage());
+	}
+
+	@Test
+	public void testRetryConnectOnFailure() throws Exception {
+		final String testId = "laissez les bon temps roulez";
+		final UUID leaderId = UUID.randomUUID();
+
+		ExecutorService executor = Executors.newCachedThreadPool();
+		TestRegistrationGateway testGateway = new TestRegistrationGateway(new TestRegistrationSuccess(testId));
+
+		try {
+			// RPC service that fails upon the first connection, but succeeds on the second
+			RpcService rpc = mock(RpcService.class);
+			when(rpc.connect(anyString(), any(Class.class))).thenReturn(
+					Futures.failed(new Exception("test connect failure")),  // first connection attempt fails
+					Futures.successful(testGateway)                         // second connection attempt succeeds
+			);
+			when(rpc.getExecutionContext()).thenReturn(ExecutionContext$.MODULE$.fromExecutor(executor));
+
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, "foobar address", leaderId);
+			registration.startRegistration();
+
+			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
+					Await.result(registration.getFuture(), new FiniteDuration(10, SECONDS));
+
+			// validate correct invocation and result
+			assertEquals(testId, success.f1.getCorrelationId());
+			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
+		}
+		finally {
+			testGateway.stop();
+			executor.shutdown();
+		}
+	}
+
+	@Test
+	public void testRetriesOnTimeouts() throws Exception {
+		final String testId = "rien ne va plus";
+		final String testEndpointAddress = "<test-address>";
+		final UUID leaderId = UUID.randomUUID();
+
+		// an endpoint that immediately returns futures with timeouts before returning a successful future
+		TestRegistrationGateway testGateway = new TestRegistrationGateway(
+				null, // timeout
+				null, // timeout
+				new TestRegistrationSuccess(testId) // success
+		);
+
+		TestingRpcService rpc = new TestingRpcService();
+
+		try {
+			rpc.registerGateway(testEndpointAddress, testGateway);
+	
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
+	
+			long started = System.nanoTime();
+			registration.startRegistration();
+	
+			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
+			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
+					Await.result(future, new FiniteDuration(10, SECONDS));
+	
+			long finished = System.nanoTime();
+			long elapsedMillis = (finished - started) / 1000000;
+	
+			// validate correct invocation and result
+			assertEquals(testId, success.f1.getCorrelationId());
+			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
+	
+			// validate that some retry-delay / back-off behavior happened
+			assertTrue("retries did not properly back off", elapsedMillis >= 3 * TestRetryingRegistration.INITIAL_TIMEOUT);
+		}
+		finally {
+			rpc.stopService();
+			testGateway.stop();
+		}
+	}
+
+	@Test
+	public void testDecline() throws Exception {
+		final String testId = "qui a coupe le fromage";
+		final String testEndpointAddress = "<test-address>";
+		final UUID leaderId = UUID.randomUUID();
+
+		TestingRpcService rpc = new TestingRpcService();
+
+		TestRegistrationGateway testGateway = new TestRegistrationGateway(
+				null, // timeout
+				new RegistrationResponse.Decline("no reason "),
+				null, // timeout
+				new TestRegistrationSuccess(testId) // success
+		);
+
+		try {
+			rpc.registerGateway(testEndpointAddress, testGateway);
+
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
+
+			long started = System.nanoTime();
+			registration.startRegistration();
+	
+			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
+			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
+					Await.result(future, new FiniteDuration(10, SECONDS));
+
+			long finished = System.nanoTime();
+			long elapsedMillis = (finished - started) / 1000000;
+
+			// validate correct invocation and result
+			assertEquals(testId, success.f1.getCorrelationId());
+			assertEquals(leaderId, testGateway.getInvocations().take().leaderId());
+
+			// validate that some retry-delay / back-off behavior happened
+			assertTrue("retries did not properly back off", elapsedMillis >= 
+					2 * TestRetryingRegistration.INITIAL_TIMEOUT + TestRetryingRegistration.DELAY_ON_DECLINE);
+		}
+		finally {
+			testGateway.stop();
+			rpc.stopService();
+		}
+	}
+	
+	@Test
+	@SuppressWarnings("unchecked")
+	public void testRetryOnError() throws Exception {
+		final String testId = "Petit a petit, l'oiseau fait son nid";
+		final String testEndpointAddress = "<test-address>";
+		final UUID leaderId = UUID.randomUUID();
+
+		TestingRpcService rpc = new TestingRpcService();
+
+		try {
+			// gateway that upon calls first responds with a failure, then with a success
+			TestRegistrationGateway testGateway = mock(TestRegistrationGateway.class);
+
+			when(testGateway.registrationCall(any(UUID.class), anyLong())).thenReturn(
+					Futures.<RegistrationResponse>failed(new Exception("test exception")),
+					Futures.<RegistrationResponse>successful(new TestRegistrationSuccess(testId)));
+			
+			rpc.registerGateway(testEndpointAddress, testGateway);
+
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
+
+			long started = System.nanoTime();
+			registration.startRegistration();
+
+			Future<Tuple2<TestRegistrationGateway, TestRegistrationSuccess>> future = registration.getFuture();
+			Tuple2<TestRegistrationGateway, TestRegistrationSuccess> success =
+					Await.result(future, new FiniteDuration(10, SECONDS));
+
+			long finished = System.nanoTime();
+			long elapsedMillis = (finished - started) / 1000000;
+			
+			assertEquals(testId, success.f1.getCorrelationId());
+
+			// validate that some retry-delay / back-off behavior happened
+			assertTrue("retries did not properly back off",
+					elapsedMillis >= TestRetryingRegistration.DELAY_ON_ERROR);
+		}
+		finally {
+			rpc.stopService();
+		}
+	}
+
+	@Test
+	public void testCancellation() throws Exception {
+		final String testEndpointAddress = "my-test-address";
+		final UUID leaderId = UUID.randomUUID();
+
+		TestingRpcService rpc = new TestingRpcService();
+
+		try {
+			Promise<RegistrationResponse> result = Futures.promise();
+
+			TestRegistrationGateway testGateway = mock(TestRegistrationGateway.class);
+			when(testGateway.registrationCall(any(UUID.class), anyLong())).thenReturn(result.future());
+
+			rpc.registerGateway(testEndpointAddress, testGateway);
+
+			TestRetryingRegistration registration = new TestRetryingRegistration(rpc, testEndpointAddress, leaderId);
+			registration.startRegistration();
+
+			// cancel and fail the current registration attempt
+			registration.cancel();
+			result.failure(new TimeoutException());
+
+			// there should not be a second registration attempt
+			verify(testGateway, atMost(1)).registrationCall(any(UUID.class), anyLong());
+		}
+		finally {
+			rpc.stopService();
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  test registration
+	// ------------------------------------------------------------------------
+
+	private static class TestRegistrationSuccess extends RegistrationResponse.Success {
+		private static final long serialVersionUID = 5542698790917150604L;
+
+		private final String correlationId;
+
+		private TestRegistrationSuccess(String correlationId) {
+			this.correlationId = correlationId;
+		}
+
+		public String getCorrelationId() {
+			return correlationId;
+		}
+	}
+
+	private static class TestRetryingRegistration extends RetryingRegistration<TestRegistrationGateway, TestRegistrationSuccess> {
+
+		// we use shorter timeouts here to speed up the tests
+		static final long INITIAL_TIMEOUT = 20;
+		static final long MAX_TIMEOUT = 200;
+		static final long DELAY_ON_ERROR = 200;
+		static final long DELAY_ON_DECLINE = 200;
+
+		public TestRetryingRegistration(RpcService rpc, String targetAddress, UUID leaderId) {
+			super(LoggerFactory.getLogger(RetryingRegistrationTest.class),
+					rpc, "TestEndpoint",
+					TestRegistrationGateway.class,
+					targetAddress, leaderId,
+					INITIAL_TIMEOUT, MAX_TIMEOUT, DELAY_ON_ERROR, DELAY_ON_DECLINE);
+		}
+
+		@Override
+		protected Future<RegistrationResponse> invokeRegistration(
+				TestRegistrationGateway gateway, UUID leaderId, long timeoutMillis) {
+			return gateway.registrationCall(leaderId, timeoutMillis);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/5ea97a18/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/TestRegistrationGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/TestRegistrationGateway.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/TestRegistrationGateway.java
new file mode 100644
index 0000000..a049e48
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/registration/TestRegistrationGateway.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.registration;
+
+import akka.dispatch.Futures;
+
+import org.apache.flink.runtime.rpc.TestingGatewayBase;
+import org.apache.flink.util.Preconditions;
+
+import scala.concurrent.Future;
+
+import java.util.UUID;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
+public class TestRegistrationGateway extends TestingGatewayBase {
+
+	private final BlockingQueue<RegistrationCall> invocations;
+
+	private final RegistrationResponse[] responses;
+
+	private int pos;
+
+	public TestRegistrationGateway(RegistrationResponse... responses) {
+		Preconditions.checkArgument(responses != null && responses.length > 0);
+
+		this.invocations = new LinkedBlockingQueue<>();
+		this.responses = responses;
+		
+	}
+
+	// ------------------------------------------------------------------------
+
+	public Future<RegistrationResponse> registrationCall(UUID leaderId, long timeout) {
+		invocations.add(new RegistrationCall(leaderId, timeout));
+
+		RegistrationResponse response = responses[pos];
+		if (pos < responses.length - 1) {
+			pos++;
+		}
+
+		// return a completed future (for a proper value), or one that never completes and will time out (for null)
+		return response != null ? Futures.successful(response) : this.<RegistrationResponse>futureWithTimeout(timeout);
+	}
+
+	public BlockingQueue<RegistrationCall> getInvocations() {
+		return invocations;
+	}
+
+	// ------------------------------------------------------------------------
+
+	public static class RegistrationCall {
+		private final UUID leaderId;
+		private final long timeout;
+
+		public RegistrationCall(UUID leaderId, long timeout) {
+			this.leaderId = leaderId;
+			this.timeout = timeout;
+		}
+
+		public UUID leaderId() {
+			return leaderId;
+		}
+
+		public long timeout() {
+			return timeout;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/5ea97a18/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
index 9f9bab3..b831ead 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
@@ -18,8 +18,98 @@
 
 package org.apache.flink.runtime.rpc.taskexecutor;
 
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.highavailability.NonHaServices;
+import org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices;
+import org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService;
+import org.apache.flink.runtime.rpc.TestingRpcService;
+import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
 import org.apache.flink.util.TestLogger;
 
+import org.junit.Test;
+
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.UUID;
+
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
 public class TaskExecutorTest extends TestLogger {
-	
+
+	@Test
+	public void testImmediatelyRegistersIfLeaderIsKnown() throws Exception {
+		final ResourceID resourceID = ResourceID.generate();
+		final String resourceManagerAddress = "/resource/manager/address/one";
+
+		final TestingRpcService rpc = new TestingRpcService();
+		try {
+			// register a mock resource manager gateway
+			ResourceManagerGateway rmGateway = mock(ResourceManagerGateway.class);
+			rpc.registerGateway(resourceManagerAddress, rmGateway);
+
+			NonHaServices haServices = new NonHaServices(resourceManagerAddress);
+			TaskExecutor taskManager = new TaskExecutor(rpc, haServices, resourceID);
+			String taskManagerAddress = taskManager.getAddress();
+
+			taskManager.start();
+
+			verify(rmGateway, timeout(5000)).registerTaskExecutor(
+					any(UUID.class), eq(taskManagerAddress), eq(resourceID), any(FiniteDuration.class));
+		}
+		finally {
+			rpc.stopService();
+		}
+	}
+
+	@Test
+	public void testTriggerRegistrationOnLeaderChange() throws Exception {
+		final ResourceID resourceID = ResourceID.generate();
+
+		final String address1 = "/resource/manager/address/one";
+		final String address2 = "/resource/manager/address/two";
+		final UUID leaderId1 = UUID.randomUUID();
+		final UUID leaderId2 = UUID.randomUUID();
+
+		final TestingRpcService rpc = new TestingRpcService();
+		try {
+			// register the mock resource manager gateways
+			ResourceManagerGateway rmGateway1 = mock(ResourceManagerGateway.class);
+			ResourceManagerGateway rmGateway2 = mock(ResourceManagerGateway.class);
+			rpc.registerGateway(address1, rmGateway1);
+			rpc.registerGateway(address2, rmGateway2);
+
+			TestingLeaderRetrievalService testLeaderService = new TestingLeaderRetrievalService();
+
+			TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
+			haServices.setResourceManagerLeaderRetriever(testLeaderService);
+
+			TaskExecutor taskManager = new TaskExecutor(rpc, haServices, resourceID);
+			String taskManagerAddress = taskManager.getAddress();
+			taskManager.start();
+
+			// no connection initially, since there is no leader
+			assertNull(taskManager.getResourceManagerConnection());
+
+			// define a leader and see that a registration happens
+			testLeaderService.notifyListener(address1, leaderId1);
+
+			verify(rmGateway1, timeout(5000)).registerTaskExecutor(
+					eq(leaderId1), eq(taskManagerAddress), eq(resourceID), any(FiniteDuration.class));
+			assertNotNull(taskManager.getResourceManagerConnection());
+
+			// cancel the leader 
+			testLeaderService.notifyListener(null, null);
+
+			// set a new leader, see that a registration happens 
+			testLeaderService.notifyListener(address2, leaderId2);
+
+			verify(rmGateway2, timeout(5000)).registerTaskExecutor(
+					eq(leaderId2), eq(taskManagerAddress), eq(resourceID), any(FiniteDuration.class));
+			assertNotNull(taskManager.getResourceManagerConnection());
+		}
+		finally {
+			rpc.stopService();
+		}
+	}
 }


[06/50] [abbrv] flink git commit: [FLINK-4638] [core] Fix exception message for MemorySegment

Posted by tr...@apache.org.
[FLINK-4638] [core] Fix exception message for MemorySegment

This closes #2515


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/7a25bf5c
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/7a25bf5c
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/7a25bf5c

Branch: refs/heads/flip-6
Commit: 7a25bf5cee9ab94525ed0284cbf399d2c33f70cf
Parents: 0975d9f
Author: Liwei Lin <lw...@gmail.com>
Authored: Tue Sep 20 14:35:28 2016 +0800
Committer: Greg Hogan <co...@greghogan.com>
Committed: Tue Sep 20 10:10:20 2016 -0400

----------------------------------------------------------------------
 .../src/main/java/org/apache/flink/core/memory/MemorySegment.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/7a25bf5c/flink-core/src/main/java/org/apache/flink/core/memory/MemorySegment.java
----------------------------------------------------------------------
diff --git a/flink-core/src/main/java/org/apache/flink/core/memory/MemorySegment.java b/flink-core/src/main/java/org/apache/flink/core/memory/MemorySegment.java
index af3efe7..d8315c5 100644
--- a/flink-core/src/main/java/org/apache/flink/core/memory/MemorySegment.java
+++ b/flink-core/src/main/java/org/apache/flink/core/memory/MemorySegment.java
@@ -161,7 +161,7 @@ public abstract class MemorySegment {
 		}
 		if (offHeapAddress >= Long.MAX_VALUE - Integer.MAX_VALUE) {
 			// this is necessary to make sure the collapsed checks are safe against numeric overflows
-			throw new IllegalArgumentException("Segment initialized with too large address: " + address
+			throw new IllegalArgumentException("Segment initialized with too large address: " + offHeapAddress
 					+ " ; Max allowed address is " + (Long.MAX_VALUE - Integer.MAX_VALUE - 1));
 		}
 		


[15/50] [abbrv] flink git commit: [FLINK-4362] [rpc] Auto generate rpc gateways via Java proxies

Posted by tr...@apache.org.
[FLINK-4362] [rpc] Auto generate rpc gateways via Java proxies

This PR introduces a generic AkkaRpcActor which receives rpc calls as a
RpcInvocation message. The RpcInvocation message is generated by the
AkkaInvocationHandler which gets them from automatically generated Java Proxies.

Add documentation for proxy based akka rpc service

Log unknown message type in AkkaRpcActor but do not fail actor

Use ReflectionUtil to extract RpcGateway type from RpcEndpoint

This closes #2357.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/f5cf6b56
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/f5cf6b56
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/f5cf6b56

Branch: refs/heads/flip-6
Commit: f5cf6b5680ff3e7bc44f2084e2754dbf8e8d5ec0
Parents: 94e0092
Author: Till Rohrmann <tr...@apache.org>
Authored: Wed Aug 10 18:42:26 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:12 2016 +0200

----------------------------------------------------------------------
 .../org/apache/flink/util/ReflectionUtil.java   |  10 +-
 .../flink/runtime/rpc/MainThreadExecutor.java   |   4 +-
 .../apache/flink/runtime/rpc/RpcEndpoint.java   |  22 +-
 .../apache/flink/runtime/rpc/RpcService.java    |   2 +-
 .../flink/runtime/rpc/akka/AkkaGateway.java     |   4 +-
 .../runtime/rpc/akka/AkkaInvocationHandler.java | 226 +++++++++++++++++++
 .../flink/runtime/rpc/akka/AkkaRpcActor.java    | 175 ++++++++++++++
 .../flink/runtime/rpc/akka/AkkaRpcService.java  | 121 +++++-----
 .../flink/runtime/rpc/akka/BaseAkkaActor.java   |  50 ----
 .../flink/runtime/rpc/akka/BaseAkkaGateway.java |  41 ----
 .../rpc/akka/jobmaster/JobMasterAkkaActor.java  |  58 -----
 .../akka/jobmaster/JobMasterAkkaGateway.java    |  57 -----
 .../runtime/rpc/akka/messages/CallAsync.java    |  41 ++++
 .../rpc/akka/messages/CallableMessage.java      |  33 ---
 .../runtime/rpc/akka/messages/CancelTask.java   |  36 ---
 .../runtime/rpc/akka/messages/ExecuteTask.java  |  36 ---
 .../messages/RegisterAtResourceManager.java     |  36 ---
 .../rpc/akka/messages/RegisterJobMaster.java    |  36 ---
 .../runtime/rpc/akka/messages/RequestSlot.java  |  37 ---
 .../rpc/akka/messages/RpcInvocation.java        |  98 ++++++++
 .../runtime/rpc/akka/messages/RunAsync.java     |  40 ++++
 .../rpc/akka/messages/RunnableMessage.java      |  31 ---
 .../akka/messages/UpdateTaskExecutionState.java |  37 ---
 .../ResourceManagerAkkaActor.java               |  65 ------
 .../ResourceManagerAkkaGateway.java             |  67 ------
 .../taskexecutor/TaskExecutorAkkaActor.java     |  77 -------
 .../taskexecutor/TaskExecutorAkkaGateway.java   |  59 -----
 .../flink/runtime/rpc/jobmaster/JobMaster.java  |   4 +-
 .../rpc/resourcemanager/ResourceManager.java    |   4 +-
 .../runtime/rpc/taskexecutor/TaskExecutor.java  |   4 +-
 .../flink/runtime/rpc/RpcCompletenessTest.java  |  50 ++--
 .../runtime/rpc/akka/AkkaRpcServiceTest.java    |   4 +-
 .../rpc/taskexecutor/TaskExecutorTest.java      |   2 +-
 33 files changed, 700 insertions(+), 867 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-core/src/main/java/org/apache/flink/util/ReflectionUtil.java
----------------------------------------------------------------------
diff --git a/flink-core/src/main/java/org/apache/flink/util/ReflectionUtil.java b/flink-core/src/main/java/org/apache/flink/util/ReflectionUtil.java
index fe2d4c0..b851eba 100644
--- a/flink-core/src/main/java/org/apache/flink/util/ReflectionUtil.java
+++ b/flink-core/src/main/java/org/apache/flink/util/ReflectionUtil.java
@@ -48,6 +48,14 @@ public final class ReflectionUtil {
 		return getTemplateType(clazz, 0);
 	}
 
+	public static <T> Class<T> getTemplateType1(Type type) {
+		if (type instanceof ParameterizedType) {
+			return (Class<T>) getTemplateTypes((ParameterizedType) type)[0];
+		} else {
+			throw new IllegalArgumentException();
+		}
+	}
+
 	public static <T> Class<T> getTemplateType2(Class<?> clazz) {
 		return getTemplateType(clazz, 1);
 	}
@@ -123,7 +131,7 @@ public final class ReflectionUtil {
 		Class<?>[] types = new Class<?>[paramterizedType.getActualTypeArguments().length];
 		int i = 0;
 		for (Type templateArgument : paramterizedType.getActualTypeArguments()) {
-			assert (templateArgument instanceof Class<?>);
+			assert templateArgument instanceof Class<?>;
 			types[i++] = (Class<?>) templateArgument;
 		}
 		return types;

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
index 14b2997..882c1b7 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
@@ -47,9 +47,9 @@ public interface MainThreadExecutor {
 	 * future will throw a {@link TimeoutException}.
 	 *
 	 * @param callable Callable to be executed
-	 * @param timeout Timeout for the future to complete
+	 * @param callTimeout Timeout for the future to complete
 	 * @param <V> Return value of the callable
 	 * @return Future of the callable result
 	 */
-	<V> Future<V> callAsync(Callable<V> callable, Timeout timeout);
+	<V> Future<V> callAsync(Callable<V> callable, Timeout callTimeout);
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
index 0d928a8..aef0803 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
@@ -20,6 +20,7 @@ package org.apache.flink.runtime.rpc;
 
 import akka.util.Timeout;
 
+import org.apache.flink.util.ReflectionUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -60,6 +61,9 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	/** RPC service to be used to start the RPC server and to obtain rpc gateways */
 	private final RpcService rpcService;
 
+	/** Class of the self gateway */
+	private final Class<C> selfGatewayType;
+
 	/** Self gateway which can be used to schedule asynchronous calls on yourself */
 	private final C self;
 
@@ -70,15 +74,19 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	 * of the executing rpc server. */
 	private final MainThreadExecutionContext mainThreadExecutionContext;
 
-
 	/**
 	 * Initializes the RPC endpoint.
 	 * 
 	 * @param rpcService The RPC server that dispatches calls to this RPC endpoint. 
 	 */
-	public RpcEndpoint(RpcService rpcService) {
+	protected RpcEndpoint(final RpcService rpcService) {
 		this.rpcService = checkNotNull(rpcService, "rpcService");
+
+		// IMPORTANT: Don't change order of selfGatewayType and self because rpcService.startServer
+		// requires that selfGatewayType has been initialized
+		this.selfGatewayType = ReflectionUtil.getTemplateType1(getClass());
 		this.self = rpcService.startServer(this);
+		
 		this.selfAddress = rpcService.getAddress(self);
 		this.mainThreadExecutionContext = new MainThreadExecutionContext((MainThreadExecutor) self);
 	}
@@ -149,6 +157,7 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	//  Asynchronous executions
 	// ------------------------------------------------------------------------
 
+
 	/**
 	 * Execute the runnable in the main thread of the underlying RPC endpoint.
 	 *
@@ -172,6 +181,15 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 		return ((MainThreadExecutor) self).callAsync(callable, timeout);
 	}
 
+	/**
+	 * Returns the class of the self gateway type.
+	 *
+	 * @return Class of the self gateway type
+	 */
+	public final Class<C> getSelfGatewayType() {
+		return selfGatewayType;
+	}
+
 	// ------------------------------------------------------------------------
 	//  Utilities
 	// ------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
index 90ff7b6..f93be83 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
@@ -46,7 +46,7 @@ public interface RpcService {
 	 * @param <C> Type of the self rpc gateway associated with the rpc server
 	 * @return Self gateway to dispatch remote procedure calls to oneself
 	 */
-	<S extends RpcEndpoint, C extends RpcGateway> C startServer(S rpcEndpoint);
+	<C extends RpcGateway, S extends RpcEndpoint<C>> C startServer(S rpcEndpoint);
 
 	/**
 	 * Stop the underlying rpc server of the provided self gateway.

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
index a96a600..a826e7d 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
@@ -23,7 +23,7 @@ import akka.actor.ActorRef;
 /**
  * Interface for Akka based rpc gateways
  */
-public interface AkkaGateway {
+interface AkkaGateway {
 
-	ActorRef getActorRef();
+	ActorRef getRpcServer();
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
new file mode 100644
index 0000000..e8e383a
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka;
+
+import akka.actor.ActorRef;
+import akka.pattern.Patterns;
+import akka.util.Timeout;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.runtime.rpc.MainThreadExecutor;
+import org.apache.flink.runtime.rpc.RpcTimeout;
+import org.apache.flink.runtime.rpc.akka.messages.CallAsync;
+import org.apache.flink.runtime.rpc.akka.messages.RpcInvocation;
+import org.apache.flink.runtime.rpc.akka.messages.RunAsync;
+import org.apache.flink.util.Preconditions;
+import scala.concurrent.Await;
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Method;
+import java.util.BitSet;
+import java.util.concurrent.Callable;
+
+/**
+ * Invocation handler to be used with a {@link AkkaRpcActor}. The invocation handler wraps the
+ * rpc in a {@link RpcInvocation} message and then sends it to the {@link AkkaRpcActor} where it is
+ * executed.
+ */
+class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThreadExecutor {
+	private final ActorRef rpcServer;
+
+	// default timeout for asks
+	private final Timeout timeout;
+
+	AkkaInvocationHandler(ActorRef rpcServer, Timeout timeout) {
+		this.rpcServer = Preconditions.checkNotNull(rpcServer);
+		this.timeout = Preconditions.checkNotNull(timeout);
+	}
+
+	@Override
+	public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
+		Class<?> declaringClass = method.getDeclaringClass();
+
+		Object result;
+
+		if (declaringClass.equals(AkkaGateway.class) || declaringClass.equals(MainThreadExecutor.class) || declaringClass.equals(Object.class)) {
+			result = method.invoke(this, args);
+		} else {
+			String methodName = method.getName();
+			Class<?>[] parameterTypes = method.getParameterTypes();
+			Annotation[][] parameterAnnotations = method.getParameterAnnotations();
+			Timeout futureTimeout = extractRpcTimeout(parameterAnnotations, args, timeout);
+
+			Tuple2<Class<?>[], Object[]> filteredArguments = filterArguments(
+				parameterTypes,
+				parameterAnnotations,
+				args);
+
+			RpcInvocation rpcInvocation = new RpcInvocation(
+				methodName,
+				filteredArguments.f0,
+				filteredArguments.f1);
+
+			Class<?> returnType = method.getReturnType();
+
+			if (returnType.equals(Void.TYPE)) {
+				rpcServer.tell(rpcInvocation, ActorRef.noSender());
+
+				result = null;
+			} else if (returnType.equals(Future.class)) {
+				// execute an asynchronous call
+				result = Patterns.ask(rpcServer, rpcInvocation, futureTimeout);
+			} else {
+				// execute a synchronous call
+				Future<?> futureResult = Patterns.ask(rpcServer, rpcInvocation, futureTimeout);
+				FiniteDuration duration = timeout.duration();
+
+				result = Await.result(futureResult, duration);
+			}
+		}
+
+		return result;
+	}
+
+	@Override
+	public ActorRef getRpcServer() {
+		return rpcServer;
+	}
+
+	@Override
+	public void runAsync(Runnable runnable) {
+		// Unfortunately I couldn't find a way to allow only local communication. Therefore, the
+		// runnable field is transient transient
+		rpcServer.tell(new RunAsync(runnable), ActorRef.noSender());
+	}
+
+	@Override
+	public <V> Future<V> callAsync(Callable<V> callable, Timeout callTimeout) {
+		// Unfortunately I couldn't find a way to allow only local communication. Therefore, the
+		// callable field is declared transient
+		@SuppressWarnings("unchecked")
+		Future<V> result = (Future<V>) Patterns.ask(rpcServer, new CallAsync(callable), callTimeout);
+
+		return result;
+	}
+
+	/**
+	 * Extracts the {@link RpcTimeout} annotated rpc timeout value from the list of given method
+	 * arguments. If no {@link RpcTimeout} annotated parameter could be found, then the default
+	 * timeout is returned.
+	 *
+	 * @param parameterAnnotations Parameter annotations
+	 * @param args Array of arguments
+	 * @param defaultTimeout Default timeout to return if no {@link RpcTimeout} annotated parameter
+	 *                       has been found
+	 * @return Timeout extracted from the array of arguments or the default timeout
+	 */
+	private static Timeout extractRpcTimeout(Annotation[][] parameterAnnotations, Object[] args, Timeout defaultTimeout) {
+		if (args != null) {
+			Preconditions.checkArgument(parameterAnnotations.length == args.length);
+
+			for (int i = 0; i < parameterAnnotations.length; i++) {
+				if (isRpcTimeout(parameterAnnotations[i])) {
+					if (args[i] instanceof FiniteDuration) {
+						return new Timeout((FiniteDuration) args[i]);
+					} else {
+						throw new RuntimeException("The rpc timeout parameter must be of type " +
+							FiniteDuration.class.getName() + ". The type " + args[i].getClass().getName() +
+							" is not supported.");
+					}
+				}
+			}
+		}
+
+		return defaultTimeout;
+	}
+
+	/**
+	 * Removes all {@link RpcTimeout} annotated parameters from the parameter type and argument
+	 * list.
+	 *
+	 * @param parameterTypes Array of parameter types
+	 * @param parameterAnnotations Array of parameter annotations
+	 * @param args Arary of arguments
+	 * @return Tuple of filtered parameter types and arguments which no longer contain the
+	 * {@link RpcTimeout} annotated parameter types and arguments
+	 */
+	private static Tuple2<Class<?>[], Object[]> filterArguments(
+		Class<?>[] parameterTypes,
+		Annotation[][] parameterAnnotations,
+		Object[] args) {
+
+		Class<?>[] filteredParameterTypes;
+		Object[] filteredArgs;
+
+		if (args == null) {
+			filteredParameterTypes = parameterTypes;
+			filteredArgs = null;
+		} else {
+			Preconditions.checkArgument(parameterTypes.length == parameterAnnotations.length);
+			Preconditions.checkArgument(parameterAnnotations.length == args.length);
+
+			BitSet isRpcTimeoutParameter = new BitSet(parameterTypes.length);
+			int numberRpcParameters = parameterTypes.length;
+
+			for (int i = 0; i < parameterTypes.length; i++) {
+				if (isRpcTimeout(parameterAnnotations[i])) {
+					isRpcTimeoutParameter.set(i);
+					numberRpcParameters--;
+				}
+			}
+
+			if (numberRpcParameters == parameterTypes.length) {
+				filteredParameterTypes = parameterTypes;
+				filteredArgs = args;
+			} else {
+				filteredParameterTypes = new Class<?>[numberRpcParameters];
+				filteredArgs = new Object[numberRpcParameters];
+				int counter = 0;
+
+				for (int i = 0; i < parameterTypes.length; i++) {
+					if (!isRpcTimeoutParameter.get(i)) {
+						filteredParameterTypes[counter] = parameterTypes[i];
+						filteredArgs[counter] = args[i];
+						counter++;
+					}
+				}
+			}
+		}
+
+		return Tuple2.of(filteredParameterTypes, filteredArgs);
+	}
+
+	/**
+	 * Checks whether any of the annotations is of type {@link RpcTimeout}
+	 *
+	 * @param annotations Array of annotations
+	 * @return True if {@link RpcTimeout} was found; otherwise false
+	 */
+	private static boolean isRpcTimeout(Annotation[] annotations) {
+		for (Annotation annotation : annotations) {
+			if (annotation.annotationType().equals(RpcTimeout.class)) {
+				return true;
+			}
+		}
+
+		return false;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
new file mode 100644
index 0000000..57da38a
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka;
+
+import akka.actor.Status;
+import akka.actor.UntypedActor;
+import akka.pattern.Patterns;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.akka.messages.CallAsync;
+import org.apache.flink.runtime.rpc.akka.messages.RpcInvocation;
+import org.apache.flink.runtime.rpc.akka.messages.RunAsync;
+import org.apache.flink.util.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.concurrent.Future;
+
+import java.lang.reflect.Method;
+import java.util.concurrent.Callable;
+
+/**
+ * Akka rpc actor which receives {@link RpcInvocation}, {@link RunAsync} and {@link CallAsync}
+ * messages.
+ * <p>
+ * The {@link RpcInvocation} designates a rpc and is dispatched to the given {@link RpcEndpoint}
+ * instance.
+ * <p>
+ * The {@link RunAsync} and {@link CallAsync} messages contain executable code which is executed
+ * in the context of the actor thread.
+ *
+ * @param <C> Type of the {@link RpcGateway} associated with the {@link RpcEndpoint}
+ * @param <T> Type of the {@link RpcEndpoint}
+ */
+class AkkaRpcActor<C extends RpcGateway, T extends RpcEndpoint<C>> extends UntypedActor {
+	private static final Logger LOG = LoggerFactory.getLogger(AkkaRpcActor.class);
+
+	private final T rpcEndpoint;
+
+	AkkaRpcActor(final T rpcEndpoint) {
+		this.rpcEndpoint = Preconditions.checkNotNull(rpcEndpoint, "rpc endpoint");
+	}
+
+	@Override
+	public void onReceive(final Object message)  {
+		if (message instanceof RunAsync) {
+			handleRunAsync((RunAsync) message);
+		} else if (message instanceof CallAsync) {
+			handleCallAsync((CallAsync) message);
+		} else if (message instanceof RpcInvocation) {
+			handleRpcInvocation((RpcInvocation) message);
+		} else {
+			LOG.warn("Received message of unknown type {}. Dropping this message!", message.getClass());
+		}
+	}
+
+	/**
+	 * Handle rpc invocations by looking up the rpc method on the rpc endpoint and calling this
+	 * method with the provided method arguments. If the method has a return value, it is returned
+	 * to the sender of the call.
+	 *
+	 * @param rpcInvocation Rpc invocation message
+	 */
+	private void handleRpcInvocation(RpcInvocation rpcInvocation) {
+		Method rpcMethod = null;
+
+		try {
+			rpcMethod = lookupRpcMethod(rpcInvocation.getMethodName(), rpcInvocation.getParameterTypes());
+		} catch (final NoSuchMethodException e) {
+			LOG.error("Could not find rpc method for rpc invocation: {}.", rpcInvocation, e);
+		}
+
+		if (rpcMethod != null) {
+			if (rpcMethod.getReturnType().equals(Void.TYPE)) {
+				// No return value to send back
+				try {
+					rpcMethod.invoke(rpcEndpoint, rpcInvocation.getArgs());
+				} catch (Throwable e) {
+					LOG.error("Error while executing remote procedure call {}.", rpcMethod, e);
+				}
+			} else {
+				try {
+					Object result = rpcMethod.invoke(rpcEndpoint, rpcInvocation.getArgs());
+
+					if (result instanceof Future) {
+						// pipe result to sender
+						Patterns.pipe((Future<?>) result, getContext().dispatcher()).to(getSender());
+					} else {
+						// tell the sender the result of the computation
+						getSender().tell(new Status.Success(result), getSelf());
+					}
+				} catch (Throwable e) {
+					// tell the sender about the failure
+					getSender().tell(new Status.Failure(e), getSelf());
+				}
+			}
+		}
+	}
+
+	/**
+	 * Handle asynchronous {@link Callable}. This method simply executes the given {@link Callable}
+	 * in the context of the actor thread.
+	 *
+	 * @param callAsync Call async message
+	 */
+	private void handleCallAsync(CallAsync callAsync) {
+		if (callAsync.getCallable() == null) {
+			final String result = "Received a " + callAsync.getClass().getName() + " message with an empty " +
+				"callable field. This indicates that this message has been serialized " +
+				"prior to sending the message. The " + callAsync.getClass().getName() +
+				" is only supported with local communication.";
+
+			LOG.warn(result);
+
+			getSender().tell(new Status.Failure(new Exception(result)), getSelf());
+		} else {
+			try {
+				Object result = callAsync.getCallable().call();
+
+				getSender().tell(new Status.Success(result), getSelf());
+			} catch (Throwable e) {
+				getSender().tell(new Status.Failure(e), getSelf());
+			}
+		}
+	}
+
+	/**
+	 * Handle asynchronous {@link Runnable}. This method simply executes the given {@link Runnable}
+	 * in the context of the actor thread.
+	 *
+	 * @param runAsync Run async message
+	 */
+	private void handleRunAsync(RunAsync runAsync) {
+		if (runAsync.getRunnable() == null) {
+			LOG.warn("Received a {} message with an empty runnable field. This indicates " +
+				"that this message has been serialized prior to sending the message. The " +
+				"{} is only supported with local communication.",
+				runAsync.getClass().getName(),
+				runAsync.getClass().getName());
+		} else {
+			try {
+				runAsync.getRunnable().run();
+			} catch (final Throwable e) {
+				LOG.error("Caught exception while executing runnable in main thread.", e);
+			}
+		}
+	}
+
+	/**
+	 * Look up the rpc method on the given {@link RpcEndpoint} instance.
+	 *
+	 * @param methodName Name of the method
+	 * @param parameterTypes Parameter types of the method
+	 * @return Method of the rpc endpoint
+	 * @throws NoSuchMethodException
+	 */
+	private Method lookupRpcMethod(final String methodName, final Class<?>[] parameterTypes) throws NoSuchMethodException {
+		return rpcEndpoint.getClass().getMethod(methodName, parameterTypes);
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
index d55bd13..17983d0 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
@@ -29,88 +29,82 @@ import akka.dispatch.Mapper;
 import akka.pattern.AskableActorSelection;
 import akka.util.Timeout;
 import org.apache.flink.runtime.akka.AkkaUtils;
-import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
-import org.apache.flink.runtime.rpc.jobmaster.JobMasterGateway;
-import org.apache.flink.runtime.rpc.resourcemanager.ResourceManager;
-import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
+import org.apache.flink.runtime.rpc.MainThreadExecutor;
 import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
-import org.apache.flink.runtime.rpc.akka.jobmaster.JobMasterAkkaActor;
-import org.apache.flink.runtime.rpc.akka.jobmaster.JobMasterAkkaGateway;
-import org.apache.flink.runtime.rpc.akka.resourcemanager.ResourceManagerAkkaActor;
-import org.apache.flink.runtime.rpc.akka.resourcemanager.ResourceManagerAkkaGateway;
-import org.apache.flink.runtime.rpc.akka.taskexecutor.TaskExecutorAkkaActor;
-import org.apache.flink.runtime.rpc.akka.taskexecutor.TaskExecutorAkkaGateway;
-import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutorGateway;
-import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutor;
+import org.apache.flink.util.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import scala.concurrent.Future;
 
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Proxy;
+import java.util.Collection;
 import java.util.HashSet;
-import java.util.Set;
 
+/**
+ * Akka based {@link RpcService} implementation. The rpc service starts an Akka actor to receive
+ * rpcs from a {@link RpcGateway}.
+ */
 public class AkkaRpcService implements RpcService {
+	private static final Logger LOG = LoggerFactory.getLogger(AkkaRpcService.class);
+
 	private final ActorSystem actorSystem;
 	private final Timeout timeout;
-	private final Set<ActorRef> actors = new HashSet<>();
+	private final Collection<ActorRef> actors = new HashSet<>(4);
 
-	public AkkaRpcService(ActorSystem actorSystem, Timeout timeout) {
-		this.actorSystem = actorSystem;
-		this.timeout = timeout;
+	public AkkaRpcService(final ActorSystem actorSystem, final Timeout timeout) {
+		this.actorSystem = Preconditions.checkNotNull(actorSystem, "actor system");
+		this.timeout = Preconditions.checkNotNull(timeout, "timeout");
 	}
 
 	@Override
-	public <C extends RpcGateway> Future<C> connect(String address, final Class<C> clazz) {
-		ActorSelection actorSel = actorSystem.actorSelection(address);
+	public <C extends RpcGateway> Future<C> connect(final String address, final Class<C> clazz) {
+		LOG.info("Try to connect to remote rpc server with address {}. Returning a {} gateway.", address, clazz.getName());
 
-		AskableActorSelection asker = new AskableActorSelection(actorSel);
+		final ActorSelection actorSel = actorSystem.actorSelection(address);
 
-		Future<Object> identify = asker.ask(new Identify(42), timeout);
+		final AskableActorSelection asker = new AskableActorSelection(actorSel);
+
+		final Future<Object> identify = asker.ask(new Identify(42), timeout);
 
 		return identify.map(new Mapper<Object, C>(){
+			@Override
 			public C apply(Object obj) {
 				ActorRef actorRef = ((ActorIdentity) obj).getRef();
 
-				if (clazz == TaskExecutorGateway.class) {
-					return (C) new TaskExecutorAkkaGateway(actorRef, timeout);
-				} else if (clazz == ResourceManagerGateway.class) {
-					return (C) new ResourceManagerAkkaGateway(actorRef, timeout);
-				} else if (clazz == JobMasterGateway.class) {
-					return (C) new JobMasterAkkaGateway(actorRef, timeout);
-				} else {
-					throw new RuntimeException("Could not find remote endpoint " + clazz);
-				}
+				InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(actorRef, timeout);
+
+				@SuppressWarnings("unchecked")
+				C proxy = (C) Proxy.newProxyInstance(
+					ClassLoader.getSystemClassLoader(),
+					new Class<?>[] {clazz},
+					akkaInvocationHandler);
+
+				return proxy;
 			}
 		}, actorSystem.dispatcher());
 	}
 
 	@Override
-	public <S extends RpcEndpoint, C extends RpcGateway> C startServer(S rpcEndpoint) {
-		ActorRef ref;
-		C self;
-		if (rpcEndpoint instanceof TaskExecutor) {
-			ref = actorSystem.actorOf(
-				Props.create(TaskExecutorAkkaActor.class, rpcEndpoint)
-			);
-
-			self = (C) new TaskExecutorAkkaGateway(ref, timeout);
-		} else if (rpcEndpoint instanceof ResourceManager) {
-			ref = actorSystem.actorOf(
-				Props.create(ResourceManagerAkkaActor.class, rpcEndpoint)
-			);
-
-			self = (C) new ResourceManagerAkkaGateway(ref, timeout);
-		} else if (rpcEndpoint instanceof JobMaster) {
-			ref = actorSystem.actorOf(
-				Props.create(JobMasterAkkaActor.class, rpcEndpoint)
-			);
-
-			self = (C) new JobMasterAkkaGateway(ref, timeout);
-		} else {
-			throw new RuntimeException("Could not start RPC server for class " + rpcEndpoint.getClass());
-		}
+	public <C extends RpcGateway, S extends RpcEndpoint<C>> C startServer(S rpcEndpoint) {
+		Preconditions.checkNotNull(rpcEndpoint, "rpc endpoint");
 
-		actors.add(ref);
+		LOG.info("Start Akka rpc actor to handle rpcs for {}.", rpcEndpoint.getClass().getName());
+
+		Props akkaRpcActorProps = Props.create(AkkaRpcActor.class, rpcEndpoint);
+
+		ActorRef actorRef = actorSystem.actorOf(akkaRpcActorProps);
+		actors.add(actorRef);
+
+		InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(actorRef, timeout);
+
+		@SuppressWarnings("unchecked")
+		C self = (C) Proxy.newProxyInstance(
+			ClassLoader.getSystemClassLoader(),
+			new Class<?>[]{rpcEndpoint.getSelfGatewayType(), MainThreadExecutor.class, AkkaGateway.class},
+			akkaInvocationHandler);
 
 		return self;
 	}
@@ -120,16 +114,19 @@ public class AkkaRpcService implements RpcService {
 		if (selfGateway instanceof AkkaGateway) {
 			AkkaGateway akkaClient = (AkkaGateway) selfGateway;
 
-			if (actors.contains(akkaClient.getActorRef())) {
-				akkaClient.getActorRef().tell(PoisonPill.getInstance(), ActorRef.noSender());
-			} else {
-				// don't stop this actor since it was not started by this RPC service
+			if (actors.contains(akkaClient.getRpcServer())) {
+				ActorRef selfActorRef = akkaClient.getRpcServer();
+
+				LOG.info("Stop Akka rpc actor {}.", selfActorRef.path());
+
+				selfActorRef.tell(PoisonPill.getInstance(), ActorRef.noSender());
 			}
 		}
 	}
 
 	@Override
 	public void stopService() {
+		LOG.info("Stop Akka rpc service.");
 		actorSystem.shutdown();
 		actorSystem.awaitTermination();
 	}
@@ -137,9 +134,11 @@ public class AkkaRpcService implements RpcService {
 	@Override
 	public <C extends RpcGateway> String getAddress(C selfGateway) {
 		if (selfGateway instanceof AkkaGateway) {
-			return AkkaUtils.getAkkaURL(actorSystem, ((AkkaGateway) selfGateway).getActorRef());
+			ActorRef actorRef = ((AkkaGateway) selfGateway).getRpcServer();
+			return AkkaUtils.getAkkaURL(actorSystem, actorRef);
 		} else {
-			throw new RuntimeException("Cannot get address for non " + AkkaGateway.class.getName() + ".");
+			String className = AkkaGateway.class.getName();
+			throw new RuntimeException("Cannot get address for non " + className + '.');
 		}
 	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaActor.java
deleted file mode 100644
index 3cb499c..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaActor.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka;
-
-import akka.actor.Status;
-import akka.actor.UntypedActor;
-import org.apache.flink.runtime.rpc.akka.messages.CallableMessage;
-import org.apache.flink.runtime.rpc.akka.messages.RunnableMessage;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class BaseAkkaActor extends UntypedActor {
-	private static final Logger LOG = LoggerFactory.getLogger(BaseAkkaActor.class);
-
-	@Override
-	public void onReceive(Object message) throws Exception {
-		if (message instanceof RunnableMessage) {
-			try {
-				((RunnableMessage) message).getRunnable().run();
-			} catch (Exception e) {
-				LOG.error("Encountered error while executing runnable.", e);
-			}
-		} else if (message instanceof CallableMessage<?>) {
-			try {
-				Object result = ((CallableMessage<?>) message).getCallable().call();
-				sender().tell(new Status.Success(result), getSelf());
-			} catch (Exception e) {
-				sender().tell(new Status.Failure(e), getSelf());
-			}
-		} else {
-			throw new RuntimeException("Unknown message " + message);
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaGateway.java
deleted file mode 100644
index 512790d..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaGateway.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka;
-
-import akka.actor.ActorRef;
-import akka.pattern.Patterns;
-import akka.util.Timeout;
-import org.apache.flink.runtime.rpc.MainThreadExecutor;
-import org.apache.flink.runtime.rpc.akka.messages.CallableMessage;
-import org.apache.flink.runtime.rpc.akka.messages.RunnableMessage;
-import scala.concurrent.Future;
-
-import java.util.concurrent.Callable;
-
-public abstract class BaseAkkaGateway implements MainThreadExecutor, AkkaGateway {
-	@Override
-	public void runAsync(Runnable runnable) {
-		getActorRef().tell(new RunnableMessage(runnable), ActorRef.noSender());
-	}
-
-	@Override
-	public <V> Future<V> callAsync(Callable<V> callable, Timeout timeout) {
-		return (Future<V>) Patterns.ask(getActorRef(), new CallableMessage(callable), timeout);
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaActor.java
deleted file mode 100644
index 9e04ea9..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaActor.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.jobmaster;
-
-import akka.actor.ActorRef;
-import akka.actor.Status;
-import org.apache.flink.runtime.rpc.akka.BaseAkkaActor;
-import org.apache.flink.runtime.rpc.akka.messages.RegisterAtResourceManager;
-import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
-import org.apache.flink.runtime.messages.Acknowledge;
-import org.apache.flink.runtime.rpc.akka.messages.UpdateTaskExecutionState;
-
-public class JobMasterAkkaActor extends BaseAkkaActor {
-	private final JobMaster jobMaster;
-
-	public JobMasterAkkaActor(JobMaster jobMaster) {
-		this.jobMaster = jobMaster;
-	}
-
-	@Override
-	public void onReceive(Object message) throws Exception {
-		if (message instanceof UpdateTaskExecutionState) {
-
-			final ActorRef sender = getSender();
-
-			UpdateTaskExecutionState updateTaskExecutionState = (UpdateTaskExecutionState) message;
-
-			try {
-				Acknowledge result = jobMaster.updateTaskExecutionState(updateTaskExecutionState.getTaskExecutionState());
-				sender.tell(new Status.Success(result), getSelf());
-			} catch (Exception e) {
-				sender.tell(new Status.Failure(e), getSelf());
-			}
-		} else if (message instanceof RegisterAtResourceManager) {
-			RegisterAtResourceManager registerAtResourceManager = (RegisterAtResourceManager) message;
-
-			jobMaster.registerAtResourceManager(registerAtResourceManager.getAddress());
-		} else {
-			super.onReceive(message);
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaGateway.java
deleted file mode 100644
index e6bf061..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaGateway.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.jobmaster;
-
-import akka.actor.ActorRef;
-import akka.pattern.AskableActorRef;
-import akka.util.Timeout;
-import org.apache.flink.runtime.rpc.akka.BaseAkkaGateway;
-import org.apache.flink.runtime.rpc.akka.messages.RegisterAtResourceManager;
-import org.apache.flink.runtime.rpc.jobmaster.JobMasterGateway;
-import org.apache.flink.runtime.messages.Acknowledge;
-import org.apache.flink.runtime.rpc.akka.messages.UpdateTaskExecutionState;
-import org.apache.flink.runtime.taskmanager.TaskExecutionState;
-import scala.concurrent.Future;
-import scala.reflect.ClassTag$;
-
-public class JobMasterAkkaGateway extends BaseAkkaGateway implements JobMasterGateway {
-	private final AskableActorRef actorRef;
-	private final Timeout timeout;
-
-	public JobMasterAkkaGateway(ActorRef actorRef, Timeout timeout) {
-		this.actorRef = new AskableActorRef(actorRef);
-		this.timeout = timeout;
-	}
-
-	@Override
-	public Future<Acknowledge> updateTaskExecutionState(TaskExecutionState taskExecutionState) {
-		return actorRef.ask(new UpdateTaskExecutionState(taskExecutionState), timeout)
-			.mapTo(ClassTag$.MODULE$.<Acknowledge>apply(Acknowledge.class));
-	}
-
-	@Override
-	public void registerAtResourceManager(String address) {
-		actorRef.actorRef().tell(new RegisterAtResourceManager(address), actorRef.actorRef());
-	}
-
-	@Override
-	public ActorRef getActorRef() {
-		return actorRef.actorRef();
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallAsync.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallAsync.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallAsync.java
new file mode 100644
index 0000000..79b7825
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallAsync.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import org.apache.flink.util.Preconditions;
+
+import java.io.Serializable;
+import java.util.concurrent.Callable;
+
+/**
+ * Message for asynchronous callable invocations
+ */
+public final class CallAsync implements Serializable {
+	private static final long serialVersionUID = 2834204738928484060L;
+
+	private transient Callable<?> callable;
+
+	public CallAsync(Callable<?> callable) {
+		this.callable = Preconditions.checkNotNull(callable);
+	}
+
+	public Callable<?> getCallable() {
+		return callable;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallableMessage.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallableMessage.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallableMessage.java
deleted file mode 100644
index f0e555f..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallableMessage.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.messages;
-
-import java.util.concurrent.Callable;
-
-public class CallableMessage<V> {
-	private final Callable<V> callable;
-
-	public CallableMessage(Callable<V> callable) {
-		this.callable = callable;
-	}
-
-	public Callable<V> getCallable() {
-		return callable;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CancelTask.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CancelTask.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CancelTask.java
deleted file mode 100644
index 0b9e9dc..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CancelTask.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.messages;
-
-import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
-
-import java.io.Serializable;
-
-public class CancelTask implements Serializable {
-	private static final long serialVersionUID = -2998176874447950595L;
-	private final ExecutionAttemptID executionAttemptID;
-
-	public CancelTask(ExecutionAttemptID executionAttemptID) {
-		this.executionAttemptID = executionAttemptID;
-	}
-
-	public ExecutionAttemptID getExecutionAttemptID() {
-		return executionAttemptID;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/ExecuteTask.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/ExecuteTask.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/ExecuteTask.java
deleted file mode 100644
index a83d539..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/ExecuteTask.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.messages;
-
-import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
-
-import java.io.Serializable;
-
-public class ExecuteTask implements Serializable {
-	private static final long serialVersionUID = -6769958430967048348L;
-	private final TaskDeploymentDescriptor taskDeploymentDescriptor;
-
-	public ExecuteTask(TaskDeploymentDescriptor taskDeploymentDescriptor) {
-		this.taskDeploymentDescriptor = taskDeploymentDescriptor;
-	}
-
-	public TaskDeploymentDescriptor getTaskDeploymentDescriptor() {
-		return taskDeploymentDescriptor;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterAtResourceManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterAtResourceManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterAtResourceManager.java
deleted file mode 100644
index 3ade082..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterAtResourceManager.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.messages;
-
-import java.io.Serializable;
-
-public class RegisterAtResourceManager implements Serializable {
-
-	private static final long serialVersionUID = -4175905742620903602L;
-
-	private final String address;
-
-	public RegisterAtResourceManager(String address) {
-		this.address = address;
-	}
-
-	public String getAddress() {
-		return address;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterJobMaster.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterJobMaster.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterJobMaster.java
deleted file mode 100644
index b35ea38..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterJobMaster.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.messages;
-
-import org.apache.flink.runtime.rpc.resourcemanager.JobMasterRegistration;
-
-import java.io.Serializable;
-
-public class RegisterJobMaster implements Serializable{
-	private static final long serialVersionUID = -4616879574192641507L;
-	private final JobMasterRegistration jobMasterRegistration;
-
-	public RegisterJobMaster(JobMasterRegistration jobMasterRegistration) {
-		this.jobMasterRegistration = jobMasterRegistration;
-	}
-
-	public JobMasterRegistration getJobMasterRegistration() {
-		return jobMasterRegistration;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RequestSlot.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RequestSlot.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RequestSlot.java
deleted file mode 100644
index 85ceeec..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RequestSlot.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.messages;
-
-import org.apache.flink.runtime.rpc.resourcemanager.SlotRequest;
-
-import java.io.Serializable;
-
-public class RequestSlot implements Serializable {
-	private static final long serialVersionUID = 7207463889348525866L;
-
-	private final SlotRequest slotRequest;
-
-	public RequestSlot(SlotRequest slotRequest) {
-		this.slotRequest = slotRequest;
-	}
-
-	public SlotRequest getSlotRequest() {
-		return slotRequest;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RpcInvocation.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RpcInvocation.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RpcInvocation.java
new file mode 100644
index 0000000..5d52ef1
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RpcInvocation.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import org.apache.flink.util.Preconditions;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+
+/**
+ * Rpc invocation message containing the remote procedure name, its parameter types and the
+ * corresponding call arguments.
+ */
+public final class RpcInvocation implements Serializable {
+	private static final long serialVersionUID = -7058254033460536037L;
+
+	private final String methodName;
+	private final Class<?>[] parameterTypes;
+	private transient Object[] args;
+
+	public RpcInvocation(String methodName, Class<?>[] parameterTypes, Object[] args) {
+		this.methodName = Preconditions.checkNotNull(methodName);
+		this.parameterTypes = Preconditions.checkNotNull(parameterTypes);
+		this.args = args;
+	}
+
+	public String getMethodName() {
+		return methodName;
+	}
+
+	public Class<?>[] getParameterTypes() {
+		return parameterTypes;
+	}
+
+	public Object[] getArgs() {
+		return args;
+	}
+
+	private void writeObject(ObjectOutputStream oos) throws IOException {
+		oos.defaultWriteObject();
+
+		if (args != null) {
+			// write has args true
+			oos.writeBoolean(true);
+
+			for (int i = 0; i < args.length; i++) {
+				try {
+					oos.writeObject(args[i]);
+				} catch (IOException e) {
+					Class<?> argClass = args[i].getClass();
+
+					throw new IOException("Could not write " + i + "th argument of method " +
+						methodName + ". The argument type is " + argClass + ". " +
+						"Make sure that this type is serializable.", e);
+				}
+			}
+		} else {
+			// write has args false
+			oos.writeBoolean(false);
+		}
+	}
+
+	private void readObject(ObjectInputStream ois) throws IOException, ClassNotFoundException {
+		ois.defaultReadObject();
+
+		boolean hasArgs = ois.readBoolean();
+
+		if (hasArgs) {
+			int numberArguments = parameterTypes.length;
+
+			args = new Object[numberArguments];
+
+			for (int i = 0; i < numberArguments; i++) {
+				args[i] = ois.readObject();
+			}
+		} else {
+			args = null;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java
new file mode 100644
index 0000000..fb95852
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunAsync.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import org.apache.flink.util.Preconditions;
+
+import java.io.Serializable;
+
+/**
+ * Message for asynchronous runnable invocations
+ */
+public final class RunAsync implements Serializable {
+	private static final long serialVersionUID = -3080595100695371036L;
+
+	private final transient Runnable runnable;
+
+	public RunAsync(Runnable runnable) {
+		this.runnable = Preconditions.checkNotNull(runnable);
+	}
+
+	public Runnable getRunnable() {
+		return runnable;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunnableMessage.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunnableMessage.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunnableMessage.java
deleted file mode 100644
index 3556738..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunnableMessage.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.messages;
-
-public class RunnableMessage {
-	private final Runnable runnable;
-
-	public RunnableMessage(Runnable runnable) {
-		this.runnable = runnable;
-	}
-
-	public Runnable getRunnable() {
-		return runnable;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/UpdateTaskExecutionState.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/UpdateTaskExecutionState.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/UpdateTaskExecutionState.java
deleted file mode 100644
index f89cd2f..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/UpdateTaskExecutionState.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.messages;
-
-import org.apache.flink.runtime.taskmanager.TaskExecutionState;
-
-import java.io.Serializable;
-
-public class UpdateTaskExecutionState implements Serializable{
-	private static final long serialVersionUID = -6662229114427331436L;
-
-	private final TaskExecutionState taskExecutionState;
-
-	public UpdateTaskExecutionState(TaskExecutionState taskExecutionState) {
-		this.taskExecutionState = taskExecutionState;
-	}
-
-	public TaskExecutionState getTaskExecutionState() {
-		return taskExecutionState;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaActor.java
deleted file mode 100644
index 13101f9..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaActor.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.resourcemanager;
-
-import akka.actor.ActorRef;
-import akka.actor.Status;
-import akka.pattern.Patterns;
-import org.apache.flink.runtime.rpc.akka.BaseAkkaActor;
-import org.apache.flink.runtime.rpc.resourcemanager.RegistrationResponse;
-import org.apache.flink.runtime.rpc.resourcemanager.ResourceManager;
-import org.apache.flink.runtime.rpc.resourcemanager.SlotAssignment;
-import org.apache.flink.runtime.rpc.akka.messages.RegisterJobMaster;
-import org.apache.flink.runtime.rpc.akka.messages.RequestSlot;
-import scala.concurrent.Future;
-
-public class ResourceManagerAkkaActor extends BaseAkkaActor {
-	private final ResourceManager resourceManager;
-
-	public ResourceManagerAkkaActor(ResourceManager resourceManager) {
-		this.resourceManager = resourceManager;
-	}
-
-	@Override
-	public void onReceive(Object message) throws Exception {
-		final ActorRef sender = getSender();
-
-		if (message instanceof RegisterJobMaster) {
-			RegisterJobMaster registerJobMaster = (RegisterJobMaster) message;
-
-			try {
-				Future<RegistrationResponse> response = resourceManager.registerJobMaster(registerJobMaster.getJobMasterRegistration());
-				Patterns.pipe(response, getContext().dispatcher()).to(sender());
-			} catch (Exception e) {
-				sender.tell(new Status.Failure(e), getSelf());
-			}
-		} else if (message instanceof RequestSlot) {
-			RequestSlot requestSlot = (RequestSlot) message;
-
-			try {
-				SlotAssignment response = resourceManager.requestSlot(requestSlot.getSlotRequest());
-				sender.tell(new Status.Success(response), getSelf());
-			} catch (Exception e) {
-				sender.tell(new Status.Failure(e), getSelf());
-			}
-		} else {
-			super.onReceive(message);
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaGateway.java
deleted file mode 100644
index 1304707..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaGateway.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.resourcemanager;
-
-import akka.actor.ActorRef;
-import akka.pattern.AskableActorRef;
-import akka.util.Timeout;
-import org.apache.flink.runtime.rpc.akka.BaseAkkaGateway;
-import org.apache.flink.runtime.rpc.resourcemanager.JobMasterRegistration;
-import org.apache.flink.runtime.rpc.resourcemanager.RegistrationResponse;
-import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
-import org.apache.flink.runtime.rpc.resourcemanager.SlotAssignment;
-import org.apache.flink.runtime.rpc.resourcemanager.SlotRequest;
-import org.apache.flink.runtime.rpc.akka.messages.RegisterJobMaster;
-import org.apache.flink.runtime.rpc.akka.messages.RequestSlot;
-import scala.concurrent.Future;
-import scala.concurrent.duration.FiniteDuration;
-import scala.reflect.ClassTag$;
-
-public class ResourceManagerAkkaGateway extends BaseAkkaGateway implements ResourceManagerGateway {
-	private final AskableActorRef actorRef;
-	private final Timeout timeout;
-
-	public ResourceManagerAkkaGateway(ActorRef actorRef, Timeout timeout) {
-		this.actorRef = new AskableActorRef(actorRef);
-		this.timeout = timeout;
-	}
-
-	@Override
-	public Future<RegistrationResponse> registerJobMaster(JobMasterRegistration jobMasterRegistration, FiniteDuration timeout) {
-		return actorRef.ask(new RegisterJobMaster(jobMasterRegistration), new Timeout(timeout))
-			.mapTo(ClassTag$.MODULE$.<RegistrationResponse>apply(RegistrationResponse.class));
-	}
-
-	@Override
-	public Future<RegistrationResponse> registerJobMaster(JobMasterRegistration jobMasterRegistration) {
-		return actorRef.ask(new RegisterJobMaster(jobMasterRegistration), timeout)
-			.mapTo(ClassTag$.MODULE$.<RegistrationResponse>apply(RegistrationResponse.class));
-	}
-
-	@Override
-	public Future<SlotAssignment> requestSlot(SlotRequest slotRequest) {
-		return actorRef.ask(new RequestSlot(slotRequest), timeout)
-			.mapTo(ClassTag$.MODULE$.<SlotAssignment>apply(SlotAssignment.class));
-	}
-
-	@Override
-	public ActorRef getActorRef() {
-		return actorRef.actorRef();
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaActor.java
deleted file mode 100644
index ed522cc..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaActor.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.taskexecutor;
-
-import akka.actor.ActorRef;
-import akka.actor.Status;
-import akka.dispatch.OnComplete;
-import org.apache.flink.runtime.messages.Acknowledge;
-import org.apache.flink.runtime.rpc.akka.BaseAkkaActor;
-import org.apache.flink.runtime.rpc.akka.messages.CancelTask;
-import org.apache.flink.runtime.rpc.akka.messages.ExecuteTask;
-import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutorGateway;
-
-public class TaskExecutorAkkaActor extends BaseAkkaActor {
-	private final TaskExecutorGateway taskExecutor;
-
-	public TaskExecutorAkkaActor(TaskExecutorGateway taskExecutor) {
-		this.taskExecutor = taskExecutor;
-	}
-
-	@Override
-	public void onReceive(Object message) throws Exception {
-		final ActorRef sender = getSender();
-
-		if (message instanceof ExecuteTask) {
-			ExecuteTask executeTask = (ExecuteTask) message;
-
-			taskExecutor.executeTask(executeTask.getTaskDeploymentDescriptor()).onComplete(
-				new OnComplete<Acknowledge>() {
-					@Override
-					public void onComplete(Throwable failure, Acknowledge success) throws Throwable {
-						if (failure != null) {
-							sender.tell(new Status.Failure(failure), getSelf());
-						} else {
-							sender.tell(new Status.Success(Acknowledge.get()), getSelf());
-						}
-					}
-				},
-				getContext().dispatcher()
-			);
-		} else if (message instanceof CancelTask) {
-			CancelTask cancelTask = (CancelTask) message;
-
-			taskExecutor.cancelTask(cancelTask.getExecutionAttemptID()).onComplete(
-				new OnComplete<Acknowledge>() {
-					@Override
-					public void onComplete(Throwable failure, Acknowledge success) throws Throwable {
-						if (failure != null) {
-							sender.tell(new Status.Failure(failure), getSelf());
-						} else {
-							sender.tell(new Status.Success(Acknowledge.get()), getSelf());
-						}
-					}
-				},
-				getContext().dispatcher()
-			);
-		} else {
-			super.onReceive(message);
-		}
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaGateway.java
deleted file mode 100644
index 7f0a522..0000000
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaGateway.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka.taskexecutor;
-
-import akka.actor.ActorRef;
-import akka.pattern.AskableActorRef;
-import akka.util.Timeout;
-import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
-import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
-import org.apache.flink.runtime.messages.Acknowledge;
-import org.apache.flink.runtime.rpc.akka.BaseAkkaGateway;
-import org.apache.flink.runtime.rpc.akka.messages.CancelTask;
-import org.apache.flink.runtime.rpc.akka.messages.ExecuteTask;
-import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutorGateway;
-import scala.concurrent.Future;
-import scala.reflect.ClassTag$;
-
-public class TaskExecutorAkkaGateway extends BaseAkkaGateway implements TaskExecutorGateway {
-	private final AskableActorRef actorRef;
-	private final Timeout timeout;
-
-	public TaskExecutorAkkaGateway(ActorRef actorRef, Timeout timeout) {
-		this.actorRef = new AskableActorRef(actorRef);
-		this.timeout = timeout;
-	}
-
-	@Override
-	public Future<Acknowledge> executeTask(TaskDeploymentDescriptor taskDeploymentDescriptor) {
-		return actorRef.ask(new ExecuteTask(taskDeploymentDescriptor), timeout)
-			.mapTo(ClassTag$.MODULE$.<Acknowledge>apply(Acknowledge.class));
-	}
-
-	@Override
-	public Future<Acknowledge> cancelTask(ExecutionAttemptID executionAttemptId) {
-		return actorRef.ask(new CancelTask(executionAttemptId), timeout)
-			.mapTo(ClassTag$.MODULE$.<Acknowledge>apply(Acknowledge.class));
-	}
-
-	@Override
-	public ActorRef getActorRef() {
-		return actorRef.actorRef();
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
index b81b19c..e53cd68 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
@@ -30,6 +30,7 @@ import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
 import org.apache.flink.runtime.taskmanager.TaskExecutionState;
+import org.apache.flink.util.Preconditions;
 import scala.Tuple2;
 import scala.concurrent.ExecutionContext;
 import scala.concurrent.ExecutionContext$;
@@ -76,7 +77,8 @@ public class JobMaster extends RpcEndpoint<JobMasterGateway> {
 
 	public JobMaster(RpcService rpcService, ExecutorService executorService) {
 		super(rpcService);
-		executionContext = ExecutionContext$.MODULE$.fromExecutor(executorService);
+		executionContext = ExecutionContext$.MODULE$.fromExecutor(
+			Preconditions.checkNotNull(executorService));
 		scheduledExecutorService = new ScheduledThreadPoolExecutor(1);
 	}
 

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
index c7e8def..729ef0c 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
@@ -25,6 +25,7 @@ import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
 import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
 import org.apache.flink.runtime.rpc.jobmaster.JobMasterGateway;
+import org.apache.flink.util.Preconditions;
 import scala.concurrent.ExecutionContext;
 import scala.concurrent.ExecutionContext$;
 import scala.concurrent.Future;
@@ -49,7 +50,8 @@ public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
 
 	public ResourceManager(RpcService rpcService, ExecutorService executorService) {
 		super(rpcService);
-		this.executionContext = ExecutionContext$.MODULE$.fromExecutor(executorService);
+		this.executionContext = ExecutionContext$.MODULE$.fromExecutor(
+			Preconditions.checkNotNull(executorService));
 		this.jobMasterGateways = new HashMap<>();
 	}
 

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
index cdfc3bd..3a7dd9f 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutor.java
@@ -25,6 +25,7 @@ import org.apache.flink.runtime.messages.Acknowledge;
 import org.apache.flink.runtime.rpc.RpcMethod;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.util.Preconditions;
 import scala.concurrent.ExecutionContext;
 
 import java.util.HashSet;
@@ -47,7 +48,8 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 
 	public TaskExecutor(RpcService rpcService, ExecutorService executorService) {
 		super(rpcService);
-		this.executionContext = ExecutionContexts$.MODULE$.fromExecutor(executorService);
+		this.executionContext = ExecutionContexts$.MODULE$.fromExecutor(
+			Preconditions.checkNotNull(executorService));
 	}
 
 	/**

http://git-wip-us.apache.org/repos/asf/flink/blob/f5cf6b56/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
index 0ded25e..e50533e 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
@@ -18,15 +18,15 @@
 
 package org.apache.flink.runtime.rpc;
 
+import org.apache.flink.util.ReflectionUtil;
 import org.apache.flink.util.TestLogger;
 import org.junit.Test;
 import org.reflections.Reflections;
 import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
 
 import java.lang.annotation.Annotation;
 import java.lang.reflect.Method;
-import java.lang.reflect.ParameterizedType;
-import java.lang.reflect.Type;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -51,9 +51,8 @@ public class RpcCompletenessTest extends TestLogger {
 
 		for (Class<? extends RpcEndpoint> rpcEndpoint :classes){
 			c = rpcEndpoint;
-			Type superClass = c.getGenericSuperclass();
 
-			Class<?> rpcGatewayType = extractTypeParameter(superClass, 0);
+			Class<?> rpcGatewayType = ReflectionUtil.getTemplateType1(c);
 
 			if (rpcGatewayType != null) {
 				checkCompleteness(rpcEndpoint, (Class<? extends RpcGateway>) rpcGatewayType);
@@ -137,13 +136,16 @@ public class RpcCompletenessTest extends TestLogger {
 		}
 
 		Annotation[][] parameterAnnotations = gatewayMethod.getParameterAnnotations();
+		Class<?>[] parameterTypes = gatewayMethod.getParameterTypes();
 		int rpcTimeoutParameters = 0;
 
-		for (Annotation[] parameterAnnotation : parameterAnnotations) {
-			for (Annotation annotation : parameterAnnotation) {
-				if (annotation.equals(RpcTimeout.class)) {
-					rpcTimeoutParameters++;
-				}
+		for (int i = 0; i < parameterAnnotations.length; i++) {
+			if (isRpcTimeout(parameterAnnotations[i])) {
+				assertTrue(
+					"The rpc timeout has to be of type " + FiniteDuration.class.getName() + ".",
+					parameterTypes[i].equals(FiniteDuration.class));
+
+				rpcTimeoutParameters++;
 			}
 		}
 
@@ -211,10 +213,10 @@ public class RpcCompletenessTest extends TestLogger {
 				if (!futureClass.equals(RpcCompletenessTest.futureClass)) {
 					return false;
 				} else {
-					Class<?> valueClass = extractTypeParameter(futureClass, 0);
+					Class<?> valueClass = ReflectionUtil.getTemplateType1(gatewayMethod.getGenericReturnType());
 
 					if (endpointMethod.getReturnType().equals(futureClass)) {
-						Class<?> rpcEndpointValueClass = extractTypeParameter(endpointMethod.getReturnType(), 0);
+						Class<?> rpcEndpointValueClass = ReflectionUtil.getTemplateType1(endpointMethod.getGenericReturnType());
 
 						// check if we have the same future value types
 						if (valueClass != null && rpcEndpointValueClass != null && !checkType(valueClass, rpcEndpointValueClass)) {
@@ -251,7 +253,7 @@ public class RpcCompletenessTest extends TestLogger {
 		if (method.getReturnType().equals(Void.TYPE)) {
 			builder.append("void").append(" ");
 		} else if (method.getReturnType().equals(futureClass)) {
-			Class<?> valueClass = extractTypeParameter(method.getGenericReturnType(), 0);
+			Class<?> valueClass = ReflectionUtil.getTemplateType1(method.getGenericReturnType());
 
 			builder
 				.append(futureClass.getSimpleName())
@@ -291,30 +293,6 @@ public class RpcCompletenessTest extends TestLogger {
 		return builder.toString();
 	}
 
-	private Class<?> extractTypeParameter(Type genericType, int position) {
-		if (genericType instanceof ParameterizedType) {
-			ParameterizedType parameterizedType = (ParameterizedType) genericType;
-
-			Type[] typeArguments = parameterizedType.getActualTypeArguments();
-
-			if (position < 0 || position >= typeArguments.length) {
-				throw new IndexOutOfBoundsException("The generic type " +
-					parameterizedType.getRawType() + " only has " + typeArguments.length +
-					" type arguments.");
-			} else {
-				Type typeArgument = typeArguments[position];
-
-				if (typeArgument instanceof Class<?>) {
-					return (Class<?>) typeArgument;
-				} else {
-					return null;
-				}
-			}
-		} else {
-			return null;
-		}
-	}
-
 	private boolean isRpcTimeout(Annotation[] annotations) {
 		for (Annotation annotation : annotations) {
 			if (annotation.annotationType().equals(RpcTimeout.class)) {


[05/50] [abbrv] flink git commit: [FLINK-4609] [java-api] Remove redundant check for null in CrossOperator

Posted by tr...@apache.org.
[FLINK-4609] [java-api] Remove redundant check for null in CrossOperator

This closes #2490


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/470b752e
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/470b752e
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/470b752e

Branch: refs/heads/flip-6
Commit: 470b752e693ae4292d34fc7fc0c778f95fa04fd9
Parents: 7a25bf5
Author: Alexander Pivovarov <ap...@gmail.com>
Authored: Fri Sep 9 22:24:43 2016 -0700
Committer: Greg Hogan <co...@greghogan.com>
Committed: Tue Sep 20 10:10:20 2016 -0400

----------------------------------------------------------------------
 .../flink/api/java/operators/CrossOperator.java | 23 ++++++--------------
 .../api/java/operators/TwoInputOperator.java    |  5 +++--
 2 files changed, 10 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/470b752e/flink-java/src/main/java/org/apache/flink/api/java/operators/CrossOperator.java
----------------------------------------------------------------------
diff --git a/flink-java/src/main/java/org/apache/flink/api/java/operators/CrossOperator.java b/flink-java/src/main/java/org/apache/flink/api/java/operators/CrossOperator.java
index 36e6c1c..3fdc51d 100644
--- a/flink-java/src/main/java/org/apache/flink/api/java/operators/CrossOperator.java
+++ b/flink-java/src/main/java/org/apache/flink/api/java/operators/CrossOperator.java
@@ -20,9 +20,9 @@ package org.apache.flink.api.java.operators;
 
 import java.util.Arrays;
 
-import org.apache.flink.annotation.PublicEvolving;
 import org.apache.flink.annotation.Internal;
 import org.apache.flink.annotation.Public;
+import org.apache.flink.annotation.PublicEvolving;
 import org.apache.flink.api.common.InvalidProgramException;
 import org.apache.flink.api.common.functions.CrossFunction;
 import org.apache.flink.api.common.operators.BinaryOperatorInformation;
@@ -124,21 +124,12 @@ public class CrossOperator<I1, I2, OUT> extends TwoInputUdfOperator<I1, I2, OUT,
 	@Public
 	public static final class DefaultCross<I1, I2> extends CrossOperator<I1, I2, Tuple2<I1, I2>>  {
 
-		private final DataSet<I1> input1;
-		private final DataSet<I2> input2;
-
 		public DefaultCross(DataSet<I1> input1, DataSet<I2> input2, CrossHint hint, String defaultName) {
-			
 			super(input1, input2, new DefaultCrossFunction<I1, I2>(),
-					new TupleTypeInfo<Tuple2<I1, I2>>(input1.getType(), input2.getType()),
-					hint, defaultName);
-
-			if (input1 == null || input2 == null) {
-				throw new NullPointerException();
-			}
-
-			this.input1 = input1;
-			this.input2 = input2;
+				new TupleTypeInfo<Tuple2<I1, I2>>(
+					Preconditions.checkNotNull(input1, "input1 is null").getType(),
+					Preconditions.checkNotNull(input2, "input2 is null").getType()),
+				hint, defaultName);
 		}
 
 		/**
@@ -155,9 +146,9 @@ public class CrossOperator<I1, I2, OUT> extends TwoInputUdfOperator<I1, I2, OUT,
 			if (function == null) {
 				throw new NullPointerException("Cross function must not be null.");
 			}
-			TypeInformation<R> returnType = TypeExtractor.getCrossReturnTypes(function, input1.getType(), input2.getType(),
+			TypeInformation<R> returnType = TypeExtractor.getCrossReturnTypes(function, getInput1().getType(), getInput2().getType(),
 					super.getDefaultName(), true);
-			return new CrossOperator<I1, I2, R>(input1, input2, clean(function), returnType, 
+			return new CrossOperator<I1, I2, R>(getInput1(), getInput2(), clean(function), returnType,
 					getCrossHint(), Utils.getCallLocationName());
 		}
 		

http://git-wip-us.apache.org/repos/asf/flink/blob/470b752e/flink-java/src/main/java/org/apache/flink/api/java/operators/TwoInputOperator.java
----------------------------------------------------------------------
diff --git a/flink-java/src/main/java/org/apache/flink/api/java/operators/TwoInputOperator.java b/flink-java/src/main/java/org/apache/flink/api/java/operators/TwoInputOperator.java
index c64882d..28dec32 100644
--- a/flink-java/src/main/java/org/apache/flink/api/java/operators/TwoInputOperator.java
+++ b/flink-java/src/main/java/org/apache/flink/api/java/operators/TwoInputOperator.java
@@ -21,6 +21,7 @@ package org.apache.flink.api.java.operators;
 import org.apache.flink.annotation.Public;
 import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.api.java.DataSet;
+import org.apache.flink.util.Preconditions;
 
 /**
  * Base class for operations that operates on two input data sets.
@@ -37,8 +38,8 @@ public abstract class TwoInputOperator<IN1, IN2, OUT, O extends TwoInputOperator
 	
 	
 	protected TwoInputOperator(DataSet<IN1> input1, DataSet<IN2> input2, TypeInformation<OUT> resultType) {
-		super(input1.getExecutionEnvironment(), resultType);
-		
+		super(Preconditions.checkNotNull(input1, "input1 is null").getExecutionEnvironment(), resultType);
+		Preconditions.checkNotNull(input2, "input2 is null");
 		DataSet.checkSameExecutionContext(input1, input2);
 		this.input1 = input1;
 		this.input2 = input2;


[23/50] [abbrv] flink git commit: [FLINK-4373] [cluster management] Introduce SlotID, AllocationID, ResourceProfile

Posted by tr...@apache.org.
[FLINK-4373] [cluster management] Introduce SlotID, AllocationID, ResourceProfile

[FLINK-4373] [cluster management] address comments

This closes #2370.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/9159ad6e
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/9159ad6e
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/9159ad6e

Branch: refs/heads/flip-6
Commit: 9159ad6eafbba3d6f5804e62a9f4209d803da123
Parents: 433a1fd
Author: Kurt Young <yk...@gmail.com>
Authored: Fri Aug 12 11:05:48 2016 +0800
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:14 2016 +0200

----------------------------------------------------------------------
 .../clusterframework/types/AllocationID.java    | 32 ++++++++
 .../clusterframework/types/ResourceProfile.java | 68 ++++++++++++++++
 .../runtime/clusterframework/types/SlotID.java  | 83 ++++++++++++++++++++
 .../types/ResourceProfileTest.java              | 49 ++++++++++++
 4 files changed, 232 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/9159ad6e/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/AllocationID.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/AllocationID.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/AllocationID.java
new file mode 100644
index 0000000..f7ae6ee
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/AllocationID.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.clusterframework.types;
+
+import org.apache.flink.util.AbstractID;
+
+/**
+ * Unique identifier for the attempt to allocate a slot, normally created by JobManager when requesting a slot,
+ * constant across re-tries. This can also be used to identify responses by the ResourceManager and to identify
+ * deployment calls towards the TaskManager that was allocated from.
+ */
+public class AllocationID extends AbstractID {
+
+	private static final long serialVersionUID = 1L;
+
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/9159ad6e/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java
new file mode 100644
index 0000000..cbe709f
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.clusterframework.types;
+
+import java.io.Serializable;
+
+/**
+ * Describe the resource profile of the slot, either when requiring or offering it. The profile can be
+ * checked whether it can match another profile's requirement, and furthermore we may calculate a matching
+ * score to decide which profile we should choose when we have lots of candidate slots.
+ */
+public class ResourceProfile implements Serializable {
+
+	private static final long serialVersionUID = -784900073893060124L;
+
+	/** How many cpu cores are needed, use double so we can specify cpu like 0.1 */
+	private final double cpuCores;
+
+	/** How many memory in mb are needed */
+	private final long memoryInMB;
+
+	public ResourceProfile(double cpuCores, long memoryInMB) {
+		this.cpuCores = cpuCores;
+		this.memoryInMB = memoryInMB;
+	}
+
+	/**
+	 * Get the cpu cores needed
+	 * @return The cpu cores, 1.0 means a full cpu thread
+	 */
+	public double getCpuCores() {
+		return cpuCores;
+	}
+
+	/**
+	 * Get the memory needed in MB
+	 * @return The memory in MB
+	 */
+	public long getMemoryInMB() {
+		return memoryInMB;
+	}
+
+	/**
+	 * Check whether required resource profile can be matched
+	 *
+	 * @param required the required resource profile
+	 * @return true if the requirement is matched, otherwise false
+	 */
+	public boolean isMatching(ResourceProfile required) {
+		return Double.compare(cpuCores, required.getCpuCores()) >= 0 && memoryInMB >= required.getMemoryInMB();
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/9159ad6e/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/SlotID.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/SlotID.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/SlotID.java
new file mode 100644
index 0000000..d1b072d
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/SlotID.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.clusterframework.types;
+
+import java.io.Serializable;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * Unique identifier for a slot which located in TaskManager.
+ */
+public class SlotID implements ResourceIDRetrievable, Serializable {
+
+	private static final long serialVersionUID = -6399206032549807771L;
+
+	/** The resource id which this slot located */
+	private final ResourceID resourceId;
+
+	/** The numeric id for single slot */
+	private final int slotId;
+
+	public SlotID(ResourceID resourceId, int slotId) {
+		this.resourceId = checkNotNull(resourceId, "ResourceID must not be null");
+		this.slotId = slotId;
+	}
+
+	// ------------------------------------------------------------------------
+
+	@Override
+	public ResourceID getResourceID() {
+		return resourceId;
+	}
+
+	// ------------------------------------------------------------------------
+
+	@Override
+	public boolean equals(Object o) {
+		if (this == o) {
+			return true;
+		}
+		if (o == null || getClass() != o.getClass()) {
+			return false;
+		}
+
+		SlotID slotID = (SlotID) o;
+
+		if (slotId != slotID.slotId) {
+			return false;
+		}
+		return resourceId.equals(slotID.resourceId);
+	}
+
+	@Override
+	public int hashCode() {
+		int result = resourceId.hashCode();
+		result = 31 * result + slotId;
+		return result;
+	}
+
+	@Override
+	public String toString() {
+		return "SlotID{" +
+			"resourceId=" + resourceId +
+			", slotId=" + slotId +
+			'}';
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/9159ad6e/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/types/ResourceProfileTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/types/ResourceProfileTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/types/ResourceProfileTest.java
new file mode 100644
index 0000000..bc5ddaa
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/types/ResourceProfileTest.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.clusterframework.types;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class ResourceProfileTest {
+
+	@Test
+	public void testMatchRequirement() throws Exception {
+		ResourceProfile rp1 = new ResourceProfile(1.0, 100);
+		ResourceProfile rp2 = new ResourceProfile(1.0, 200);
+		ResourceProfile rp3 = new ResourceProfile(2.0, 100);
+		ResourceProfile rp4 = new ResourceProfile(2.0, 200);
+
+		assertFalse(rp1.isMatching(rp2));
+		assertTrue(rp2.isMatching(rp1));
+
+		assertFalse(rp1.isMatching(rp3));
+		assertTrue(rp3.isMatching(rp1));
+
+		assertFalse(rp2.isMatching(rp3));
+		assertFalse(rp3.isMatching(rp2));
+
+		assertTrue(rp4.isMatching(rp1));
+		assertTrue(rp4.isMatching(rp2));
+		assertTrue(rp4.isMatching(rp3));
+		assertTrue(rp4.isMatching(rp4));
+	}
+}


[26/50] [abbrv] flink git commit: [FLINK-4382] [rpc] Buffer rpc calls until the RpcEndpoint has been started

Posted by tr...@apache.org.
[FLINK-4382] [rpc] Buffer rpc calls until the RpcEndpoint has been started

This PR allows the AkkaRpcActor to stash messages until the corresponding RcpEndpoint
has been started. When receiving a Processing.START message, the AkkaRpcActor
unstashes all messages and starts processing rpcs. When receiving a Processing.STOP
message, it will stop processing messages and stash incoming messages again.

Add test case for message stashing

This closes #2358.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/613f5a7d
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/613f5a7d
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/613f5a7d

Branch: refs/heads/flip-6
Commit: 613f5a7dfe551725306a799ce6815a496f302813
Parents: 9159ad6
Author: Till Rohrmann <tr...@apache.org>
Authored: Thu Aug 11 18:13:25 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:14 2016 +0200

----------------------------------------------------------------------
 .../apache/flink/runtime/rpc/RpcEndpoint.java   |  15 ++-
 .../flink/runtime/rpc/StartStoppable.java       |  35 ++++++
 .../runtime/rpc/akka/AkkaInvocationHandler.java |  21 +++-
 .../flink/runtime/rpc/akka/AkkaRpcActor.java    |  39 ++++++-
 .../flink/runtime/rpc/akka/AkkaRpcService.java  |   8 +-
 .../runtime/rpc/akka/messages/Processing.java   |  27 +++++
 .../flink/runtime/rpc/RpcCompletenessTest.java  |  45 +++++++-
 .../runtime/rpc/akka/AkkaRpcActorTest.java      | 108 +++++++++++++++++++
 .../runtime/rpc/akka/AkkaRpcServiceTest.java    |   3 +
 .../flink/runtime/rpc/akka/AsyncCallsTest.java  |   5 +-
 .../rpc/akka/MainThreadValidationTest.java      |   4 +-
 .../rpc/akka/MessageSerializationTest.java      |   4 +
 .../rpc/taskexecutor/TaskExecutorTest.java      |  18 ++++
 13 files changed, 315 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
index d36a283..67ac182 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
@@ -74,7 +74,7 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 
 	/** The main thread execution context to be used to execute future callbacks in the main thread
 	 * of the executing rpc server. */
-	private final MainThreadExecutionContext mainThreadExecutionContext;
+	private final ExecutionContext mainThreadExecutionContext;
 
 	/** A reference to the endpoint's main thread, if the current method is called by the main thread */
 	final AtomicReference<Thread> currentMainThread = new AtomicReference<>(null); 
@@ -106,10 +106,21 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	}
 	
 	// ------------------------------------------------------------------------
-	//  Shutdown
+	//  Start & Shutdown
 	// ------------------------------------------------------------------------
 
 	/**
+	 * Starts the rpc endpoint. This tells the underlying rpc server that the rpc endpoint is ready
+	 * to process remote procedure calls.
+	 *
+	 * IMPORTANT: Whenever you override this method, call the parent implementation to enable
+	 * rpc processing. It is advised to make the parent call last.
+	 */
+	public void start() {
+		((StartStoppable) self).start();
+	}
+
+	/**
 	 * Shuts down the underlying RPC endpoint via the RPC service.
 	 * After this method was called, the RPC endpoint will no longer be reachable, neither remotely,
 	 * not via its {@link #getSelf() self gateway}. It will also not accepts executions in main thread

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/StartStoppable.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/StartStoppable.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/StartStoppable.java
new file mode 100644
index 0000000..dd5595f
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/StartStoppable.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+/**
+ * Interface to start and stop the processing of rpc calls in the rpc server.
+ */
+public interface StartStoppable {
+
+	/**
+	 * Starts the processing of remote procedure calls.
+	 */
+	void start();
+
+	/**
+	 * Stops the processing of remote procedure calls.
+	 */
+	void stop();
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
index 297104b..524bf74 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaInvocationHandler.java
@@ -24,8 +24,10 @@ import akka.util.Timeout;
 import org.apache.flink.api.java.tuple.Tuple2;
 import org.apache.flink.runtime.rpc.MainThreadExecutor;
 import org.apache.flink.runtime.rpc.RpcTimeout;
+import org.apache.flink.runtime.rpc.StartStoppable;
 import org.apache.flink.runtime.rpc.akka.messages.CallAsync;
 import org.apache.flink.runtime.rpc.akka.messages.LocalRpcInvocation;
+import org.apache.flink.runtime.rpc.akka.messages.Processing;
 import org.apache.flink.runtime.rpc.akka.messages.RemoteRpcInvocation;
 import org.apache.flink.runtime.rpc.akka.messages.RpcInvocation;
 import org.apache.flink.runtime.rpc.akka.messages.RunAsync;
@@ -50,7 +52,7 @@ import static org.apache.flink.util.Preconditions.checkArgument;
  * rpc in a {@link LocalRpcInvocation} message and then sends it to the {@link AkkaRpcActor} where it is
  * executed.
  */
-class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThreadExecutor {
+class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThreadExecutor, StartStoppable {
 	private static final Logger LOG = Logger.getLogger(AkkaInvocationHandler.class);
 
 	private final ActorRef rpcEndpoint;
@@ -76,7 +78,8 @@ class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThrea
 
 		Object result;
 
-		if (declaringClass.equals(AkkaGateway.class) || declaringClass.equals(MainThreadExecutor.class) || declaringClass.equals(Object.class)) {
+		if (declaringClass.equals(AkkaGateway.class) || declaringClass.equals(MainThreadExecutor.class) ||
+			declaringClass.equals(Object.class) || declaringClass.equals(StartStoppable.class)) {
 			result = method.invoke(this, args);
 		} else {
 			String methodName = method.getName();
@@ -171,6 +174,20 @@ class AkkaInvocationHandler implements InvocationHandler, AkkaGateway, MainThrea
 		}
 	}
 
+	@Override
+	public void start() {
+		rpcEndpoint.tell(Processing.START, ActorRef.noSender());
+	}
+
+	@Override
+	public void stop() {
+		rpcEndpoint.tell(Processing.STOP, ActorRef.noSender());
+	}
+
+	// ------------------------------------------------------------------------
+	//  Helper methods
+	// ------------------------------------------------------------------------
+
 	/**
 	 * Extracts the {@link RpcTimeout} annotated rpc timeout value from the list of given method
 	 * arguments. If no {@link RpcTimeout} annotated parameter could be found, then the default

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
index dfcbcc3..2373be9 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActor.java
@@ -20,13 +20,15 @@ package org.apache.flink.runtime.rpc.akka;
 
 import akka.actor.ActorRef;
 import akka.actor.Status;
-import akka.actor.UntypedActor;
+import akka.actor.UntypedActorWithStash;
+import akka.japi.Procedure;
 import akka.pattern.Patterns;
 import org.apache.flink.runtime.rpc.MainThreadValidatorUtil;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.akka.messages.CallAsync;
 import org.apache.flink.runtime.rpc.akka.messages.LocalRpcInvocation;
+import org.apache.flink.runtime.rpc.akka.messages.Processing;
 import org.apache.flink.runtime.rpc.akka.messages.RpcInvocation;
 import org.apache.flink.runtime.rpc.akka.messages.RunAsync;
 
@@ -45,18 +47,23 @@ import static org.apache.flink.util.Preconditions.checkNotNull;
 
 /**
  * Akka rpc actor which receives {@link LocalRpcInvocation}, {@link RunAsync} and {@link CallAsync}
- * messages.
+ * {@link Processing} messages.
  * <p>
  * The {@link LocalRpcInvocation} designates a rpc and is dispatched to the given {@link RpcEndpoint}
  * instance.
  * <p>
  * The {@link RunAsync} and {@link CallAsync} messages contain executable code which is executed
  * in the context of the actor thread.
+ * <p>
+ * The {@link Processing} message controls the processing behaviour of the akka rpc actor. A
+ * {@link Processing#START} message unstashes all stashed messages and starts processing incoming
+ * messages. A {@link Processing#STOP} message stops processing messages and stashes incoming
+ * messages.
  *
  * @param <C> Type of the {@link RpcGateway} associated with the {@link RpcEndpoint}
  * @param <T> Type of the {@link RpcEndpoint}
  */
-class AkkaRpcActor<C extends RpcGateway, T extends RpcEndpoint<C>> extends UntypedActor {
+class AkkaRpcActor<C extends RpcGateway, T extends RpcEndpoint<C>> extends UntypedActorWithStash {
 	
 	private static final Logger LOG = LoggerFactory.getLogger(AkkaRpcActor.class);
 
@@ -73,6 +80,27 @@ class AkkaRpcActor<C extends RpcGateway, T extends RpcEndpoint<C>> extends Untyp
 
 	@Override
 	public void onReceive(final Object message) {
+		if (message.equals(Processing.START)) {
+			unstashAll();
+			getContext().become(new Procedure<Object>() {
+				@Override
+				public void apply(Object message) throws Exception {
+					if (message.equals(Processing.STOP)) {
+						getContext().unbecome();
+					} else {
+						handleMessage(message);
+					}
+				}
+			});
+		} else {
+			LOG.info("The rpc endpoint {} has not been started yet. Stashing message {} until processing is started.",
+				rpcEndpoint.getClass().getName(),
+				message.getClass().getName());
+			stash();
+		}
+	}
+
+	private void handleMessage(Object message) {
 		mainThreadValidator.enterMainThread();
 		try {
 			if (message instanceof RunAsync) {
@@ -82,7 +110,10 @@ class AkkaRpcActor<C extends RpcGateway, T extends RpcEndpoint<C>> extends Untyp
 			} else if (message instanceof RpcInvocation) {
 				handleRpcInvocation((RpcInvocation) message);
 			} else {
-				LOG.warn("Received message of unknown type {}. Dropping this message!", message.getClass());
+				LOG.warn(
+					"Received message of unknown type {} with value {}. Dropping this message!",
+					message.getClass().getName(),
+					message);
 			}
 		} finally {
 			mainThreadValidator.exitMainThread();

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
index b963c53..7b33524 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
@@ -34,7 +34,7 @@ import org.apache.flink.runtime.rpc.MainThreadExecutor;
 import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.RpcEndpoint;
 import org.apache.flink.runtime.rpc.RpcService;
-
+import org.apache.flink.runtime.rpc.StartStoppable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -136,7 +136,11 @@ public class AkkaRpcService implements RpcService {
 		@SuppressWarnings("unchecked")
 		C self = (C) Proxy.newProxyInstance(
 			classLoader,
-			new Class<?>[]{rpcEndpoint.getSelfGatewayType(), MainThreadExecutor.class, AkkaGateway.class},
+			new Class<?>[]{
+				rpcEndpoint.getSelfGatewayType(),
+				MainThreadExecutor.class,
+				StartStoppable.class,
+				AkkaGateway.class},
 			akkaInvocationHandler);
 
 		return self;

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/Processing.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/Processing.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/Processing.java
new file mode 100644
index 0000000..5c7df5d
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/Processing.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+/**
+ * Controls the processing behaviour of the {@link org.apache.flink.runtime.rpc.akka.AkkaRpcActor}
+ */
+public enum Processing {
+	START, // Unstashes all stashed messages and starts processing incoming messages
+	STOP // Stop processing messages and stashes all incoming messages
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
index e50533e..97cf0cb 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/RpcCompletenessTest.java
@@ -18,6 +18,8 @@
 
 package org.apache.flink.runtime.rpc;
 
+import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.util.ReflectionUtil;
 import org.apache.flink.util.TestLogger;
 import org.junit.Test;
@@ -140,7 +142,7 @@ public class RpcCompletenessTest extends TestLogger {
 		int rpcTimeoutParameters = 0;
 
 		for (int i = 0; i < parameterAnnotations.length; i++) {
-			if (isRpcTimeout(parameterAnnotations[i])) {
+			if (RpcCompletenessTest.isRpcTimeout(parameterAnnotations[i])) {
 				assertTrue(
 					"The rpc timeout has to be of type " + FiniteDuration.class.getName() + ".",
 					parameterTypes[i].equals(FiniteDuration.class));
@@ -185,7 +187,7 @@ public class RpcCompletenessTest extends TestLogger {
 
 		// filter out the RpcTimeout parameters
 		for (int i = 0; i < gatewayParameterTypes.length; i++) {
-			if (!isRpcTimeout(gatewayParameterAnnotations[i])) {
+			if (!RpcCompletenessTest.isRpcTimeout(gatewayParameterAnnotations[i])) {
 				filteredGatewayParameterTypes.add(gatewayParameterTypes[i]);
 			}
 		}
@@ -235,7 +237,22 @@ public class RpcCompletenessTest extends TestLogger {
 	}
 
 	private boolean checkType(Class<?> firstType, Class<?> secondType) {
-		return firstType.equals(secondType);
+		Class<?> firstResolvedType;
+		Class<?> secondResolvedType;
+
+		if (firstType.isPrimitive()) {
+			firstResolvedType = RpcCompletenessTest.resolvePrimitiveType(firstType);
+		} else {
+			firstResolvedType = firstType;
+		}
+
+		if (secondType.isPrimitive()) {
+			secondResolvedType = RpcCompletenessTest.resolvePrimitiveType(secondType);
+		} else {
+			secondResolvedType = secondType;
+		}
+
+		return firstResolvedType.equals(secondResolvedType);
 	}
 
 	/**
@@ -279,7 +296,7 @@ public class RpcCompletenessTest extends TestLogger {
 
 		for (int i = 0; i < parameterTypes.length; i++) {
 			// filter out the RpcTimeout parameters
-			if (!isRpcTimeout(parameterAnnotations[i])) {
+			if (!RpcCompletenessTest.isRpcTimeout(parameterAnnotations[i])) {
 				builder.append(parameterTypes[i].getName());
 
 				if (i < parameterTypes.length -1) {
@@ -293,7 +310,7 @@ public class RpcCompletenessTest extends TestLogger {
 		return builder.toString();
 	}
 
-	private boolean isRpcTimeout(Annotation[] annotations) {
+	private static boolean isRpcTimeout(Annotation[] annotations) {
 		for (Annotation annotation : annotations) {
 			if (annotation.annotationType().equals(RpcTimeout.class)) {
 				return true;
@@ -302,4 +319,22 @@ public class RpcCompletenessTest extends TestLogger {
 
 		return false;
 	}
+
+	/**
+	 * Returns the boxed type for a primitive type.
+	 *
+	 * @param primitveType Primitive type to resolve
+	 * @return Boxed type for the given primitive type
+	 */
+	private static Class<?> resolvePrimitiveType(Class<?> primitveType) {
+		assert primitveType.isPrimitive();
+
+		TypeInformation<?> typeInformation = BasicTypeInfo.getInfoFor(primitveType);
+
+		if (typeInformation != null) {
+			return typeInformation.getTypeClass();
+		} else {
+			throw new RuntimeException("Could not retrive basic type information for primitive type " + primitveType + '.');
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java
new file mode 100644
index 0000000..1653fac
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcActorTest.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka;
+
+import akka.actor.ActorSystem;
+import akka.util.Timeout;
+import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcMethod;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.util.TestLogger;
+import org.hamcrest.core.Is;
+import org.junit.AfterClass;
+import org.junit.Test;
+import scala.concurrent.Await;
+import scala.concurrent.Future;
+
+import java.util.concurrent.TimeUnit;
+
+import static org.junit.Assert.assertThat;
+
+public class AkkaRpcActorTest extends TestLogger {
+
+	// ------------------------------------------------------------------------
+	//  shared test members
+	// ------------------------------------------------------------------------
+
+	private static ActorSystem actorSystem = AkkaUtils.createDefaultActorSystem();
+
+	private static Timeout timeout = new Timeout(10000, TimeUnit.MILLISECONDS);
+
+	private static AkkaRpcService akkaRpcService =
+		new AkkaRpcService(actorSystem, timeout);
+
+	@AfterClass
+	public static void shutdown() {
+		akkaRpcService.stopService();
+		actorSystem.shutdown();
+		actorSystem.awaitTermination();
+	}
+
+	/**
+	 * Tests that the {@link AkkaRpcActor} stashes messages until the corresponding
+	 * {@link RpcEndpoint} has been started.
+	 */
+	@Test
+	public void testMessageStashing() throws Exception {
+		int expectedValue = 1337;
+
+		DummyRpcEndpoint rpcEndpoint = new DummyRpcEndpoint(akkaRpcService);
+
+		DummyRpcGateway rpcGateway = rpcEndpoint.getSelf();
+
+		// this message should not be processed until we've started the rpc endpoint
+		Future<Integer> result = rpcGateway.foobar();
+
+		// set a new value which we expect to be returned
+		rpcEndpoint.setFoobar(expectedValue);
+
+		// now process the rpc
+		rpcEndpoint.start();
+
+		Integer actualValue = Await.result(result, timeout.duration());
+
+		assertThat("The new foobar value should have been returned.", actualValue, Is.is(expectedValue));
+
+		rpcEndpoint.shutDown();
+	}
+
+	private interface DummyRpcGateway extends RpcGateway {
+		Future<Integer> foobar();
+	}
+
+	private static class DummyRpcEndpoint extends RpcEndpoint<DummyRpcGateway> {
+
+		private volatile int _foobar = 42;
+
+		protected DummyRpcEndpoint(RpcService rpcService) {
+			super(rpcService);
+		}
+
+		@RpcMethod
+		public int foobar() {
+			return _foobar;
+		}
+
+		public void setFoobar(int value) {
+			_foobar = value;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
index f26b40b..fd55904 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AkkaRpcServiceTest.java
@@ -57,6 +57,9 @@ public class AkkaRpcServiceTest extends TestLogger {
 		ResourceManager resourceManager = new ResourceManager(akkaRpcService, executorService);
 		JobMaster jobMaster = new JobMaster(akkaRpcService2, executorService);
 
+		resourceManager.start();
+		jobMaster.start();
+
 		ResourceManagerGateway rm = resourceManager.getSelf();
 
 		assertTrue(rm instanceof AkkaGateway);

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java
index f2ce52d..d33987c 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java
@@ -28,6 +28,7 @@ import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.RpcMethod;
 import org.apache.flink.runtime.rpc.RpcService;
 
+import org.apache.flink.util.TestLogger;
 import org.junit.AfterClass;
 import org.junit.Test;
 
@@ -42,7 +43,7 @@ import java.util.concurrent.locks.ReentrantLock;
 
 import static org.junit.Assert.*;
 
-public class AsyncCallsTest {
+public class AsyncCallsTest extends TestLogger {
 
 	// ------------------------------------------------------------------------
 	//  shared test members
@@ -72,6 +73,7 @@ public class AsyncCallsTest {
 		final AtomicBoolean concurrentAccess = new AtomicBoolean(false);
 
 		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService, lock);
+		testEndpoint.start();
 		TestGateway gateway = testEndpoint.getSelf();
 
 		// a bunch of gateway calls
@@ -127,6 +129,7 @@ public class AsyncCallsTest {
 		final long delay = 200;
 
 		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService, lock);
+		testEndpoint.start();
 
 		// run something asynchronously
 		testEndpoint.runAsync(new Runnable() {

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MainThreadValidationTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MainThreadValidationTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MainThreadValidationTest.java
index b854143..9ffafda 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MainThreadValidationTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MainThreadValidationTest.java
@@ -27,13 +27,14 @@ import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.RpcMethod;
 import org.apache.flink.runtime.rpc.RpcService;
 
+import org.apache.flink.util.TestLogger;
 import org.junit.Test;
 
 import java.util.concurrent.TimeUnit;
 
 import static org.junit.Assert.assertTrue;
 
-public class MainThreadValidationTest {
+public class MainThreadValidationTest extends TestLogger {
 
 	@Test
 	public void failIfNotInMainThread() {
@@ -51,6 +52,7 @@ public class MainThreadValidationTest {
 
 		try {
 			TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService);
+			testEndpoint.start();
 
 			// this works, because it is executed as an RPC call
 			testEndpoint.getSelf().someConcurrencyCriticalFunction();

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MessageSerializationTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MessageSerializationTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MessageSerializationTest.java
index ca8179c..9d2ed99 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MessageSerializationTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/MessageSerializationTest.java
@@ -86,6 +86,7 @@ public class MessageSerializationTest extends TestLogger {
 	public void testNonSerializableLocalMessageTransfer() throws InterruptedException, IOException {
 		LinkedBlockingQueue<Object> linkedBlockingQueue = new LinkedBlockingQueue<>();
 		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService1, linkedBlockingQueue);
+		testEndpoint.start();
 
 		TestGateway testGateway = testEndpoint.getSelf();
 
@@ -106,6 +107,7 @@ public class MessageSerializationTest extends TestLogger {
 		LinkedBlockingQueue<Object> linkedBlockingQueue = new LinkedBlockingQueue<>();
 
 		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService1, linkedBlockingQueue);
+		testEndpoint.start();
 
 		String address = testEndpoint.getAddress();
 
@@ -126,6 +128,7 @@ public class MessageSerializationTest extends TestLogger {
 		LinkedBlockingQueue<Object> linkedBlockingQueue = new LinkedBlockingQueue<>();
 
 		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService1, linkedBlockingQueue);
+		testEndpoint.start();
 
 		String address = testEndpoint.getAddress();
 
@@ -149,6 +152,7 @@ public class MessageSerializationTest extends TestLogger {
 		LinkedBlockingQueue<Object> linkedBlockingQueue = new LinkedBlockingQueue<>();
 
 		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService1, linkedBlockingQueue);
+		testEndpoint.start();
 
 		String address = testEndpoint.getAddress();
 

http://git-wip-us.apache.org/repos/asf/flink/blob/613f5a7d/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
index 33c9cb6..c96f4f6 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/taskexecutor/TaskExecutorTest.java
@@ -28,17 +28,26 @@ import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
 import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
 import org.apache.flink.runtime.jobgraph.JobVertexID;
 import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.MainThreadExecutor;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcGateway;
 import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.rpc.StartStoppable;
 import org.apache.flink.runtime.util.DirectExecutorService;
 import org.apache.flink.util.SerializedValue;
 import org.apache.flink.util.TestLogger;
 import org.junit.Test;
+import org.mockito.Matchers;
+import org.mockito.cglib.proxy.InvocationHandler;
+import org.mockito.cglib.proxy.Proxy;
+import scala.concurrent.Future;
 
 import java.net.URL;
 import java.util.Collections;
 
 import static org.junit.Assert.fail;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
 public class TaskExecutorTest extends TestLogger {
 
@@ -48,8 +57,13 @@ public class TaskExecutorTest extends TestLogger {
 	@Test
 	public void testTaskExecution() throws Exception {
 		RpcService testingRpcService = mock(RpcService.class);
+		InvocationHandler invocationHandler = mock(InvocationHandler.class);
+		Object selfGateway = Proxy.newProxyInstance(ClassLoader.getSystemClassLoader(), new Class<?>[] {TaskExecutorGateway.class, MainThreadExecutor.class, StartStoppable.class}, invocationHandler);
+		when(testingRpcService.startServer(Matchers.any(RpcEndpoint.class))).thenReturn((RpcGateway)selfGateway);
+
 		DirectExecutorService directExecutorService = new DirectExecutorService();
 		TaskExecutor taskExecutor = new TaskExecutor(testingRpcService, directExecutorService);
+		taskExecutor.start();
 
 		TaskDeploymentDescriptor tdd = new TaskDeploymentDescriptor(
 			new JobID(),
@@ -82,8 +96,12 @@ public class TaskExecutorTest extends TestLogger {
 	@Test(expected=Exception.class)
 	public void testWrongTaskCancellation() throws Exception {
 		RpcService testingRpcService = mock(RpcService.class);
+		InvocationHandler invocationHandler = mock(InvocationHandler.class);
+		Object selfGateway = Proxy.newProxyInstance(ClassLoader.getSystemClassLoader(), new Class<?>[] {TaskExecutorGateway.class, MainThreadExecutor.class, StartStoppable.class}, invocationHandler);
+		when(testingRpcService.startServer(Matchers.any(RpcEndpoint.class))).thenReturn((RpcGateway)selfGateway);
 		DirectExecutorService directExecutorService = null;
 		TaskExecutor taskExecutor = new TaskExecutor(testingRpcService, directExecutorService);
+		taskExecutor.start();
 
 		taskExecutor.cancelTask(new ExecutionAttemptID());
 


[13/50] [abbrv] flink git commit: [FLINK-3929] Added Keytab based Kerberos support to enable secure Flink cluster deployment(addresses HDHS, Kafka and ZK services)

Posted by tr...@apache.org.
[FLINK-3929] Added Keytab based Kerberos support to enable secure Flink cluster deployment(addresses HDHS, Kafka and ZK services)

FLINK-3929 Added MiniKDC support for Kafka, Zookeeper, RollingFS and Yarn integration test modules


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/25a622fd
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/25a622fd
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/25a622fd

Branch: refs/heads/flip-6
Commit: 25a622fd9c9255bd1a5b4b6ff7891730dce34ac1
Parents: 303f6fe
Author: Vijay Srinivasaraghavan <vi...@emc.com>
Authored: Wed Jul 20 17:08:33 2016 -0700
Committer: Maximilian Michels <mx...@apache.org>
Committed: Tue Sep 20 22:03:29 2016 +0200

----------------------------------------------------------------------
 docs/internals/flink_security.md                |  87 ++++++
 docs/setup/config.md                            |  27 +-
 .../org/apache/flink/client/CliFrontend.java    |  38 +--
 .../flink/configuration/ConfigConstants.java    |  22 ++
 .../org/apache/flink/util/Preconditions.java    |   2 +-
 .../src/main/flink-bin/conf/flink-jaas.conf     |  26 ++
 flink-dist/src/main/resources/flink-conf.yaml   |  25 ++
 .../MesosApplicationMasterRunner.java           |   3 +-
 .../clusterframework/BootstrapTools.java        |   7 +
 .../runtime/security/JaasConfiguration.java     | 160 ++++++++++
 .../flink/runtime/security/SecurityContext.java | 313 +++++++++++++++++++
 .../flink/runtime/jobmanager/JobManager.scala   |  34 +-
 .../flink/runtime/taskmanager/TaskManager.scala |  25 +-
 .../runtime/security/JaasConfigurationTest.java |  52 +++
 .../runtime/security/SecurityContextTest.java   |  77 +++++
 .../flink-connector-filesystem/pom.xml          |  36 +++
 .../connectors/fs/RollingSinkITCase.java        |  19 +-
 .../connectors/fs/RollingSinkSecuredITCase.java | 232 ++++++++++++++
 .../src/test/resources/log4j-test.properties    |   2 +
 .../connectors/kafka/Kafka08ProducerITCase.java |   1 -
 .../kafka/KafkaTestEnvironmentImpl.java         |  13 +-
 .../flink-connector-kafka-0.9/pom.xml           |  18 ++
 .../connectors/kafka/Kafka09ITCase.java         |  14 +-
 .../connectors/kafka/Kafka09ProducerITCase.java |   1 -
 .../kafka/Kafka09SecureRunITCase.java           |  62 ++++
 .../kafka/KafkaTestEnvironmentImpl.java         |  81 ++++-
 .../src/test/resources/log4j-test.properties    |   2 +
 .../flink-connector-kafka-base/pom.xml          |  19 ++
 .../connectors/kafka/KafkaConsumerTestBase.java | 100 ++++--
 .../connectors/kafka/KafkaProducerTestBase.java |  14 +-
 .../kafka/KafkaShortRetentionTestBase.java      |  29 +-
 .../connectors/kafka/KafkaTestBase.java         |  72 +++--
 .../connectors/kafka/KafkaTestEnvironment.java  |  11 +-
 .../kafka/testutils/DataGenerators.java         |   9 +-
 .../flink-test-utils/pom.xml                    |  25 ++
 .../util/StreamingMultipleProgramsTestBase.java |   6 +
 .../flink/test/util/SecureTestEnvironment.java  | 249 +++++++++++++++
 .../test/util/TestingJaasConfiguration.java     | 106 +++++++
 .../flink/test/util/TestingSecurityContext.java |  80 +++++
 flink-yarn-tests/pom.xml                        |  20 ++
 .../flink/yarn/FlinkYarnSessionCliTest.java     |   2 +-
 .../flink/yarn/YARNHighAvailabilityITCase.java  |  14 +-
 .../YARNSessionCapacitySchedulerITCase.java     |  23 ++
 .../flink/yarn/YARNSessionFIFOITCase.java       |  24 ++
 .../yarn/YARNSessionFIFOSecuredITCase.java      | 103 ++++++
 .../org/apache/flink/yarn/YarnTestBase.java     |  87 +++++-
 .../src/test/resources/log4j-test.properties    |   5 +
 .../yarn/AbstractYarnClusterDescriptor.java     | 104 ++++--
 .../main/java/org/apache/flink/yarn/Utils.java  |   8 +
 .../flink/yarn/YarnApplicationMasterRunner.java | 158 ++++++++--
 .../org/apache/flink/yarn/YarnConfigKeys.java   |   6 +-
 .../flink/yarn/YarnTaskManagerRunner.java       |  88 ++++--
 .../flink/yarn/cli/FlinkYarnSessionCli.java     |  28 +-
 pom.xml                                         |   7 +
 tools/log4j-travis.properties                   |   3 +
 55 files changed, 2553 insertions(+), 226 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/docs/internals/flink_security.md
----------------------------------------------------------------------
diff --git a/docs/internals/flink_security.md b/docs/internals/flink_security.md
new file mode 100644
index 0000000..846273b
--- /dev/null
+++ b/docs/internals/flink_security.md
@@ -0,0 +1,87 @@
+---
+title:  "Flink Security"
+# Top navigation
+top-nav-group: internals
+top-nav-pos: 10
+top-nav-title: Flink Security
+---
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+This document briefly describes how Flink security works in the context of various deployment mechanism (Standalone/Cluster vs YARN) 
+and the connectors that participates in Flink Job execution stage. This documentation can be helpful for both administrators and developers 
+who plans to run Flink on a secure environment.
+
+## Objective
+
+The primary goal of Flink security model is to enable secure data access for jobs within a cluster via connectors. In a production deployment scenario, 
+streaming jobs are understood to run for longer period of time (days/weeks/months) and the system must be  able to authenticate against secure 
+data sources throughout the life of the job. The current implementation supports running Flink clusters (Job Manager/Task Manager/Jobs) under the 
+context of a Kerberos identity based on Keytab credential supplied during deployment time. Any jobs submitted will continue to run in the identity of the cluster.
+
+## How Flink Security works
+Flink deployment includes running Job Manager/ZooKeeper, Task Manager(s), Web UI and Job(s). Jobs (user code) can be submitted through web UI and/or CLI. 
+A Job program may use one or more connectors (Kafka, HDFS, Cassandra, Flume, Kinesis etc.,) and each connector may have a specific security 
+requirements (Kerberos, database based, SSL/TLS, custom etc.,). While satisfying the security requirements for all the connectors evolves over a period 
+of time, at this time of writing, the following connectors/services are tested for Kerberos/Keytab based security.
+
+- Kafka (0.9)
+- HDFS
+- ZooKeeper
+
+Hadoop uses the UserGroupInformation (UGI) class to manage security. UGI is a static implementation that takes care of handling Kerberos authentication. The Flink bootstrap implementation
+(JM/TM/CLI) takes care of instantiating UGI with the appropriate security credentials to establish the necessary security context.
+
+Services like Kafka and ZooKeeper use SASL/JAAS based authentication mechanism to authenticate against a Kerberos server. It expects JAAS configuration with a platform-specific login 
+module *name* to be provided. Managing per-connector configuration files will be an overhead and to overcome this requirement, a process-wide JAAS configuration object is 
+instantiated which serves standard ApplicationConfigurationEntry for the connectors that authenticates using SASL/JAAS mechanism.
+
+It is important to understand that the Flink processes (JM/TM/UI/Jobs) itself uses UGI's doAS() implementation to run under a specific user context, i.e. if Hadoop security is enabled 
+then the Flink processes will be running under a secure user account or else it will run as the OS login user account who starts the Flink cluster.
+
+## Security Configurations
+
+Secure credentials can be supplied by adding below configuration elements to Flink configuration file:
+
+- `security.keytab`: Absolute path to Kerberos keytab file that contains the user credentials/secret.
+
+- `security.principal`: User principal name that the Flink cluster should run as.
+
+The delegation token mechanism (*kinit cache*) is still supported for backward compatibility but enabling security using *keytab* configuration is the preferred and recommended approach.
+
+## Standalone Mode:
+
+Steps to run a secure Flink cluster in standalone/cluster mode:
+- Add security configurations to Flink configuration file (on all cluster nodes) 
+- Make sure the Keytab file exist in the path as indicated in *security.keytab* configuration on all cluster nodes
+- Deploy Flink cluster using cluster start/stop scripts or CLI
+
+## Yarn Mode:
+
+Steps to run secure Flink cluster in Yarn mode:
+- Add security configurations to Flink configuration file (on the node from where cluster will be provisioned using Flink/Yarn CLI) 
+- Make sure the Keytab file exist in the path as indicated in *security.keytab* configuration
+- Deploy Flink cluster using CLI
+
+In Yarn mode, the user supplied keytab will be copied over to the Yarn containers (App Master/JM and TM) as the Yarn local resource file.
+Security implementation details are based on <a href="https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md">Yarn security</a> 
+
+## Token Renewal
+
+UGI and Kafka/ZK login module implementations takes care of auto-renewing the tickets upon reaching expiry and no further action is needed on the part of Flink.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/docs/setup/config.md
----------------------------------------------------------------------
diff --git a/docs/setup/config.md b/docs/setup/config.md
index 51475cc..54ef394 100644
--- a/docs/setup/config.md
+++ b/docs/setup/config.md
@@ -95,18 +95,35 @@ These options are useful for debugging a Flink application for memory and garbag
 
 ### Kerberos
 
-Flink supports Kerberos authentication of Hadoop services such as HDFS, YARN, or HBase.
+Flink supports Kerberos authentication for the following services 
+
++ Hadoop Components: such as HDFS, YARN, or HBase.
++ Kafka Connectors (version 0.9+)
++ Zookeeper Server/Client
+
+Hadoop components relies on the UserGroupInformation (UGI) implementation to handle Kerberos authentication, whereas Kafka and Zookeeper services handles Kerberos authentication through SASL/JAAS implementation.
 
 **Kerberos is only properly supported in Hadoop version 2.6.1 and above. All
   other versions have critical bugs which might fail the Flink job
   unexpectedly.**
 
+**Ticket cache** and **Keytab** modes are supported for all above mentioned services.
+
+> Ticket cache (Supported only to provide backward compatibility support. Keytab is the preferred approach for long running jobs)
+
 While Hadoop uses Kerberos tickets to authenticate users with services initially, the authentication process continues differently afterwards. Instead of saving the ticket to authenticate on a later access, Hadoop creates its own security tokens (DelegationToken) that it passes around. These are authenticated to Kerberos periodically but are independent of the token renewal time. The tokens have a maximum life span identical to the Kerberos ticket maximum life span.
 
-Please make sure to set the maximum ticket life span high long running jobs. The renewal time of the ticket, on the other hand, is not important because Hadoop abstracts this away using its own security tocken renewal system. Hadoop makes sure that tickets are renewed in time and you can be sure to be authenticated until the end of the ticket life time.
+While using ticket cache mode, please make sure to set the maximum ticket life span high long running jobs. 
 
 If you are on YARN, then it is sufficient to authenticate the client with Kerberos. On a Flink standalone cluster you need to ensure that, initially, all nodes are authenticated with Kerberos using the `kinit` tool.
 
+> Keytab (security principal and keytab can be configured through Flink configuration file)
+- `security.keytab`: Path to Keytab file
+- `security.principal`: Principal associated with the keytab
+
+Kerberos ticket renewal is abstracted and automatically handled by the Hadoop/Kafka/ZK login modules and ensures that tickets are renewed in time and you can be sure to be authenticated until the end of the ticket life time.
+
+For Kafka and ZK, process-wide JAAS config will be created using the provided security credentials and the Kerberos authentication will be handled by Kafka/ZK login handlers.
 
 ### Other
 
@@ -315,6 +332,12 @@ Previously this key was named `recovery.mode` and the default value was `standal
 
 - `high-availability.job.delay`: (Default `akka.ask.timeout`) Defines the delay before persisted jobs are recovered in case of a master recovery situation. Previously this key was named `recovery.job.delay`.
 
+### ZooKeeper-Security
+
+- `zookeeper.sasl.disable`: (Default: `true`) Defines if SASL based authentication needs to be enabled or disabled. The configuration value can be set to "true" if ZooKeeper cluster is running in secure mode (Kerberos)
+
+- `zookeeper.sasl.service-name`: (Default: `zookeeper`) If the ZooKeeper server is configured with a different service name (default:"zookeeper") then it can be supplied using this configuration. A mismatch in service name between client and server configuration will cause the authentication to fail. 
+
 ## Environment
 
 - `env.log.dir`: (Defaults to the `log` directory under Flink's home) Defines the directory where the Flink logs are saved. It has to be an absolute path.

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-clients/src/main/java/org/apache/flink/client/CliFrontend.java
----------------------------------------------------------------------
diff --git a/flink-clients/src/main/java/org/apache/flink/client/CliFrontend.java b/flink-clients/src/main/java/org/apache/flink/client/CliFrontend.java
index c7fb647..575ffad 100644
--- a/flink-clients/src/main/java/org/apache/flink/client/CliFrontend.java
+++ b/flink-clients/src/main/java/org/apache/flink/client/CliFrontend.java
@@ -66,7 +66,7 @@ import org.apache.flink.runtime.messages.JobManagerMessages.StopJob;
 import org.apache.flink.runtime.messages.JobManagerMessages.StoppingFailure;
 import org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint;
 import org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepointSuccess;
-import org.apache.flink.runtime.security.SecurityUtils;
+import org.apache.flink.runtime.security.SecurityContext;
 import org.apache.flink.runtime.util.EnvironmentInformation;
 import org.apache.flink.util.Preconditions;
 import org.apache.flink.util.StringUtils;
@@ -161,6 +161,9 @@ public class CliFrontend {
 				"filesystem scheme from configuration.", e);
 		}
 
+		this.config.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, configDirectory.getAbsolutePath()
+				+ ".." + File.separator);
+
 		this.clientTimeout = AkkaUtils.getClientTimeout(config);
 	}
 
@@ -982,25 +985,7 @@ public class CliFrontend {
 		// do action
 		switch (action) {
 			case ACTION_RUN:
-				// run() needs to run in a secured environment for the optimizer.
-				if (SecurityUtils.isSecurityEnabled()) {
-					String message = "Secure Hadoop environment setup detected. Running in secure context.";
-					LOG.info(message);
-
-					try {
-						return SecurityUtils.runSecured(new SecurityUtils.FlinkSecuredRunner<Integer>() {
-							@Override
-							public Integer run() throws Exception {
-								return CliFrontend.this.run(params);
-							}
-						});
-					}
-					catch (Exception e) {
-						return handleError(e);
-					}
-				} else {
-					return run(params);
-				}
+				return CliFrontend.this.run(params);
 			case ACTION_LIST:
 				return list(params);
 			case ACTION_INFO:
@@ -1037,12 +1022,19 @@ public class CliFrontend {
 	/**
 	 * Submits the job based on the arguments
 	 */
-	public static void main(String[] args) {
+	public static void main(final String[] args) {
 		EnvironmentInformation.logEnvironmentInfo(LOG, "Command Line Client", args);
 
 		try {
-			CliFrontend cli = new CliFrontend();
-			int retCode = cli.parseParameters(args);
+			final CliFrontend cli = new CliFrontend();
+			SecurityContext.install(new SecurityContext.SecurityConfiguration().setFlinkConfiguration(cli.config));
+			int retCode = SecurityContext.getInstalled()
+					.runSecured(new SecurityContext.FlinkSecuredRunner<Integer>() {
+						@Override
+						public Integer run() {
+							return cli.parseParameters(args);
+						}
+					});
 			System.exit(retCode);
 		}
 		catch (Throwable t) {

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-core/src/main/java/org/apache/flink/configuration/ConfigConstants.java
----------------------------------------------------------------------
diff --git a/flink-core/src/main/java/org/apache/flink/configuration/ConfigConstants.java b/flink-core/src/main/java/org/apache/flink/configuration/ConfigConstants.java
index f0f1b6b..9e66e2a 100644
--- a/flink-core/src/main/java/org/apache/flink/configuration/ConfigConstants.java
+++ b/flink-core/src/main/java/org/apache/flink/configuration/ConfigConstants.java
@@ -758,6 +758,12 @@ public final class ConfigConstants {
 	@PublicEvolving
 	public static final String HA_ZOOKEEPER_MAX_RETRY_ATTEMPTS = "high-availability.zookeeper.client.max-retry-attempts";
 
+	@PublicEvolving
+	public static final String ZOOKEEPER_SASL_DISABLE = "zookeeper.sasl.disable";
+
+	@PublicEvolving
+	public static final String ZOOKEEPER_SASL_SERVICE_NAME = "zookeeper.sasl.service-name";
+
 	/** Deprecated in favour of {@link #HA_ZOOKEEPER_QUORUM_KEY}. */
 	@Deprecated
 	public static final String ZOOKEEPER_QUORUM_KEY = "recovery.zookeeper.quorum";
@@ -1233,6 +1239,9 @@ public final class ConfigConstants {
 	/** ZooKeeper default leader port. */
 	public static final int DEFAULT_ZOOKEEPER_LEADER_PORT = 3888;
 
+	/** Defaults for ZK client security **/
+	public static final boolean DEFAULT_ZOOKEEPER_SASL_DISABLE = true;
+
 	// ------------------------- Queryable state ------------------------------
 
 	/** Port to bind KvState server to. */
@@ -1279,6 +1288,19 @@ public final class ConfigConstants {
 	/** The environment variable name which contains the location of the lib folder */
 	public static final String ENV_FLINK_LIB_DIR = "FLINK_LIB_DIR";
 
+	// -------------------------------- Security -------------------------------
+
+	/**
+	 * The config parameter defining security credentials required
+	 * for securing Flink cluster.
+	 */
+
+	/** Keytab file key name to be used in flink configuration file */
+	public static final String SECURITY_KEYTAB_KEY = "security.keytab";
+
+	/** Kerberos security principal key name to be used in flink configuration file */
+	public static final String SECURITY_PRINCIPAL_KEY = "security.principal";
+
 
 	/**
 	 * Not instantiable.

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-core/src/main/java/org/apache/flink/util/Preconditions.java
----------------------------------------------------------------------
diff --git a/flink-core/src/main/java/org/apache/flink/util/Preconditions.java b/flink-core/src/main/java/org/apache/flink/util/Preconditions.java
index ea6b9dd..e970c13 100644
--- a/flink-core/src/main/java/org/apache/flink/util/Preconditions.java
+++ b/flink-core/src/main/java/org/apache/flink/util/Preconditions.java
@@ -293,7 +293,7 @@ public final class Preconditions {
 
 		return builder.toString();
 	}
-	
+
 	// ------------------------------------------------------------------------
 
 	/** Private constructor to prevent instantiation */

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-dist/src/main/flink-bin/conf/flink-jaas.conf
----------------------------------------------------------------------
diff --git a/flink-dist/src/main/flink-bin/conf/flink-jaas.conf b/flink-dist/src/main/flink-bin/conf/flink-jaas.conf
new file mode 100644
index 0000000..d476e24
--- /dev/null
+++ b/flink-dist/src/main/flink-bin/conf/flink-jaas.conf
@@ -0,0 +1,26 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+# We are using this file as an workaround for the Kafka and ZK SASL implementation
+# since they explicitly look for java.security.auth.login.config property
+# The file itself is not used by the application since the internal implementation
+# uses a process-wide in-memory java security configuration object.
+# Please do not edit/delete this file - See FLINK-3929
+sample {
+  useKeyTab=false
+  useTicketCache=true;
+};
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-dist/src/main/resources/flink-conf.yaml
----------------------------------------------------------------------
diff --git a/flink-dist/src/main/resources/flink-conf.yaml b/flink-dist/src/main/resources/flink-conf.yaml
index 27fd84a..c876922 100644
--- a/flink-dist/src/main/resources/flink-conf.yaml
+++ b/flink-dist/src/main/resources/flink-conf.yaml
@@ -139,6 +139,31 @@ jobmanager.web.port: 8081
 #
 # "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
 #
+
 # high-availability: zookeeper
 # high-availability.zookeeper.quorum: localhost:2181
 # high-availability.zookeeper.storageDir: hdfs:///flink/ha/
+
+#==============================================================================
+# Flink Cluster Security Configuration (optional configuration)
+#==============================================================================
+
+# Kerberos security for the connectors can be enabled by providing below configurations
+# Security works in two modes - keytab/principal combination or using the Kerberos token cache
+# If keytab and principal are not provided, token cache (manual kinit) will be used
+
+#security.keytab: /path/to/kerberos/keytab
+#security.principal: flink-user
+
+#==============================================================================
+# ZK Security Configuration (optional configuration)
+#==============================================================================
+# Below configurations are applicable if ZK quorum is configured for Kerberos security
+
+# SASL authentication is disabled by default and can be enabled by changig the value to false
+#
+# zookeeper.sasl.disable: true
+
+# Override below configuration to provide custom ZK service name if configured
+#
+# zookeeper.sasl.service-name: zookeeper
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosApplicationMasterRunner.java
----------------------------------------------------------------------
diff --git a/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosApplicationMasterRunner.java b/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosApplicationMasterRunner.java
index 8fb6af4..5ec39c2 100644
--- a/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosApplicationMasterRunner.java
+++ b/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosApplicationMasterRunner.java
@@ -582,10 +582,11 @@ public class MesosApplicationMasterRunner {
 		// build the launch command
 		boolean hasLogback = new File(workingDirectory, "logback.xml").exists();
 		boolean hasLog4j = new File(workingDirectory, "log4j.properties").exists();
+		boolean hasKrb5 = false;
 
 		String launchCommand = BootstrapTools.getTaskManagerShellCommand(
 			flinkConfig, tmParams.containeredParameters(), ".", ".",
-			hasLogback, hasLog4j, taskManagerMainClass);
+			hasLogback, hasLog4j, hasKrb5, taskManagerMainClass);
 		cmd.setValue(launchCommand);
 
 		// build the environment variables

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/BootstrapTools.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/BootstrapTools.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/BootstrapTools.java
index c9748cb..d844f5d 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/BootstrapTools.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/BootstrapTools.java
@@ -308,6 +308,7 @@ public class BootstrapTools {
 			String logDirectory,
 			boolean hasLogback,
 			boolean hasLog4j,
+			boolean hasKrb5,
 			Class<?> mainClass) {
 
 		StringBuilder tmCommand = new StringBuilder("$JAVA_HOME/bin/java");
@@ -328,6 +329,12 @@ public class BootstrapTools {
 				tmCommand.append(" -Dlog4j.configuration=file:")
 						.append(configDirectory).append("/log4j.properties");
 			}
+
+			//applicable only for YarnMiniCluster secure test run
+			//krb5.conf file will be available as local resource in JM/TM container
+			if(hasKrb5) {
+				tmCommand.append(" -Djava.security.krb5.conf=krb5.conf");
+			}
 		}
 
 		tmCommand.append(' ').append(mainClass.getName());

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-runtime/src/main/java/org/apache/flink/runtime/security/JaasConfiguration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/security/JaasConfiguration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/security/JaasConfiguration.java
new file mode 100644
index 0000000..c4527dd
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/security/JaasConfiguration.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.security;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.flink.annotation.Internal;
+import org.apache.hadoop.security.authentication.util.KerberosUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.security.auth.login.AppConfigurationEntry;
+import javax.security.auth.login.Configuration;
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ *
+ * JAAS configuration provider object that provides default LoginModule for various connectors that supports
+ * JAAS/SASL based Kerberos authentication. The implementation is inspired from Hadoop UGI class.
+ *
+ * Different connectors uses different login module name to implement JAAS based authentication support.
+ * For example, Kafka expects the login module name to be "kafkaClient" whereas ZooKeeper expect the
+ * name to be "client". This sets responsibility on the Flink cluster administrator to configure/provide right
+ * JAAS config entries. To simplify this requirement, we have introduced this abstraction that provides
+ * a standard lookup to get the login module entry for the JAAS based authentication to work.
+ *
+ * HDFS connector will not be impacted with this configuration since it uses UGI based mechanism to authenticate.
+ *
+ * <a href="https://docs.oracle.com/javase/7/docs/api/javax/security/auth/login/Configuration.html">Configuration</a>
+ *
+ */
+
+@Internal
+public class JaasConfiguration extends Configuration {
+
+	private static final Logger LOG = LoggerFactory.getLogger(JaasConfiguration.class);
+
+	public static final String JAVA_VENDOR_NAME = System.getProperty("java.vendor");
+
+	public static final boolean IBM_JAVA;
+
+	private static final Map<String, String> debugOptions = new HashMap<>();
+
+	private static final Map<String, String> kerberosCacheOptions = new HashMap<>();
+
+	private static final Map<String, String> keytabKerberosOptions = new HashMap<>();
+
+	private static final AppConfigurationEntry userKerberosAce;
+
+	private AppConfigurationEntry keytabKerberosAce = null;
+
+	static {
+
+		IBM_JAVA = JAVA_VENDOR_NAME.contains("IBM");
+
+		if(LOG.isDebugEnabled()) {
+			debugOptions.put("debug", "true");
+		}
+
+		if(IBM_JAVA) {
+			kerberosCacheOptions.put("useDefaultCcache", "true");
+		} else {
+			kerberosCacheOptions.put("doNotPrompt", "true");
+			kerberosCacheOptions.put("useTicketCache", "true");
+		}
+
+		String ticketCache = System.getenv("KRB5CCNAME");
+		if(ticketCache != null) {
+			if(IBM_JAVA) {
+				System.setProperty("KRB5CCNAME", ticketCache);
+			} else {
+				kerberosCacheOptions.put("ticketCache", ticketCache);
+			}
+		}
+
+		kerberosCacheOptions.put("renewTGT", "true");
+		kerberosCacheOptions.putAll(debugOptions);
+
+		userKerberosAce = new AppConfigurationEntry(
+				KerberosUtil.getKrb5LoginModuleName(),
+				AppConfigurationEntry.LoginModuleControlFlag.OPTIONAL,
+				kerberosCacheOptions);
+
+	}
+
+	protected JaasConfiguration(String keytab, String principal) {
+
+		LOG.info("Initializing JAAS configuration instance. Parameters: {}, {}", keytab, principal);
+
+		if(StringUtils.isBlank(keytab) && !StringUtils.isBlank(principal) ||
+				(!StringUtils.isBlank(keytab) && StringUtils.isBlank(principal))){
+			throw new RuntimeException("Both keytab and principal are required and cannot be empty");
+		}
+
+		if(!StringUtils.isBlank(keytab) && !StringUtils.isBlank(principal)) {
+
+			if(IBM_JAVA) {
+				keytabKerberosOptions.put("useKeytab", prependFileUri(keytab));
+				keytabKerberosOptions.put("credsType", "both");
+			} else {
+				keytabKerberosOptions.put("keyTab", keytab);
+				keytabKerberosOptions.put("doNotPrompt", "true");
+				keytabKerberosOptions.put("useKeyTab", "true");
+				keytabKerberosOptions.put("storeKey", "true");
+			}
+
+			keytabKerberosOptions.put("principal", principal);
+			keytabKerberosOptions.put("refreshKrb5Config", "true");
+			keytabKerberosOptions.putAll(debugOptions);
+
+			keytabKerberosAce = new AppConfigurationEntry(
+					KerberosUtil.getKrb5LoginModuleName(),
+					AppConfigurationEntry.LoginModuleControlFlag.REQUIRED,
+					keytabKerberosOptions);
+		}
+	}
+
+	public static Map<String, String> getKeytabKerberosOptions() {
+		return keytabKerberosOptions;
+	}
+
+	private static String prependFileUri(String keytabPath) {
+		File f = new File(keytabPath);
+		return f.toURI().toString();
+	}
+
+	@Override
+	public AppConfigurationEntry[] getAppConfigurationEntry(String applicationName) {
+
+		LOG.debug("JAAS configuration requested for the application entry: {}", applicationName);
+
+		AppConfigurationEntry[] appConfigurationEntry;
+
+		if(keytabKerberosAce != null) {
+			appConfigurationEntry = new AppConfigurationEntry[] {keytabKerberosAce, userKerberosAce};
+		} else {
+			appConfigurationEntry = new AppConfigurationEntry[] {userKerberosAce};
+		}
+
+		return appConfigurationEntry;
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-runtime/src/main/java/org/apache/flink/runtime/security/SecurityContext.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/security/SecurityContext.java b/flink-runtime/src/main/java/org/apache/flink/runtime/security/SecurityContext.java
new file mode 100644
index 0000000..4b8b69b
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/security/SecurityContext.java
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.security;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.configuration.ConfigConstants;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.IllegalConfigurationException;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.security.token.TokenIdentifier;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.security.auth.Subject;
+import java.io.File;
+import java.lang.reflect.Method;
+import java.security.PrivilegedExceptionAction;
+import java.util.Collection;
+
+/*
+ * Process-wide security context object which initializes UGI with appropriate security credentials and also it
+ * creates in-memory JAAS configuration object which will serve appropriate ApplicationConfigurationEntry for the
+ * connector login module implementation that authenticates Kerberos identity using SASL/JAAS based mechanism.
+ */
+@Internal
+public class SecurityContext {
+
+	private static final Logger LOG = LoggerFactory.getLogger(SecurityContext.class);
+
+	public static final String JAAS_CONF_FILENAME = "flink-jaas.conf";
+
+	private static final String JAVA_SECURITY_AUTH_LOGIN_CONFIG = "java.security.auth.login.config";
+
+	private static final String ZOOKEEPER_SASL_CLIENT = "zookeeper.sasl.client";
+
+	private static final String ZOOKEEPER_SASL_CLIENT_USERNAME = "zookeeper.sasl.client.username";
+
+	private static SecurityContext installedContext;
+
+	public static SecurityContext getInstalled() { return installedContext; }
+
+	private UserGroupInformation ugi;
+
+	SecurityContext(UserGroupInformation ugi) {
+		if(ugi == null) {
+			throw new RuntimeException("UGI passed cannot be null");
+		}
+		this.ugi = ugi;
+	}
+
+	public <T> T runSecured(final FlinkSecuredRunner<T> runner) throws Exception {
+		return ugi.doAs(new PrivilegedExceptionAction<T>() {
+			@Override
+			public T run() throws Exception {
+				return runner.run();
+			}
+		});
+	}
+
+	public static void install(SecurityConfiguration config) throws Exception {
+
+		// perform static initialization of UGI, JAAS
+		if(installedContext != null) {
+			LOG.warn("overriding previous security context");
+		}
+
+		// establish the JAAS config
+		JaasConfiguration jaasConfig = new JaasConfiguration(config.keytab, config.principal);
+		javax.security.auth.login.Configuration.setConfiguration(jaasConfig);
+
+		populateSystemSecurityProperties(config.flinkConf);
+
+		// establish the UGI login user
+		UserGroupInformation.setConfiguration(config.hadoopConf);
+
+		UserGroupInformation loginUser;
+
+		if(UserGroupInformation.isSecurityEnabled() &&
+				config.keytab != null && !StringUtils.isBlank(config.principal)) {
+			String keytabPath = (new File(config.keytab)).getAbsolutePath();
+
+			UserGroupInformation.loginUserFromKeytab(config.principal, keytabPath);
+
+			loginUser = UserGroupInformation.getLoginUser();
+
+			// supplement with any available tokens
+			String fileLocation = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
+			if(fileLocation != null) {
+				/*
+				 * Use reflection API since the API semantics are not available in Hadoop1 profile. Below APIs are
+				 * used in the context of reading the stored tokens from UGI.
+				 * Credentials cred = Credentials.readTokenStorageFile(new File(fileLocation), config.hadoopConf);
+				 * loginUser.addCredentials(cred);
+				*/
+				try {
+					Method readTokenStorageFileMethod = Credentials.class.getMethod("readTokenStorageFile",
+							File.class, org.apache.hadoop.conf.Configuration.class);
+					Credentials cred = (Credentials) readTokenStorageFileMethod.invoke(null,new File(fileLocation),
+							config.hadoopConf);
+					Method addCredentialsMethod = UserGroupInformation.class.getMethod("addCredentials",
+							Credentials.class);
+					addCredentialsMethod.invoke(loginUser,cred);
+				} catch(NoSuchMethodException e) {
+					LOG.warn("Could not find method implementations in the shaded jar. Exception: {}", e);
+				}
+			}
+		} else {
+			// login with current user credentials (e.g. ticket cache)
+			try {
+				//Use reflection API to get the login user object
+				//UserGroupInformation.loginUserFromSubject(null);
+				Method loginUserFromSubjectMethod = UserGroupInformation.class.getMethod("loginUserFromSubject", Subject.class);
+				Subject subject = null;
+				loginUserFromSubjectMethod.invoke(null,subject);
+			} catch(NoSuchMethodException e) {
+				LOG.warn("Could not find method implementations in the shaded jar. Exception: {}", e);
+			}
+
+			loginUser = UserGroupInformation.getLoginUser();
+			// note that the stored tokens are read automatically
+		}
+
+		boolean delegationToken = false;
+		final Text HDFS_DELEGATION_KIND = new Text("HDFS_DELEGATION_TOKEN");
+		Collection<Token<? extends TokenIdentifier>> usrTok = loginUser.getTokens();
+		for(Token<? extends TokenIdentifier> token : usrTok) {
+			final Text id = new Text(token.getIdentifier());
+			LOG.debug("Found user token " + id + " with " + token);
+			if(token.getKind().equals(HDFS_DELEGATION_KIND)) {
+				delegationToken = true;
+			}
+		}
+
+		if(UserGroupInformation.isSecurityEnabled() && !loginUser.hasKerberosCredentials()) {
+			//throw an error in non-yarn deployment if kerberos cache is not available
+			if(!delegationToken) {
+				LOG.error("Hadoop Security is enabled but current login user does not have Kerberos Credentials");
+				throw new RuntimeException("Hadoop Security is enabled but current login user does not have Kerberos Credentials");
+			}
+		}
+
+		installedContext = new SecurityContext(loginUser);
+	}
+
+	/*
+	 * This method configures some of the system properties that are require for ZK and Kafka SASL authentication
+	 * See: https://github.com/apache/kafka/blob/0.9.0/clients/src/main/java/org/apache/kafka/common/security/kerberos/Login.java#L289
+	 * See: https://github.com/sgroschupf/zkclient/blob/master/src/main/java/org/I0Itec/zkclient/ZkClient.java#L900
+	 * In this method, setting java.security.auth.login.config configuration is configured only to support ZK and
+	 * Kafka current code behavior.
+	 */
+	private static void populateSystemSecurityProperties(Configuration configuration) {
+
+		//required to be empty for Kafka but we will override the property
+		//with pseudo JAAS configuration file if SASL auth is enabled for ZK
+		System.setProperty(JAVA_SECURITY_AUTH_LOGIN_CONFIG, "");
+
+		if(configuration == null) {
+			return;
+		}
+
+		boolean disableSaslClient = configuration.getBoolean(ConfigConstants.ZOOKEEPER_SASL_DISABLE,
+				ConfigConstants.DEFAULT_ZOOKEEPER_SASL_DISABLE);
+		if(disableSaslClient) {
+			LOG.info("SASL client auth for ZK will be disabled");
+			//SASL auth is disabled by default but will be enabled if specified in configuration
+			System.setProperty(ZOOKEEPER_SASL_CLIENT,"false");
+			return;
+		}
+
+		String baseDir = configuration.getString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, null);
+		if(baseDir == null) {
+			String message = "SASL auth is enabled for ZK but unable to locate pseudo Jaas config " +
+					"since " + ConfigConstants.FLINK_BASE_DIR_PATH_KEY + " is not provided";
+			LOG.error(message);
+			throw new IllegalConfigurationException(message);
+		}
+
+		File f = new File(baseDir);
+		if(!f.exists() || !f.isDirectory()) {
+			LOG.error("Invalid flink base directory {} configuration provided", baseDir);
+			throw new IllegalConfigurationException("Invalid flink base directory configuration provided");
+		}
+
+		File jaasConfigFile = new File(f, JAAS_CONF_FILENAME);
+
+		if (!jaasConfigFile.exists() || !jaasConfigFile.isFile()) {
+
+			//check if there is a conf directory
+			File confDir = new File(f, "conf");
+			if(!confDir.exists() || !confDir.isDirectory()) {
+				LOG.error("Could not locate " + JAAS_CONF_FILENAME);
+				throw new IllegalConfigurationException("Could not locate " + JAAS_CONF_FILENAME);
+			}
+
+			jaasConfigFile = new File(confDir, JAAS_CONF_FILENAME);
+
+			if (!jaasConfigFile.exists() || !jaasConfigFile.isFile()) {
+				LOG.error("Could not locate " + JAAS_CONF_FILENAME);
+				throw new IllegalConfigurationException("Could not locate " + JAAS_CONF_FILENAME);
+			}
+		}
+
+		LOG.info("Enabling {} property with pseudo JAAS config file: {}",
+				JAVA_SECURITY_AUTH_LOGIN_CONFIG, jaasConfigFile);
+
+		//ZK client module lookup the configuration to handle SASL.
+		//https://github.com/sgroschupf/zkclient/blob/master/src/main/java/org/I0Itec/zkclient/ZkClient.java#L900
+		System.setProperty(JAVA_SECURITY_AUTH_LOGIN_CONFIG, jaasConfigFile.getAbsolutePath());
+		System.setProperty(ZOOKEEPER_SASL_CLIENT,"true");
+
+		String zkSaslServiceName = configuration.getString(ConfigConstants.ZOOKEEPER_SASL_SERVICE_NAME, null);
+		if(!StringUtils.isBlank(zkSaslServiceName)) {
+			LOG.info("ZK SASL service name: {} is provided in the configuration", zkSaslServiceName);
+			System.setProperty(ZOOKEEPER_SASL_CLIENT_USERNAME,zkSaslServiceName);
+		}
+
+	}
+
+	/**
+	 * Inputs for establishing the security context.
+	 */
+	public static class SecurityConfiguration {
+
+		Configuration flinkConf;
+
+		org.apache.hadoop.conf.Configuration hadoopConf = new org.apache.hadoop.conf.Configuration();
+
+		String keytab;
+
+		String principal;
+
+		public String getKeytab() {
+			return keytab;
+		}
+
+		public String getPrincipal() {
+			return principal;
+		}
+
+		public SecurityConfiguration setFlinkConfiguration(Configuration flinkConf) {
+
+			this.flinkConf = flinkConf;
+
+			String keytab = flinkConf.getString(ConfigConstants.SECURITY_KEYTAB_KEY, null);
+
+			String principal = flinkConf.getString(ConfigConstants.SECURITY_PRINCIPAL_KEY, null);
+
+			validate(keytab, principal);
+
+			LOG.debug("keytab {} and principal {} .", keytab, principal);
+
+			this.keytab = keytab;
+
+			this.principal = principal;
+
+			return this;
+		}
+
+		public SecurityConfiguration setHadoopConfiguration(org.apache.hadoop.conf.Configuration conf) {
+			this.hadoopConf = conf;
+			return this;
+		}
+
+		private void validate(String keytab, String principal) {
+
+			if(StringUtils.isBlank(keytab) && !StringUtils.isBlank(principal) ||
+					!StringUtils.isBlank(keytab) && StringUtils.isBlank(principal)) {
+				if(StringUtils.isBlank(keytab)) {
+					LOG.warn("Keytab is null or empty");
+				}
+				if(StringUtils.isBlank(principal)) {
+					LOG.warn("Principal is null or empty");
+				}
+				throw new RuntimeException("Requires both keytab and principal to be provided");
+			}
+
+			if(!StringUtils.isBlank(keytab)) {
+				File keytabFile = new File(keytab);
+				if(!keytabFile.exists() || !keytabFile.isFile()) {
+					LOG.warn("Not a valid keytab: {} file", keytab);
+					throw new RuntimeException("Invalid keytab file: " + keytab + " passed");
+				}
+			}
+
+		}
+	}
+
+	public interface FlinkSecuredRunner<T> {
+		T run() throws Exception;
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-runtime/src/main/scala/org/apache/flink/runtime/jobmanager/JobManager.scala
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/scala/org/apache/flink/runtime/jobmanager/JobManager.scala b/flink-runtime/src/main/scala/org/apache/flink/runtime/jobmanager/JobManager.scala
index 9c844ba..639c158 100644
--- a/flink-runtime/src/main/scala/org/apache/flink/runtime/jobmanager/JobManager.scala
+++ b/flink-runtime/src/main/scala/org/apache/flink/runtime/jobmanager/JobManager.scala
@@ -72,8 +72,8 @@ import org.apache.flink.runtime.metrics.groups.JobManagerMetricGroup
 import org.apache.flink.runtime.process.ProcessReaper
 import org.apache.flink.runtime.query.{KvStateMessage, UnknownKvStateLocation}
 import org.apache.flink.runtime.query.KvStateMessage.{LookupKvStateLocation, NotifyKvStateRegistered, NotifyKvStateUnregistered}
-import org.apache.flink.runtime.security.SecurityUtils
-import org.apache.flink.runtime.security.SecurityUtils.FlinkSecuredRunner
+import org.apache.flink.runtime.security.SecurityContext.{FlinkSecuredRunner, SecurityConfiguration}
+import org.apache.flink.runtime.security.{SecurityContext}
 import org.apache.flink.runtime.taskmanager.TaskManager
 import org.apache.flink.runtime.util._
 import org.apache.flink.runtime.webmonitor.{WebMonitor, WebMonitorUtils}
@@ -2062,26 +2062,18 @@ object JobManager {
     }
 
     // run the job manager
+    SecurityContext.install(new SecurityConfiguration().setFlinkConfiguration(configuration))
+
     try {
-      if (SecurityUtils.isSecurityEnabled) {
-        LOG.info("Security is enabled. Starting secure JobManager.")
-        SecurityUtils.runSecured(new FlinkSecuredRunner[Unit] {
-          override def run(): Unit = {
-            runJobManager(
-              configuration,
-              executionMode,
-              listeningHost,
-              listeningPortRange)
-          }
-        })
-      } else {
-        LOG.info("Security is not enabled. Starting non-authenticated JobManager.")
-        runJobManager(
-          configuration,
-          executionMode,
-          listeningHost,
-          listeningPortRange)
-      }
+      SecurityContext.getInstalled.runSecured(new FlinkSecuredRunner[Unit] {
+        override def run(): Unit = {
+          runJobManager(
+            configuration,
+            executionMode,
+            listeningHost,
+            listeningPortRange)
+        }
+      })
     } catch {
       case t: Throwable =>
         LOG.error("Failed to run JobManager.", t)

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala b/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
index 63a64a0..8534ee1 100644
--- a/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
+++ b/flink-runtime/src/main/scala/org/apache/flink/runtime/taskmanager/TaskManager.scala
@@ -71,8 +71,8 @@ import org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup
 import org.apache.flink.runtime.process.ProcessReaper
 import org.apache.flink.runtime.query.KvStateRegistry
 import org.apache.flink.runtime.query.netty.{DisabledKvStateRequestStats, KvStateServer}
-import org.apache.flink.runtime.security.SecurityUtils
-import org.apache.flink.runtime.security.SecurityUtils.FlinkSecuredRunner
+import org.apache.flink.runtime.security.SecurityContext.{FlinkSecuredRunner, SecurityConfiguration}
+import org.apache.flink.runtime.security.SecurityContext
 import org.apache.flink.runtime.util._
 import org.apache.flink.runtime.{FlinkActor, LeaderSessionMessageFilter, LogMessages}
 import org.apache.flink.util.{MathUtils, NetUtils}
@@ -1521,19 +1521,14 @@ object TaskManager {
     val resourceId = ResourceID.generate()
 
     // run the TaskManager (if requested in an authentication enabled context)
+    SecurityContext.install(new SecurityConfiguration().setFlinkConfiguration(configuration))
+
     try {
-      if (SecurityUtils.isSecurityEnabled) {
-        LOG.info("Security is enabled. Starting secure TaskManager.")
-        SecurityUtils.runSecured(new FlinkSecuredRunner[Unit] {
-          override def run(): Unit = {
-            selectNetworkInterfaceAndRunTaskManager(configuration, resourceId, classOf[TaskManager])
-          }
-        })
-      }
-      else {
-        LOG.info("Security is not enabled. Starting non-authenticated TaskManager.")
-        selectNetworkInterfaceAndRunTaskManager(configuration, resourceId, classOf[TaskManager])
-      }
+      SecurityContext.getInstalled.runSecured(new FlinkSecuredRunner[Unit] {
+        override def run(): Unit = {
+          selectNetworkInterfaceAndRunTaskManager(configuration, resourceId, classOf[TaskManager])
+        }
+      })
     }
     catch {
       case t: Throwable =>
@@ -1588,6 +1583,8 @@ object TaskManager {
       }
     }
 
+    conf.setString(ConfigConstants.FLINK_BASE_DIR_PATH_KEY, cliConfig.getConfigDir() + "/..")
+
     conf
   }
 

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-runtime/src/test/java/org/apache/flink/runtime/security/JaasConfigurationTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/security/JaasConfigurationTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/security/JaasConfigurationTest.java
new file mode 100644
index 0000000..89e5ef9
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/security/JaasConfigurationTest.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.security;
+
+import org.apache.hadoop.security.authentication.util.KerberosUtil;
+import org.junit.Test;
+
+import javax.security.auth.login.AppConfigurationEntry;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for the {@link JaasConfiguration}.
+ */
+public class JaasConfigurationTest {
+
+	@Test
+	public void testInvalidKerberosParams() {
+		String keytab = "user.keytab";
+		String principal = null;
+		try {
+			new JaasConfiguration(keytab, principal);
+		} catch(RuntimeException re) {
+			assertEquals("Both keytab and principal are required and cannot be empty",re.getMessage());
+		}
+	}
+
+	@Test
+	public void testDefaultAceEntry() {
+		JaasConfiguration conf = new JaasConfiguration(null,null);
+		javax.security.auth.login.Configuration.setConfiguration(conf);
+		final AppConfigurationEntry[] entry = conf.getAppConfigurationEntry("test");
+		AppConfigurationEntry ace = entry[0];
+		assertEquals(ace.getLoginModuleName(), KerberosUtil.getKrb5LoginModuleName());
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-runtime/src/test/java/org/apache/flink/runtime/security/SecurityContextTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/security/SecurityContextTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/security/SecurityContextTest.java
new file mode 100644
index 0000000..5f3d76a
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/security/SecurityContextTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flink.runtime.security;
+
+import org.apache.hadoop.security.UserGroupInformation;
+import org.junit.Test;
+
+import java.lang.reflect.Method;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+/**
+ * Tests for the {@link SecurityContext}.
+ */
+public class SecurityContextTest {
+
+	@Test
+	public void testCreateInsecureHadoopCtx() {
+		SecurityContext.SecurityConfiguration sc = new SecurityContext.SecurityConfiguration();
+		try {
+			SecurityContext.install(sc);
+			assertEquals(UserGroupInformation.getLoginUser().getUserName(),getOSUserName());
+		} catch (Exception e) {
+			fail(e.getMessage());
+		}
+	}
+
+	@Test
+	public void testInvalidUGIContext() {
+		try {
+			new SecurityContext(null);
+		} catch (RuntimeException re) {
+			assertEquals("UGI passed cannot be null",re.getMessage());
+		}
+	}
+
+
+	private String getOSUserName() throws Exception {
+		String userName = "";
+		String osName = System.getProperty( "os.name" ).toLowerCase();
+		String className = null;
+
+		if( osName.contains( "windows" ) ){
+			className = "com.sun.security.auth.module.NTSystem";
+		}
+		else if( osName.contains( "linux" ) ){
+			className = "com.sun.security.auth.module.UnixSystem";
+		}
+		else if( osName.contains( "solaris" ) || osName.contains( "sunos" ) ){
+			className = "com.sun.security.auth.module.SolarisSystem";
+		}
+
+		if( className != null ){
+			Class<?> c = Class.forName( className );
+			Method method = c.getDeclaredMethod( "getUsername" );
+			Object o = c.newInstance();
+			userName = (String) method.invoke( o );
+		}
+		return userName;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-filesystem/pom.xml
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-filesystem/pom.xml b/flink-streaming-connectors/flink-connector-filesystem/pom.xml
index 5712856..edf299d 100644
--- a/flink-streaming-connectors/flink-connector-filesystem/pom.xml
+++ b/flink-streaming-connectors/flink-connector-filesystem/pom.xml
@@ -121,6 +121,42 @@ under the License.
 			<version>${hadoop.version}</version><!--$NO-MVN-MAN-VER$-->
 		</dependency>
 
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-minikdc</artifactId>
+			<version>${minikdc.version}</version>
+		</dependency>
+
 	</dependencies>
 
+	<build>
+		<plugins>
+
+			<!--
+            https://issues.apache.org/jira/browse/DIRSHARED-134
+            Required to pull the Mini-KDC transitive dependency
+            -->
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-bundle-plugin</artifactId>
+				<version>3.0.1</version>
+				<inherited>true</inherited>
+				<extensions>true</extensions>
+			</plugin>
+
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-surefire-plugin</artifactId>
+				<configuration>
+					<!--
+					Enforce single threaded execution to avoid port conflicts when running
+					secure mini DFS cluster
+					-->
+					<forkCount>1</forkCount>
+					<reuseForks>false</reuseForks>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+
 </project>

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkITCase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkITCase.java b/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkITCase.java
index 7ee75c1..c3c8df5 100644
--- a/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkITCase.java
+++ b/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkITCase.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
@@ -57,6 +58,8 @@ import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -79,19 +82,24 @@ import java.util.Map;
 @Deprecated
 public class RollingSinkITCase extends StreamingMultipleProgramsTestBase {
 
+	protected static final Logger LOG = LoggerFactory.getLogger(RollingSinkITCase.class);
+
 	@ClassRule
 	public static TemporaryFolder tempFolder = new TemporaryFolder();
 
-	private static MiniDFSCluster hdfsCluster;
-	private static org.apache.hadoop.fs.FileSystem dfs;
-	private static String hdfsURI;
+	protected static MiniDFSCluster hdfsCluster;
+	protected static org.apache.hadoop.fs.FileSystem dfs;
+	protected static String hdfsURI;
+	protected static Configuration conf = new Configuration();
 
+	protected static File dataDir;
 
 	@BeforeClass
 	public static void createHDFS() throws IOException {
-		Configuration conf = new Configuration();
 
-		File dataDir = tempFolder.newFolder();
+		LOG.info("In RollingSinkITCase: Starting MiniDFSCluster ");
+
+		dataDir = tempFolder.newFolder();
 
 		conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, dataDir.getAbsolutePath());
 		MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
@@ -106,6 +114,7 @@ public class RollingSinkITCase extends StreamingMultipleProgramsTestBase {
 
 	@AfterClass
 	public static void destroyHDFS() {
+		LOG.info("In RollingSinkITCase: tearing down MiniDFSCluster ");
 		hdfsCluster.shutdown();
 	}
 

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java b/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java
new file mode 100644
index 0000000..86cedaf
--- /dev/null
+++ b/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.streaming.connectors.fs;
+
+import org.apache.flink.configuration.ConfigConstants;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.security.SecurityContext;
+import org.apache.flink.streaming.util.TestStreamEnvironment;
+import org.apache.flink.test.util.SecureTestEnvironment;
+import org.apache.flink.test.util.TestingSecurityContext;
+import org.apache.flink.test.util.TestBaseUtils;
+import org.apache.flink.util.NetUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.http.HttpConfig;
+import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_USER_NAME_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_KEYTAB_FILE_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HTTP_POLICY_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_ADDRESS_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY;
+
+/**
+ * Tests for running {@link RollingSinkSecuredITCase} which is an extension of {@link RollingSink} in secure environment
+ */
+
+//The test is disabled since MiniDFS secure run requires lower order ports to be used.
+//We can enable the test when the fix is available (HDFS-9213)
+@Ignore
+public class RollingSinkSecuredITCase extends RollingSinkITCase {
+
+	protected static final Logger LOG = LoggerFactory.getLogger(RollingSinkSecuredITCase.class);
+
+	/*
+	 * override super class static methods to avoid creating MiniDFS and MiniFlink with wrong configurations
+	 * and out-of-order sequence for secure cluster
+	 */
+	@BeforeClass
+	public static void setup() throws Exception {}
+
+	@AfterClass
+	public static void teardown() throws Exception {}
+
+	@BeforeClass
+	public static void createHDFS() throws IOException {}
+
+	@AfterClass
+	public static void destroyHDFS() {}
+
+	@BeforeClass
+	public static void startSecureCluster() throws Exception {
+
+		LOG.info("starting secure cluster environment for testing");
+
+		dataDir = tempFolder.newFolder();
+
+		conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, dataDir.getAbsolutePath());
+
+		SecureTestEnvironment.prepare(tempFolder);
+
+		populateSecureConfigurations();
+
+		Configuration flinkConfig = new Configuration();
+		flinkConfig.setString(ConfigConstants.SECURITY_KEYTAB_KEY,
+				SecureTestEnvironment.getTestKeytab());
+		flinkConfig.setString(ConfigConstants.SECURITY_PRINCIPAL_KEY,
+				SecureTestEnvironment.getHadoopServicePrincipal());
+
+		SecurityContext.SecurityConfiguration ctx = new SecurityContext.SecurityConfiguration();
+		ctx.setFlinkConfiguration(flinkConfig);
+		ctx.setHadoopConfiguration(conf);
+		try {
+			TestingSecurityContext.install(ctx, SecureTestEnvironment.getClientSecurityConfigurationMap());
+		} catch (Exception e) {
+			throw new RuntimeException("Exception occurred while setting up secure test context. Reason: {}", e);
+		}
+
+		File hdfsSiteXML = new File(dataDir.getAbsolutePath() + "/hdfs-site.xml");
+
+		FileWriter writer = new FileWriter(hdfsSiteXML);
+		conf.writeXml(writer);
+		writer.flush();
+		writer.close();
+
+		Map<String, String> map = new HashMap<String, String>(System.getenv());
+		map.put("HADOOP_CONF_DIR", hdfsSiteXML.getParentFile().getAbsolutePath());
+		TestBaseUtils.setEnv(map);
+
+
+		MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
+		builder.checkDataNodeAddrConfig(true);
+		builder.checkDataNodeHostConfig(true);
+		hdfsCluster = builder.build();
+
+		dfs = hdfsCluster.getFileSystem();
+
+		hdfsURI = "hdfs://"
+				+ NetUtils.hostAndPortToUrlString(hdfsCluster.getURI().getHost(), hdfsCluster.getNameNodePort())
+				+ "/";
+
+		startSecureFlinkClusterWithRecoveryModeEnabled();
+	}
+
+	@AfterClass
+	public static void teardownSecureCluster() throws Exception {
+		LOG.info("tearing down secure cluster environment");
+
+		TestStreamEnvironment.unsetAsContext();
+		stopCluster(cluster, TestBaseUtils.DEFAULT_TIMEOUT);
+
+		hdfsCluster.shutdown();
+
+		SecureTestEnvironment.cleanup();
+	}
+
+	private static void populateSecureConfigurations() {
+
+		String dataTransferProtection = "authentication";
+
+		SecurityUtil.setAuthenticationMethod(UserGroupInformation.AuthenticationMethod.KERBEROS, conf);
+		conf.set(DFS_NAMENODE_USER_NAME_KEY, SecureTestEnvironment.getHadoopServicePrincipal());
+		conf.set(DFS_NAMENODE_KEYTAB_FILE_KEY, SecureTestEnvironment.getTestKeytab());
+		conf.set(DFS_DATANODE_USER_NAME_KEY, SecureTestEnvironment.getHadoopServicePrincipal());
+		conf.set(DFS_DATANODE_KEYTAB_FILE_KEY, SecureTestEnvironment.getTestKeytab());
+		conf.set(DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY, SecureTestEnvironment.getHadoopServicePrincipal());
+
+		conf.setBoolean(DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, true);
+
+		conf.set("dfs.data.transfer.protection", dataTransferProtection);
+
+		conf.set(DFS_HTTP_POLICY_KEY, HttpConfig.Policy.HTTP_ONLY.name());
+
+		conf.set(DFS_ENCRYPT_DATA_TRANSFER_KEY, "false");
+
+		conf.setInt("dfs.datanode.socket.write.timeout", 0);
+
+		/*
+		 * We ae setting the port number to privileged port - see HDFS-9213
+		 * This requires the user to have root privilege to bind to the port
+		 * Use below command (ubuntu) to set privilege to java process for the
+		 * bind() to work if the java process is not running as root.
+		 * setcap 'cap_net_bind_service=+ep' /path/to/java
+		 */
+		conf.set(DFS_DATANODE_ADDRESS_KEY, "localhost:1002");
+		conf.set(DFS_DATANODE_HOST_NAME_KEY, "localhost");
+		conf.set(DFS_DATANODE_HTTP_ADDRESS_KEY, "localhost:1003");
+	}
+
+	private static void startSecureFlinkClusterWithRecoveryModeEnabled() {
+		try {
+			LOG.info("Starting Flink and ZK in secure mode");
+
+			dfs.mkdirs(new Path("/flink/checkpoints"));
+			dfs.mkdirs(new Path("/flink/recovery"));
+
+			org.apache.flink.configuration.Configuration config = new org.apache.flink.configuration.Configuration();
+
+			config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
+			config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, DEFAULT_PARALLELISM);
+			config.setBoolean(ConfigConstants.LOCAL_START_WEBSERVER, false);
+			config.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, 3);
+			config.setString(ConfigConstants.RECOVERY_MODE, "zookeeper");
+			config.setString(ConfigConstants.STATE_BACKEND, "filesystem");
+			config.setString(ConfigConstants.ZOOKEEPER_CHECKPOINTS_PATH, hdfsURI + "/flink/checkpoints");
+			config.setString(ConfigConstants.ZOOKEEPER_RECOVERY_PATH, hdfsURI + "/flink/recovery");
+			config.setString("state.backend.fs.checkpointdir", hdfsURI + "/flink/checkpoints");
+
+			SecureTestEnvironment.populateFlinkSecureConfigurations(config);
+
+			cluster = TestBaseUtils.startCluster(config, false);
+			TestStreamEnvironment.setAsContext(cluster, DEFAULT_PARALLELISM);
+
+		} catch (Exception e) {
+			LOG.error("Exception occured while creating MiniFlink cluster. Reason: {}", e);
+			throw new RuntimeException(e);
+		}
+	}
+
+	/* For secure cluster testing, it is enough to run only one test and override below test methods
+	 * to keep the overall build time minimal
+	 */
+	@Override
+	public void testNonRollingSequenceFileWithoutCompressionWriter() throws Exception {}
+
+	@Override
+	public void testNonRollingSequenceFileWithCompressionWriter() throws Exception {}
+
+	@Override
+	public void testNonRollingAvroKeyValueWithoutCompressionWriter() throws Exception {}
+
+	@Override
+	public void testNonRollingAvroKeyValueWithCompressionWriter() throws Exception {}
+
+	@Override
+	public void testDateTimeRollingStringWriter() throws Exception {}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-filesystem/src/test/resources/log4j-test.properties
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-filesystem/src/test/resources/log4j-test.properties b/flink-streaming-connectors/flink-connector-filesystem/src/test/resources/log4j-test.properties
index fe60d94..5c22851 100644
--- a/flink-streaming-connectors/flink-connector-filesystem/src/test/resources/log4j-test.properties
+++ b/flink-streaming-connectors/flink-connector-filesystem/src/test/resources/log4j-test.properties
@@ -25,3 +25,5 @@ log4j.appender.testlogger.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
 
 # suppress the irrelevant (wrong) warnings from the netty channel handler
 log4j.logger.org.jboss.netty.channel.DefaultChannelPipeline=ERROR, testlogger
+
+log4j.logger.org.apache.directory=OFF, testlogger
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-0.8/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka08ProducerITCase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-0.8/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka08ProducerITCase.java b/flink-streaming-connectors/flink-connector-kafka-0.8/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka08ProducerITCase.java
index fc13719..5c951db 100644
--- a/flink-streaming-connectors/flink-connector-kafka-0.8/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka08ProducerITCase.java
+++ b/flink-streaming-connectors/flink-connector-kafka-0.8/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka08ProducerITCase.java
@@ -21,7 +21,6 @@ package org.apache.flink.streaming.connectors.kafka;
 
 import org.junit.Test;
 
-
 @SuppressWarnings("serial")
 public class Kafka08ProducerITCase extends KafkaProducerTestBase {
 

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-0.8/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-0.8/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java b/flink-streaming-connectors/flink-connector-kafka-0.8/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java
index 864773a..cbf3d06 100644
--- a/flink-streaming-connectors/flink-connector-kafka-0.8/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java
+++ b/flink-streaming-connectors/flink-connector-kafka-0.8/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestEnvironmentImpl.java
@@ -81,6 +81,11 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 	}
 
 	@Override
+	public Properties getSecureProperties() {
+		return null;
+	}
+
+	@Override
 	public String getVersion() {
 		return "0.8";
 	}
@@ -132,9 +137,14 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 		return server.socketServer().brokerId();
 	}
 
+	@Override
+	public boolean isSecureRunSupported() {
+		return false;
+	}
+
 
 	@Override
-	public void prepare(int numKafkaServers, Properties additionalServerProperties) {
+	public void prepare(int numKafkaServers, Properties additionalServerProperties, boolean secureMode) {
 		this.additionalServerProperties = additionalServerProperties;
 		File tempDir = new File(System.getProperty("java.io.tmpdir"));
 
@@ -210,6 +220,7 @@ public class KafkaTestEnvironmentImpl extends KafkaTestEnvironment {
 		if (zookeeper != null) {
 			try {
 				zookeeper.stop();
+				zookeeper.close();
 			}
 			catch (Exception e) {
 				LOG.warn("ZK.stop() failed", e);

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-0.9/pom.xml
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-0.9/pom.xml b/flink-streaming-connectors/flink-connector-kafka-0.9/pom.xml
index 3b31de6..bfcde82 100644
--- a/flink-streaming-connectors/flink-connector-kafka-0.9/pom.xml
+++ b/flink-streaming-connectors/flink-connector-kafka-0.9/pom.xml
@@ -134,6 +134,13 @@ under the License.
 			<scope>test</scope>
 		</dependency>
 
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-minikdc</artifactId>
+			<version>${minikdc.version}</version>
+			<scope>test</scope>
+		</dependency>
+
 	</dependencies>
 
 	<build>
@@ -180,6 +187,17 @@ under the License.
 					<argLine>-Xms256m -Xmx1000m -Dlog4j.configuration=${log4j.configuration} -Dmvn.forkNumber=${surefire.forkNumber} -XX:-UseGCOverheadLimit</argLine>
 				</configuration>
 			</plugin>
+			<!--
+            https://issues.apache.org/jira/browse/DIRSHARED-134
+            Required to pull the Mini-KDC transitive dependency
+            -->
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-bundle-plugin</artifactId>
+				<version>3.0.1</version>
+				<inherited>true</inherited>
+				<extensions>true</extensions>
+			</plugin>
 		</plugins>
 	</build>
 	

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09ITCase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09ITCase.java b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09ITCase.java
index 957833d..16ddcdc 100644
--- a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09ITCase.java
+++ b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09ITCase.java
@@ -21,12 +21,12 @@ import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
 import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.junit.Test;
 
+import java.util.Properties;
 import java.util.UUID;
 
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
-
 public class Kafka09ITCase extends KafkaConsumerTestBase {
 
 	// ------------------------------------------------------------------------
@@ -131,11 +131,15 @@ public class Kafka09ITCase extends KafkaConsumerTestBase {
 	public void testJsonTableSource() throws Exception {
 		String topic = UUID.randomUUID().toString();
 
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+
 		// Names and types are determined in the actual test method of the
 		// base test class.
 		Kafka09JsonTableSource tableSource = new Kafka09JsonTableSource(
 				topic,
-				standardProps,
+				props,
 				new String[] {
 						"long",
 						"string",
@@ -159,11 +163,15 @@ public class Kafka09ITCase extends KafkaConsumerTestBase {
 	public void testJsonTableSourceWithFailOnMissingField() throws Exception {
 		String topic = UUID.randomUUID().toString();
 
+		Properties props = new Properties();
+		props.putAll(standardProps);
+		props.putAll(secureProps);
+
 		// Names and types are determined in the actual test method of the
 		// base test class.
 		Kafka09JsonTableSource tableSource = new Kafka09JsonTableSource(
 				topic,
-				standardProps,
+				props,
 				new String[] {
 						"long",
 						"string",

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09ProducerITCase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09ProducerITCase.java b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09ProducerITCase.java
index 1288347..ae4f5b2 100644
--- a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09ProducerITCase.java
+++ b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09ProducerITCase.java
@@ -21,7 +21,6 @@ package org.apache.flink.streaming.connectors.kafka;
 
 import org.junit.Test;
 
-
 @SuppressWarnings("serial")
 public class Kafka09ProducerITCase extends KafkaProducerTestBase {
 

http://git-wip-us.apache.org/repos/asf/flink/blob/25a622fd/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecureRunITCase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecureRunITCase.java b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecureRunITCase.java
new file mode 100644
index 0000000..d12ec65
--- /dev/null
+++ b/flink-streaming-connectors/flink-connector-kafka-0.9/src/test/java/org/apache/flink/streaming/connectors/kafka/Kafka09SecureRunITCase.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.streaming.connectors.kafka;
+
+import org.apache.flink.test.util.SecureTestEnvironment;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+/*
+ * Kafka Secure Connection (kerberos) IT test case
+ */
+public class Kafka09SecureRunITCase extends KafkaConsumerTestBase {
+
+	protected static final Logger LOG = LoggerFactory.getLogger(Kafka09SecureRunITCase.class);
+
+	@BeforeClass
+	public static void prepare() throws IOException, ClassNotFoundException {
+		LOG.info("-------------------------------------------------------------------------");
+		LOG.info("    Starting Kafka09SecureRunITCase ");
+		LOG.info("-------------------------------------------------------------------------");
+
+		SecureTestEnvironment.prepare(tempFolder);
+		SecureTestEnvironment.populateFlinkSecureConfigurations(getFlinkConfiguration());
+
+		startClusters(true);
+	}
+
+	@AfterClass
+	public static void shutDownServices() {
+		shutdownClusters();
+		SecureTestEnvironment.cleanup();
+	}
+
+
+	//timeout interval is large since in Travis, ZK connection timeout occurs frequently
+	//The timeout for the test case is 2 times timeout of ZK connection
+	@Test(timeout = 600000)
+	public void testMultipleTopics() throws Exception {
+		runProduceConsumeMultipleTopics();
+	}
+
+}
\ No newline at end of file


[18/50] [abbrv] flink git commit: [FLINK-4346] [rpc] Add new RPC abstraction

Posted by tr...@apache.org.
[FLINK-4346] [rpc] Add new RPC abstraction


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/04bcb715
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/04bcb715
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/04bcb715

Branch: refs/heads/flip-6
Commit: 04bcb71576f1f8261beb4c4cffb65b82a1382864
Parents: 68709b0
Author: Till Rohrmann <tr...@apache.org>
Authored: Wed Aug 3 19:31:34 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:12 2016 +0200

----------------------------------------------------------------------
 flink-runtime/pom.xml                           |   5 +
 .../flink/runtime/rpc/MainThreadExecutor.java   |  54 +++
 .../apache/flink/runtime/rpc/RpcEndpoint.java   | 182 +++++++++++
 .../apache/flink/runtime/rpc/RpcGateway.java    |  25 ++
 .../org/apache/flink/runtime/rpc/RpcMethod.java |  35 ++
 .../apache/flink/runtime/rpc/RpcService.java    |  74 +++++
 .../apache/flink/runtime/rpc/RpcTimeout.java    |  34 ++
 .../flink/runtime/rpc/akka/AkkaGateway.java     |  29 ++
 .../flink/runtime/rpc/akka/AkkaRpcService.java  | 145 ++++++++
 .../flink/runtime/rpc/akka/BaseAkkaActor.java   |  50 +++
 .../flink/runtime/rpc/akka/BaseAkkaGateway.java |  41 +++
 .../rpc/akka/jobmaster/JobMasterAkkaActor.java  |  58 ++++
 .../akka/jobmaster/JobMasterAkkaGateway.java    |  57 ++++
 .../rpc/akka/messages/CallableMessage.java      |  33 ++
 .../runtime/rpc/akka/messages/CancelTask.java   |  36 ++
 .../runtime/rpc/akka/messages/ExecuteTask.java  |  36 ++
 .../messages/RegisterAtResourceManager.java     |  36 ++
 .../rpc/akka/messages/RegisterJobMaster.java    |  36 ++
 .../runtime/rpc/akka/messages/RequestSlot.java  |  37 +++
 .../rpc/akka/messages/RunnableMessage.java      |  31 ++
 .../akka/messages/UpdateTaskExecutionState.java |  37 +++
 .../ResourceManagerAkkaActor.java               |  65 ++++
 .../ResourceManagerAkkaGateway.java             |  67 ++++
 .../taskexecutor/TaskExecutorAkkaActor.java     |  77 +++++
 .../taskexecutor/TaskExecutorAkkaGateway.java   |  59 ++++
 .../flink/runtime/rpc/jobmaster/JobMaster.java  | 249 ++++++++++++++
 .../runtime/rpc/jobmaster/JobMasterGateway.java |  45 +++
 .../resourcemanager/JobMasterRegistration.java  |  35 ++
 .../resourcemanager/RegistrationResponse.java   |  43 +++
 .../rpc/resourcemanager/ResourceManager.java    |  94 ++++++
 .../resourcemanager/ResourceManagerGateway.java |  58 ++++
 .../rpc/resourcemanager/SlotAssignment.java     |  25 ++
 .../rpc/resourcemanager/SlotRequest.java        |  25 ++
 .../runtime/rpc/taskexecutor/TaskExecutor.java  |  82 +++++
 .../rpc/taskexecutor/TaskExecutorGateway.java   |  48 +++
 .../flink/runtime/rpc/RpcCompletenessTest.java  | 327 +++++++++++++++++++
 .../runtime/rpc/akka/AkkaRpcServiceTest.java    |  81 +++++
 .../rpc/taskexecutor/TaskExecutorTest.java      |  92 ++++++
 .../runtime/util/DirectExecutorService.java     | 234 +++++++++++++
 flink-tests/pom.xml                             |   1 -
 pom.xml                                         |   7 +
 41 files changed, 2784 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/pom.xml
----------------------------------------------------------------------
diff --git a/flink-runtime/pom.xml b/flink-runtime/pom.xml
index 5fea8fb..09c6fd0 100644
--- a/flink-runtime/pom.xml
+++ b/flink-runtime/pom.xml
@@ -189,6 +189,11 @@ under the License.
 			<artifactId>akka-testkit_${scala.binary.version}</artifactId>
 		</dependency>
 
+		<dependency>
+			<groupId>org.reflections</groupId>
+			<artifactId>reflections</artifactId>
+		</dependency>
+
 	</dependencies>
 
 	<build>

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
new file mode 100644
index 0000000..e06711e
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/MainThreadExecutor.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+import akka.util.Timeout;
+import scala.concurrent.Future;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Interface to execute {@link Runnable} and {@link Callable} in the main thread of the underlying
+ * rpc server.
+ *
+ * This interface is intended to be implemented by the self gateway in a {@link RpcEndpoint}
+ * implementation which allows to dispatch local procedures to the main thread of the underlying
+ * rpc server.
+ */
+public interface MainThreadExecutor {
+	/**
+	 * Execute the runnable in the main thread of the underlying rpc server.
+	 *
+	 * @param runnable Runnable to be executed
+	 */
+	void runAsync(Runnable runnable);
+
+	/**
+	 * Execute the callable in the main thread of the underlying rpc server and return a future for
+	 * the callable result. If the future is not completed within the given timeout, the returned
+	 * future will throw a {@link TimeoutException}.
+	 *
+	 * @param callable Callable to be executed
+	 * @param timeout Timeout for the future to complete
+	 * @param <V> Return value of the callable
+	 * @return Future of the callable result
+	 */
+	<V> Future<V> callAsync(Callable<V> callable, Timeout timeout);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
new file mode 100644
index 0000000..3d8757f
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+import akka.util.Timeout;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.concurrent.ExecutionContext;
+import scala.concurrent.Future;
+
+import java.util.concurrent.Callable;
+
+/**
+ * Base class for rpc endpoints. Distributed components which offer remote procedure calls have to
+ * extend the rpc endpoint base class.
+ *
+ * The main idea is that a rpc endpoint is backed by a rpc server which has a single thread
+ * processing the rpc calls. Thus, by executing all state changing operations within the main
+ * thread, we don't have to reason about concurrent accesses. The rpc provides provides
+ * {@link #runAsync(Runnable)}, {@link #callAsync(Callable, Timeout)} and the
+ * {@link #getMainThreadExecutionContext()} to execute code in the rpc server's main thread.
+ *
+ * @param <C> Rpc gateway counterpart for the implementing rpc endpoint
+ */
+public abstract class RpcEndpoint<C extends RpcGateway> {
+
+	protected final Logger log = LoggerFactory.getLogger(getClass());
+
+	/** Rpc service to be used to start the rpc server and to obtain rpc gateways */
+	private final RpcService rpcService;
+
+	/** Self gateway which can be used to schedule asynchronous calls on yourself */
+	private C self;
+
+	/**
+	 * The main thread execution context to be used to execute future callbacks in the main thread
+	 * of the executing rpc server.
+	 *
+	 * IMPORTANT: The main thread context is only available after the rpc server has been started.
+	 */
+	private MainThreadExecutionContext mainThreadExecutionContext;
+
+	public RpcEndpoint(RpcService rpcService) {
+		this.rpcService = rpcService;
+	}
+
+	/**
+	 * Get self-gateway which should be used to run asynchronous rpc calls on this endpoint.
+	 *
+	 * IMPORTANT: Always issue local method calls via the self-gateway if the current thread
+	 * is not the main thread of the underlying rpc server, e.g. from within a future callback.
+	 *
+	 * @return Self gateway
+	 */
+	public C getSelf() {
+		return self;
+	}
+
+	/**
+	 * Execute the runnable in the main thread of the underlying rpc server.
+	 *
+	 * @param runnable Runnable to be executed in the main thread of the underlying rpc server
+	 */
+	public void runAsync(Runnable runnable) {
+		((MainThreadExecutor) self).runAsync(runnable);
+	}
+
+	/**
+	 * Execute the callable in the main thread of the underlying rpc server returning a future for
+	 * the result of the callable. If the callable is not completed within the given timeout, then
+	 * the future will be failed with a {@link java.util.concurrent.TimeoutException}.
+	 *
+	 * @param callable Callable to be executed in the main thread of the underlying rpc server
+	 * @param timeout Timeout for the callable to be completed
+	 * @param <V> Return type of the callable
+	 * @return Future for the result of the callable.
+	 */
+	public <V> Future<V> callAsync(Callable<V> callable, Timeout timeout) {
+		return ((MainThreadExecutor) self).callAsync(callable, timeout);
+	}
+
+	/**
+	 * Gets the main thread execution context. The main thread execution context can be used to
+	 * execute tasks in the main thread of the underlying rpc server.
+	 *
+	 * @return Main thread execution context
+	 */
+	public ExecutionContext getMainThreadExecutionContext() {
+		return mainThreadExecutionContext;
+	}
+
+	/**
+	 * Gets the used rpc service.
+	 *
+	 * @return Rpc service
+	 */
+	public RpcService getRpcService() {
+		return rpcService;
+	}
+
+	/**
+	 * Starts the underlying rpc server via the rpc service and creates the main thread execution
+	 * context. This makes the rpc endpoint effectively reachable from the outside.
+	 *
+	 * Can be overriden to add rpc endpoint specific start up code. Should always call the parent
+	 * start method.
+	 */
+	public void start() {
+		self = rpcService.startServer(this);
+		mainThreadExecutionContext = new MainThreadExecutionContext((MainThreadExecutor) self);
+	}
+
+
+	/**
+	 * Shuts down the underlying rpc server via the rpc service.
+	 *
+	 * Can be overriden to add rpc endpoint specific shut down code. Should always call the parent
+	 * shut down method.
+	 */
+	public void shutDown() {
+		rpcService.stopServer(self);
+	}
+
+	/**
+	 * Gets the address of the underlying rpc server. The address should be fully qualified so that
+	 * a remote system can connect to this rpc server via this address.
+	 *
+	 * @return Fully qualified address of the underlying rpc server
+	 */
+	public String getAddress() {
+		return rpcService.getAddress(self);
+	}
+
+	/**
+	 * Execution context which executes runnables in the main thread context. A reported failure
+	 * will cause the underlying rpc server to shut down.
+	 */
+	private class MainThreadExecutionContext implements ExecutionContext {
+		private final MainThreadExecutor gateway;
+
+		MainThreadExecutionContext(MainThreadExecutor gateway) {
+			this.gateway = gateway;
+		}
+
+		@Override
+		public void execute(Runnable runnable) {
+			gateway.runAsync(runnable);
+		}
+
+		@Override
+		public void reportFailure(final Throwable t) {
+			gateway.runAsync(new Runnable() {
+				@Override
+				public void run() {
+					log.error("Encountered failure in the main thread execution context.", t);
+					shutDown();
+				}
+			});
+		}
+
+		@Override
+		public ExecutionContext prepare() {
+			return this;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcGateway.java
new file mode 100644
index 0000000..e3a16b4
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcGateway.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+/**
+ * Rpc gateway interface which has to be implemented by Rpc gateways.
+ */
+public interface RpcGateway {
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcMethod.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcMethod.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcMethod.java
new file mode 100644
index 0000000..875e557
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcMethod.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Annotation for rpc method in a {@link RpcEndpoint} implementation. Every rpc method must have a
+ * respective counterpart in the {@link RpcGateway} implementation for this rpc server. The
+ * RpcCompletenessTest makes sure that the set of rpc methods in a rpc server and the set of
+ * gateway methods in the corresponding gateway implementation are identical.
+ */
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+public @interface RpcMethod {
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
new file mode 100644
index 0000000..90ff7b6
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcService.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+import scala.concurrent.Future;
+
+/**
+ * Interface for rpc services. An rpc service is used to start and connect to a {@link RpcEndpoint}.
+ * Connecting to a rpc server will return a {@link RpcGateway} which can be used to call remote
+ * procedures.
+ */
+public interface RpcService {
+
+	/**
+	 * Connect to a remote rpc server under the provided address. Returns a rpc gateway which can
+	 * be used to communicate with the rpc server.
+	 *
+	 * @param address Address of the remote rpc server
+	 * @param clazz Class of the rpc gateway to return
+	 * @param <C> Type of the rpc gateway to return
+	 * @return Future containing the rpc gateway
+	 */
+	<C extends RpcGateway> Future<C> connect(String address, Class<C> clazz);
+
+	/**
+	 * Start a rpc server which forwards the remote procedure calls to the provided rpc endpoint.
+	 *
+	 * @param rpcEndpoint Rpc protocl to dispath the rpcs to
+	 * @param <S> Type of the rpc endpoint
+	 * @param <C> Type of the self rpc gateway associated with the rpc server
+	 * @return Self gateway to dispatch remote procedure calls to oneself
+	 */
+	<S extends RpcEndpoint, C extends RpcGateway> C startServer(S rpcEndpoint);
+
+	/**
+	 * Stop the underlying rpc server of the provided self gateway.
+	 *
+	 * @param selfGateway Self gateway describing the underlying rpc server
+	 * @param <C> Type of the rpc gateway
+	 */
+	<C extends RpcGateway> void stopServer(C selfGateway);
+
+	/**
+	 * Stop the rpc service shutting down all started rpc servers.
+	 */
+	void stopService();
+
+	/**
+	 * Get the fully qualified address of the underlying rpc server represented by the self gateway.
+	 * It must be possible to connect from a remote host to the rpc server via the returned fully
+	 * qualified address.
+	 *
+	 * @param selfGateway Self gateway associated with the underlying rpc server
+	 * @param <C> Type of the rpc gateway
+	 * @return Fully qualified address
+	 */
+	<C extends RpcGateway> String getAddress(C selfGateway);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcTimeout.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcTimeout.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcTimeout.java
new file mode 100644
index 0000000..3d36d47
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcTimeout.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Annotation for {@link RpcGateway} methods to specify an additional timeout parameter for the
+ * returned future to be completed. The rest of the provided parameters is passed to the remote rpc
+ * server for the rpc.
+ */
+@Target(ElementType.PARAMETER)
+@Retention(RetentionPolicy.RUNTIME)
+public @interface RpcTimeout {
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
new file mode 100644
index 0000000..a96a600
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaGateway.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka;
+
+import akka.actor.ActorRef;
+
+/**
+ * Interface for Akka based rpc gateways
+ */
+public interface AkkaGateway {
+
+	ActorRef getActorRef();
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
new file mode 100644
index 0000000..d55bd13
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/AkkaRpcService.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka;
+
+import akka.actor.ActorIdentity;
+import akka.actor.ActorRef;
+import akka.actor.ActorSelection;
+import akka.actor.ActorSystem;
+import akka.actor.Identify;
+import akka.actor.PoisonPill;
+import akka.actor.Props;
+import akka.dispatch.Mapper;
+import akka.pattern.AskableActorSelection;
+import akka.util.Timeout;
+import org.apache.flink.runtime.akka.AkkaUtils;
+import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
+import org.apache.flink.runtime.rpc.jobmaster.JobMasterGateway;
+import org.apache.flink.runtime.rpc.resourcemanager.ResourceManager;
+import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.rpc.akka.jobmaster.JobMasterAkkaActor;
+import org.apache.flink.runtime.rpc.akka.jobmaster.JobMasterAkkaGateway;
+import org.apache.flink.runtime.rpc.akka.resourcemanager.ResourceManagerAkkaActor;
+import org.apache.flink.runtime.rpc.akka.resourcemanager.ResourceManagerAkkaGateway;
+import org.apache.flink.runtime.rpc.akka.taskexecutor.TaskExecutorAkkaActor;
+import org.apache.flink.runtime.rpc.akka.taskexecutor.TaskExecutorAkkaGateway;
+import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutorGateway;
+import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutor;
+import scala.concurrent.Future;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class AkkaRpcService implements RpcService {
+	private final ActorSystem actorSystem;
+	private final Timeout timeout;
+	private final Set<ActorRef> actors = new HashSet<>();
+
+	public AkkaRpcService(ActorSystem actorSystem, Timeout timeout) {
+		this.actorSystem = actorSystem;
+		this.timeout = timeout;
+	}
+
+	@Override
+	public <C extends RpcGateway> Future<C> connect(String address, final Class<C> clazz) {
+		ActorSelection actorSel = actorSystem.actorSelection(address);
+
+		AskableActorSelection asker = new AskableActorSelection(actorSel);
+
+		Future<Object> identify = asker.ask(new Identify(42), timeout);
+
+		return identify.map(new Mapper<Object, C>(){
+			public C apply(Object obj) {
+				ActorRef actorRef = ((ActorIdentity) obj).getRef();
+
+				if (clazz == TaskExecutorGateway.class) {
+					return (C) new TaskExecutorAkkaGateway(actorRef, timeout);
+				} else if (clazz == ResourceManagerGateway.class) {
+					return (C) new ResourceManagerAkkaGateway(actorRef, timeout);
+				} else if (clazz == JobMasterGateway.class) {
+					return (C) new JobMasterAkkaGateway(actorRef, timeout);
+				} else {
+					throw new RuntimeException("Could not find remote endpoint " + clazz);
+				}
+			}
+		}, actorSystem.dispatcher());
+	}
+
+	@Override
+	public <S extends RpcEndpoint, C extends RpcGateway> C startServer(S rpcEndpoint) {
+		ActorRef ref;
+		C self;
+		if (rpcEndpoint instanceof TaskExecutor) {
+			ref = actorSystem.actorOf(
+				Props.create(TaskExecutorAkkaActor.class, rpcEndpoint)
+			);
+
+			self = (C) new TaskExecutorAkkaGateway(ref, timeout);
+		} else if (rpcEndpoint instanceof ResourceManager) {
+			ref = actorSystem.actorOf(
+				Props.create(ResourceManagerAkkaActor.class, rpcEndpoint)
+			);
+
+			self = (C) new ResourceManagerAkkaGateway(ref, timeout);
+		} else if (rpcEndpoint instanceof JobMaster) {
+			ref = actorSystem.actorOf(
+				Props.create(JobMasterAkkaActor.class, rpcEndpoint)
+			);
+
+			self = (C) new JobMasterAkkaGateway(ref, timeout);
+		} else {
+			throw new RuntimeException("Could not start RPC server for class " + rpcEndpoint.getClass());
+		}
+
+		actors.add(ref);
+
+		return self;
+	}
+
+	@Override
+	public <C extends RpcGateway> void stopServer(C selfGateway) {
+		if (selfGateway instanceof AkkaGateway) {
+			AkkaGateway akkaClient = (AkkaGateway) selfGateway;
+
+			if (actors.contains(akkaClient.getActorRef())) {
+				akkaClient.getActorRef().tell(PoisonPill.getInstance(), ActorRef.noSender());
+			} else {
+				// don't stop this actor since it was not started by this RPC service
+			}
+		}
+	}
+
+	@Override
+	public void stopService() {
+		actorSystem.shutdown();
+		actorSystem.awaitTermination();
+	}
+
+	@Override
+	public <C extends RpcGateway> String getAddress(C selfGateway) {
+		if (selfGateway instanceof AkkaGateway) {
+			return AkkaUtils.getAkkaURL(actorSystem, ((AkkaGateway) selfGateway).getActorRef());
+		} else {
+			throw new RuntimeException("Cannot get address for non " + AkkaGateway.class.getName() + ".");
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaActor.java
new file mode 100644
index 0000000..3cb499c
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaActor.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka;
+
+import akka.actor.Status;
+import akka.actor.UntypedActor;
+import org.apache.flink.runtime.rpc.akka.messages.CallableMessage;
+import org.apache.flink.runtime.rpc.akka.messages.RunnableMessage;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class BaseAkkaActor extends UntypedActor {
+	private static final Logger LOG = LoggerFactory.getLogger(BaseAkkaActor.class);
+
+	@Override
+	public void onReceive(Object message) throws Exception {
+		if (message instanceof RunnableMessage) {
+			try {
+				((RunnableMessage) message).getRunnable().run();
+			} catch (Exception e) {
+				LOG.error("Encountered error while executing runnable.", e);
+			}
+		} else if (message instanceof CallableMessage<?>) {
+			try {
+				Object result = ((CallableMessage<?>) message).getCallable().call();
+				sender().tell(new Status.Success(result), getSelf());
+			} catch (Exception e) {
+				sender().tell(new Status.Failure(e), getSelf());
+			}
+		} else {
+			throw new RuntimeException("Unknown message " + message);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaGateway.java
new file mode 100644
index 0000000..512790d
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/BaseAkkaGateway.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka;
+
+import akka.actor.ActorRef;
+import akka.pattern.Patterns;
+import akka.util.Timeout;
+import org.apache.flink.runtime.rpc.MainThreadExecutor;
+import org.apache.flink.runtime.rpc.akka.messages.CallableMessage;
+import org.apache.flink.runtime.rpc.akka.messages.RunnableMessage;
+import scala.concurrent.Future;
+
+import java.util.concurrent.Callable;
+
+public abstract class BaseAkkaGateway implements MainThreadExecutor, AkkaGateway {
+	@Override
+	public void runAsync(Runnable runnable) {
+		getActorRef().tell(new RunnableMessage(runnable), ActorRef.noSender());
+	}
+
+	@Override
+	public <V> Future<V> callAsync(Callable<V> callable, Timeout timeout) {
+		return (Future<V>) Patterns.ask(getActorRef(), new CallableMessage(callable), timeout);
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaActor.java
new file mode 100644
index 0000000..9e04ea9
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaActor.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.jobmaster;
+
+import akka.actor.ActorRef;
+import akka.actor.Status;
+import org.apache.flink.runtime.rpc.akka.BaseAkkaActor;
+import org.apache.flink.runtime.rpc.akka.messages.RegisterAtResourceManager;
+import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.akka.messages.UpdateTaskExecutionState;
+
+public class JobMasterAkkaActor extends BaseAkkaActor {
+	private final JobMaster jobMaster;
+
+	public JobMasterAkkaActor(JobMaster jobMaster) {
+		this.jobMaster = jobMaster;
+	}
+
+	@Override
+	public void onReceive(Object message) throws Exception {
+		if (message instanceof UpdateTaskExecutionState) {
+
+			final ActorRef sender = getSender();
+
+			UpdateTaskExecutionState updateTaskExecutionState = (UpdateTaskExecutionState) message;
+
+			try {
+				Acknowledge result = jobMaster.updateTaskExecutionState(updateTaskExecutionState.getTaskExecutionState());
+				sender.tell(new Status.Success(result), getSelf());
+			} catch (Exception e) {
+				sender.tell(new Status.Failure(e), getSelf());
+			}
+		} else if (message instanceof RegisterAtResourceManager) {
+			RegisterAtResourceManager registerAtResourceManager = (RegisterAtResourceManager) message;
+
+			jobMaster.registerAtResourceManager(registerAtResourceManager.getAddress());
+		} else {
+			super.onReceive(message);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaGateway.java
new file mode 100644
index 0000000..e6bf061
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/jobmaster/JobMasterAkkaGateway.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.jobmaster;
+
+import akka.actor.ActorRef;
+import akka.pattern.AskableActorRef;
+import akka.util.Timeout;
+import org.apache.flink.runtime.rpc.akka.BaseAkkaGateway;
+import org.apache.flink.runtime.rpc.akka.messages.RegisterAtResourceManager;
+import org.apache.flink.runtime.rpc.jobmaster.JobMasterGateway;
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.akka.messages.UpdateTaskExecutionState;
+import org.apache.flink.runtime.taskmanager.TaskExecutionState;
+import scala.concurrent.Future;
+import scala.reflect.ClassTag$;
+
+public class JobMasterAkkaGateway extends BaseAkkaGateway implements JobMasterGateway {
+	private final AskableActorRef actorRef;
+	private final Timeout timeout;
+
+	public JobMasterAkkaGateway(ActorRef actorRef, Timeout timeout) {
+		this.actorRef = new AskableActorRef(actorRef);
+		this.timeout = timeout;
+	}
+
+	@Override
+	public Future<Acknowledge> updateTaskExecutionState(TaskExecutionState taskExecutionState) {
+		return actorRef.ask(new UpdateTaskExecutionState(taskExecutionState), timeout)
+			.mapTo(ClassTag$.MODULE$.<Acknowledge>apply(Acknowledge.class));
+	}
+
+	@Override
+	public void registerAtResourceManager(String address) {
+		actorRef.actorRef().tell(new RegisterAtResourceManager(address), actorRef.actorRef());
+	}
+
+	@Override
+	public ActorRef getActorRef() {
+		return actorRef.actorRef();
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallableMessage.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallableMessage.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallableMessage.java
new file mode 100644
index 0000000..f0e555f
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CallableMessage.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import java.util.concurrent.Callable;
+
+public class CallableMessage<V> {
+	private final Callable<V> callable;
+
+	public CallableMessage(Callable<V> callable) {
+		this.callable = callable;
+	}
+
+	public Callable<V> getCallable() {
+		return callable;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CancelTask.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CancelTask.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CancelTask.java
new file mode 100644
index 0000000..0b9e9dc
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/CancelTask.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
+
+import java.io.Serializable;
+
+public class CancelTask implements Serializable {
+	private static final long serialVersionUID = -2998176874447950595L;
+	private final ExecutionAttemptID executionAttemptID;
+
+	public CancelTask(ExecutionAttemptID executionAttemptID) {
+		this.executionAttemptID = executionAttemptID;
+	}
+
+	public ExecutionAttemptID getExecutionAttemptID() {
+		return executionAttemptID;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/ExecuteTask.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/ExecuteTask.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/ExecuteTask.java
new file mode 100644
index 0000000..a83d539
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/ExecuteTask.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
+
+import java.io.Serializable;
+
+public class ExecuteTask implements Serializable {
+	private static final long serialVersionUID = -6769958430967048348L;
+	private final TaskDeploymentDescriptor taskDeploymentDescriptor;
+
+	public ExecuteTask(TaskDeploymentDescriptor taskDeploymentDescriptor) {
+		this.taskDeploymentDescriptor = taskDeploymentDescriptor;
+	}
+
+	public TaskDeploymentDescriptor getTaskDeploymentDescriptor() {
+		return taskDeploymentDescriptor;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterAtResourceManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterAtResourceManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterAtResourceManager.java
new file mode 100644
index 0000000..3ade082
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterAtResourceManager.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import java.io.Serializable;
+
+public class RegisterAtResourceManager implements Serializable {
+
+	private static final long serialVersionUID = -4175905742620903602L;
+
+	private final String address;
+
+	public RegisterAtResourceManager(String address) {
+		this.address = address;
+	}
+
+	public String getAddress() {
+		return address;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterJobMaster.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterJobMaster.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterJobMaster.java
new file mode 100644
index 0000000..b35ea38
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RegisterJobMaster.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import org.apache.flink.runtime.rpc.resourcemanager.JobMasterRegistration;
+
+import java.io.Serializable;
+
+public class RegisterJobMaster implements Serializable{
+	private static final long serialVersionUID = -4616879574192641507L;
+	private final JobMasterRegistration jobMasterRegistration;
+
+	public RegisterJobMaster(JobMasterRegistration jobMasterRegistration) {
+		this.jobMasterRegistration = jobMasterRegistration;
+	}
+
+	public JobMasterRegistration getJobMasterRegistration() {
+		return jobMasterRegistration;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RequestSlot.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RequestSlot.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RequestSlot.java
new file mode 100644
index 0000000..85ceeec
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RequestSlot.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import org.apache.flink.runtime.rpc.resourcemanager.SlotRequest;
+
+import java.io.Serializable;
+
+public class RequestSlot implements Serializable {
+	private static final long serialVersionUID = 7207463889348525866L;
+
+	private final SlotRequest slotRequest;
+
+	public RequestSlot(SlotRequest slotRequest) {
+		this.slotRequest = slotRequest;
+	}
+
+	public SlotRequest getSlotRequest() {
+		return slotRequest;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunnableMessage.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunnableMessage.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunnableMessage.java
new file mode 100644
index 0000000..3556738
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/RunnableMessage.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+public class RunnableMessage {
+	private final Runnable runnable;
+
+	public RunnableMessage(Runnable runnable) {
+		this.runnable = runnable;
+	}
+
+	public Runnable getRunnable() {
+		return runnable;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/UpdateTaskExecutionState.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/UpdateTaskExecutionState.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/UpdateTaskExecutionState.java
new file mode 100644
index 0000000..f89cd2f
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/messages/UpdateTaskExecutionState.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.messages;
+
+import org.apache.flink.runtime.taskmanager.TaskExecutionState;
+
+import java.io.Serializable;
+
+public class UpdateTaskExecutionState implements Serializable{
+	private static final long serialVersionUID = -6662229114427331436L;
+
+	private final TaskExecutionState taskExecutionState;
+
+	public UpdateTaskExecutionState(TaskExecutionState taskExecutionState) {
+		this.taskExecutionState = taskExecutionState;
+	}
+
+	public TaskExecutionState getTaskExecutionState() {
+		return taskExecutionState;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaActor.java
new file mode 100644
index 0000000..13101f9
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaActor.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.resourcemanager;
+
+import akka.actor.ActorRef;
+import akka.actor.Status;
+import akka.pattern.Patterns;
+import org.apache.flink.runtime.rpc.akka.BaseAkkaActor;
+import org.apache.flink.runtime.rpc.resourcemanager.RegistrationResponse;
+import org.apache.flink.runtime.rpc.resourcemanager.ResourceManager;
+import org.apache.flink.runtime.rpc.resourcemanager.SlotAssignment;
+import org.apache.flink.runtime.rpc.akka.messages.RegisterJobMaster;
+import org.apache.flink.runtime.rpc.akka.messages.RequestSlot;
+import scala.concurrent.Future;
+
+public class ResourceManagerAkkaActor extends BaseAkkaActor {
+	private final ResourceManager resourceManager;
+
+	public ResourceManagerAkkaActor(ResourceManager resourceManager) {
+		this.resourceManager = resourceManager;
+	}
+
+	@Override
+	public void onReceive(Object message) throws Exception {
+		final ActorRef sender = getSender();
+
+		if (message instanceof RegisterJobMaster) {
+			RegisterJobMaster registerJobMaster = (RegisterJobMaster) message;
+
+			try {
+				Future<RegistrationResponse> response = resourceManager.registerJobMaster(registerJobMaster.getJobMasterRegistration());
+				Patterns.pipe(response, getContext().dispatcher()).to(sender());
+			} catch (Exception e) {
+				sender.tell(new Status.Failure(e), getSelf());
+			}
+		} else if (message instanceof RequestSlot) {
+			RequestSlot requestSlot = (RequestSlot) message;
+
+			try {
+				SlotAssignment response = resourceManager.requestSlot(requestSlot.getSlotRequest());
+				sender.tell(new Status.Success(response), getSelf());
+			} catch (Exception e) {
+				sender.tell(new Status.Failure(e), getSelf());
+			}
+		} else {
+			super.onReceive(message);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaGateway.java
new file mode 100644
index 0000000..1304707
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/resourcemanager/ResourceManagerAkkaGateway.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.resourcemanager;
+
+import akka.actor.ActorRef;
+import akka.pattern.AskableActorRef;
+import akka.util.Timeout;
+import org.apache.flink.runtime.rpc.akka.BaseAkkaGateway;
+import org.apache.flink.runtime.rpc.resourcemanager.JobMasterRegistration;
+import org.apache.flink.runtime.rpc.resourcemanager.RegistrationResponse;
+import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
+import org.apache.flink.runtime.rpc.resourcemanager.SlotAssignment;
+import org.apache.flink.runtime.rpc.resourcemanager.SlotRequest;
+import org.apache.flink.runtime.rpc.akka.messages.RegisterJobMaster;
+import org.apache.flink.runtime.rpc.akka.messages.RequestSlot;
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+import scala.reflect.ClassTag$;
+
+public class ResourceManagerAkkaGateway extends BaseAkkaGateway implements ResourceManagerGateway {
+	private final AskableActorRef actorRef;
+	private final Timeout timeout;
+
+	public ResourceManagerAkkaGateway(ActorRef actorRef, Timeout timeout) {
+		this.actorRef = new AskableActorRef(actorRef);
+		this.timeout = timeout;
+	}
+
+	@Override
+	public Future<RegistrationResponse> registerJobMaster(JobMasterRegistration jobMasterRegistration, FiniteDuration timeout) {
+		return actorRef.ask(new RegisterJobMaster(jobMasterRegistration), new Timeout(timeout))
+			.mapTo(ClassTag$.MODULE$.<RegistrationResponse>apply(RegistrationResponse.class));
+	}
+
+	@Override
+	public Future<RegistrationResponse> registerJobMaster(JobMasterRegistration jobMasterRegistration) {
+		return actorRef.ask(new RegisterJobMaster(jobMasterRegistration), timeout)
+			.mapTo(ClassTag$.MODULE$.<RegistrationResponse>apply(RegistrationResponse.class));
+	}
+
+	@Override
+	public Future<SlotAssignment> requestSlot(SlotRequest slotRequest) {
+		return actorRef.ask(new RequestSlot(slotRequest), timeout)
+			.mapTo(ClassTag$.MODULE$.<SlotAssignment>apply(SlotAssignment.class));
+	}
+
+	@Override
+	public ActorRef getActorRef() {
+		return actorRef.actorRef();
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaActor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaActor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaActor.java
new file mode 100644
index 0000000..ed522cc
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaActor.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.taskexecutor;
+
+import akka.actor.ActorRef;
+import akka.actor.Status;
+import akka.dispatch.OnComplete;
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.akka.BaseAkkaActor;
+import org.apache.flink.runtime.rpc.akka.messages.CancelTask;
+import org.apache.flink.runtime.rpc.akka.messages.ExecuteTask;
+import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutorGateway;
+
+public class TaskExecutorAkkaActor extends BaseAkkaActor {
+	private final TaskExecutorGateway taskExecutor;
+
+	public TaskExecutorAkkaActor(TaskExecutorGateway taskExecutor) {
+		this.taskExecutor = taskExecutor;
+	}
+
+	@Override
+	public void onReceive(Object message) throws Exception {
+		final ActorRef sender = getSender();
+
+		if (message instanceof ExecuteTask) {
+			ExecuteTask executeTask = (ExecuteTask) message;
+
+			taskExecutor.executeTask(executeTask.getTaskDeploymentDescriptor()).onComplete(
+				new OnComplete<Acknowledge>() {
+					@Override
+					public void onComplete(Throwable failure, Acknowledge success) throws Throwable {
+						if (failure != null) {
+							sender.tell(new Status.Failure(failure), getSelf());
+						} else {
+							sender.tell(new Status.Success(Acknowledge.get()), getSelf());
+						}
+					}
+				},
+				getContext().dispatcher()
+			);
+		} else if (message instanceof CancelTask) {
+			CancelTask cancelTask = (CancelTask) message;
+
+			taskExecutor.cancelTask(cancelTask.getExecutionAttemptID()).onComplete(
+				new OnComplete<Acknowledge>() {
+					@Override
+					public void onComplete(Throwable failure, Acknowledge success) throws Throwable {
+						if (failure != null) {
+							sender.tell(new Status.Failure(failure), getSelf());
+						} else {
+							sender.tell(new Status.Success(Acknowledge.get()), getSelf());
+						}
+					}
+				},
+				getContext().dispatcher()
+			);
+		} else {
+			super.onReceive(message);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaGateway.java
new file mode 100644
index 0000000..7f0a522
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/akka/taskexecutor/TaskExecutorAkkaGateway.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.akka.taskexecutor;
+
+import akka.actor.ActorRef;
+import akka.pattern.AskableActorRef;
+import akka.util.Timeout;
+import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
+import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.akka.BaseAkkaGateway;
+import org.apache.flink.runtime.rpc.akka.messages.CancelTask;
+import org.apache.flink.runtime.rpc.akka.messages.ExecuteTask;
+import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutorGateway;
+import scala.concurrent.Future;
+import scala.reflect.ClassTag$;
+
+public class TaskExecutorAkkaGateway extends BaseAkkaGateway implements TaskExecutorGateway {
+	private final AskableActorRef actorRef;
+	private final Timeout timeout;
+
+	public TaskExecutorAkkaGateway(ActorRef actorRef, Timeout timeout) {
+		this.actorRef = new AskableActorRef(actorRef);
+		this.timeout = timeout;
+	}
+
+	@Override
+	public Future<Acknowledge> executeTask(TaskDeploymentDescriptor taskDeploymentDescriptor) {
+		return actorRef.ask(new ExecuteTask(taskDeploymentDescriptor), timeout)
+			.mapTo(ClassTag$.MODULE$.<Acknowledge>apply(Acknowledge.class));
+	}
+
+	@Override
+	public Future<Acknowledge> cancelTask(ExecutionAttemptID executionAttemptId) {
+		return actorRef.ask(new CancelTask(executionAttemptId), timeout)
+			.mapTo(ClassTag$.MODULE$.<Acknowledge>apply(Acknowledge.class));
+	}
+
+	@Override
+	public ActorRef getActorRef() {
+		return actorRef.actorRef();
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
new file mode 100644
index 0000000..b81b19c
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMaster.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.jobmaster;
+
+import akka.dispatch.Futures;
+import akka.dispatch.Mapper;
+import akka.dispatch.OnComplete;
+import org.apache.flink.runtime.instance.InstanceID;
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.RpcMethod;
+import org.apache.flink.runtime.rpc.resourcemanager.JobMasterRegistration;
+import org.apache.flink.runtime.rpc.resourcemanager.RegistrationResponse;
+import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.taskmanager.TaskExecutionState;
+import scala.Tuple2;
+import scala.concurrent.ExecutionContext;
+import scala.concurrent.ExecutionContext$;
+import scala.concurrent.Future;
+import scala.concurrent.duration.Deadline;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.UUID;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * JobMaster implementation. The job master is responsible for the execution of a single
+ * {@link org.apache.flink.runtime.jobgraph.JobGraph}.
+ *
+ * It offers the following methods as part of its rpc interface to interact with the JobMaster
+ * remotely:
+ * <ul>
+ *     <li>{@link #registerAtResourceManager(String)} triggers the registration at the resource manager</li>
+ *     <li>{@link #updateTaskExecutionState(TaskExecutionState)} updates the task execution state for
+ * given task</li>
+ * </ul>
+ */
+public class JobMaster extends RpcEndpoint<JobMasterGateway> {
+	/** Execution context for future callbacks */
+	private final ExecutionContext executionContext;
+
+	/** Execution context for scheduled runnables */
+	private final ScheduledExecutorService scheduledExecutorService;
+
+	private final FiniteDuration initialRegistrationTimeout = new FiniteDuration(500, TimeUnit.MILLISECONDS);
+	private final FiniteDuration maxRegistrationTimeout = new FiniteDuration(30, TimeUnit.SECONDS);
+	private final FiniteDuration registrationDuration = new FiniteDuration(365, TimeUnit.DAYS);
+	private final long failedRegistrationDelay = 10000;
+
+	/** Gateway to connected resource manager, null iff not connected */
+	private ResourceManagerGateway resourceManager = null;
+
+	/** UUID to filter out old registration runs */
+	private UUID currentRegistrationRun;
+
+	public JobMaster(RpcService rpcService, ExecutorService executorService) {
+		super(rpcService);
+		executionContext = ExecutionContext$.MODULE$.fromExecutor(executorService);
+		scheduledExecutorService = new ScheduledThreadPoolExecutor(1);
+	}
+
+	public ResourceManagerGateway getResourceManager() {
+		return resourceManager;
+	}
+
+	//----------------------------------------------------------------------------------------------
+	// RPC methods
+	//----------------------------------------------------------------------------------------------
+
+	/**
+	 * Updates the task execution state for a given task.
+	 *
+	 * @param taskExecutionState New task execution state for a given task
+	 * @return Acknowledge the task execution state update
+	 */
+	@RpcMethod
+	public Acknowledge updateTaskExecutionState(TaskExecutionState taskExecutionState) {
+		System.out.println("TaskExecutionState: " + taskExecutionState);
+		return Acknowledge.get();
+	}
+
+	/**
+	 * Triggers the registration of the job master at the resource manager.
+	 *
+	 * @param address Address of the resource manager
+	 */
+	@RpcMethod
+	public void registerAtResourceManager(final String address) {
+		currentRegistrationRun = UUID.randomUUID();
+
+		Future<ResourceManagerGateway> resourceManagerFuture = getRpcService().connect(address, ResourceManagerGateway.class);
+
+		handleResourceManagerRegistration(
+			new JobMasterRegistration(getAddress()),
+			1,
+			resourceManagerFuture,
+			currentRegistrationRun,
+			initialRegistrationTimeout,
+			maxRegistrationTimeout,
+			registrationDuration.fromNow());
+	}
+
+	//----------------------------------------------------------------------------------------------
+	// Helper methods
+	//----------------------------------------------------------------------------------------------
+
+	/**
+	 * Helper method to handle the resource manager registration process. If a registration attempt
+	 * times out, then a new attempt with the doubled time out is initiated. The whole registration
+	 * process has a deadline. Once this deadline is overdue without successful registration, the
+	 * job master shuts down.
+	 *
+	 * @param jobMasterRegistration Job master registration info which is sent to the resource
+	 *                              manager
+	 * @param attemptNumber Registration attempt number
+	 * @param resourceManagerFuture Future of the resource manager gateway
+	 * @param registrationRun UUID describing the current registration run
+	 * @param timeout Timeout of the last registration attempt
+	 * @param maxTimeout Maximum timeout between registration attempts
+	 * @param deadline Deadline for the registration
+	 */
+	void handleResourceManagerRegistration(
+		final JobMasterRegistration jobMasterRegistration,
+		final int attemptNumber,
+		final Future<ResourceManagerGateway> resourceManagerFuture,
+		final UUID registrationRun,
+		final FiniteDuration timeout,
+		final FiniteDuration maxTimeout,
+		final Deadline deadline) {
+
+		// filter out concurrent registration runs
+		if (registrationRun.equals(currentRegistrationRun)) {
+
+			log.info("Start registration attempt #{}.", attemptNumber);
+
+			if (deadline.isOverdue()) {
+				// we've exceeded our registration deadline. This means that we have to shutdown the JobMaster
+				log.error("Exceeded registration deadline without successfully registering at the ResourceManager.");
+				shutDown();
+			} else {
+				Future<Tuple2<RegistrationResponse, ResourceManagerGateway>> registrationResponseFuture = resourceManagerFuture.flatMap(new Mapper<ResourceManagerGateway, Future<Tuple2<RegistrationResponse, ResourceManagerGateway>>>() {
+					@Override
+					public Future<Tuple2<RegistrationResponse, ResourceManagerGateway>> apply(ResourceManagerGateway resourceManagerGateway) {
+						return resourceManagerGateway.registerJobMaster(jobMasterRegistration, timeout).zip(Futures.successful(resourceManagerGateway));
+					}
+				}, executionContext);
+
+				registrationResponseFuture.onComplete(new OnComplete<Tuple2<RegistrationResponse, ResourceManagerGateway>>() {
+					@Override
+					public void onComplete(Throwable failure, Tuple2<RegistrationResponse, ResourceManagerGateway> tuple) throws Throwable {
+						if (failure != null) {
+							if (failure instanceof TimeoutException) {
+								// we haven't received an answer in the given timeout interval,
+								// so increase it and try again.
+								final FiniteDuration newTimeout = timeout.$times(2L).min(maxTimeout);
+
+								handleResourceManagerRegistration(
+									jobMasterRegistration,
+									attemptNumber + 1,
+									resourceManagerFuture,
+									registrationRun,
+									newTimeout,
+									maxTimeout,
+									deadline);
+							} else {
+								log.error("Received unknown error while registering at the ResourceManager.", failure);
+								shutDown();
+							}
+						} else {
+							final RegistrationResponse response = tuple._1();
+							final ResourceManagerGateway gateway = tuple._2();
+
+							if (response.isSuccess()) {
+								finishResourceManagerRegistration(gateway, response.getInstanceID());
+							} else {
+								log.info("The registration was refused. Try again.");
+
+								scheduledExecutorService.schedule(new Runnable() {
+									@Override
+									public void run() {
+										// we have to execute scheduled runnable in the main thread
+										// because we need consistency wrt currentRegistrationRun
+										runAsync(new Runnable() {
+											@Override
+											public void run() {
+												// our registration attempt was refused. Start over.
+												handleResourceManagerRegistration(
+													jobMasterRegistration,
+													1,
+													resourceManagerFuture,
+													registrationRun,
+													initialRegistrationTimeout,
+													maxTimeout,
+													deadline);
+											}
+										});
+									}
+								}, failedRegistrationDelay, TimeUnit.MILLISECONDS);
+							}
+						}
+					}
+				}, getMainThreadExecutionContext()); // use the main thread execution context to execute the call back in the main thread
+			}
+		} else {
+			log.info("Discard out-dated registration run.");
+		}
+	}
+
+	/**
+	 * Finish the resource manager registration by setting the new resource manager gateway.
+	 *
+	 * @param resourceManager New resource manager gateway
+	 * @param instanceID Instance id assigned by the resource manager
+	 */
+	void finishResourceManagerRegistration(ResourceManagerGateway resourceManager, InstanceID instanceID) {
+		log.info("Successfully registered at the ResourceManager under instance id {}.", instanceID);
+		this.resourceManager = resourceManager;
+	}
+
+	/**
+	 * Return if the job master is connected to a resource manager.
+	 *
+	 * @return true if the job master is connected to the resource manager
+	 */
+	public boolean isConnected() {
+		return resourceManager != null;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMasterGateway.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMasterGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMasterGateway.java
new file mode 100644
index 0000000..17a4c3a
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/jobmaster/JobMasterGateway.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.jobmaster;
+
+import org.apache.flink.runtime.messages.Acknowledge;
+import org.apache.flink.runtime.rpc.RpcGateway;
+import org.apache.flink.runtime.taskmanager.TaskExecutionState;
+import scala.concurrent.Future;
+
+/**
+ * {@link JobMaster} rpc gateway interface
+ */
+public interface JobMasterGateway extends RpcGateway {
+
+	/**
+	 * Updates the task execution state for a given task.
+	 *
+	 * @param taskExecutionState New task execution state for a given task
+	 * @return Future acknowledge of the task execution state update
+	 */
+	Future<Acknowledge> updateTaskExecutionState(TaskExecutionState taskExecutionState);
+
+	/**
+	 * Triggers the registration of the job master at the resource manager.
+	 *
+	 * @param address Address of the resource manager
+	 */
+	void registerAtResourceManager(final String address);
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/JobMasterRegistration.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/JobMasterRegistration.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/JobMasterRegistration.java
new file mode 100644
index 0000000..7a2deae
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/JobMasterRegistration.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.resourcemanager;
+
+import java.io.Serializable;
+
+public class JobMasterRegistration implements Serializable {
+	private static final long serialVersionUID = 8411214999193765202L;
+
+	private final String address;
+
+	public JobMasterRegistration(String address) {
+		this.address = address;
+	}
+
+	public String getAddress() {
+		return address;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/RegistrationResponse.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/RegistrationResponse.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/RegistrationResponse.java
new file mode 100644
index 0000000..8ac9e49
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/RegistrationResponse.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.resourcemanager;
+
+import org.apache.flink.runtime.instance.InstanceID;
+
+import java.io.Serializable;
+
+public class RegistrationResponse implements Serializable {
+	private static final long serialVersionUID = -2379003255993119993L;
+
+	private final boolean isSuccess;
+	private final InstanceID instanceID;
+
+	public RegistrationResponse(boolean isSuccess, InstanceID instanceID) {
+		this.isSuccess = isSuccess;
+		this.instanceID = instanceID;
+	}
+
+	public boolean isSuccess() {
+		return isSuccess;
+	}
+
+	public InstanceID getInstanceID() {
+		return instanceID;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/04bcb715/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
new file mode 100644
index 0000000..c7e8def
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManager.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.resourcemanager;
+
+import akka.dispatch.Mapper;
+import org.apache.flink.runtime.instance.InstanceID;
+import org.apache.flink.runtime.rpc.RpcMethod;
+import org.apache.flink.runtime.rpc.RpcEndpoint;
+import org.apache.flink.runtime.rpc.RpcService;
+import org.apache.flink.runtime.rpc.jobmaster.JobMaster;
+import org.apache.flink.runtime.rpc.jobmaster.JobMasterGateway;
+import scala.concurrent.ExecutionContext;
+import scala.concurrent.ExecutionContext$;
+import scala.concurrent.Future;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+
+/**
+ * ResourceManager implementation. The resource manager is responsible for resource de-/allocation
+ * and bookkeeping.
+ *
+ * It offers the following methods as part of its rpc interface to interact with the him remotely:
+ * <ul>
+ *     <li>{@link #registerJobMaster(JobMasterRegistration)} registers a {@link JobMaster} at the resource manager</li>
+ *     <li>{@link #requestSlot(SlotRequest)} requests a slot from the resource manager</li>
+ * </ul>
+ */
+public class ResourceManager extends RpcEndpoint<ResourceManagerGateway> {
+	private final ExecutionContext executionContext;
+	private final Map<JobMasterGateway, InstanceID> jobMasterGateways;
+
+	public ResourceManager(RpcService rpcService, ExecutorService executorService) {
+		super(rpcService);
+		this.executionContext = ExecutionContext$.MODULE$.fromExecutor(executorService);
+		this.jobMasterGateways = new HashMap<>();
+	}
+
+	/**
+	 * Register a {@link JobMaster} at the resource manager.
+	 *
+	 * @param jobMasterRegistration Job master registration information
+	 * @return Future registration response
+	 */
+	@RpcMethod
+	public Future<RegistrationResponse> registerJobMaster(JobMasterRegistration jobMasterRegistration) {
+		Future<JobMasterGateway> jobMasterFuture = getRpcService().connect(jobMasterRegistration.getAddress(), JobMasterGateway.class);
+
+		return jobMasterFuture.map(new Mapper<JobMasterGateway, RegistrationResponse>() {
+			@Override
+			public RegistrationResponse apply(final JobMasterGateway jobMasterGateway) {
+				InstanceID instanceID;
+
+				if (jobMasterGateways.containsKey(jobMasterGateway)) {
+					instanceID = jobMasterGateways.get(jobMasterGateway);
+				} else {
+					instanceID = new InstanceID();
+					jobMasterGateways.put(jobMasterGateway, instanceID);
+				}
+
+				return new RegistrationResponse(true, instanceID);
+			}
+		}, getMainThreadExecutionContext());
+	}
+
+	/**
+	 * Requests a slot from the resource manager.
+	 *
+	 * @param slotRequest Slot request
+	 * @return Slot assignment
+	 */
+	@RpcMethod
+	public SlotAssignment requestSlot(SlotRequest slotRequest) {
+		System.out.println("SlotRequest: " + slotRequest);
+		return new SlotAssignment();
+	}
+}


[44/50] [abbrv] flink git commit: [hotfix] Add self rpc gateway registration to TestingSerialRpcService

Posted by tr...@apache.org.
[hotfix] Add self rpc gateway registration to TestingSerialRpcService


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/12af3b13
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/12af3b13
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/12af3b13

Branch: refs/heads/flip-6
Commit: 12af3b13c98d9adc1ebc5b9f98ebefb5b2a016e6
Parents: b779d19
Author: Till Rohrmann <tr...@apache.org>
Authored: Fri Sep 2 14:51:16 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:17 2016 +0200

----------------------------------------------------------------------
 .../flink/runtime/rpc/TestingSerialRpcService.java  | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/12af3b13/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java
index 7bdbb99..955edcc 100644
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/TestingSerialRpcService.java
@@ -43,7 +43,8 @@ import static org.apache.flink.util.Preconditions.checkNotNull;
 
 
 /**
- * An RPC Service implementation for testing. This RPC service directly executes all asynchronous calls one by one in the main thread.
+ * An RPC Service implementation for testing. This RPC service directly executes all asynchronous
+ * calls one by one in the calling thread.
  */
 public class TestingSerialRpcService implements RpcService {
 
@@ -52,7 +53,7 @@ public class TestingSerialRpcService implements RpcService {
 
 	public TestingSerialRpcService() {
 		executorService = new DirectExecutorService();
-		this.registeredConnections = new ConcurrentHashMap<>();
+		this.registeredConnections = new ConcurrentHashMap<>(16);
 	}
 
 	@Override
@@ -78,14 +79,14 @@ public class TestingSerialRpcService implements RpcService {
 
 	@Override
 	public void stopServer(RpcGateway selfGateway) {
-
+		registeredConnections.remove(selfGateway.getAddress());
 	}
 
 	@Override
 	public <C extends RpcGateway, S extends RpcEndpoint<C>> C startServer(S rpcEndpoint) {
 		final String address = UUID.randomUUID().toString();
 
-		InvocationHandler akkaInvocationHandler = new TestingSerialInvocationHandler(address, rpcEndpoint);
+		InvocationHandler akkaInvocationHandler = new TestingSerialRpcService.TestingSerialInvocationHandler<>(address, rpcEndpoint);
 		ClassLoader classLoader = getClass().getClassLoader();
 
 		@SuppressWarnings("unchecked")
@@ -99,6 +100,9 @@ public class TestingSerialRpcService implements RpcService {
 			},
 			akkaInvocationHandler);
 
+		// register self
+		registeredConnections.putIfAbsent(self.getAddress(), self);
+
 		return self;
 	}
 
@@ -133,7 +137,7 @@ public class TestingSerialRpcService implements RpcService {
 		}
 	}
 
-	private static class TestingSerialInvocationHandler<C extends RpcGateway, T extends RpcEndpoint<C>> implements InvocationHandler, RpcGateway, MainThreadExecutor, StartStoppable {
+	private static final class TestingSerialInvocationHandler<C extends RpcGateway, T extends RpcEndpoint<C>> implements InvocationHandler, RpcGateway, MainThreadExecutor, StartStoppable {
 
 		private final T rpcEndpoint;
 
@@ -197,7 +201,7 @@ public class TestingSerialRpcService implements RpcService {
 			final Method rpcMethod = lookupRpcMethod(methodName, parameterTypes);
 			Object result = rpcMethod.invoke(rpcEndpoint, args);
 
-			if (result != null && result instanceof Future) {
+			if (result instanceof Future) {
 				Future<?> future = (Future<?>) result;
 				return Await.result(future, futureTimeout.duration());
 			} else {


[03/50] [abbrv] flink git commit: [FLINK-4572] [gelly] Convert to negative in LongValueToIntValue

Posted by tr...@apache.org.
[FLINK-4572] [gelly] Convert to negative in LongValueToIntValue

The Gelly drivers expect that scale 32 edges, represented by the lower
32 bits of long values, can be converted to int values. Values between
2^31 and 2^32 - 1 should be converted to negative integers.

Creates separate signed and unsigned translators for long to int. This
prevents ambiguous conversion since each translator only works on a
32-bit range of values.

This closes #2469


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/e58fd6e0
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/e58fd6e0
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/e58fd6e0

Branch: refs/heads/flip-6
Commit: e58fd6e001b319aa7325a0544735f8c0680141d8
Parents: 4f84a02
Author: Greg Hogan <co...@greghogan.com>
Authored: Fri Sep 2 12:01:29 2016 -0400
Committer: Greg Hogan <co...@greghogan.com>
Committed: Tue Sep 20 10:10:20 2016 -0400

----------------------------------------------------------------------
 .../apache/flink/graph/driver/GraphMetrics.java |  6 +--
 .../graph/examples/ClusteringCoefficient.java   |  6 +--
 .../org/apache/flink/graph/examples/HITS.java   |  4 +-
 .../flink/graph/examples/JaccardIndex.java      |  4 +-
 .../flink/graph/examples/TriangleListing.java   |  6 +--
 .../asm/translate/LongValueToIntValue.java      | 43 -----------------
 .../translate/LongValueToSignedIntValue.java    | 44 ++++++++++++++++++
 .../translate/LongValueToUnsignedIntValue.java  | 49 ++++++++++++++++++++
 .../LongValueToSignedIntValueTest.java          | 49 ++++++++++++++++++++
 .../LongValueToUnsignedIntValueTest.java        | 49 ++++++++++++++++++++
 10 files changed, 204 insertions(+), 56 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/e58fd6e0/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/driver/GraphMetrics.java
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/driver/GraphMetrics.java b/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/driver/GraphMetrics.java
index cc265bb..44b64ee 100644
--- a/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/driver/GraphMetrics.java
+++ b/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/driver/GraphMetrics.java
@@ -28,7 +28,7 @@ import org.apache.flink.api.java.utils.ParameterTool;
 import org.apache.flink.graph.Graph;
 import org.apache.flink.graph.GraphAnalytic;
 import org.apache.flink.graph.GraphCsvReader;
-import org.apache.flink.graph.asm.translate.LongValueToIntValue;
+import org.apache.flink.graph.asm.translate.LongValueToUnsignedIntValue;
 import org.apache.flink.graph.asm.translate.TranslateGraphIds;
 import org.apache.flink.graph.generator.RMatGraph;
 import org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory;
@@ -180,7 +180,7 @@ public class GraphMetrics {
 							.run(new org.apache.flink.graph.library.metric.directed.EdgeMetrics<LongValue, NullValue, NullValue>());
 					} else {
 						Graph<IntValue, NullValue, NullValue> newGraph = graph
-							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToIntValue()))
+							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()))
 							.run(new org.apache.flink.graph.asm.simple.directed.Simplify<IntValue, NullValue, NullValue>());
 
 						vm = newGraph
@@ -201,7 +201,7 @@ public class GraphMetrics {
 							.run(new org.apache.flink.graph.library.metric.undirected.EdgeMetrics<LongValue, NullValue, NullValue>());
 					} else {
 						Graph<IntValue, NullValue, NullValue> newGraph = graph
-							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToIntValue()))
+							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()))
 							.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>(clipAndFlip));
 
 						vm = newGraph

http://git-wip-us.apache.org/repos/asf/flink/blob/e58fd6e0/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/ClusteringCoefficient.java
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/ClusteringCoefficient.java b/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/ClusteringCoefficient.java
index f4b1ecf..9961fff 100644
--- a/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/ClusteringCoefficient.java
+++ b/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/ClusteringCoefficient.java
@@ -30,7 +30,7 @@ import org.apache.flink.api.java.utils.ParameterTool;
 import org.apache.flink.graph.Graph;
 import org.apache.flink.graph.GraphAnalytic;
 import org.apache.flink.graph.GraphCsvReader;
-import org.apache.flink.graph.asm.translate.LongValueToIntValue;
+import org.apache.flink.graph.asm.translate.LongValueToUnsignedIntValue;
 import org.apache.flink.graph.asm.translate.TranslateGraphIds;
 import org.apache.flink.graph.generator.RMatGraph;
 import org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory;
@@ -195,7 +195,7 @@ public class ClusteringCoefficient {
 								.setLittleParallelism(little_parallelism));
 					} else {
 						Graph<IntValue, NullValue, NullValue> newGraph = graph
-							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToIntValue())
+							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue())
 								.setParallelism(little_parallelism))
 							.run(new org.apache.flink.graph.asm.simple.directed.Simplify<IntValue, NullValue, NullValue>()
 								.setParallelism(little_parallelism));
@@ -225,7 +225,7 @@ public class ClusteringCoefficient {
 								.setLittleParallelism(little_parallelism));
 					} else {
 						Graph<IntValue, NullValue, NullValue> newGraph = graph
-							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToIntValue())
+							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue())
 								.setParallelism(little_parallelism))
 							.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>(clipAndFlip)
 								.setParallelism(little_parallelism));

http://git-wip-us.apache.org/repos/asf/flink/blob/e58fd6e0/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/HITS.java
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/HITS.java b/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/HITS.java
index 59612d9..b2ce726 100644
--- a/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/HITS.java
+++ b/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/HITS.java
@@ -30,7 +30,7 @@ import org.apache.flink.api.java.utils.ParameterTool;
 import org.apache.flink.graph.Graph;
 import org.apache.flink.graph.GraphCsvReader;
 import org.apache.flink.graph.asm.simple.directed.Simplify;
-import org.apache.flink.graph.asm.translate.LongValueToIntValue;
+import org.apache.flink.graph.asm.translate.LongValueToUnsignedIntValue;
 import org.apache.flink.graph.asm.translate.TranslateGraphIds;
 import org.apache.flink.graph.generator.RMatGraph;
 import org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory;
@@ -137,7 +137,7 @@ public class HITS {
 						.run(new org.apache.flink.graph.library.link_analysis.HITS<LongValue, NullValue, NullValue>(iterations));
 				} else {
 					hits = graph
-						.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToIntValue()))
+						.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()))
 						.run(new Simplify<IntValue, NullValue, NullValue>())
 						.run(new org.apache.flink.graph.library.link_analysis.HITS<IntValue, NullValue, NullValue>(iterations));
 				}

http://git-wip-us.apache.org/repos/asf/flink/blob/e58fd6e0/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/JaccardIndex.java
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/JaccardIndex.java b/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/JaccardIndex.java
index 96f66ab..fa69ef0 100644
--- a/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/JaccardIndex.java
+++ b/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/JaccardIndex.java
@@ -30,7 +30,7 @@ import org.apache.flink.api.java.utils.ParameterTool;
 import org.apache.flink.graph.Graph;
 import org.apache.flink.graph.GraphCsvReader;
 import org.apache.flink.graph.asm.simple.undirected.Simplify;
-import org.apache.flink.graph.asm.translate.LongValueToIntValue;
+import org.apache.flink.graph.asm.translate.LongValueToUnsignedIntValue;
 import org.apache.flink.graph.asm.translate.TranslateGraphIds;
 import org.apache.flink.graph.generator.RMatGraph;
 import org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory;
@@ -151,7 +151,7 @@ public class JaccardIndex {
 							.setLittleParallelism(little_parallelism));
 				} else {
 					ji = graph
-						.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToIntValue())
+						.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue())
 							.setParallelism(little_parallelism))
 						.run(new Simplify<IntValue, NullValue, NullValue>(clipAndFlip)
 							.setParallelism(little_parallelism))

http://git-wip-us.apache.org/repos/asf/flink/blob/e58fd6e0/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/TriangleListing.java
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/TriangleListing.java b/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/TriangleListing.java
index f3ce708..d819d19 100644
--- a/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/TriangleListing.java
+++ b/flink-libraries/flink-gelly-examples/src/main/java/org/apache/flink/graph/examples/TriangleListing.java
@@ -30,7 +30,7 @@ import org.apache.flink.api.java.utils.ParameterTool;
 import org.apache.flink.graph.Graph;
 import org.apache.flink.graph.GraphCsvReader;
 import org.apache.flink.graph.asm.simple.undirected.Simplify;
-import org.apache.flink.graph.asm.translate.LongValueToIntValue;
+import org.apache.flink.graph.asm.translate.LongValueToUnsignedIntValue;
 import org.apache.flink.graph.asm.translate.TranslateGraphIds;
 import org.apache.flink.graph.generator.RMatGraph;
 import org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory;
@@ -159,7 +159,7 @@ public class TriangleListing {
 							.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>());
 					} else {
 						tl = graph
-							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToIntValue()))
+							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()))
 							.run(new org.apache.flink.graph.asm.simple.directed.Simplify<IntValue, NullValue, NullValue>())
 							.run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<IntValue, NullValue, NullValue>());
 					}
@@ -175,7 +175,7 @@ public class TriangleListing {
 							.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>());
 					} else {
 						tl = graph
-							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToIntValue()))
+							.run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>(new LongValueToUnsignedIntValue()))
 							.run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>(clipAndFlip))
 							.run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<IntValue, NullValue, NullValue>());
 					}

http://git-wip-us.apache.org/repos/asf/flink/blob/e58fd6e0/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToIntValue.java
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToIntValue.java b/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToIntValue.java
deleted file mode 100644
index adaf592..0000000
--- a/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToIntValue.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.graph.asm.translate;
-
-import org.apache.flink.types.IntValue;
-import org.apache.flink.types.LongValue;
-import org.apache.flink.util.MathUtils;
-
-/**
- * Translate {@link LongValue} to {@link IntValue}.
- *
- * Throws {@link RuntimeException} for integer overflow.
- */
-public class LongValueToIntValue
-implements TranslateFunction<LongValue, IntValue> {
-
-	@Override
-	public IntValue translate(LongValue value, IntValue reuse)
-			throws Exception {
-		if (reuse == null) {
-			reuse = new IntValue();
-		}
-
-		reuse.setValue(MathUtils.checkedDownCast(value.getValue()));
-		return reuse;
-	}
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/e58fd6e0/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToSignedIntValue.java
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToSignedIntValue.java b/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToSignedIntValue.java
new file mode 100644
index 0000000..92a4abc
--- /dev/null
+++ b/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToSignedIntValue.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.graph.asm.translate;
+
+import org.apache.flink.types.IntValue;
+import org.apache.flink.types.LongValue;
+import org.apache.flink.util.MathUtils;
+
+/**
+ * Translate {@link LongValue} to {@link IntValue}.
+ *
+ * Throws {@link RuntimeException} for integer overflow.
+ */
+public class LongValueToSignedIntValue
+implements TranslateFunction<LongValue, IntValue> {
+
+	@Override
+	public IntValue translate(LongValue value, IntValue reuse)
+			throws Exception {
+		if (reuse == null) {
+			reuse = new IntValue();
+		}
+
+		reuse.setValue(MathUtils.checkedDownCast(value.getValue()));
+
+		return reuse;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/e58fd6e0/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToUnsignedIntValue.java
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToUnsignedIntValue.java b/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToUnsignedIntValue.java
new file mode 100644
index 0000000..943e727
--- /dev/null
+++ b/flink-libraries/flink-gelly/src/main/java/org/apache/flink/graph/asm/translate/LongValueToUnsignedIntValue.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.graph.asm.translate;
+
+import org.apache.flink.types.IntValue;
+import org.apache.flink.types.LongValue;
+
+/**
+ * Translate {@link LongValue} to {@link IntValue}.
+ *
+ * Throws {@link RuntimeException} for integer overflow.
+ */
+public class LongValueToUnsignedIntValue
+implements TranslateFunction<LongValue, IntValue> {
+
+	@Override
+	public IntValue translate(LongValue value, IntValue reuse)
+			throws Exception {
+		if (reuse == null) {
+			reuse = new IntValue();
+		}
+
+		long l = value.getValue();
+
+		if (l < 0 || l >= (1L << 32)) {
+			throw new IllegalArgumentException("Cannot cast long value " + value + " to integer.");
+		} else {
+			reuse.setValue((int)(l & 0xffffffffL));
+		}
+
+		return reuse;
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/e58fd6e0/flink-libraries/flink-gelly/src/test/java/org/apache/flink/graph/asm/translate/LongValueToSignedIntValueTest.java
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-gelly/src/test/java/org/apache/flink/graph/asm/translate/LongValueToSignedIntValueTest.java b/flink-libraries/flink-gelly/src/test/java/org/apache/flink/graph/asm/translate/LongValueToSignedIntValueTest.java
new file mode 100644
index 0000000..9e2ec0a
--- /dev/null
+++ b/flink-libraries/flink-gelly/src/test/java/org/apache/flink/graph/asm/translate/LongValueToSignedIntValueTest.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.graph.asm.translate;
+
+import org.apache.flink.types.IntValue;
+import org.apache.flink.types.LongValue;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class LongValueToSignedIntValueTest {
+
+	private TranslateFunction<LongValue, IntValue> translator = new LongValueToSignedIntValue();
+
+	private IntValue reuse = new IntValue();
+
+	@Test
+	public void testTranslation() throws Exception {
+		assertEquals(new IntValue(Integer.MIN_VALUE), translator.translate(new LongValue((long)Integer.MIN_VALUE), reuse));
+		assertEquals(new IntValue(0), translator.translate(new LongValue(0L), reuse));
+		assertEquals(new IntValue(Integer.MAX_VALUE), translator.translate(new LongValue((long)Integer.MAX_VALUE), reuse));
+	}
+
+	@Test(expected=IllegalArgumentException.class)
+	public void testUpperOutOfRange() throws Exception {
+		assertEquals(new IntValue(), translator.translate(new LongValue((long)Integer.MAX_VALUE + 1), reuse));
+	}
+
+	@Test(expected=IllegalArgumentException.class)
+	public void testLowerOutOfRange() throws Exception {
+		assertEquals(new IntValue(), translator.translate(new LongValue((long)Integer.MIN_VALUE - 1), reuse));
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/e58fd6e0/flink-libraries/flink-gelly/src/test/java/org/apache/flink/graph/asm/translate/LongValueToUnsignedIntValueTest.java
----------------------------------------------------------------------
diff --git a/flink-libraries/flink-gelly/src/test/java/org/apache/flink/graph/asm/translate/LongValueToUnsignedIntValueTest.java b/flink-libraries/flink-gelly/src/test/java/org/apache/flink/graph/asm/translate/LongValueToUnsignedIntValueTest.java
new file mode 100644
index 0000000..14e9685
--- /dev/null
+++ b/flink-libraries/flink-gelly/src/test/java/org/apache/flink/graph/asm/translate/LongValueToUnsignedIntValueTest.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.graph.asm.translate;
+
+import org.apache.flink.types.IntValue;
+import org.apache.flink.types.LongValue;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class LongValueToUnsignedIntValueTest {
+
+	private TranslateFunction<LongValue, IntValue> translator = new LongValueToUnsignedIntValue();
+
+	private IntValue reuse = new IntValue();
+
+	@Test
+	public void testTranslation() throws Exception {
+		assertEquals(new IntValue(0), translator.translate(new LongValue(0L), reuse));
+		assertEquals(new IntValue(Integer.MIN_VALUE), translator.translate(new LongValue((long)Integer.MAX_VALUE + 1), reuse));
+		assertEquals(new IntValue(-1), translator.translate(new LongValue((1L << 32) - 1), reuse));
+	}
+
+	@Test(expected=IllegalArgumentException.class)
+	public void testUpperOutOfRange() throws Exception {
+		assertEquals(new IntValue(), translator.translate(new LongValue(1L << 32), reuse));
+	}
+
+	@Test(expected=IllegalArgumentException.class)
+	public void testLowerOutOfRange() throws Exception {
+		assertEquals(new IntValue(), translator.translate(new LongValue(-1), reuse));
+	}
+}


[41/50] [abbrv] flink git commit: [FLINK-4347][cluster management] Implement SlotManager core

Posted by tr...@apache.org.
[FLINK-4347][cluster management] Implement SlotManager core

This closes #2388


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/ba78de80
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/ba78de80
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/ba78de80

Branch: refs/heads/flip-6
Commit: ba78de80837a64c08591f6c3ee21f19d0d92180a
Parents: 35e8010
Author: Kurt Young <yk...@gmail.com>
Authored: Thu Aug 18 15:48:30 2016 +0800
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:16 2016 +0200

----------------------------------------------------------------------
 .../runtime/clusterframework/SlotManager.java   | 525 ++++++++++++++++++
 .../clusterframework/types/ResourceID.java      |   4 +-
 .../clusterframework/types/ResourceProfile.java |   5 +
 .../clusterframework/types/ResourceSlot.java    |  66 +++
 .../runtime/clusterframework/types/SlotID.java  |  14 +-
 .../rpc/resourcemanager/SlotRequest.java        |  51 +-
 .../runtime/rpc/taskexecutor/SlotReport.java    |  56 ++
 .../runtime/rpc/taskexecutor/SlotStatus.java    | 129 +++++
 .../clusterframework/SlotManagerTest.java       | 540 +++++++++++++++++++
 9 files changed, 1382 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/ba78de80/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/SlotManager.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/SlotManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/SlotManager.java
new file mode 100644
index 0000000..cc140a1
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/SlotManager.java
@@ -0,0 +1,525 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.clusterframework;
+
+import org.apache.flink.annotation.VisibleForTesting;
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+import org.apache.flink.runtime.clusterframework.types.ResourceSlot;
+import org.apache.flink.runtime.clusterframework.types.SlotID;
+import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
+import org.apache.flink.runtime.rpc.resourcemanager.SlotRequest;
+import org.apache.flink.runtime.rpc.taskexecutor.SlotReport;
+import org.apache.flink.runtime.rpc.taskexecutor.SlotStatus;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * SlotManager is responsible for receiving slot requests and do slot allocations. It allows to request
+ * slots from registered TaskManagers and issues container allocation requests in case of there are not
+ * enough available slots. Besides, it should sync its slot allocation with TaskManager's heartbeat.
+ * <p>
+ * The main operation principle of SlotManager is:
+ * <ul>
+ * <li>1. All slot allocation status should be synced with TaskManager, which is the ground truth.</li>
+ * <li>2. All slots that have registered must be tracked, either by free pool or allocated pool.</li>
+ * <li>3. All slot requests will be handled by best efforts, there is no guarantee that one request will be
+ * fulfilled in time or correctly allocated. Conflicts or timeout or some special error will happen, it should
+ * be handled outside SlotManager. SlotManager will make each decision based on the information it currently
+ * holds.</li>
+ * </ul>
+ * <b>IMPORTANT:</b> This class is <b>Not Thread-safe</b>.
+ */
+public abstract class SlotManager {
+
+	private static final Logger LOG = LoggerFactory.getLogger(SlotManager.class);
+
+	/** Gateway to communicate with ResourceManager */
+	private final ResourceManagerGateway resourceManagerGateway;
+
+	/** All registered slots, including free and allocated slots */
+	private final Map<ResourceID, Map<SlotID, ResourceSlot>> registeredSlots;
+
+	/** All pending slot requests, waiting available slots to fulfil */
+	private final Map<AllocationID, SlotRequest> pendingSlotRequests;
+
+	/** All free slots that can be used to be allocated */
+	private final Map<SlotID, ResourceSlot> freeSlots;
+
+	/** All allocations, we can lookup allocations either by SlotID or AllocationID */
+	private final AllocationMap allocationMap;
+
+	public SlotManager(ResourceManagerGateway resourceManagerGateway) {
+		this.resourceManagerGateway = checkNotNull(resourceManagerGateway);
+		this.registeredSlots = new HashMap<>(16);
+		this.pendingSlotRequests = new LinkedHashMap<>(16);
+		this.freeSlots = new HashMap<>(16);
+		this.allocationMap = new AllocationMap();
+	}
+
+	// ------------------------------------------------------------------------
+	//  slot managements
+	// ------------------------------------------------------------------------
+
+	/**
+	 * Request a slot with requirements, we may either fulfill the request or pending it. Trigger container
+	 * allocation if we don't have enough resource. If we have free slot which can match the request, record
+	 * this allocation and forward the request to TaskManager through ResourceManager (we want this done by
+	 * RPC's main thread to avoid race condition).
+	 *
+	 * @param request The detailed request of the slot
+	 */
+	public void requestSlot(final SlotRequest request) {
+		if (isRequestDuplicated(request)) {
+			LOG.warn("Duplicated slot request, AllocationID:{}", request.getAllocationId());
+			return;
+		}
+
+		// try to fulfil the request with current free slots
+		ResourceSlot slot = chooseSlotToUse(request, freeSlots);
+		if (slot != null) {
+			LOG.info("Assigning SlotID({}) to AllocationID({}), JobID:{}", slot.getSlotId(),
+				request.getAllocationId(), request.getJobId());
+
+			// record this allocation in bookkeeping
+			allocationMap.addAllocation(slot.getSlotId(), request.getAllocationId());
+
+			// remove selected slot from free pool
+			freeSlots.remove(slot.getSlotId());
+
+			// TODO: send slot request to TaskManager
+		} else {
+			LOG.info("Cannot fulfil slot request, try to allocate a new container for it, " +
+				"AllocationID:{}, JobID:{}", request.getAllocationId(), request.getJobId());
+			allocateContainer(request.getResourceProfile());
+			pendingSlotRequests.put(request.getAllocationId(), request);
+		}
+	}
+
+	/**
+	 * Sync slot status with TaskManager's SlotReport.
+	 */
+	public void updateSlotStatus(final SlotReport slotReport) {
+		for (SlotStatus slotStatus : slotReport.getSlotsStatus()) {
+			updateSlotStatus(slotStatus);
+		}
+	}
+
+	/**
+	 * The slot request to TaskManager may be either failed by rpc communication (timeout, network error, etc.)
+	 * or really rejected by TaskManager. We shall retry this request by:
+	 * <ul>
+	 * <li>1. verify and clear all the previous allocate information for this request
+	 * <li>2. try to request slot again
+	 * </ul>
+	 * <p>
+	 * This may cause some duplicate allocation, e.g. the slot request to TaskManager is successful but the response
+	 * is lost somehow, so we may request a slot in another TaskManager, this causes two slots assigned to one request,
+	 * but it can be taken care of by rejecting registration at JobManager.
+	 *
+	 * @param originalRequest The original slot request
+	 * @param slotId          The target SlotID
+	 */
+	public void handleSlotRequestFailedAtTaskManager(final SlotRequest originalRequest, final SlotID slotId) {
+		final AllocationID originalAllocationId = originalRequest.getAllocationId();
+		LOG.info("Slot request failed at TaskManager, SlotID:{}, AllocationID:{}, JobID:{}",
+			slotId, originalAllocationId, originalRequest.getJobId());
+
+		// verify the allocation info before we do anything
+		if (freeSlots.containsKey(slotId)) {
+			// this slot is currently empty, no need to de-allocate it from our allocations
+			LOG.info("Original slot is somehow empty, retrying this request");
+
+			// before retry, we should double check whether this request was allocated by some other ways
+			if (!allocationMap.isAllocated(originalAllocationId)) {
+				requestSlot(originalRequest);
+			} else {
+				LOG.info("The failed request has somehow been allocated, SlotID:{}",
+					allocationMap.getSlotID(originalAllocationId));
+			}
+		} else if (allocationMap.isAllocated(slotId)) {
+			final AllocationID currentAllocationId = allocationMap.getAllocationID(slotId);
+
+			// check whether we have an agreement on whom this slot belongs to
+			if (originalAllocationId.equals(currentAllocationId)) {
+				LOG.info("De-allocate this request and retry");
+				allocationMap.removeAllocation(currentAllocationId);
+
+				// put this slot back to free pool
+				ResourceSlot slot = checkNotNull(getRegisteredSlot(slotId));
+				freeSlots.put(slotId, slot);
+
+				// retry the request
+				requestSlot(originalRequest);
+			} else {
+				// the slot is taken by someone else, no need to de-allocate it from our allocations
+				LOG.info("Original slot is taken by someone else, current AllocationID:{}", currentAllocationId);
+
+				// before retry, we should double check whether this request was allocated by some other ways
+				if (!allocationMap.isAllocated(originalAllocationId)) {
+					requestSlot(originalRequest);
+				} else {
+					LOG.info("The failed request is somehow been allocated, SlotID:{}",
+						allocationMap.getSlotID(originalAllocationId));
+				}
+			}
+		} else {
+			LOG.error("BUG! {} is neither in free pool nor in allocated pool", slotId);
+		}
+	}
+
+	/**
+	 * Callback for TaskManager failures. In case that a TaskManager fails, we have to clean up all its slots.
+	 *
+	 * @param resourceId The ResourceID of the TaskManager
+	 */
+	public void notifyTaskManagerFailure(final ResourceID resourceId) {
+		LOG.info("Resource:{} been notified failure", resourceId);
+		final Map<SlotID, ResourceSlot> slotIdsToRemove = registeredSlots.remove(resourceId);
+		if (slotIdsToRemove != null) {
+			for (SlotID slotId : slotIdsToRemove.keySet()) {
+				LOG.info("Removing Slot:{} upon resource failure", slotId);
+				if (freeSlots.containsKey(slotId)) {
+					freeSlots.remove(slotId);
+				} else if (allocationMap.isAllocated(slotId)) {
+					allocationMap.removeAllocation(slotId);
+				} else {
+					LOG.error("BUG! {} is neither in free pool nor in allocated pool", slotId);
+				}
+			}
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  internal behaviors
+	// ------------------------------------------------------------------------
+
+	/**
+	 * Update slot status based on TaskManager's report. There are mainly two situations when we receive the report:
+	 * <ul>
+	 * <li>1. The slot is newly registered.</li>
+	 * <li>2. The slot has registered, it contains its current status.</li>
+	 * </ul>
+	 * <p>
+	 * Regarding 1: It's fairly simple, we just record this slot's status, and trigger schedule if slot is empty.
+	 * <p>
+	 * Regarding 2: It will cause some weird situation since we may have some time-gap on how the slot's status really
+	 * is. We may have some updates on the slot's allocation, but it doesn't reflected by TaskManager's heartbeat yet,
+	 * and we may make some wrong decision if we cannot guarantee we have the exact status about all the slots. So
+	 * the principle here is: We always trust TaskManager's heartbeat, we will correct our information based on that
+	 * and take next action based on the diff between our information and heartbeat status.
+	 *
+	 * @param reportedStatus Reported slot status
+	 */
+	void updateSlotStatus(final SlotStatus reportedStatus) {
+		final SlotID slotId = reportedStatus.getSlotID();
+		final ResourceSlot slot = new ResourceSlot(slotId, reportedStatus.getProfiler());
+
+		if (registerNewSlot(slot)) {
+			// we have a newly registered slot
+			LOG.info("New slot appeared, SlotID:{}, AllocationID:{}", slotId, reportedStatus.getAllocationID());
+
+			if (reportedStatus.getAllocationID() != null) {
+				// slot in use, record this in bookkeeping
+				allocationMap.addAllocation(slotId, reportedStatus.getAllocationID());
+			} else {
+				handleFreeSlot(new ResourceSlot(slotId, reportedStatus.getProfiler()));
+			}
+		} else {
+			// slot exists, update current information
+			if (reportedStatus.getAllocationID() != null) {
+				// slot is reported in use
+				final AllocationID reportedAllocationId = reportedStatus.getAllocationID();
+
+				// check whether we also thought this slot is in use
+				if (allocationMap.isAllocated(slotId)) {
+					// we also think that slot is in use, check whether the AllocationID matches
+					final AllocationID currentAllocationId = allocationMap.getAllocationID(slotId);
+
+					if (!reportedAllocationId.equals(currentAllocationId)) {
+						LOG.info("Slot allocation info mismatch! SlotID:{}, current:{}, reported:{}",
+							slotId, currentAllocationId, reportedAllocationId);
+
+						// seems we have a disagreement about the slot assignments, need to correct it
+						allocationMap.removeAllocation(slotId);
+						allocationMap.addAllocation(slotId, reportedAllocationId);
+					}
+				} else {
+					LOG.info("Slot allocation info mismatch! SlotID:{}, current:null, reported:{}",
+						slotId, reportedAllocationId);
+
+					// we thought the slot is free, should correct this information
+					allocationMap.addAllocation(slotId, reportedStatus.getAllocationID());
+
+					// remove this slot from free slots pool
+					freeSlots.remove(slotId);
+				}
+			} else {
+				// slot is reported empty
+
+				// check whether we also thought this slot is empty
+				if (allocationMap.isAllocated(slotId)) {
+					LOG.info("Slot allocation info mismatch! SlotID:{}, current:{}, reported:null",
+						slotId, allocationMap.getAllocationID(slotId));
+
+					// we thought the slot is in use, correct it
+					allocationMap.removeAllocation(slotId);
+
+					// we have a free slot!
+					handleFreeSlot(new ResourceSlot(slotId, reportedStatus.getProfiler()));
+				}
+			}
+		}
+	}
+
+	/**
+	 * When we have a free slot, try to fulfill the pending request first. If any request can be fulfilled,
+	 * record this allocation in bookkeeping and send slot request to TaskManager, else we just add this slot
+	 * to the free pool.
+	 *
+	 * @param freeSlot The free slot
+	 */
+	private void handleFreeSlot(final ResourceSlot freeSlot) {
+		SlotRequest chosenRequest = chooseRequestToFulfill(freeSlot, pendingSlotRequests);
+
+		if (chosenRequest != null) {
+			pendingSlotRequests.remove(chosenRequest.getAllocationId());
+
+			LOG.info("Assigning SlotID({}) to AllocationID({}), JobID:{}", freeSlot.getSlotId(),
+				chosenRequest.getAllocationId(), chosenRequest.getJobId());
+			allocationMap.addAllocation(freeSlot.getSlotId(), chosenRequest.getAllocationId());
+
+			// TODO: send slot request to TaskManager
+		} else {
+			freeSlots.put(freeSlot.getSlotId(), freeSlot);
+		}
+	}
+
+	/**
+	 * Check whether the request is duplicated. We use AllocationID to identify slot request, for each
+	 * formerly received slot request, it is either in pending list or already been allocated.
+	 *
+	 * @param request The slot request
+	 * @return <tt>true</tt> if the request is duplicated
+	 */
+	private boolean isRequestDuplicated(final SlotRequest request) {
+		final AllocationID allocationId = request.getAllocationId();
+		return pendingSlotRequests.containsKey(allocationId)
+			|| allocationMap.isAllocated(allocationId);
+	}
+
+	/**
+	 * Try to register slot, and tell if this slot is newly registered.
+	 *
+	 * @param slot The ResourceSlot which will be checked and registered
+	 * @return <tt>true</tt> if we meet a new slot
+	 */
+	private boolean registerNewSlot(final ResourceSlot slot) {
+		final SlotID slotId = slot.getSlotId();
+		final ResourceID resourceId = slotId.getResourceID();
+		if (!registeredSlots.containsKey(resourceId)) {
+			registeredSlots.put(resourceId, new HashMap<SlotID, ResourceSlot>());
+		}
+		return registeredSlots.get(resourceId).put(slotId, slot) == null;
+	}
+
+	private ResourceSlot getRegisteredSlot(final SlotID slotId) {
+		final ResourceID resourceId = slotId.getResourceID();
+		if (!registeredSlots.containsKey(resourceId)) {
+			return null;
+		}
+		return registeredSlots.get(resourceId).get(slotId);
+	}
+
+	// ------------------------------------------------------------------------
+	//  Framework specific behavior
+	// ------------------------------------------------------------------------
+
+	/**
+	 * Choose a slot to use among all free slots, the behavior is framework specified.
+	 *
+	 * @param request   The slot request
+	 * @param freeSlots All slots which can be used
+	 * @return The slot we choose to use, <tt>null</tt> if we did not find a match
+	 */
+	protected abstract ResourceSlot chooseSlotToUse(final SlotRequest request,
+		final Map<SlotID, ResourceSlot> freeSlots);
+
+	/**
+	 * Choose a pending request to fulfill when we have a free slot, the behavior is framework specified.
+	 *
+	 * @param offeredSlot     The free slot
+	 * @param pendingRequests All the pending slot requests
+	 * @return The chosen SlotRequest, <tt>null</tt> if we did not find a match
+	 */
+	protected abstract SlotRequest chooseRequestToFulfill(final ResourceSlot offeredSlot,
+		final Map<AllocationID, SlotRequest> pendingRequests);
+
+	/**
+	 * The framework specific code for allocating a container for specified resource profile.
+	 *
+	 * @param resourceProfile The resource profile
+	 */
+	protected abstract void allocateContainer(final ResourceProfile resourceProfile);
+
+
+	// ------------------------------------------------------------------------
+	//  Helper classes
+	// ------------------------------------------------------------------------
+
+	/**
+	 * We maintain all the allocations with SlotID and AllocationID. We are able to get or remove the allocation info
+	 * either by SlotID or AllocationID.
+	 */
+	private static class AllocationMap {
+
+		/** All allocated slots (by SlotID) */
+		private final Map<SlotID, AllocationID> allocatedSlots;
+
+		/** All allocated slots (by AllocationID), it'a a inverse view of allocatedSlots */
+		private final Map<AllocationID, SlotID> allocatedSlotsByAllocationId;
+
+		AllocationMap() {
+			this.allocatedSlots = new HashMap<>(16);
+			this.allocatedSlotsByAllocationId = new HashMap<>(16);
+		}
+
+		/**
+		 * Add a allocation
+		 *
+		 * @param slotId       The slot id
+		 * @param allocationId The allocation id
+		 */
+		void addAllocation(final SlotID slotId, final AllocationID allocationId) {
+			allocatedSlots.put(slotId, allocationId);
+			allocatedSlotsByAllocationId.put(allocationId, slotId);
+		}
+
+		/**
+		 * De-allocation with slot id
+		 *
+		 * @param slotId The slot id
+		 */
+		void removeAllocation(final SlotID slotId) {
+			if (allocatedSlots.containsKey(slotId)) {
+				final AllocationID allocationId = allocatedSlots.get(slotId);
+				allocatedSlots.remove(slotId);
+				allocatedSlotsByAllocationId.remove(allocationId);
+			}
+		}
+
+		/**
+		 * De-allocation with allocation id
+		 *
+		 * @param allocationId The allocation id
+		 */
+		void removeAllocation(final AllocationID allocationId) {
+			if (allocatedSlotsByAllocationId.containsKey(allocationId)) {
+				SlotID slotId = allocatedSlotsByAllocationId.get(allocationId);
+				allocatedSlotsByAllocationId.remove(allocationId);
+				allocatedSlots.remove(slotId);
+			}
+		}
+
+		/**
+		 * Check whether allocation exists by slot id
+		 *
+		 * @param slotId The slot id
+		 * @return true if the allocation exists
+		 */
+		boolean isAllocated(final SlotID slotId) {
+			return allocatedSlots.containsKey(slotId);
+		}
+
+		/**
+		 * Check whether allocation exists by allocation id
+		 *
+		 * @param allocationId The allocation id
+		 * @return true if the allocation exists
+		 */
+		boolean isAllocated(final AllocationID allocationId) {
+			return allocatedSlotsByAllocationId.containsKey(allocationId);
+		}
+
+		AllocationID getAllocationID(final SlotID slotId) {
+			return allocatedSlots.get(slotId);
+		}
+
+		SlotID getSlotID(final AllocationID allocationId) {
+			return allocatedSlotsByAllocationId.get(allocationId);
+		}
+
+		public int size() {
+			return allocatedSlots.size();
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  Testing utilities
+	// ------------------------------------------------------------------------
+
+	@VisibleForTesting
+	boolean isAllocated(final SlotID slotId) {
+		return allocationMap.isAllocated(slotId);
+	}
+
+	@VisibleForTesting
+	boolean isAllocated(final AllocationID allocationId) {
+		return allocationMap.isAllocated(allocationId);
+	}
+
+	/**
+	 * Add free slots directly to the free pool, this will not trigger pending requests allocation
+	 *
+	 * @param slot The resource slot
+	 */
+	@VisibleForTesting
+	void addFreeSlot(final ResourceSlot slot) {
+		final ResourceID resourceId = slot.getResourceID();
+		final SlotID slotId = slot.getSlotId();
+
+		if (!registeredSlots.containsKey(resourceId)) {
+			registeredSlots.put(resourceId, new HashMap<SlotID, ResourceSlot>());
+		}
+		registeredSlots.get(resourceId).put(slot.getSlotId(), slot);
+		freeSlots.put(slotId, slot);
+	}
+
+	@VisibleForTesting
+	int getAllocatedSlotCount() {
+		return allocationMap.size();
+	}
+
+	@VisibleForTesting
+	int getFreeSlotCount() {
+		return freeSlots.size();
+	}
+
+	@VisibleForTesting
+	int getPendingRequestCount() {
+		return pendingSlotRequests.size();
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/ba78de80/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceID.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceID.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceID.java
index 8cf9ccb..6b8a037 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceID.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceID.java
@@ -63,9 +63,7 @@ public final class ResourceID implements ResourceIDRetrievable, Serializable {
 
 	@Override
 	public String toString() {
-		return "ResourceID{" +
-			"resourceId='" + resourceId + '\'' +
-			'}';
+		return resourceId;
 	}
 
 	/**

http://git-wip-us.apache.org/repos/asf/flink/blob/ba78de80/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java
index cbe709f..ff1c4bf 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceProfile.java
@@ -40,6 +40,11 @@ public class ResourceProfile implements Serializable {
 		this.memoryInMB = memoryInMB;
 	}
 
+	public ResourceProfile(ResourceProfile other) {
+		this.cpuCores = other.cpuCores;
+		this.memoryInMB = other.memoryInMB;
+	}
+
 	/**
 	 * Get the cpu cores needed
 	 * @return The cpu cores, 1.0 means a full cpu thread

http://git-wip-us.apache.org/repos/asf/flink/blob/ba78de80/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java
new file mode 100644
index 0000000..8a6db5f
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/ResourceSlot.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.clusterframework.types;
+
+import java.io.Serializable;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * A ResourceSlot represents a slot located in TaskManager from ResourceManager's view. It has a unique
+ * identification and resource profile which we can compare to the resource request.
+ */
+public class ResourceSlot implements ResourceIDRetrievable, Serializable {
+
+	private static final long serialVersionUID = -5853720153136840674L;
+
+	/** The unique identification of this slot */
+	private final SlotID slotId;
+
+	/** The resource profile of this slot */
+	private final ResourceProfile resourceProfile;
+
+	public ResourceSlot(SlotID slotId, ResourceProfile resourceProfile) {
+		this.slotId = checkNotNull(slotId);
+		this.resourceProfile = checkNotNull(resourceProfile);
+	}
+
+	@Override
+	public ResourceID getResourceID() {
+		return slotId.getResourceID();
+	}
+
+	public SlotID getSlotId() {
+		return slotId;
+	}
+
+	public ResourceProfile getResourceProfile() {
+		return resourceProfile;
+	}
+
+	/**
+	 * Check whether required resource profile can be matched by this slot.
+	 *
+	 * @param required The required resource profile
+	 * @return true if requirement can be matched
+	 */
+	public boolean isMatchingRequirement(ResourceProfile required) {
+		return resourceProfile.isMatching(required);
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/ba78de80/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/SlotID.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/SlotID.java b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/SlotID.java
index d1b072d..e831a5d 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/SlotID.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/clusterframework/types/SlotID.java
@@ -75,9 +75,15 @@ public class SlotID implements ResourceIDRetrievable, Serializable {
 
 	@Override
 	public String toString() {
-		return "SlotID{" +
-			"resourceId=" + resourceId +
-			", slotId=" + slotId +
-			'}';
+		return resourceId + "_" + slotId;
+	}
+
+	/**
+	 * Generate a random slot id.
+	 *
+	 * @return A random slot id.
+	 */
+	public static SlotID generate() {
+		return new SlotID(ResourceID.generate(), 0);
 	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/ba78de80/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java
index d8fe268..74c7c39 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/SlotRequest.java
@@ -18,8 +18,57 @@
 
 package org.apache.flink.runtime.rpc.resourcemanager;
 
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+
 import java.io.Serializable;
 
-public class SlotRequest implements Serializable{
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * This describes the requirement of the slot, mainly used by JobManager requesting slot from ResourceManager.
+ */
+public class SlotRequest implements Serializable {
+
 	private static final long serialVersionUID = -6586877187990445986L;
+
+	/** The JobID of the slot requested for */
+	private final JobID jobId;
+
+	/** The unique identification of this request */
+	private final AllocationID allocationId;
+
+	/** The resource profile of the required slot */
+	private final ResourceProfile resourceProfile;
+
+	public SlotRequest(JobID jobId, AllocationID allocationId, ResourceProfile resourceProfile) {
+		this.jobId = checkNotNull(jobId);
+		this.allocationId = checkNotNull(allocationId);
+		this.resourceProfile = checkNotNull(resourceProfile);
+	}
+
+	/**
+	 * Get the JobID of the slot requested for.
+	 * @return The job id
+	 */
+	public JobID getJobId() {
+		return jobId;
+	}
+
+	/**
+	 * Get the unique identification of this request
+	 * @return the allocation id
+	 */
+	public AllocationID getAllocationId() {
+		return allocationId;
+	}
+
+	/**
+	 * Get the resource profile of the desired slot
+	 * @return The resource profile
+	 */
+	public ResourceProfile getResourceProfile() {
+		return resourceProfile;
+	}
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/ba78de80/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
new file mode 100644
index 0000000..c372ecb
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotReport.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.taskexecutor;
+
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+
+import java.io.Serializable;
+import java.util.List;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * A report about the current status of all slots of the TaskExecutor, describing
+ * which slots are available and allocated, and what jobs (JobManagers) the allocated slots
+ * have been allocated to.
+ */
+public class SlotReport implements Serializable {
+
+	private static final long serialVersionUID = -3150175198722481689L;
+
+	/** The slots status of the TaskManager */
+	private final List<SlotStatus> slotsStatus;
+
+	/** The resource id which identifies the TaskManager */
+	private final ResourceID resourceID;
+
+	public SlotReport(final List<SlotStatus> slotsStatus, final ResourceID resourceID) {
+		this.slotsStatus = checkNotNull(slotsStatus);
+		this.resourceID = checkNotNull(resourceID);
+	}
+
+	public List<SlotStatus> getSlotsStatus() {
+		return slotsStatus;
+	}
+
+	public ResourceID getResourceID() {
+		return resourceID;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/ba78de80/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotStatus.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotStatus.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotStatus.java
new file mode 100644
index 0000000..e8e2084
--- /dev/null
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/taskexecutor/SlotStatus.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc.taskexecutor;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+import org.apache.flink.runtime.clusterframework.types.SlotID;
+
+import java.io.Serializable;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * This describes the slot current status which located in TaskManager.
+ */
+public class SlotStatus implements Serializable {
+
+	private static final long serialVersionUID = 5099191707339664493L;
+
+	/** slotID to identify a slot */
+	private final SlotID slotID;
+
+	/** the resource profile of the slot */
+	private final ResourceProfile profiler;
+
+	/** if the slot is allocated, allocationId identify its allocation; else, allocationId is null */
+	private final AllocationID allocationID;
+
+	/** if the slot is allocated, jobId identify which job this slot is allocated to; else, jobId is null */
+	private final JobID jobID;
+
+	public SlotStatus(SlotID slotID, ResourceProfile profiler) {
+		this(slotID, profiler, null, null);
+	}
+
+	public SlotStatus(SlotID slotID, ResourceProfile profiler, AllocationID allocationID, JobID jobID) {
+		this.slotID = checkNotNull(slotID, "slotID cannot be null");
+		this.profiler = checkNotNull(profiler, "profile cannot be null");
+		this.allocationID = allocationID;
+		this.jobID = jobID;
+	}
+
+	/**
+	 * Get the unique identification of this slot
+	 *
+	 * @return The slot id
+	 */
+	public SlotID getSlotID() {
+		return slotID;
+	}
+
+	/**
+	 * Get the resource profile of this slot
+	 *
+	 * @return The resource profile
+	 */
+	public ResourceProfile getProfiler() {
+		return profiler;
+	}
+
+	/**
+	 * Get the allocation id of this slot
+	 *
+	 * @return The allocation id if this slot is allocated, otherwise null
+	 */
+	public AllocationID getAllocationID() {
+		return allocationID;
+	}
+
+	/**
+	 * Get the job id of the slot allocated for
+	 *
+	 * @return The job id if this slot is allocated, otherwise null
+	 */
+	public JobID getJobID() {
+		return jobID;
+	}
+
+	@Override
+	public boolean equals(Object o) {
+		if (this == o) {
+			return true;
+		}
+		if (o == null || getClass() != o.getClass()) {
+			return false;
+		}
+
+		SlotStatus that = (SlotStatus) o;
+
+		if (!slotID.equals(that.slotID)) {
+			return false;
+		}
+		if (!profiler.equals(that.profiler)) {
+			return false;
+		}
+		if (allocationID != null ? !allocationID.equals(that.allocationID) : that.allocationID != null) {
+			return false;
+		}
+		return jobID != null ? jobID.equals(that.jobID) : that.jobID == null;
+
+	}
+
+	@Override
+	public int hashCode() {
+		int result = slotID.hashCode();
+		result = 31 * result + profiler.hashCode();
+		result = 31 * result + (allocationID != null ? allocationID.hashCode() : 0);
+		result = 31 * result + (jobID != null ? jobID.hashCode() : 0);
+		return result;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/ba78de80/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/SlotManagerTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/SlotManagerTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/SlotManagerTest.java
new file mode 100644
index 0000000..2ee280f
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/clusterframework/SlotManagerTest.java
@@ -0,0 +1,540 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.clusterframework;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.clusterframework.types.AllocationID;
+import org.apache.flink.runtime.clusterframework.types.ResourceID;
+import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
+import org.apache.flink.runtime.clusterframework.types.ResourceSlot;
+import org.apache.flink.runtime.clusterframework.types.SlotID;
+import org.apache.flink.runtime.rpc.resourcemanager.ResourceManagerGateway;
+import org.apache.flink.runtime.rpc.resourcemanager.SlotRequest;
+import org.apache.flink.runtime.rpc.taskexecutor.SlotStatus;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
+public class SlotManagerTest {
+
+	private static final double DEFAULT_TESTING_CPU_CORES = 1.0;
+
+	private static final long DEFAULT_TESTING_MEMORY = 512;
+
+	private static final ResourceProfile DEFAULT_TESTING_PROFILE =
+		new ResourceProfile(DEFAULT_TESTING_CPU_CORES, DEFAULT_TESTING_MEMORY);
+
+	private static final ResourceProfile DEFAULT_TESTING_BIG_PROFILE =
+		new ResourceProfile(DEFAULT_TESTING_CPU_CORES * 2, DEFAULT_TESTING_MEMORY * 2);
+
+	private ResourceManagerGateway resourceManagerGateway;
+
+	@Before
+	public void setUp() {
+		resourceManagerGateway = mock(ResourceManagerGateway.class);
+	}
+
+	/**
+	 * Tests that there are no free slots when we request, need to allocate from cluster manager master
+	 */
+	@Test
+	public void testRequestSlotWithoutFreeSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertEquals(1, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_PROFILE, slotManager.getAllocatedContainers().get(0));
+	}
+
+	/**
+	 * Tests that there are some free slots when we request, and the request is fulfilled immediately
+	 */
+	@Test
+	public void testRequestSlotWithFreeSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 1);
+		assertEquals(1, slotManager.getFreeSlotCount());
+
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertEquals(0, slotManager.getAllocatedContainers().size());
+	}
+
+	/**
+	 * Tests that there are some free slots when we request, but none of them are suitable
+	 */
+	@Test
+	public void testRequestSlotWithoutSuitableSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 2);
+		assertEquals(2, slotManager.getFreeSlotCount());
+
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(2, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertEquals(1, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
+	}
+
+	/**
+	 * Tests that we send duplicated slot request
+	 */
+	@Test
+	public void testDuplicatedSlotRequest() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 1);
+
+		SlotRequest request1 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE);
+
+		slotManager.requestSlot(request1);
+		slotManager.requestSlot(request2);
+		slotManager.requestSlot(request2);
+		slotManager.requestSlot(request1);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertEquals(1, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
+	}
+
+	/**
+	 * Tests that we send multiple slot requests
+	 */
+	@Test
+	public void testRequestMultipleSlots() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		directlyProvideFreeSlots(slotManager, DEFAULT_TESTING_PROFILE, 5);
+
+		// request 3 normal slots
+		for (int i = 0; i < 3; ++i) {
+			slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		}
+
+		// request 2 big slots
+		for (int i = 0; i < 2; ++i) {
+			slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
+		}
+
+		// request 1 normal slot again
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+
+		assertEquals(4, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(2, slotManager.getPendingRequestCount());
+		assertEquals(2, slotManager.getAllocatedContainers().size());
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(0));
+		assertEquals(DEFAULT_TESTING_BIG_PROFILE, slotManager.getAllocatedContainers().get(1));
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, and we used it to fulfill a pending request
+	 */
+	@Test
+	public void testNewlyAppearedFreeSlotFulfillPendingRequest() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		assertEquals(1, slotManager.getPendingRequestCount());
+
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, but we have no pending request
+	 */
+	@Test
+	public void testNewlyAppearedFreeSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, but it't not suitable for all the pending requests
+	 */
+	@Test
+	public void testNewlyAppearedFreeSlotNotMatchPendingRequests() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_BIG_PROFILE));
+		assertEquals(1, slotManager.getPendingRequestCount());
+
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertFalse(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that a new slot appeared in SlotReport, and it's been reported using by some job
+	 */
+	@Test
+	public void testNewlyAppearedInUseSlot() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new AllocationID(), new JobID());
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertTrue(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that we had a slot in-use, and it's confirmed by SlotReport
+	 */
+	@Test
+	public void testExistingInUseSlotUpdateStatus() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		// make this slot in use
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertTrue(slotManager.isAllocated(slotId));
+
+		// slot status is confirmed
+		SlotStatus slotStatus2 = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE,
+			request.getAllocationId(), request.getJobId());
+		slotManager.updateSlotStatus(slotStatus2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertTrue(slotManager.isAllocated(slotId));
+	}
+
+	/**
+	 * Tests that we had a slot in-use, but it's empty according to the SlotReport
+	 */
+	@Test
+	public void testExistingInUseSlotAdjustedToEmpty() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		SlotRequest request1 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request1);
+
+		// make this slot in use
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		// another request pending
+		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertTrue(slotManager.isAllocated(request1.getAllocationId()));
+
+
+		// but slot is reported empty again, request2 will be fulfilled, request1 will be missing
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
+	}
+
+	/**
+	 * Tests that we had a slot in use, and it's also reported in use by TaskManager, but the allocation
+	 * information didn't match.
+	 */
+	@Test
+	public void testExistingInUseSlotWithDifferentAllocationInfo() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		// make this slot in use
+		SlotID slotId = SlotID.generate();
+		SlotStatus slotStatus = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertTrue(slotManager.isAllocated(request.getAllocationId()));
+
+		SlotStatus slotStatus2 = new SlotStatus(slotId, DEFAULT_TESTING_PROFILE, new AllocationID(), new JobID());
+		// update slot status with different allocation info
+		slotManager.updateSlotStatus(slotStatus2);
+
+		// original request is missing and won't be allocated
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slotId));
+		assertFalse(slotManager.isAllocated(request.getAllocationId()));
+		assertTrue(slotManager.isAllocated(slotStatus2.getAllocationID()));
+	}
+
+	/**
+	 * Tests that we had a free slot, and it's confirmed by SlotReport
+	 */
+	@Test
+	public void testExistingEmptySlotUpdateStatus() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
+		slotManager.addFreeSlot(slot);
+
+		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), DEFAULT_TESTING_PROFILE);
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(0, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+	}
+
+	/**
+	 * Tests that we had a free slot, and it's reported in-use by TaskManager
+	 */
+	@Test
+	public void testExistingEmptySlotAdjustedToInUse() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
+		slotManager.addFreeSlot(slot);
+
+		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), DEFAULT_TESTING_PROFILE,
+			new AllocationID(), new JobID());
+		slotManager.updateSlotStatus(slotStatus);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slot.getSlotId()));
+	}
+
+	/**
+	 * Tests that we did some allocation but failed / rejected by TaskManager, request will retry
+	 */
+	@Test
+	public void testSlotAllocationFailedAtTaskManager() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
+		slotManager.addFreeSlot(slot);
+
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertTrue(slotManager.isAllocated(slot.getSlotId()));
+
+		slotManager.handleSlotRequestFailedAtTaskManager(request, slot.getSlotId());
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+	}
+
+
+	/**
+	 * Tests that we did some allocation but failed / rejected by TaskManager, and slot is occupied by another request
+	 */
+	@Test
+	public void testSlotAllocationFailedAtTaskManagerOccupiedByOther() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+		ResourceSlot slot = new ResourceSlot(SlotID.generate(), DEFAULT_TESTING_PROFILE);
+		slotManager.addFreeSlot(slot);
+
+		SlotRequest request = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request);
+
+		// slot is set empty by heartbeat
+		SlotStatus slotStatus = new SlotStatus(slot.getSlotId(), slot.getResourceProfile());
+		slotManager.updateSlotStatus(slotStatus);
+
+		// another request took this slot
+		SlotRequest request2 = new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE);
+		slotManager.requestSlot(request2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+		assertFalse(slotManager.isAllocated(request.getAllocationId()));
+		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
+
+		// original request should be pended
+		slotManager.handleSlotRequestFailedAtTaskManager(request, slot.getSlotId());
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(1, slotManager.getPendingRequestCount());
+		assertFalse(slotManager.isAllocated(request.getAllocationId()));
+		assertTrue(slotManager.isAllocated(request2.getAllocationId()));
+	}
+
+	@Test
+	public void testNotifyTaskManagerFailure() {
+		TestingSlotManager slotManager = new TestingSlotManager(resourceManagerGateway);
+
+		ResourceID resource1 = ResourceID.generate();
+		ResourceID resource2 = ResourceID.generate();
+
+		ResourceSlot slot11 = new ResourceSlot(new SlotID(resource1, 1), DEFAULT_TESTING_PROFILE);
+		ResourceSlot slot12 = new ResourceSlot(new SlotID(resource1, 2), DEFAULT_TESTING_PROFILE);
+		ResourceSlot slot21 = new ResourceSlot(new SlotID(resource2, 1), DEFAULT_TESTING_PROFILE);
+		ResourceSlot slot22 = new ResourceSlot(new SlotID(resource2, 2), DEFAULT_TESTING_PROFILE);
+
+		slotManager.addFreeSlot(slot11);
+		slotManager.addFreeSlot(slot21);
+
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+		slotManager.requestSlot(new SlotRequest(new JobID(), new AllocationID(), DEFAULT_TESTING_PROFILE));
+
+		assertEquals(2, slotManager.getAllocatedSlotCount());
+		assertEquals(0, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+
+		slotManager.addFreeSlot(slot12);
+		slotManager.addFreeSlot(slot22);
+
+		assertEquals(2, slotManager.getAllocatedSlotCount());
+		assertEquals(2, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+
+		slotManager.notifyTaskManagerFailure(resource2);
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+
+		// notify an not exist resource failure
+		slotManager.notifyTaskManagerFailure(ResourceID.generate());
+
+		assertEquals(1, slotManager.getAllocatedSlotCount());
+		assertEquals(1, slotManager.getFreeSlotCount());
+		assertEquals(0, slotManager.getPendingRequestCount());
+	}
+
+	// ------------------------------------------------------------------------
+	//  testing utilities
+	// ------------------------------------------------------------------------
+
+	private void directlyProvideFreeSlots(
+		final SlotManager slotManager,
+		final ResourceProfile resourceProfile,
+		final int freeSlotNum)
+	{
+		for (int i = 0; i < freeSlotNum; ++i) {
+			slotManager.addFreeSlot(new ResourceSlot(SlotID.generate(), new ResourceProfile(resourceProfile)));
+		}
+	}
+
+	// ------------------------------------------------------------------------
+	//  testing classes
+	// ------------------------------------------------------------------------
+
+	private static class TestingSlotManager extends SlotManager {
+
+		private final List<ResourceProfile> allocatedContainers;
+
+		TestingSlotManager(ResourceManagerGateway resourceManagerGateway) {
+			super(resourceManagerGateway);
+			this.allocatedContainers = new LinkedList<>();
+		}
+
+		/**
+		 * Choose slot randomly if it matches requirement
+		 *
+		 * @param request   The slot request
+		 * @param freeSlots All slots which can be used
+		 * @return The chosen slot or null if cannot find a match
+		 */
+		@Override
+		protected ResourceSlot chooseSlotToUse(SlotRequest request, Map<SlotID, ResourceSlot> freeSlots) {
+			for (ResourceSlot slot : freeSlots.values()) {
+				if (slot.isMatchingRequirement(request.getResourceProfile())) {
+					return slot;
+				}
+			}
+			return null;
+		}
+
+		/**
+		 * Choose request randomly if offered slot can match its requirement
+		 *
+		 * @param offeredSlot     The free slot
+		 * @param pendingRequests All the pending slot requests
+		 * @return The chosen request's AllocationID or null if cannot find a match
+		 */
+		@Override
+		protected SlotRequest chooseRequestToFulfill(ResourceSlot offeredSlot,
+			Map<AllocationID, SlotRequest> pendingRequests)
+		{
+			for (Map.Entry<AllocationID, SlotRequest> pendingRequest : pendingRequests.entrySet()) {
+				if (offeredSlot.isMatchingRequirement(pendingRequest.getValue().getResourceProfile())) {
+					return pendingRequest.getValue();
+				}
+			}
+			return null;
+		}
+
+		@Override
+		protected void allocateContainer(ResourceProfile resourceProfile) {
+			allocatedContainers.add(resourceProfile);
+		}
+
+		List<ResourceProfile> getAllocatedContainers() {
+			return allocatedContainers;
+		}
+	}
+}


[43/50] [abbrv] flink git commit: [FLINK-4528] [rpc] Marks main thread execution methods in RpcEndpoint as protected

Posted by tr...@apache.org.
[FLINK-4528] [rpc] Marks main thread execution methods in RpcEndpoint as protected

Give main thread execution context into the TaskExecutorToResourceManagerConnection


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/b779d19d
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/b779d19d
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/b779d19d

Branch: refs/heads/flip-6
Commit: b779d19de3416492afd3191ba5fc911d6b475919
Parents: 2f12ba3
Author: Till Rohrmann <tr...@apache.org>
Authored: Mon Aug 29 15:49:59 2016 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Wed Sep 21 11:39:17 2016 +0200

----------------------------------------------------------------------
 .../apache/flink/runtime/rpc/RpcEndpoint.java   |   8 +-
 .../runtime/taskexecutor/TaskExecutor.java      |   7 +-
 ...TaskExecutorToResourceManagerConnection.java |  26 ++-
 .../flink/runtime/rpc/AsyncCallsTest.java       | 216 ++++++++++++++++++
 .../flink/runtime/rpc/akka/AsyncCallsTest.java  | 219 -------------------
 5 files changed, 242 insertions(+), 234 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/b779d19d/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
index 7b3f8a1..e9e2b2c 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/rpc/RpcEndpoint.java
@@ -161,7 +161,7 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	 *
 	 * @return Main thread execution context
 	 */
-	public ExecutionContext getMainThreadExecutionContext() {
+	protected ExecutionContext getMainThreadExecutionContext() {
 		return mainThreadExecutionContext;
 	}
 
@@ -184,7 +184,7 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	 *
 	 * @param runnable Runnable to be executed in the main thread of the underlying RPC endpoint
 	 */
-	public void runAsync(Runnable runnable) {
+	protected void runAsync(Runnable runnable) {
 		((MainThreadExecutor) self).runAsync(runnable);
 	}
 
@@ -195,7 +195,7 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	 * @param runnable Runnable to be executed
 	 * @param delay    The delay after which the runnable will be executed
 	 */
-	public void scheduleRunAsync(Runnable runnable, long delay, TimeUnit unit) {
+	protected void scheduleRunAsync(Runnable runnable, long delay, TimeUnit unit) {
 		((MainThreadExecutor) self).scheduleRunAsync(runnable, unit.toMillis(delay));
 	}
 
@@ -209,7 +209,7 @@ public abstract class RpcEndpoint<C extends RpcGateway> {
 	 * @param <V> Return type of the callable
 	 * @return Future for the result of the callable.
 	 */
-	public <V> Future<V> callAsync(Callable<V> callable, Timeout timeout) {
+	protected <V> Future<V> callAsync(Callable<V> callable, Timeout timeout) {
 		return ((MainThreadExecutor) self).callAsync(callable, timeout);
 	}
 

http://git-wip-us.apache.org/repos/asf/flink/blob/b779d19d/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
index 4871b96..735730b 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
@@ -176,7 +176,12 @@ public class TaskExecutor extends RpcEndpoint<TaskExecutorGateway> {
 		if (newLeaderAddress != null) {
 			log.info("Attempting to register at ResourceManager {}", newLeaderAddress);
 			resourceManagerConnection =
-				new TaskExecutorToResourceManagerConnection(log, this, newLeaderAddress, newLeaderId);
+				new TaskExecutorToResourceManagerConnection(
+					log,
+					this,
+					newLeaderAddress,
+					newLeaderId,
+					getMainThreadExecutionContext());
 			resourceManagerConnection.start();
 		}
 	}

http://git-wip-us.apache.org/repos/asf/flink/blob/b779d19d/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorToResourceManagerConnection.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorToResourceManagerConnection.java b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorToResourceManagerConnection.java
index 25332a0..28062b6 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorToResourceManagerConnection.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutorToResourceManagerConnection.java
@@ -31,6 +31,7 @@ import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway;
 
 import org.slf4j.Logger;
 
+import scala.concurrent.ExecutionContext;
 import scala.concurrent.Future;
 import scala.concurrent.duration.FiniteDuration;
 
@@ -55,9 +56,12 @@ public class TaskExecutorToResourceManagerConnection {
 
 	private final String resourceManagerAddress;
 
+	/** Execution context to be used to execute the on complete action of the ResourceManagerRegistration */
+	private final ExecutionContext executionContext;
+
 	private TaskExecutorToResourceManagerConnection.ResourceManagerRegistration pendingRegistration;
 
-	private ResourceManagerGateway registeredResourceManager;
+	private volatile ResourceManagerGateway registeredResourceManager;
 
 	private InstanceID registrationId;
 
@@ -66,15 +70,17 @@ public class TaskExecutorToResourceManagerConnection {
 
 
 	public TaskExecutorToResourceManagerConnection(
-			Logger log,
-			TaskExecutor taskExecutor,
-			String resourceManagerAddress,
-			UUID resourceManagerLeaderId) {
+		Logger log,
+		TaskExecutor taskExecutor,
+		String resourceManagerAddress,
+		UUID resourceManagerLeaderId,
+		ExecutionContext executionContext) {
 
 		this.log = checkNotNull(log);
 		this.taskExecutor = checkNotNull(taskExecutor);
 		this.resourceManagerAddress = checkNotNull(resourceManagerAddress);
 		this.resourceManagerLeaderId = checkNotNull(resourceManagerLeaderId);
+		this.executionContext = checkNotNull(executionContext);
 	}
 
 	// ------------------------------------------------------------------------
@@ -93,22 +99,22 @@ public class TaskExecutorToResourceManagerConnection {
 		pendingRegistration.startRegistration();
 
 		Future<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>> future = pendingRegistration.getFuture();
-		
+
 		future.onSuccess(new OnSuccess<Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess>>() {
 			@Override
 			public void onSuccess(Tuple2<ResourceManagerGateway, TaskExecutorRegistrationSuccess> result) {
-				registeredResourceManager = result.f0;
 				registrationId = result.f1.getRegistrationId();
+				registeredResourceManager = result.f0;
 			}
-		}, taskExecutor.getMainThreadExecutionContext());
+		}, executionContext);
 		
 		// this future should only ever fail if there is a bug, not if the registration is declined
 		future.onFailure(new OnFailure() {
 			@Override
 			public void onFailure(Throwable failure) {
-				taskExecutor.onFatalError(failure);
+				taskExecutor.onFatalErrorAsync(failure);
 			}
-		}, taskExecutor.getMainThreadExecutionContext());
+		}, executionContext);
 	}
 
 	public void close() {

http://git-wip-us.apache.org/repos/asf/flink/blob/b779d19d/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/AsyncCallsTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/AsyncCallsTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/AsyncCallsTest.java
new file mode 100644
index 0000000..1791056
--- /dev/null
+++ b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/AsyncCallsTest.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.rpc;
+
+import akka.actor.ActorSystem;
+import akka.util.Timeout;
+
+import org.apache.flink.core.testutils.OneShotLatch;
+import org.apache.flink.runtime.akka.AkkaUtils;
+
+import org.apache.flink.runtime.rpc.akka.AkkaRpcService;
+import org.apache.flink.util.TestLogger;
+import org.junit.AfterClass;
+import org.junit.Test;
+
+import scala.concurrent.Await;
+import scala.concurrent.Future;
+import scala.concurrent.duration.FiniteDuration;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReentrantLock;
+
+import static org.junit.Assert.*;
+
+public class AsyncCallsTest extends TestLogger {
+
+	// ------------------------------------------------------------------------
+	//  shared test members
+	// ------------------------------------------------------------------------
+
+	private static ActorSystem actorSystem = AkkaUtils.createDefaultActorSystem();
+
+	private static AkkaRpcService akkaRpcService =
+			new AkkaRpcService(actorSystem, new Timeout(10000, TimeUnit.MILLISECONDS));
+
+	@AfterClass
+	public static void shutdown() {
+		akkaRpcService.stopService();
+		actorSystem.shutdown();
+	}
+
+
+	// ------------------------------------------------------------------------
+	//  tests
+	// ------------------------------------------------------------------------
+
+	@Test
+	public void testScheduleWithNoDelay() throws Exception {
+
+		// to collect all the thread references
+		final ReentrantLock lock = new ReentrantLock();
+		final AtomicBoolean concurrentAccess = new AtomicBoolean(false);
+
+		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService, lock);
+		testEndpoint.start();
+		TestGateway gateway = testEndpoint.getSelf();
+
+		// a bunch of gateway calls
+		gateway.someCall();
+		gateway.anotherCall();
+		gateway.someCall();
+
+		// run something asynchronously
+		for (int i = 0; i < 10000; i++) {
+			testEndpoint.runAsync(new Runnable() {
+				@Override
+				public void run() {
+					boolean holdsLock = lock.tryLock();
+					if (holdsLock) {
+						lock.unlock();
+					} else {
+						concurrentAccess.set(true);
+					}
+				}
+			});
+		}
+	
+		Future<String> result = testEndpoint.callAsync(new Callable<String>() {
+			@Override
+			public String call() throws Exception {
+				boolean holdsLock = lock.tryLock();
+				if (holdsLock) {
+					lock.unlock();
+				} else {
+					concurrentAccess.set(true);
+				}
+				return "test";
+			}
+		}, new Timeout(30, TimeUnit.SECONDS));
+		String str = Await.result(result, new FiniteDuration(30, TimeUnit.SECONDS));
+		assertEquals("test", str);
+
+		// validate that no concurrent access happened
+		assertFalse("Rpc Endpoint had concurrent access", testEndpoint.hasConcurrentAccess());
+		assertFalse("Rpc Endpoint had concurrent access", concurrentAccess.get());
+
+		akkaRpcService.stopServer(testEndpoint.getSelf());
+	}
+
+	@Test
+	public void testScheduleWithDelay() throws Exception {
+
+		// to collect all the thread references
+		final ReentrantLock lock = new ReentrantLock();
+		final AtomicBoolean concurrentAccess = new AtomicBoolean(false);
+		final OneShotLatch latch = new OneShotLatch();
+
+		final long delay = 200;
+
+		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService, lock);
+		testEndpoint.start();
+
+		// run something asynchronously
+		testEndpoint.runAsync(new Runnable() {
+			@Override
+			public void run() {
+				boolean holdsLock = lock.tryLock();
+				if (holdsLock) {
+					lock.unlock();
+				} else {
+					concurrentAccess.set(true);
+				}
+			}
+		});
+
+		final long start = System.nanoTime();
+
+		testEndpoint.scheduleRunAsync(new Runnable() {
+			@Override
+			public void run() {
+				boolean holdsLock = lock.tryLock();
+				if (holdsLock) {
+					lock.unlock();
+				} else {
+					concurrentAccess.set(true);
+				}
+				latch.trigger();
+			}
+		}, delay, TimeUnit.MILLISECONDS);
+
+		latch.await();
+		final long stop = System.nanoTime();
+
+		// validate that no concurrent access happened
+		assertFalse("Rpc Endpoint had concurrent access", testEndpoint.hasConcurrentAccess());
+		assertFalse("Rpc Endpoint had concurrent access", concurrentAccess.get());
+
+		assertTrue("call was not properly delayed", ((stop - start) / 1000000) >= delay);
+	}
+
+	// ------------------------------------------------------------------------
+	//  test RPC endpoint
+	// ------------------------------------------------------------------------
+	
+	public interface TestGateway extends RpcGateway {
+
+		void someCall();
+
+		void anotherCall();
+	}
+
+	@SuppressWarnings("unused")
+	public static class TestEndpoint extends RpcEndpoint<TestGateway> {
+
+		private final ReentrantLock lock;
+
+		private volatile boolean concurrentAccess;
+
+		public TestEndpoint(RpcService rpcService, ReentrantLock lock) {
+			super(rpcService);
+			this.lock = lock;
+		}
+
+		@RpcMethod
+		public void someCall() {
+			boolean holdsLock = lock.tryLock();
+			if (holdsLock) {
+				lock.unlock();
+			} else {
+				concurrentAccess = true;
+			}
+		}
+
+		@RpcMethod
+		public void anotherCall() {
+			boolean holdsLock = lock.tryLock();
+			if (holdsLock) {
+				lock.unlock();
+			} else {
+				concurrentAccess = true;
+			}
+		}
+
+		public boolean hasConcurrentAccess() {
+			return concurrentAccess;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/b779d19d/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java
deleted file mode 100644
index d33987c..0000000
--- a/flink-runtime/src/test/java/org/apache/flink/runtime/rpc/akka/AsyncCallsTest.java
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.flink.runtime.rpc.akka;
-
-import akka.actor.ActorSystem;
-import akka.util.Timeout;
-
-import org.apache.flink.core.testutils.OneShotLatch;
-import org.apache.flink.runtime.akka.AkkaUtils;
-import org.apache.flink.runtime.rpc.RpcEndpoint;
-import org.apache.flink.runtime.rpc.RpcGateway;
-import org.apache.flink.runtime.rpc.RpcMethod;
-import org.apache.flink.runtime.rpc.RpcService;
-
-import org.apache.flink.util.TestLogger;
-import org.junit.AfterClass;
-import org.junit.Test;
-
-import scala.concurrent.Await;
-import scala.concurrent.Future;
-import scala.concurrent.duration.FiniteDuration;
-
-import java.util.concurrent.Callable;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.locks.ReentrantLock;
-
-import static org.junit.Assert.*;
-
-public class AsyncCallsTest extends TestLogger {
-
-	// ------------------------------------------------------------------------
-	//  shared test members
-	// ------------------------------------------------------------------------
-
-	private static ActorSystem actorSystem = AkkaUtils.createDefaultActorSystem();
-
-	private static AkkaRpcService akkaRpcService = 
-			new AkkaRpcService(actorSystem, new Timeout(10000, TimeUnit.MILLISECONDS));
-
-	@AfterClass
-	public static void shutdown() {
-		akkaRpcService.stopService();
-		actorSystem.shutdown();
-	}
-
-
-	// ------------------------------------------------------------------------
-	//  tests
-	// ------------------------------------------------------------------------
-
-	@Test
-	public void testScheduleWithNoDelay() throws Exception {
-
-		// to collect all the thread references
-		final ReentrantLock lock = new ReentrantLock();
-		final AtomicBoolean concurrentAccess = new AtomicBoolean(false);
-
-		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService, lock);
-		testEndpoint.start();
-		TestGateway gateway = testEndpoint.getSelf();
-
-		// a bunch of gateway calls
-		gateway.someCall();
-		gateway.anotherCall();
-		gateway.someCall();
-
-		// run something asynchronously
-		for (int i = 0; i < 10000; i++) {
-			testEndpoint.runAsync(new Runnable() {
-				@Override
-				public void run() {
-					boolean holdsLock = lock.tryLock();
-					if (holdsLock) {
-						lock.unlock();
-					} else {
-						concurrentAccess.set(true);
-					}
-				}
-			});
-		}
-	
-		Future<String> result = testEndpoint.callAsync(new Callable<String>() {
-			@Override
-			public String call() throws Exception {
-				boolean holdsLock = lock.tryLock();
-				if (holdsLock) {
-					lock.unlock();
-				} else {
-					concurrentAccess.set(true);
-				}
-				return "test";
-			}
-		}, new Timeout(30, TimeUnit.SECONDS));
-		String str = Await.result(result, new FiniteDuration(30, TimeUnit.SECONDS));
-		assertEquals("test", str);
-
-		// validate that no concurrent access happened
-		assertFalse("Rpc Endpoint had concurrent access", testEndpoint.hasConcurrentAccess());
-		assertFalse("Rpc Endpoint had concurrent access", concurrentAccess.get());
-
-		akkaRpcService.stopServer(testEndpoint.getSelf());
-	}
-
-	@Test
-	public void testScheduleWithDelay() throws Exception {
-
-		// to collect all the thread references
-		final ReentrantLock lock = new ReentrantLock();
-		final AtomicBoolean concurrentAccess = new AtomicBoolean(false);
-		final OneShotLatch latch = new OneShotLatch();
-
-		final long delay = 200;
-
-		TestEndpoint testEndpoint = new TestEndpoint(akkaRpcService, lock);
-		testEndpoint.start();
-
-		// run something asynchronously
-		testEndpoint.runAsync(new Runnable() {
-			@Override
-			public void run() {
-				boolean holdsLock = lock.tryLock();
-				if (holdsLock) {
-					lock.unlock();
-				} else {
-					concurrentAccess.set(true);
-				}
-			}
-		});
-
-		final long start = System.nanoTime();
-
-		testEndpoint.scheduleRunAsync(new Runnable() {
-			@Override
-			public void run() {
-				boolean holdsLock = lock.tryLock();
-				if (holdsLock) {
-					lock.unlock();
-				} else {
-					concurrentAccess.set(true);
-				}
-				latch.trigger();
-			}
-		}, delay, TimeUnit.MILLISECONDS);
-
-		latch.await();
-		final long stop = System.nanoTime();
-
-		// validate that no concurrent access happened
-		assertFalse("Rpc Endpoint had concurrent access", testEndpoint.hasConcurrentAccess());
-		assertFalse("Rpc Endpoint had concurrent access", concurrentAccess.get());
-
-		assertTrue("call was not properly delayed", ((stop - start) / 1000000) >= delay);
-	}
-
-	// ------------------------------------------------------------------------
-	//  test RPC endpoint
-	// ------------------------------------------------------------------------
-	
-	interface TestGateway extends RpcGateway {
-
-		void someCall();
-
-		void anotherCall();
-	}
-
-	@SuppressWarnings("unused")
-	public static class TestEndpoint extends RpcEndpoint<TestGateway> {
-
-		private final ReentrantLock lock;
-
-		private volatile boolean concurrentAccess;
-
-		public TestEndpoint(RpcService rpcService, ReentrantLock lock) {
-			super(rpcService);
-			this.lock = lock;
-		}
-
-		@RpcMethod
-		public void someCall() {
-			boolean holdsLock = lock.tryLock();
-			if (holdsLock) {
-				lock.unlock();
-			} else {
-				concurrentAccess = true;
-			}
-		}
-
-		@RpcMethod
-		public void anotherCall() {
-			boolean holdsLock = lock.tryLock();
-			if (holdsLock) {
-				lock.unlock();
-			} else {
-				concurrentAccess = true;
-			}
-		}
-
-		public boolean hasConcurrentAccess() {
-			return concurrentAccess;
-		}
-	}
-}


[09/50] [abbrv] flink git commit: [FLINK-3929] conditionally skip RollingSinkSecuredITCase

Posted by tr...@apache.org.
[FLINK-3929] conditionally skip RollingSinkSecuredITCase

- for now, we skip this test class until Hadoop version 3.x.x.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/285b6f74
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/285b6f74
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/285b6f74

Branch: refs/heads/flip-6
Commit: 285b6f74e0416b200229bd67cb7521e4a6871bbc
Parents: 25a622f
Author: Maximilian Michels <mx...@apache.org>
Authored: Thu Sep 1 12:49:53 2016 +0200
Committer: Maximilian Michels <mx...@apache.org>
Committed: Tue Sep 20 22:03:29 2016 +0200

----------------------------------------------------------------------
 .../connectors/fs/RollingSinkSecuredITCase.java | 37 +++++++++++++++-----
 .../connectors/kafka/KafkaTestBase.java         |  8 +++--
 2 files changed, 35 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/285b6f74/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java b/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java
index 86cedaf..930ddd2 100644
--- a/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java
+++ b/flink-streaming-connectors/flink-connector-filesystem/src/test/java/org/apache/flink/streaming/connectors/fs/RollingSinkSecuredITCase.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -31,9 +31,10 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.VersionInfo;
 import org.junit.AfterClass;
+import org.junit.Assume;
 import org.junit.BeforeClass;
-import org.junit.Ignore;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -57,15 +58,31 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY;
 
 /**
  * Tests for running {@link RollingSinkSecuredITCase} which is an extension of {@link RollingSink} in secure environment
+ * Note: only executed for Hadoop version > 3.x.x
  */
-
-//The test is disabled since MiniDFS secure run requires lower order ports to be used.
-//We can enable the test when the fix is available (HDFS-9213)
-@Ignore
 public class RollingSinkSecuredITCase extends RollingSinkITCase {
 
 	protected static final Logger LOG = LoggerFactory.getLogger(RollingSinkSecuredITCase.class);
 
+	/**
+	 * Skips all tests if the Hadoop version doesn't match.
+	 * We can't run this test class until HDFS-9213 is fixed which allows a secure DataNode
+	 * to bind to non-privileged ports for testing.
+	 * For now, we skip this test class until Hadoop version 3.x.x.
+	 */
+	private static void skipIfHadoopVersionIsNotAppropriate() {
+		// Skips all tests if the Hadoop version doesn't match
+		String hadoopVersionString = VersionInfo.getVersion();
+		String[] split = hadoopVersionString.split("\\.");
+		if (split.length != 3) {
+			throw new IllegalStateException("Hadoop version was not of format 'X.X.X': " + hadoopVersionString);
+		}
+		Assume.assumeTrue(
+			// check whether we're running Hadoop version >= 3.x.x
+			Integer.parseInt(split[0]) >= 3
+		);
+	}
+
 	/*
 	 * override super class static methods to avoid creating MiniDFS and MiniFlink with wrong configurations
 	 * and out-of-order sequence for secure cluster
@@ -85,6 +102,8 @@ public class RollingSinkSecuredITCase extends RollingSinkITCase {
 	@BeforeClass
 	public static void startSecureCluster() throws Exception {
 
+		skipIfHadoopVersionIsNotAppropriate();
+
 		LOG.info("starting secure cluster environment for testing");
 
 		dataDir = tempFolder.newFolder();
@@ -143,7 +162,9 @@ public class RollingSinkSecuredITCase extends RollingSinkITCase {
 		TestStreamEnvironment.unsetAsContext();
 		stopCluster(cluster, TestBaseUtils.DEFAULT_TIMEOUT);
 
-		hdfsCluster.shutdown();
+		if (hdfsCluster != null) {
+			hdfsCluster.shutdown();
+		}
 
 		SecureTestEnvironment.cleanup();
 	}
@@ -229,4 +250,4 @@ public class RollingSinkSecuredITCase extends RollingSinkITCase {
 	@Override
 	public void testDateTimeRollingStringWriter() throws Exception {}
 
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/285b6f74/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestBase.java
----------------------------------------------------------------------
diff --git a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestBase.java b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestBase.java
index afdd158..5cec4f0 100644
--- a/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestBase.java
+++ b/flink-streaming-connectors/flink-connector-kafka-base/src/test/java/org/apache/flink/streaming/connectors/kafka/KafkaTestBase.java
@@ -110,7 +110,7 @@ public abstract class KafkaTestBase extends TestLogger {
 	}
 
 	protected static Configuration getFlinkConfiguration() {
-		Configuration flinkConfig = new Configuration();;
+		Configuration flinkConfig = new Configuration();
 		flinkConfig.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
 		flinkConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 8);
 		flinkConfig.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 16);
@@ -134,7 +134,11 @@ public abstract class KafkaTestBase extends TestLogger {
 
 		brokerConnectionStrings = kafkaServer.getBrokerConnectionString();
 
-		if(kafkaServer.isSecureRunSupported() && secureMode) {
+		if (secureMode) {
+			if (!kafkaServer.isSecureRunSupported()) {
+				throw new IllegalStateException(
+					"Attempting to test in secure mode but secure mode not supported by the KafkaTestEnvironment.");
+			}
 			secureProps = kafkaServer.getSecureProperties();
 		}