You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by tg...@apache.org on 2013/03/26 20:00:53 UTC
svn commit: r1461269 - in
/hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src:
main/java/org/apache/hadoop/mapred/gridmix/
main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/
test/java/org/apache/hadoop/mapred/gridmix/
Author: tgraves
Date: Tue Mar 26 19:00:53 2013
New Revision: 1461269
URL: http://svn.apache.org/r1461269
Log:
MAPREDUCE-3008. Improvements to cumulative CPU emulation for short running tasks in Gridmix. (amarrk via tgraves)
Modified:
hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java
hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java
hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java
hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java
hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java
hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestResourceUsageEmulators.java
Modified: hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java?rev=1461269&r1=1461268&r2=1461269&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java (original)
+++ hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java Tue Mar 26 19:00:53 2013
@@ -32,6 +32,7 @@ import org.apache.hadoop.mapreduce.Mappe
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskCounter;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
@@ -72,7 +73,7 @@ class LoadJob extends GridmixJob {
job.setNumReduceTasks(jobdesc.getNumberReduces());
job.setMapOutputKeyClass(GridmixKey.class);
job.setMapOutputValueClass(GridmixRecord.class);
- job.setSortComparatorClass(GridmixKey.Comparator.class);
+ job.setSortComparatorClass(LoadSortComparator.class);
job.setGroupingComparatorClass(SpecGroupingComparator.class);
job.setInputFormatClass(LoadInputFormat.class);
job.setOutputFormatClass(RawBytesOutputFormat.class);
@@ -94,17 +95,84 @@ class LoadJob extends GridmixJob {
}
/**
+ * This is a load matching key comparator which will make sure that the
+ * resource usage load is matched even when the framework is in control.
+ */
+ public static class LoadSortComparator extends GridmixKey.Comparator {
+ private ResourceUsageMatcherRunner matcher = null;
+ private boolean isConfigured = false;
+
+ public LoadSortComparator() {
+ super();
+ }
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ configure();
+ int ret = super.compare(b1, s1, l1, b2, s2, l2);
+ if (matcher != null) {
+ try {
+ matcher.match(); // match the resource usage now
+ } catch (Exception e) {}
+ }
+ return ret;
+ }
+
+ //TODO Note that the sorter will be instantiated 2 times as follows
+ // 1. During the sort/spill in the map phase
+ // 2. During the merge in the sort phase
+ // We need the handle to the matcher thread only in (2).
+ // This logic can be relaxed to run only in (2).
+ private void configure() {
+ if (!isConfigured) {
+ ThreadGroup group = Thread.currentThread().getThreadGroup();
+ Thread[] threads = new Thread[group.activeCount() * 2];
+ group.enumerate(threads, true);
+ for (Thread t : threads) {
+ if (t != null && (t instanceof ResourceUsageMatcherRunner)) {
+ this.matcher = (ResourceUsageMatcherRunner) t;
+ isConfigured = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /**
* This is a progress based resource usage matcher.
*/
@SuppressWarnings("unchecked")
- static class ResourceUsageMatcherRunner extends Thread {
+ static class ResourceUsageMatcherRunner extends Thread
+ implements Progressive {
private final ResourceUsageMatcher matcher;
- private final Progressive progress;
+ private final BoostingProgress progress;
private final long sleepTime;
private static final String SLEEP_CONFIG =
"gridmix.emulators.resource-usage.sleep-duration";
private static final long DEFAULT_SLEEP_TIME = 100; // 100ms
+ /**
+ * This is a progress bar that can be boosted for weaker use-cases.
+ */
+ private static class BoostingProgress implements Progressive {
+ private float boostValue = 0f;
+ TaskInputOutputContext context;
+
+ BoostingProgress(TaskInputOutputContext context) {
+ this.context = context;
+ }
+
+ void setBoostValue(float boostValue) {
+ this.boostValue = boostValue;
+ }
+
+ @Override
+ public float getProgress() {
+ return Math.min(1f, context.getProgress() + boostValue);
+ }
+ }
+
ResourceUsageMatcherRunner(final TaskInputOutputContext context,
ResourceUsageMetrics metrics) {
Configuration conf = context.getConfiguration();
@@ -118,19 +186,14 @@ class LoadJob extends GridmixJob {
// set the other parameters
this.sleepTime = conf.getLong(SLEEP_CONFIG, DEFAULT_SLEEP_TIME);
- progress = new Progressive() {
- @Override
- public float getProgress() {
- return context.getProgress();
- }
- };
+ progress = new BoostingProgress(context);
// instantiate a resource-usage-matcher
matcher = new ResourceUsageMatcher();
matcher.configure(conf, plugin, metrics, progress);
}
- protected void match() throws Exception {
+ protected void match() throws IOException, InterruptedException {
// match the resource usage
matcher.matchResourceUsage();
}
@@ -157,21 +220,34 @@ class LoadJob extends GridmixJob {
+ " thread! Exiting.", e);
}
}
+
+ @Override
+ public float getProgress() {
+ return matcher.getProgress();
+ }
+
+ // boost the progress bar as fasten up the emulation cycles.
+ void boost(float value) {
+ progress.setBoostValue(value);
+ }
}
// Makes sure that the TaskTracker doesn't kill the map/reduce tasks while
// they are emulating
private static class StatusReporter extends Thread {
- private TaskAttemptContext context;
- StatusReporter(TaskAttemptContext context) {
+ private final TaskAttemptContext context;
+ private final Progressive progress;
+
+ StatusReporter(TaskAttemptContext context, Progressive progress) {
this.context = context;
+ this.progress = progress;
}
@Override
public void run() {
LOG.info("Status reporter thread started.");
try {
- while (context.getProgress() < 1) {
+ while (!isInterrupted() && progress.getProgress() < 1) {
// report progress
context.progress();
@@ -278,7 +354,7 @@ class LoadJob extends GridmixJob {
matcher.setDaemon(true);
// start the status reporter thread
- reporter = new StatusReporter(ctxt);
+ reporter = new StatusReporter(ctxt, matcher);
reporter.setDaemon(true);
reporter.start();
}
@@ -326,6 +402,17 @@ class LoadJob extends GridmixJob {
}
}
+ // check if the thread will get a chance to run or not
+ // check if there will be a sort&spill->merge phase or not
+ // check if the final sort&spill->merge phase is gonna happen or not
+ if (context.getNumReduceTasks() > 0
+ && context.getCounter(TaskCounter.SPILLED_RECORDS).getValue() == 0) {
+ LOG.info("Boosting the map phase progress.");
+ // add the sort phase progress to the map phase and emulate
+ matcher.boost(0.33f);
+ matcher.match();
+ }
+
// start the matcher thread since the map phase ends here
matcher.start();
}
@@ -394,7 +481,7 @@ class LoadJob extends GridmixJob {
matcher = new ResourceUsageMatcherRunner(context, metrics);
// start the status reporter thread
- reporter = new StatusReporter(context);
+ reporter = new StatusReporter(context, matcher);
reporter.start();
}
@Override
@@ -530,9 +617,13 @@ class LoadJob extends GridmixJob {
specRecords[j] = info.getOutputRecords();
metrics[j] = info.getResourceUsageMetrics();
if (LOG.isDebugEnabled()) {
- LOG.debug(String.format("SPEC(%d) %d -> %d %d %d", id(), i,
+ LOG.debug(String.format("SPEC(%d) %d -> %d %d %d %d %d %d %d", id(), i,
i + j * maps, info.getOutputRecords(),
- info.getOutputBytes()));
+ info.getOutputBytes(),
+ info.getResourceUsageMetrics().getCumulativeCpuUsage(),
+ info.getResourceUsageMetrics().getPhysicalMemoryUsage(),
+ info.getResourceUsageMetrics().getVirtualMemoryUsage(),
+ info.getResourceUsageMetrics().getHeapUsage()));
}
}
final TaskInfo info = jobdesc.getTaskInfo(TaskType.MAP, i);
Modified: hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java?rev=1461269&r1=1461268&r2=1461269&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java (original)
+++ hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java Tue Mar 26 19:00:53 2013
@@ -67,7 +67,7 @@ implements ResourceUsageEmulatorPlugin {
private float emulationInterval; // emulation interval
private long targetCpuUsage = 0;
private float lastSeenProgress = 0;
- private long lastSeenCpuUsageCpuUsage = 0;
+ private long lastSeenCpuUsage = 0;
// Configuration parameters
public static final String CPU_EMULATION_PROGRESS_INTERVAL =
@@ -229,6 +229,16 @@ implements ResourceUsageEmulatorPlugin {
return progress * progress * progress * progress;
}
+ private synchronized long getCurrentCPUUsage() {
+ return monitor.getCumulativeCpuTime();
+
+ }
+
+ @Override
+ public float getProgress() {
+ return Math.min(1f, ((float)getCurrentCPUUsage())/targetCpuUsage);
+ }
+
@Override
//TODO Multi-threading for speedup?
public void emulate() throws IOException, InterruptedException {
@@ -249,10 +259,9 @@ implements ResourceUsageEmulatorPlugin {
// Note that (Cc-Cl)/(Pc-Pl) is termed as 'rate' in the following
// section
- long currentCpuUsage =
- monitor.getCumulativeCpuTime();
+ long currentCpuUsage = getCurrentCPUUsage();
// estimate the cpu usage rate
- float rate = (currentCpuUsage - lastSeenCpuUsageCpuUsage)
+ float rate = (currentCpuUsage - lastSeenCpuUsage)
/ (currentProgress - lastSeenProgress);
long projectedUsage =
currentCpuUsage + (long)((1 - currentProgress) * rate);
@@ -264,8 +273,7 @@ implements ResourceUsageEmulatorPlugin {
(long)(targetCpuUsage
* getWeightForProgressInterval(currentProgress));
- while (monitor.getCumulativeCpuTime()
- < currentWeighedTarget) {
+ while (getCurrentCPUUsage() < currentWeighedTarget) {
emulatorCore.compute();
// sleep for 100ms
try {
@@ -281,8 +289,7 @@ implements ResourceUsageEmulatorPlugin {
// set the last seen progress
lastSeenProgress = progress.getProgress();
// set the last seen usage
- lastSeenCpuUsageCpuUsage =
- monitor.getCumulativeCpuTime();
+ lastSeenCpuUsage = getCurrentCPUUsage();
}
}
}
@@ -310,6 +317,6 @@ implements ResourceUsageEmulatorPlugin {
// initialize the states
lastSeenProgress = 0;
- lastSeenCpuUsageCpuUsage = 0;
+ lastSeenCpuUsage = 0;
}
-}
\ No newline at end of file
+}
Modified: hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java?rev=1461269&r1=1461268&r2=1461269&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java (original)
+++ hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java Tue Mar 26 19:00:53 2013
@@ -42,7 +42,7 @@ import org.apache.hadoop.conf.Configurat
* For configuring GridMix to load and and use a resource usage emulator,
* see {@link ResourceUsageMatcher}.
*/
-public interface ResourceUsageEmulatorPlugin {
+public interface ResourceUsageEmulatorPlugin extends Progressive {
/**
* Initialize the plugin. This might involve
* - initializing the variables
Modified: hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java?rev=1461269&r1=1461268&r2=1461269&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java (original)
+++ hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java Tue Mar 26 19:00:53 2013
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.mapred.gridmix.emulators.resourceusage;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -35,7 +36,7 @@ import org.apache.hadoop.yarn.util.Resou
* <p>Note that the order in which the emulators are invoked is same as the
* order in which they are configured.
*/
-public class ResourceUsageMatcher {
+public class ResourceUsageMatcher implements Progressive {
/**
* Configuration key to set resource usage emulators.
*/
@@ -80,10 +81,31 @@ public class ResourceUsageMatcher {
}
}
- public void matchResourceUsage() throws Exception {
+ public void matchResourceUsage() throws IOException, InterruptedException {
for (ResourceUsageEmulatorPlugin emulator : emulationPlugins) {
// match the resource usage
emulator.emulate();
}
}
+
+ /**
+ * Returns the average progress.
+ */
+ @Override
+ public float getProgress() {
+ if (emulationPlugins.size() > 0) {
+ // return the average progress
+ float progress = 0f;
+ for (ResourceUsageEmulatorPlugin emulator : emulationPlugins) {
+ // consider weighted progress of each emulator
+ progress += emulator.getProgress();
+ }
+
+ return progress / emulationPlugins.size();
+ }
+
+ // if no emulators are configured then return 1
+ return 1f;
+
+ }
}
\ No newline at end of file
Modified: hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java?rev=1461269&r1=1461268&r2=1461269&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java (original)
+++ hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java Tue Mar 26 19:00:53 2013
@@ -187,6 +187,11 @@ implements ResourceUsageEmulatorPlugin {
}
@Override
+ public float getProgress() {
+ return Math.min(1f, ((float)getTotalHeapUsageInMB())/targetHeapUsageInMB);
+ }
+
+ @Override
public void emulate() throws IOException, InterruptedException {
if (enabled) {
float currentProgress = progress.getProgress();
Modified: hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestResourceUsageEmulators.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestResourceUsageEmulators.java?rev=1461269&r1=1461268&r2=1461269&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestResourceUsageEmulators.java (original)
+++ hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestResourceUsageEmulators.java Tue Mar 26 19:00:53 2013
@@ -133,6 +133,14 @@ public class TestResourceUsageEmulators
? fs.getFileStatus(testPath).getModificationTime()
: 0;
}
+
+ @Override
+ public float getProgress() {
+ try {
+ return fs.exists(touchPath) ? 1.0f : 0f;
+ } catch (IOException ioe) {}
+ return 0f;
+ }
}
/**