You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by ra...@apache.org on 2011/05/27 06:27:29 UTC
svn commit: r1128147 - in /hadoop/mapreduce/trunk/src:
contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/
contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/
docs/src/documentation/content/xdocs/
Author: ravigummadi
Date: Fri May 27 04:27:29 2011
New Revision: 1128147
URL: http://svn.apache.org/viewvc?rev=1128147&view=rev
Log:
MAPREDUCE-2137. Provide mapping between jobs of trace file and the corresponding simulated cluster's jobs in Gridmix.
Modified:
hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java
hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
hadoop/mapreduce/trunk/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java
hadoop/mapreduce/trunk/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java
hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/gridmix.xml
Modified: hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java?rev=1128147&r1=1128146&r2=1128147&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java Fri May 27 04:27:29 2011
@@ -92,6 +92,20 @@ public class Gridmix extends Configured
*/
public static final String GRIDMIX_USR_RSV = "gridmix.user.resolve.class";
+ /**
+ * Configuration property set in simulated job's configuration whose value is
+ * set to the corresponding original job's name. This is not configurable by
+ * gridmix user.
+ */
+ public static final String ORIGINAL_JOB_NAME =
+ "gridmix.job.original-job-name";
+ /**
+ * Configuration property set in simulated job's configuration whose value is
+ * set to the corresponding original job's id. This is not configurable by
+ * gridmix user.
+ */
+ public static final String ORIGINAL_JOB_ID = "gridmix.job.original-job-id";
+
private DistributedCacheEmulator distCacheEmulator;
// Submit data structures
Modified: hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java?rev=1128147&r1=1128146&r2=1128147&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java Fri May 27 04:27:29 2011
@@ -54,16 +54,17 @@ import org.apache.commons.logging.LogFac
*/
abstract class GridmixJob implements Callable<Job>, Delayed {
- public static final String JOBNAME = "GRIDMIX";
- public static final String ORIGNAME = "gridmix.job.name.original";
+ // Gridmix job name format is GRIDMIX<6 digit sequence number>
+ public static final String JOB_NAME_PREFIX = "GRIDMIX";
public static final Log LOG = LogFactory.getLog(GridmixJob.class);
private static final ThreadLocal<Formatter> nameFormat =
new ThreadLocal<Formatter>() {
@Override
protected Formatter initialValue() {
- final StringBuilder sb = new StringBuilder(JOBNAME.length() + 5);
- sb.append(JOBNAME);
+ final StringBuilder sb =
+ new StringBuilder(JOB_NAME_PREFIX.length() + 6);
+ sb.append(JOB_NAME_PREFIX);
return new Formatter(sb);
}
};
@@ -95,18 +96,21 @@ abstract class GridmixJob implements Cal
this.jobdesc = jobdesc;
this.seq = seq;
- ((StringBuilder)nameFormat.get().out()).setLength(JOBNAME.length());
+ ((StringBuilder)nameFormat.get().out()).setLength(JOB_NAME_PREFIX.length());
try {
job = this.ugi.doAs(new PrivilegedExceptionAction<Job>() {
public Job run() throws IOException {
- Job ret =
- new Job(conf,
- nameFormat.get().format("%05d", seq).toString());
- ret.getConfiguration().setInt(GRIDMIX_JOB_SEQ, seq);
+
String jobId = null == jobdesc.getJobID()
? "<unknown>"
: jobdesc.getJobID().toString();
- ret.getConfiguration().set(ORIGNAME, jobId);
+ Job ret = new Job(conf,
+ nameFormat.get().format("%06d", seq).toString());
+ ret.getConfiguration().setInt(GRIDMIX_JOB_SEQ, seq);
+
+ ret.getConfiguration().set(Gridmix.ORIGINAL_JOB_ID, jobId);
+ ret.getConfiguration().set(Gridmix.ORIGINAL_JOB_NAME,
+ jobdesc.getName());
if (conf.getBoolean(GRIDMIX_USE_QUEUE_IN_TRACE, false)) {
setJobQueue(ret, jobdesc.getQueueName());
} else {
Modified: hadoop/mapreduce/trunk/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java?rev=1128147&r1=1128146&r2=1128147&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java Fri May 27 04:27:29 2011
@@ -146,7 +146,7 @@ public class DebugJobProducer implements
final long seed = r.nextLong();
r.setSeed(seed);
id = seq.getAndIncrement();
- name = String.format("MOCKJOB%05d", id);
+ name = String.format("MOCKJOB%06d", id);
this.conf = conf;
LOG.info(name + " (" + seed + ")");
Modified: hadoop/mapreduce/trunk/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java?rev=1128147&r1=1128146&r2=1128147&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java Fri May 27 04:27:29 2011
@@ -17,9 +17,9 @@
*/
package org.apache.hadoop.mapred.gridmix;
-import java.io.FileInputStream;
import java.io.InputStream;
import java.io.IOException;
+import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -41,6 +41,7 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.TaskReport;
import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.tools.rumen.JobStory;
import org.apache.hadoop.tools.rumen.JobStoryProducer;
@@ -110,18 +111,10 @@ public class TestGridmixSubmission {
final JobClient client = new JobClient(
GridmixTestUtils.mrCluster.createJobConf());
for (Job job : succeeded) {
- final String jobname = job.getJobName();
- if (GenerateData.JOB_NAME.equals(jobname)) {
- if (!job.getConfiguration().getBoolean(
- GridmixJob.GRIDMIX_USE_QUEUE_IN_TRACE, true)) {
- assertEquals(" Improper queue for " + job.getJobName(),
- job.getConfiguration().get("mapred.job.queue.name"),
- "q1");
- } else {
- assertEquals(" Improper queue for " + job.getJobName(),
- job.getConfiguration().get("mapred.job.queue.name"),
- "default");
- }
+ final String jobName = job.getJobName();
+ Configuration conf = job.getConfiguration();
+ if (GenerateData.JOB_NAME.equals(jobName)) {
+ verifyQueue(conf, jobName);
final Path in = new Path("foo").makeQualified(GridmixTestUtils.dfs);
final Path out = new Path("/gridmix").makeQualified(GridmixTestUtils.dfs);
final ContentSummary generated = GridmixTestUtils.dfs.getContentSummary(in);
@@ -131,29 +124,48 @@ public class TestGridmixSubmission {
FileStatus[] outstat = GridmixTestUtils.dfs.listStatus(out);
assertEquals("Mismatched job count", NJOBS, outstat.length);
continue;
+ } else if (GenerateDistCacheData.JOB_NAME.equals(jobName)) {
+ verifyQueue(conf, jobName);
+ continue;
}
- if (!job.getConfiguration().getBoolean(
+ if (!conf.getBoolean(
GridmixJob.GRIDMIX_USE_QUEUE_IN_TRACE, true)) {
- assertEquals(" Improper queue for " + job.getJobName() + " " ,
- job.getConfiguration().get("mapred.job.queue.name"),"q1" );
+ assertEquals(" Improper queue for " + jobName + " " ,
+ conf.get(MRJobConfig.QUEUE_NAME), "q1" );
} else {
- assertEquals(" Improper queue for " + job.getJobName() + " ",
- job.getConfiguration().get("mapred.job.queue.name"),
- sub.get(job.getConfiguration().get(GridmixJob.ORIGNAME))
- .getQueueName());
+ assertEquals(" Improper queue for " + jobName + " ",
+ conf.get(MRJobConfig.QUEUE_NAME),
+ sub.get(conf.get(Gridmix.ORIGINAL_JOB_ID)).getQueueName());
}
- final JobStory spec =
- sub.get(job.getConfiguration().get(GridmixJob.ORIGNAME));
- assertNotNull("No spec for " + job.getJobName(), spec);
- assertNotNull("No counters for " + job.getJobName(), job.getCounters());
- final String specname = spec.getName();
+ final String originalJobId = conf.get(Gridmix.ORIGINAL_JOB_ID);
+ final JobStory spec = sub.get(originalJobId);
+ assertNotNull("No spec for " + jobName, spec);
+ assertNotNull("No counters for " + jobName, job.getCounters());
+ final String originalJobName = spec.getName();
+ System.out.println("originalJobName=" + originalJobName
+ + ";GridmixJobName=" + jobName + ";originalJobID=" + originalJobId);
+ assertTrue("Original job name is wrong.", originalJobName.equals(
+ conf.get(Gridmix.ORIGINAL_JOB_NAME)));
+
+ // Gridmix job seqNum contains 6 digits
+ int seqNumLength = 6;
+ String jobSeqNum = new DecimalFormat("000000").format(
+ conf.getInt(GridmixJob.GRIDMIX_JOB_SEQ, -1));
+ // Original job name is of the format MOCKJOB<6 digit sequence number>
+ // because MockJob jobNames are of this format.
+ assertTrue(originalJobName.substring(
+ originalJobName.length() - seqNumLength).equals(jobSeqNum));
+
+ assertTrue("Gridmix job name is not in the expected format.",
+ jobName.equals(
+ GridmixJob.JOB_NAME_PREFIX + jobSeqNum));
+
final FileStatus stat =
GridmixTestUtils.dfs.getFileStatus(
- new Path(GridmixTestUtils.DEST,
- "" + Integer.valueOf(specname.substring(specname.length() - 5))));
- assertEquals("Wrong owner for " + job.getJobName(), spec.getUser(),
+ new Path(GridmixTestUtils.DEST, "" + Integer.valueOf(jobSeqNum)));
+ assertEquals("Wrong owner for " + jobName, spec.getUser(),
stat.getOwner());
final int nMaps = spec.getNumberMaps();
@@ -162,7 +174,7 @@ public class TestGridmixSubmission {
// TODO Blocked by MAPREDUCE-118
if (true) return;
// TODO
- System.out.println(jobname + ": " + nMaps + "/" + nReds);
+ System.out.println(jobName + ": " + nMaps + "/" + nReds);
final TaskReport[] mReports =
client.getMapTaskReports(JobID.downgrade(job.getJobID()));
assertEquals("Mismatched map count", nMaps, mReports.length);
@@ -177,6 +189,18 @@ public class TestGridmixSubmission {
}
}
+ // Verify if correct job queue is used
+ private void verifyQueue(Configuration conf, String jobName) {
+ if (!conf.getBoolean(
+ GridmixJob.GRIDMIX_USE_QUEUE_IN_TRACE, true)) {
+ assertEquals(" Improper queue for " + jobName,
+ conf.get("mapred.job.queue.name"), "q1");
+ } else {
+ assertEquals(" Improper queue for " + jobName,
+ conf.get("mapred.job.queue.name"), "default");
+ }
+ }
+
public void check(final TaskType type, Job job, JobStory spec,
final TaskReport[] runTasks,
long extraInputBytes, int extraInputRecords,
Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/gridmix.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/gridmix.xml?rev=1128147&r1=1128146&r2=1128147&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/gridmix.xml (original)
+++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/gridmix.xml Fri May 27 04:27:29 2011
@@ -56,8 +56,8 @@
<li>Use GridMix with the job trace on the benchmark cluster.</li>
</ol>
<p>Jobs submitted by GridMix have names of the form
- "<code>GRIDMIXnnnnn</code>", where
- "<code>nnnnn</code>" is a sequence number padded with leading
+ "<code>GRIDMIXnnnnnn</code>", where
+ "<code>nnnnnn</code>" is a sequence number padded with leading
zeroes.</p>
</section>
<section id="usage">
@@ -540,6 +540,36 @@ hadoop jar <gridmix-jar> org.apach
</ul>
</section>
+ <section id="simulatedjobconf">
+ <title>Configuration of Simulated Jobs</title>
+ <p> Gridmix3 sets some configuration properties in the simulated Jobs
+ submitted by it so that they can be mapped back to the corresponding Job
+ in the input Job trace. These configuration parameters include:
+ </p>
+ <table>
+ <tr>
+ <th>Parameter</th>
+ <th>Description</th>
+ </tr>
+ <tr>
+ <td>
+ <code>gridmix.job.original-job-id</code>
+ </td>
+ <td> The job id of the original cluster's job corresponding to this
+ simulated job.
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <code>gridmix.job.original-job-name</code>
+ </td>
+ <td> The job name of the original cluster's job corresponding to this
+ simulated job.
+ </td>
+ </tr>
+ </table>
+ </section>
+
<section id="assumptions">
<title>Simplifying Assumptions</title>
<p>GridMix will be developed in stages, incorporating feedback and