You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/07/18 13:05:11 UTC
svn commit: r423049 - in /lucene/hadoop/trunk: ./
src/contrib/smallJobsBenchmark/ src/contrib/smallJobsBenchmark/bin/
src/contrib/smallJobsBenchmark/src/ src/contrib/smallJobsBenchmark/src/java/
src/contrib/smallJobsBenchmark/src/java/org/ src/contrib/...
Author: cutting
Date: Tue Jul 18 04:05:10 2006
New Revision: 423049
URL: http://svn.apache.org/viewvc?rev=423049&view=rev
Log:
HADOOP-307. Add smallJobsBenchmark contrib module. This runs lots of small jobs to determine per-task overheads. Contributed by Sanjay Dahiya.
Added:
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/Readme.txt
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh (with props)
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh (with props)
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/build.xml
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkMapper.java
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkReducer.java
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/GenData.java
lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/MultiJobRunner.java
Modified:
lucene/hadoop/trunk/CHANGES.txt
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=423049&r1=423048&r2=423049&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Jul 18 04:05:10 2006
@@ -43,6 +43,10 @@
12. HADOOP-356. Add contrib to "compile" and "test" build targets, so
that this code is better maintained. (Michel Tourn via cutting)
+13. HADOOP-307. Add smallJobsBenchmark contrib module. This runs
+ lots of small jobs, in order to determine per-task overheads.
+ (Sanjay Dahiya via cutting)
+
Release 0.4.0 - 2006-06-28
Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/Readme.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/Readme.txt?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/Readme.txt (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/Readme.txt Tue Jul 18 04:05:10 2006
@@ -0,0 +1,40 @@
+SmallJobsBenchmark Readme :
+
+Building the benchmark.
+to build -
+$ cd smallJobsBenchmark
+$ ant deploy
+
+Running the benchmark
+$ cd build/contrib/smallJobsBenchmark
+$ bin/run.sh
+
+after successfully running the benchmark see logs/report.txt for consolidated output of all the runs.
+
+change this script to configure options.
+
+Configurable options are -
+
+-inputLines noOfLines
+ no of lines of input to generate.
+
+-inputType (ascending, descending, random)
+ type of input to generate.
+
+-jar jarFilePath
+ Jar file containing Mapper and Reducer implementations in jar file. By default ant build creates MRBenchmark.jar file containing default Mapper and Reducer.
+
+-times numJobs
+No of times to run each MapReduce task, time is calculated as average of all runs.
+
+-workDir dfsPath
+DFS path to put output of MR tasks.
+
+-maps numMaps
+No of maps for wach task
+
+-reduces numReduces
+No of reduces for each task
+
+-ignoreOutput
+Doesn't copy the output back to local disk. Otherwise it creates the output back to a temp location on local disk.
Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh Tue Jul 18 04:05:10 2006
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+echo "DataLines, Maps, Reduces, AvgTime " > logs/report.txt
+for logFile in `ls logs/*.log`
+do
+# tail -n $((${TIMES}+5)) ${logFile} >> logs/report.txt
+ tail -n 1 ${logFile} >> logs/report.txt
+done
+
+cat logs/report.txt
Propchange: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh
------------------------------------------------------------------------------
svn:executable = *
Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh Tue Jul 18 04:05:10 2006
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+if [ -z $HADOOP_HOME ]
+then
+ echo "Error HADOOP_HOME not defined" ;
+ exit 1;
+fi
+
+if [ -z $JAVA_HOME ]
+then
+ echo "Error JAVA_HOME not defined" ;
+ exit 1;
+fi
+
+export CLASSPATH=$HADOOP_HOME/conf:/export/crawlspace/kryptonite/java/jdk/lib/tools.jar:$HADOOP_HOME/build/classes:$HADOOP_HOME/build:$HADOOP_HOME/build/test/classes:$HADOOP_HOME/hadoop-*.jar:$HADOOP_HOME/lib/commons-cli-2.0-SNAPSHOT.jar:$HADOOP_HOME/lib/commons-logging-1.0.4.jar:$HADOOP_HOME/lib/commons-logging-api-1.0.4.jar:$HADOOP_HOME/lib/jetty-5.1.4.jar:$HADOOP_HOME/lib/junit-3.8.1.jar:$HADOOP_HOME/lib/log4j-1.2.13.jar:$HADOOP_HOME/lib/lucene-core-1.9.1.jar:$HADOOP_HOME/lib/servlet-api.jar:$HADOOP_HOME/lib/jetty-ext/ant.jar:$HADOOP_HOME/lib/jetty-ext/commons-el.jar:$HADOOP_HOME/lib/jetty-ext/jasper-compiler.jar:$HADOOP_HOME/lib/jetty-ext/jasper-runtime.jar:$HADOOP_HOME/lib/jetty-ext/jsp-api.jar
+
+mkdir -p logs;
+
+export TIMES=2
+
+#for dataLines in 1 10000 10000000
+for dataLines in 1 100
+ do
+
+for maps in 1 18
+ do
+ for reduces in 1 18
+ do
+$JAVA_HOME/bin/java -classpath $CLASSPATH:./classes org.apache.hadoop.benchmarks.mapred.MultiJobRunner -inputLines ${dataLines} -output /hadoop/mapred/MROutput -jar MRBenchmark.jar -times ${TIMES} -workDir /hadoop/mapred/work -maps ${maps} -reduces ${reduces} -inputType ascending -ignoreOutput 2>&1 | tee logs/benchmark_${dataLines}_${maps}_${reduces}.log
+
+ done
+ done
+ done
+
+bin/report.sh
Propchange: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh
------------------------------------------------------------------------------
svn:executable = *
Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/build.xml?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/build.xml (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/build.xml Tue Jul 18 04:05:10 2006
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+
+<project name="smallJobsBenchmark" default="jar">
+
+ <import file="../build-contrib.xml"/>
+
+ <!-- Same name is used by shell scripts running this
+ benchmark -->
+ <property name="benchmarkJarName" value="MRBenchmark.jar"/>
+
+ <target name="jar" depends="compile">
+ <jar
+ jarfile="${build.dir}/${benchmarkJarName}"
+ basedir="${build.classes}">
+ </jar>
+ </target>
+
+ <target name="deploy" depends="jar">
+ <mkdir dir="${build.dir}/bin"/>
+ <copy todir="${build.dir}/bin">
+ <fileset dir="${root}/bin">
+ <include name="**/*.*"/>
+ </fileset>
+ </copy>
+ </target>
+
+ </project>
Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkMapper.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkMapper.java?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkMapper.java (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkMapper.java Tue Jul 18 04:05:10 2006
@@ -0,0 +1,34 @@
+package org.apache.hadoop.benchmarks.mapred;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * takes inpt format as text lines, runs some processing on it and
+ * writes out data as text again.
+ *
+ * @author sanjaydahiya
+ *
+ */
+public class BenchmarkMapper extends MapReduceBase implements Mapper {
+
+ public void map(WritableComparable key, Writable value,
+ OutputCollector output, Reporter reporter) throws IOException {
+
+ String line = ((UTF8)value).toString();
+ output.collect(new UTF8(process(line)), new UTF8(""));
+ }
+
+ public String process(String line){
+ return line ;
+ }
+
+}
Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkReducer.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkReducer.java?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkReducer.java (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkReducer.java Tue Jul 18 04:05:10 2006
@@ -0,0 +1,32 @@
+package org.apache.hadoop.benchmarks.mapred;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ *
+ * @author sanjaydahiya
+ *
+ */
+public class BenchmarkReducer extends MapReduceBase implements Reducer {
+
+ public void reduce(WritableComparable key, Iterator values,
+ OutputCollector output, Reporter reporter) throws IOException {
+
+ // ignore the key and write values to output
+ while(values.hasNext()){
+ output.collect(key, new UTF8(values.next().toString()));
+ }
+ }
+
+ public String process(String line){
+ return line ;
+ }
+}
Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/GenData.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/GenData.java?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/GenData.java (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/GenData.java Tue Jul 18 04:05:10 2006
@@ -0,0 +1,64 @@
+package org.apache.hadoop.benchmarks.mapred;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Random;
+
+
+public class GenData {
+ public static final int RANDOM = 1;
+ public static final int ASCENDING = 2;
+ public static final int DESCENDING = 3;
+
+ public static void generateText(long numLines, File file, int sortType) throws IOException{
+
+ PrintStream output = new PrintStream(new FileOutputStream(file));
+ int padding = String.valueOf(numLines).length();
+
+ switch(sortType){
+
+ case RANDOM :
+ for(long l = 0 ; l<numLines ; l++ ){
+ output.println(pad((new Random()).nextLong(), padding));
+ }
+ break ;
+
+ case ASCENDING:
+ for(long l = 0 ; l<numLines ; l++ ){
+ output.println(pad(l, padding));
+ }
+ break ;
+
+ case DESCENDING:
+ for(long l = numLines ; l>0 ; l-- ){
+ output.println(pad(l, padding));
+ }
+ break ;
+
+ }
+ output.close() ;
+ }
+
+ private static String pad( long number, int size ){
+ String str = String.valueOf(number);
+
+ StringBuffer value = new StringBuffer();
+ for( int i = str.length(); i< size ; i++ ){
+ value.append("0");
+ }
+ value.append(str);
+ return value.toString();
+ }
+
+ public static void main(String[] args){
+ try{
+ // test
+ generateText(100, new File("/Users/sanjaydahiya/dev/temp/sort.txt"), ASCENDING);
+ }catch(Exception e){
+ e.printStackTrace();
+ }
+ }
+
+}
Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/MultiJobRunner.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/MultiJobRunner.java?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/MultiJobRunner.java (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/MultiJobRunner.java Tue Jul 18 04:05:10 2006
@@ -0,0 +1,405 @@
+package org.apache.hadoop.benchmarks.mapred;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+
+/**
+ * Runs a job multiple times and takesaverage of all runs.
+ * @author sanjaydahiya
+ *
+ */
+public class MultiJobRunner {
+
+ private String jarFile = "MRBenchmark.jar" ;
+ private String input ;
+ private String output ;
+ private int numJobs = 2000 ; // default value
+ private static final Log LOG = LogFactory.getLog(MultiJobRunner.class);
+ private int numMaps = 2;
+ private int numReduces = 1;
+ private int dataLines = 1;
+ private boolean ignoreOutput = false ;
+ private boolean verbose = false ;
+
+ // just to print in the end
+ ArrayList execTimes = new ArrayList();
+
+ private static String context = "/mapred/benchmark";
+
+ /**
+ * Input is a local file.
+ * @param input
+ * @param output
+ * @param jarFile
+ */
+ public MultiJobRunner(String input, String output, String jarFile){
+ this.input = input ;
+ this.output = output ;
+ this.jarFile = jarFile ;
+ }
+
+ public String getInput() {
+ return input;
+ }
+
+ public void setInput(String input) {
+ this.input = input;
+ }
+
+ public String getJarFile() {
+ return jarFile;
+ }
+
+ public void setJarFile(String jarFile) {
+ this.jarFile = jarFile;
+ }
+
+ public String getOutput() {
+ return output;
+ }
+
+ public void setOutput(String output) {
+ this.output = output;
+ }
+
+ public int getNumJobs() {
+ return numJobs;
+ }
+
+ public void setNumJobs(int numJobs) {
+ this.numJobs = numJobs;
+ }
+
+
+ public int getDataLines() {
+ return dataLines;
+ }
+
+ public void setDataLines(int dataLines) {
+ this.dataLines = dataLines;
+ }
+
+ public boolean isIgnoreOutput(){
+ return this.ignoreOutput ;
+ }
+
+ public void setIgnoreOutput(boolean ignore){
+ this.ignoreOutput = ignore ;
+ }
+
+ public void setVerbose(boolean verbose){
+ this.verbose = verbose ;
+ }
+ public boolean getVerbose(){
+ return this.verbose;
+ }
+
+ /**
+ * Prepare the jobConf.
+ * @return
+ */
+ private JobConf setupJob(){
+ JobConf job = new JobConf() ;
+
+ job.addInputPath(new Path(context + "/input"));
+
+ job.setInputFormat(TextInputFormat.class);
+ job.setOutputFormat(TextOutputFormat.class);
+
+ job.setInputKeyClass(LongWritable.class);
+ job.setOutputValueClass(UTF8.class);
+
+ job.setMapOutputKeyClass(UTF8.class);
+ job.setMapOutputValueClass(UTF8.class);
+
+ job.setOutputPath(new Path(output));
+
+ job.setJar(jarFile);
+ job.setMapperClass(BenchmarkMapper.class);
+ job.setReducerClass(BenchmarkReducer.class);
+
+ job.setNumMapTasks(this.numMaps);
+ job.setNumReduceTasks(this.numReduces);
+
+ return job ;
+ }
+
+ /**
+ * Runs a MapReduce task, given number of times. The input to each task is the same file.
+ * @param job
+ * @param times
+ * @throws IOException
+ */
+ private void runJobInSequence(int times) throws IOException{
+ Path intrimData = null ;
+ Random rand = new Random();
+
+ for( int i= 0;i<times;i++){
+ // create a new job conf every time, reusing same object doesnt seem to work.
+ JobConf job = setupJob();
+
+ // give a new random name to output of the mapred tasks
+ // TODO: see if something better can be done
+ intrimData = new Path(context+"/temp/multiMapRedOutput_" +
+ rand.nextInt() );
+ job.setOutputPath(intrimData);
+
+ // run the mapred task now
+ LOG.info("Running job, Input : " + job.getInputPaths()[0] +
+ " Output : " + job.getOutputPath());
+ long curTime = System.currentTimeMillis();
+ JobClient.runJob(job);
+ execTimes.add(new Long(System.currentTimeMillis() - curTime));
+
+ // pull the output out of DFS for validation
+ File localOutputFile = File.createTempFile("MROutput" +
+ new Random().nextInt(), ".txt" );
+ String localOutputPath = localOutputFile.getAbsolutePath() ;
+ localOutputFile.delete();
+
+ if( ! ignoreOutput ){
+ copyFromDFS(intrimData, localOutputPath);
+ }
+
+ // diff(input, localOutputPath);
+ }
+ }
+
+ /**
+ * Not using it.
+ */
+ private boolean diff(String path1, String path2) throws IOException{
+ boolean ret = false ;
+
+ return ret ;
+ }
+
+ /**
+ * Runs a sequence of map reduce tasks, output of each reduce is input
+ * to next map. input should be a pre configured array of JobConfs.
+ *
+ */
+ public Path runJobsInSequence(JobConf[] jobs) throws IOException{
+
+ // input location = jobs[0] input loc
+ Path finalOutput = null ;
+
+ for( int i=0;i<jobs.length; i++){
+ if( 0 != i ) {
+ // run the first job in sequence.
+ jobs[i].addInputPath(finalOutput) ;
+ }
+
+ JobClient.runJob(jobs[i]);
+ finalOutput = jobs[i].getOutputPath();
+ }
+
+ return finalOutput;
+ }
+
+ /**
+ *
+ * Copy the input file from local disk to DFS.
+ * @param localFile
+ * @param remotePath
+ * @return
+ * @throws IOException
+ */
+ private Path copyToDFS(String localFile, Path remotePath) throws IOException{
+ if( null == remotePath){
+ // use temp path under /mapred in DFS
+ remotePath = new Path( context+"/input/MRBenchmark_" +
+ new Random().nextInt()) ;
+ }
+ //new File(localPath).
+ Configuration conf = new Configuration();
+ FileSystem localFS = FileSystem.getNamed("local", conf);
+ FileSystem remoteFS = FileSystem.get(conf);
+
+ FileUtil.copy(localFS, new Path(localFile), remoteFS,
+ remotePath, false, conf);
+
+ if( ignoreOutput) {
+ // delete local copy
+ new File(localFile).delete();
+ }
+
+ return remotePath;
+ }
+
+ private void copyFromDFS(Path remotePath, String localPath)
+ throws IOException{
+
+ Configuration conf = new Configuration();
+ FileSystem localFS = FileSystem.getNamed("local", conf);
+ FileSystem remoteFS = FileSystem.get(conf);
+
+ FileUtil.copy(remoteFS, remotePath,
+ localFS, new Path(localPath), false, conf);
+ }
+
+ private void setupContext() throws IOException{
+ FileSystem.get(new Configuration()).mkdirs(new Path(context));
+ }
+ private void clearContext() throws IOException{
+ FileSystem.get(new Configuration()).delete(new Path(context));
+ }
+ /**
+ * Run the benchmark.
+ * @throws IOException
+ */
+ public void run() throws IOException{
+
+ setupContext();
+ Path path = copyToDFS(input, null);
+
+ long time = System.currentTimeMillis();
+
+ try{
+ runJobInSequence(numJobs);
+ }finally{
+ clearContext();
+ }
+
+ if( verbose ) {
+ // Print out a report
+ System.out.println("Total MapReduce tasks executed: " + this.numJobs);
+ System.out.println("Total lines of data : " + this.dataLines);
+ System.out.println("Maps : " + this.numMaps +
+ " , Reduces : " + this.numReduces);
+ }
+ int i =0 ;
+ long totalTime = 0 ;
+ for( Iterator iter = execTimes.iterator() ; iter.hasNext();){
+ totalTime += ((Long)iter.next()).longValue() ;
+ if( verbose ) {
+ System.out.println("Total time for task : " + ++i +
+ " , = " + (Long)iter.next());
+ }
+ }
+
+ long avgTime = totalTime / numJobs ;
+ if( verbose ) {
+ System.out.println("Avg time : " + avgTime);
+ }
+
+ System.out.println("DataLines Maps Reduces AvgTime");
+ System.out.println(this.dataLines + ", " + this.numMaps + ", " +
+ this.numReduces + ", " + avgTime);
+
+ }
+
+ public int getNumMaps() {
+ return numMaps;
+ }
+
+ public void setNumMaps(int numMaps) {
+ this.numMaps = numMaps;
+ }
+
+ public int getNumReduces() {
+ return numReduces;
+ }
+
+ public void setNumReduces(int numReduces) {
+ this.numReduces = numReduces;
+ }
+
+ public static void main (String[] args) throws IOException{
+
+ String version = "MRBenchmark.0.0.1";
+ String usage =
+ "Usage: MultiJobRunner -inputLines noOfLines -jar jarFilePath " +
+ "[-output dfsPath] [-times numJobs] -workDir dfsPath" +
+ "[-inputType (ascending, descending, random)]" +
+ " -maps numMaps -reduces numReduces -ignoreOutput -verbose" ;
+
+ System.out.println(version);
+
+ if (args.length == 0) {
+ System.err.println(usage);
+ System.exit(-1);
+ }
+
+ String output = "";
+ String jarFile = "MRBenchmark.jar" ;
+ int numJobs = 0 ;
+ int numMaps = 2;
+ int numReduces = 1 ;
+ int dataLines = 1 ;
+ int inputType = GenData.RANDOM ;
+ boolean ignoreOutput = false ;
+ boolean verbose = false ;
+
+ for (int i = 0; i < args.length; i++) { // parse command line
+ if (args[i].equals("-output")) {
+ output = args[++i];
+ }else if (args[i].equals("-jar")) {
+ jarFile = args[++i];
+ }else if (args[i].equals("-times")) {
+ numJobs = Integer.parseInt(args[++i]);
+ }else if(args[i].equals("-workDir")) {
+ context = args[++i];
+ }else if(args[i].equals("-maps")) {
+ numMaps = Integer.parseInt(args[++i]);
+ }else if(args[i].equals("-reduces")) {
+ numReduces = Integer.parseInt(args[++i]);
+ }else if(args[i].equals("-inputLines")) {
+ dataLines = Integer.parseInt(args[++i]);
+ }else if(args[i].equals("-inputType")) {
+ String s = args[++i] ;
+ if( s.equals("ascending")){
+ inputType = GenData.ASCENDING ;
+ }else if(s.equals("descending")){
+ inputType = GenData.DESCENDING ;
+ }else if(s.equals("random")){
+ inputType = GenData.RANDOM ;
+ }
+ }else if(args[i].equals("-ignoreOutput")) {
+ ignoreOutput = true ;
+ }else if(args[i].equals("-verbose")) {
+ verbose = true ;
+ }
+ }
+
+ File inputFile = File.createTempFile("SortedInput_" +
+ new Random().nextInt(),".txt" );
+ GenData.generateText(dataLines, inputFile, inputType);
+
+ MultiJobRunner runner = new MultiJobRunner(inputFile.getAbsolutePath(),
+ output, jarFile );
+ runner.setInput(inputFile.getAbsolutePath());
+ runner.setNumMaps(numMaps);
+ runner.setNumReduces(numReduces);
+ runner.setDataLines(dataLines);
+ runner.setIgnoreOutput(ignoreOutput);
+ runner.setVerbose(verbose);
+
+ if( 0 != numJobs ){
+ runner.setNumJobs(numJobs);
+ }
+
+ try{
+ runner.run();
+ }catch(IOException e){
+ e.printStackTrace();
+ }
+ }
+
+}