You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by cd...@apache.org on 2009/08/28 02:12:21 UTC

svn commit: r808686 [1/9] - in /hadoop/mapreduce/trunk: ./ ivy/ src/java/org/apache/hadoop/mapred/ src/test/mapred/org/apache/hadoop/tools/rumen/ src/test/tools/ src/test/tools/data/ src/test/tools/data/rumen/ src/test/tools/data/rumen/histogram-tests/...

Author: cdouglas
Date: Fri Aug 28 00:12:18 2009
New Revision: 808686

URL: http://svn.apache.org/viewvc?rev=808686&view=rev
Log:
MAPREDUCE-751. Add Rumen, a tool for extracting statistics from job tracker
logs and generating job traces for simulation and analysis.
Contributed by Dick King and Guanying Wang


Added:
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/HistogramRawTestData.java
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestHistograms.java
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestPiecewiseLinearInterpolation.java
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestZombieJob.java
    hadoop/mapreduce/trunk/src/test/tools/
    hadoop/mapreduce/trunk/src/test/tools/data/
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-minimal.json
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-one-value-many-repeats.json
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-only-one-value.json
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-three-values.json
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-minimal.json
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-one-value-many-repeats.json
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-only-one-value.json
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-three-values.json
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/small-trace-test/
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/small-trace-test/job-tracker-logs-topology-output
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/small-trace-test/job-tracker-logs-trace-output
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/small-trace-test/sample-job-tracker-logs
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/zombie/
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/zombie/input-topology.json
    hadoop/mapreduce/trunk/src/test/tools/data/rumen/zombie/input-trace.json
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/CDFPiecewiseLinearRandomGenerator.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/CDFRandomGenerator.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/DeepCompare.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/DeepInequalityException.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/HadoopLogsAnalyzer.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/Histogram.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/JobStory.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LogRecordType.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedDiscreteCDF.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedJob.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedLocation.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedNetworkTopology.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedSingleRelativeRanking.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedTask.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/MapTaskAttemptInfo.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/Pair.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/ParsedConfigFile.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/ParsedHost.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/ParsedLine.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/Parser.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/ReduceTaskAttemptInfo.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/TaskAttemptInfo.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/TaskInfo.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/TreePath.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/ZombieJob.java
Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/build.xml
    hadoop/mapreduce/trunk/ivy.xml
    hadoop/mapreduce/trunk/ivy/libraries.properties
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=808686&r1=808685&r2=808686&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Fri Aug 28 00:12:18 2009
@@ -81,6 +81,10 @@
     MAPREDUCE-824. Add support for a hierarchy of queues in the capacity 
     scheduler. (Rahul Kumar Singh via yhemanth)
 
+    MAPREDUCE-751. Add Rumen, a tool for extracting statistics from job tracker
+    logs and generating job traces for simulation and analysis. (Dick King via
+    cdouglas)
+
   IMPROVEMENTS
 
     MAPREDUCE-816. Rename "local" mysql import to "direct" in Sqoop.

Modified: hadoop/mapreduce/trunk/build.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/build.xml?rev=808686&r1=808685&r2=808686&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/build.xml (original)
+++ hadoop/mapreduce/trunk/build.xml Fri Aug 28 00:12:18 2009
@@ -112,6 +112,8 @@
   <property name="test.junit.haltonfailure" value="no" />
   <property name="test.junit.maxmemory" value="512m" />
 
+  <property name="test.tools.input.dir" value="${basedir}/src/test/tools/data" />
+
   <property name="test.mapred.build.classes" value="${test.build.dir}/mapred/classes"/>
   <property name="test.mapred.commit.tests.file" value="${test.src.dir}/commit-tests" />
   <property name="test.mapred.all.tests.file" value="${test.src.dir}/all-tests" />
@@ -372,7 +374,7 @@
 
   <target name="compile-core" depends="clover, compile-mapred-classes, compile-c++" description="Compile core only"/> 
 
-  <target name="compile-contrib" depends="compile-core,compile-c++-libhdfs">
+  <target name="compile-contrib" depends="compile-core,tools,compile-c++-libhdfs">
      <subant target="compile">
         <property name="version" value="${version}"/>
         <property name="hadoop-core.version" value="${hadoop-core.version}"/>
@@ -564,6 +566,7 @@
         dir="${basedir}" timeout="${test.timeout}"
         errorProperty="tests.failed" failureProperty="tests.failed">
         <sysproperty key="test.build.data" value="${test.build.data}"/>
+        <sysproperty key="test.tools.input.dir" value = "${test.tools.input.dir}"/>
         <sysproperty key="test.cache.data" value="${test.cache.data}"/>     
         <sysproperty key="test.debug.data" value="${test.debug.data}"/>
         <sysproperty key="hadoop.log.dir" value="${test.log.dir}"/>

Modified: hadoop/mapreduce/trunk/ivy.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/ivy.xml?rev=808686&r1=808685&r2=808686&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/ivy.xml (original)
+++ hadoop/mapreduce/trunk/ivy.xml Fri Aug 28 00:12:18 2009
@@ -275,7 +275,11 @@
       conf="common->default"/>
     <dependency org="org.codehaus.jackson"
       name="jackson-mapper-asl"
-      rev="1.0.1"
+      rev="${jackson.version}"
+      conf="common->default"/>
+    <dependency org="org.codehaus.jackson"
+      name="jackson-core-asl"
+      rev="${jackson.version}"
       conf="common->default"/>
     <dependency org="com.thoughtworks.paranamer"
       name="paranamer"

Modified: hadoop/mapreduce/trunk/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/ivy/libraries.properties?rev=808686&r1=808685&r2=808686&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/ivy/libraries.properties (original)
+++ hadoop/mapreduce/trunk/ivy/libraries.properties Fri Aug 28 00:12:18 2009
@@ -49,7 +49,6 @@
 jetty-util.version=6.1.14
 junit.version=4.5
 jdiff.version=1.0.9
-json.version=1.0
 
 kfs.version=0.3
 
@@ -70,3 +69,5 @@
 
 xmlenc.version=0.52
 xerces.version=1.4.4
+
+jackson.version=1.0.1

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java?rev=808686&r1=808685&r2=808686&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java Fri Aug 28 00:12:18 2009
@@ -32,7 +32,7 @@
  * not intended to be a comprehensive piece of data.
  *
  **************************************************/
-abstract class TaskStatus implements Writable, Cloneable {
+public abstract class TaskStatus implements Writable, Cloneable {
   static final Log LOG =
     LogFactory.getLog(TaskStatus.class.getName());
   

Added: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/HistogramRawTestData.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/HistogramRawTestData.java?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/HistogramRawTestData.java (added)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/HistogramRawTestData.java Fri Aug 28 00:12:18 2009
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.rumen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+class HistogramRawTestData {
+  List<Long> data = new ArrayList<Long>();
+
+  List<Integer> percentiles = new ArrayList<Integer>();
+
+  int scale;
+
+  public List<Integer> getPercentiles() {
+    return percentiles;
+  }
+
+  public void setPercentiles(List<Integer> percentiles) {
+    this.percentiles = percentiles;
+  }
+
+  public int getScale() {
+    return scale;
+  }
+
+  public void setScale(int scale) {
+    this.scale = scale;
+  }
+
+  public List<Long> getData() {
+    return data;
+  }
+
+  public void setData(List<Long> data) {
+    this.data = data;
+  }
+}

Added: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestHistograms.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestHistograms.java?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestHistograms.java (added)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestHistograms.java Fri Aug 28 00:12:18 2009
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.rumen;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import junit.framework.TestCase;
+
+import java.util.List;
+
+import org.codehaus.jackson.JsonParseException;
+import org.codehaus.jackson.JsonEncoding;
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonParser;
+import org.codehaus.jackson.map.DeserializationConfig;
+import org.codehaus.jackson.JsonProcessingException;
+import org.codehaus.jackson.map.JsonMappingException;
+import org.codehaus.jackson.map.ObjectMapper;
+
+/**
+ *
+ */
+public class TestHistograms extends TestCase {
+
+  /**
+   * @throws IOException
+   * 
+   *           There should be files in the directory named by
+   *           ${test.build.data}/rumen/histogram-test .
+   * 
+   *           There will be pairs of files, inputXxx.json and goldXxx.json .
+   * 
+   *           We read the input file as a HistogramRawTestData in json. Then we
+   *           create a Histogram using the data field, and then a
+   *           LoggedDiscreteCDF using the percentiles and scale field. Finally,
+   *           we read the corresponding goldXxx.json as a LoggedDiscreteCDF and
+   *           deepCompare them.
+   */
+  public void testHistograms() throws IOException {
+    String rootInputDir = System.getProperty("test.tools.input.dir", "");
+
+    File rootInputDirFile = new File(rootInputDir);
+
+    File rootInputFile = new File(rootInputDirFile, "rumen/histogram-tests");
+
+    if (rootInputDir.charAt(rootInputDir.length() - 1) == '/') {
+      rootInputDir = rootInputDir.substring(0, rootInputDir.length() - 1);
+    }
+
+    String[] tests = rootInputFile.list();
+
+    for (int i = 0; i < tests.length; ++i) {
+      if (tests[i].length() > 5 && "input".equals(tests[i].substring(0, 5))) {
+        File inputData = new File(rootInputFile, tests[i]);
+
+        if (!(new File(rootInputFile, "build" + tests[i].substring(5)))
+            .exists()
+            && !(new File(rootInputFile, "gold" + tests[i].substring(5))
+                .exists())
+            && !(new File(rootInputFile, "silver" + tests[i].substring(5))
+                .exists())) {
+          System.out
+              .println("Neither a build nor a gold file exists for the file, "
+                  + inputData.getCanonicalPath());
+
+          continue;
+        }
+
+        LoggedDiscreteCDF newResult = histogramFileToCDF(inputData.getPath());
+
+        if ((new File(rootInputFile, "build" + tests[i].substring(5))).exists()
+            && !(new File(rootInputFile, "gold" + tests[i].substring(5)))
+                .exists()
+            && !(new File(rootInputFile, "silver" + tests[i].substring(5)))
+                .exists()) {
+          try {
+            System.out.println("Building a new gold file for the file, "
+                + inputData.getCanonicalPath());
+            System.out.println("Please inspect it thoroughly and rename it.");
+
+            ObjectMapper mapper = new ObjectMapper();
+            JsonFactory factory = mapper.getJsonFactory();
+            PrintStream ostream = new PrintStream(new File(rootInputFile,
+                "silver" + tests[i].substring(5)));
+            JsonGenerator gen = factory.createJsonGenerator(ostream,
+                JsonEncoding.UTF8);
+            gen.useDefaultPrettyPrinter();
+
+            gen.writeObject(newResult);
+
+            gen.close();
+          } catch (IOException e) {
+            e.printStackTrace();
+          }
+        } else {
+          System.out.println("Testing a Histogram built from the file, "
+              + inputData.getCanonicalPath());
+          File goldCDF = new File(rootInputFile, "gold" + tests[i].substring(5));
+          FileInputStream goldStream = new FileInputStream(goldCDF);
+          BufferedReader goldReader = new BufferedReader(new InputStreamReader(
+              goldStream));
+          ObjectMapper goldMapper = new ObjectMapper();
+          JsonParser goldParser = goldMapper.getJsonFactory().createJsonParser(
+              goldReader);
+          LoggedDiscreteCDF DCDF = goldMapper.readValue(goldParser,
+              LoggedDiscreteCDF.class);
+
+          try {
+            DCDF.deepCompare(newResult, new TreePath(null, "<root>"));
+          } catch (DeepInequalityException e) {
+            String error = e.path.toString();
+
+            assertFalse(error, true);
+          }
+        }
+      }
+    }
+  }
+
+  private static LoggedDiscreteCDF histogramFileToCDF(String filename)
+      throws IOException {
+
+    File inputData = new File(filename);
+
+    FileInputStream dataStream = new FileInputStream(inputData);
+    BufferedReader dataReader = new BufferedReader(new InputStreamReader(
+        dataStream));
+    ObjectMapper dataMapper = new ObjectMapper();
+    dataMapper.configure(
+        DeserializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
+    JsonParser dataParser = dataMapper.getJsonFactory().createJsonParser(
+        dataReader);
+    HistogramRawTestData data = dataMapper.readValue(dataParser,
+        HistogramRawTestData.class);
+
+    Histogram hist = new Histogram();
+
+    List<Long> measurements = data.getData();
+
+    List<Long> typeProbeData = new HistogramRawTestData().getData();
+
+    assertTrue(
+        "The data attribute of a jackson-reconstructed HistogramRawTestData "
+            + " should be a " + typeProbeData.getClass().getName()
+            + ", like a virgin HistogramRawTestData, but it's a "
+            + measurements.getClass().getName(),
+        measurements.getClass() == typeProbeData.getClass());
+
+    for (int j = 0; j < measurements.size(); ++j) {
+      hist.enter(measurements.get(j));
+    }
+
+    LoggedDiscreteCDF result = new LoggedDiscreteCDF();
+    int[] percentiles = new int[data.getPercentiles().size()];
+
+    for (int j = 0; j < data.getPercentiles().size(); ++j) {
+      percentiles[j] = data.getPercentiles().get(j);
+    }
+
+    result.setCDF(hist, percentiles, data.getScale());
+
+    return result;
+  }
+}

Added: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestPiecewiseLinearInterpolation.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestPiecewiseLinearInterpolation.java?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestPiecewiseLinearInterpolation.java (added)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestPiecewiseLinearInterpolation.java Fri Aug 28 00:12:18 2009
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.rumen;
+
+import java.util.ArrayList;
+
+import junit.framework.TestCase;
+
+public class TestPiecewiseLinearInterpolation extends TestCase {
+
+  static private double maximumRelativeError = 0.002D;
+
+  static private LoggedSingleRelativeRanking makeRR(double ranking, long datum) {
+    LoggedSingleRelativeRanking result = new LoggedSingleRelativeRanking();
+
+    result.setDatum(datum);
+    result.setRelativeRanking(ranking);
+
+    return result;
+  }
+
+  public void testOneRun() {
+    LoggedDiscreteCDF input = new LoggedDiscreteCDF();
+
+    input.setMinimum(100000L);
+    input.setMaximum(1100000L);
+
+    ArrayList<LoggedSingleRelativeRanking> rankings = new ArrayList<LoggedSingleRelativeRanking>();
+
+    rankings.add(makeRR(0.1, 200000L));
+    rankings.add(makeRR(0.5, 800000L));
+    rankings.add(makeRR(0.9, 1000000L));
+
+    input.setRankings(rankings);
+    input.setNumberValues(3);
+
+    CDFRandomGenerator gen = new CDFPiecewiseLinearRandomGenerator(input);
+    Histogram values = new Histogram();
+
+    for (int i = 0; i < 1000000; ++i) {
+      long value = gen.randomValue();
+      values.enter(value);
+    }
+
+    /*
+     * Now we build a percentiles CDF, and compute the sum of the squares of the
+     * actual percentiles vrs. the predicted percentiles
+     */
+    int[] percentiles = new int[99];
+
+    for (int i = 0; i < 99; ++i) {
+      percentiles[i] = i + 1;
+    }
+
+    long[] result = values.getCDF(100, percentiles);
+    long sumErrorSquares = 0L;
+
+    for (int i = 0; i < 10; ++i) {
+      long error = result[i] - (10000L * i + 100000L);
+      System.out.println("element " + i + ", got " + result[i] + ", expected "
+          + (10000L * i + 100000L) + ", error = " + error);
+      sumErrorSquares += error * error;
+    }
+
+    for (int i = 10; i < 50; ++i) {
+      long error = result[i] - (15000L * i + 50000L);
+      System.out.println("element " + i + ", got " + result[i] + ", expected "
+          + (15000L * i + 50000L) + ", error = " + error);
+      sumErrorSquares += error * error;
+    }
+
+    for (int i = 50; i < 90; ++i) {
+      long error = result[i] - (5000L * i + 550000L);
+      System.out.println("element " + i + ", got " + result[i] + ", expected "
+          + (5000L * i + 550000L) + ", error = " + error);
+      sumErrorSquares += error * error;
+    }
+
+    for (int i = 90; i <= 100; ++i) {
+      long error = result[i] - (10000L * i + 100000L);
+      System.out.println("element " + i + ", got " + result[i] + ", expected "
+          + (10000L * i + 100000L) + ", error = " + error);
+      sumErrorSquares += error * error;
+    }
+
+    // normalize the error
+    double realSumErrorSquares = (double) sumErrorSquares;
+
+    double normalizedError = realSumErrorSquares / 100
+        / rankings.get(1).getDatum() / rankings.get(1).getDatum();
+    double RMSNormalizedError = Math.sqrt(normalizedError);
+
+    System.out.println("sumErrorSquares = " + sumErrorSquares);
+
+    System.out.println("normalizedError: " + normalizedError
+        + ", RMSNormalizedError: " + RMSNormalizedError);
+
+    System.out.println("Cumulative error is " + RMSNormalizedError);
+
+    assertTrue("The RMS relative error per bucket, " + RMSNormalizedError
+        + ", exceeds our tolerance of " + maximumRelativeError,
+        RMSNormalizedError <= maximumRelativeError);
+
+  }
+}

Added: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java (added)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java Fri Aug 28 00:12:18 2009
@@ -0,0 +1,336 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.rumen;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintStream;
+
+import org.codehaus.jackson.JsonParseException;
+import org.codehaus.jackson.JsonParser;
+import org.codehaus.jackson.JsonProcessingException;
+import org.codehaus.jackson.map.JsonMappingException;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.DeserializationConfig;
+
+import junit.framework.TestCase;
+
+public class TestRumenJobTraces extends TestCase {
+  public void testSmallTrace() throws IOException {
+    File tempDirectory = new File(System.getProperty("test.build.data", "/tmp"));
+
+    String rootInputDir = System.getProperty("test.tools.input.dir", "");
+    String rootTempDir = System.getProperty("test.build.data", "");
+
+    File rootInputFile = new File(new File(rootInputDir),
+        "rumen/small-trace-test");
+    File tempDirFile = new File(rootTempDir);
+
+    assertFalse("property test.build.data is not defined", ""
+        .equals(rootTempDir));
+    assertFalse("property test.tools.input.dir is not defined", ""
+        .equals(rootInputDir));
+
+    if (rootInputDir.charAt(rootInputDir.length() - 1) == '/') {
+      rootInputDir = rootInputDir.substring(0, rootInputDir.length() - 1);
+    }
+
+    if (rootTempDir.charAt(rootTempDir.length() - 1) == '/') {
+      rootTempDir = rootTempDir.substring(0, rootTempDir.length() - 1);
+    }
+
+    File topologyFile = File.createTempFile("topology", ".json", tempDirFile);
+    File traceFile = File.createTempFile("trace", ".json", tempDirFile);
+
+    File inputFile = new File(rootInputFile, "sample-job-tracker-logs");
+
+    // topologyFile.deleteOnExit();
+    // traceFile.deleteOnExit();
+    System.out.println("topology result file = "
+        + topologyFile.getCanonicalPath());
+    System.out.println("trace result file = " + traceFile.getCanonicalPath());
+
+    String[] args = new String[6];
+
+    args[0] = "-v1";
+
+    args[1] = "-write-topology";
+    args[2] = topologyFile.getPath();
+
+    args[3] = "-write-job-trace";
+    args[4] = traceFile.getPath();
+
+    args[5] = inputFile.getPath();
+
+    assertTrue("The input file " + inputFile.getPath() + " does not exist.",
+        inputFile.canRead());
+    assertTrue("The output topology file " + topologyFile.getPath()
+        + " cannot be written.", topologyFile.canWrite());
+    assertTrue("The output trace file " + traceFile.getPath()
+        + " cannot be written.", traceFile.canWrite());
+
+    PrintStream old_stdout = System.out;
+
+    File stdoutFile = File.createTempFile("stdout", ".text", tempDirFile);
+
+    // stdoutFile.deleteOnExit();
+    System.out.println("stdout file = " + stdoutFile.getCanonicalPath());
+
+    PrintStream enveloped_stdout = new PrintStream(new BufferedOutputStream(
+        new FileOutputStream(stdoutFile)));
+
+    File topologyGoldFile = new File(rootInputFile,
+        "job-tracker-logs-topology-output");
+    File traceGoldFile = new File(rootInputFile,
+        "job-tracker-logs-trace-output");
+
+    try {
+      System.setOut(enveloped_stdout);
+
+      HadoopLogsAnalyzer.main(args);
+
+      enveloped_stdout.close();
+    } finally {
+      System.setOut(old_stdout);
+    }
+
+    jsonFileMatchesGold(topologyFile, topologyGoldFile,
+        new LoggedNetworkTopology(), "topology");
+    jsonFileMatchesGold(traceFile, traceGoldFile, new LoggedJob(), "trace");
+
+    System.out
+        .println("These files have been erased because the tests have succeeded.");
+
+    topologyFile.deleteOnExit();
+    traceFile.deleteOnExit();
+    stdoutFile.deleteOnExit();
+  }
+
+  /*
+   * This block of methods is commented out because its methods require huge
+   * test files to support them meaningfully. We expect to be able to fix this
+   * problem in a furture release.
+   * 
+   * public void testBulkFilesJobDistro() throws IOException { String args[] = {
+   * "-v1", "-delays", "-runtimes" }; statisticalTest(args,
+   * "rumen/large-test-inputs/monolithic-files",
+   * "rumen/large-test-inputs/gold-bulk-job-distribution.text", true); }
+   * 
+   * public void testIndividualFilesJobDistro() throws IOException { String
+   * args[] = { "-v1", "-delays", "-runtimes" }; statisticalTest(args,
+   * "rumen/large-test-inputs/individual-files",
+   * "rumen/large-test-inputs/gold-individual-job-distribution.text", true); }
+   * 
+   * public void testSpreadsGZFile() throws IOException { String args[] = {
+   * "-v1", "-delays", "-runtimes", "-spreads", "10", "90",
+   * "-job-digest-spectra", "10", "50", "90" }; statisticalTest( args,
+   * "rumen/large-test-inputs/monolithic-files/jobs-0-99-including-truncations.gz"
+   * , "rumen/large-test-inputs/gold-single-gz-task-distribution.text", false);
+   * }
+   * 
+   * public void testSpreadsSingleFile() throws IOException { String args[] = {
+   * "-v1", "-delays", "-runtimes", "-spreads", "10", "90",
+   * "-job-digest-spectra", "10", "50", "90" }; statisticalTest(args,
+   * "rumen/large-test-inputs/monolithic-files/jobs-100-199",
+   * "rumen/large-test-inputs/gold-single-bulk-task-distribution.text", false);
+   * }
+   */
+
+  /**
+   * 
+   * A test case of HadoopLogsAnalyzer.main consists of a call to this function.
+   * It succeeds by returning,fails by performing a junit assertion failure, and
+   * can abend with an I/O error if some of the inputs aren't there or some of
+   * the output cannot be written [due to quota, perhaps, or permissions
+   * 
+   * 
+   * @param args
+   *          these are the arguments that we eventually supply to
+   *          HadoopLogsAnalyzer.main to test its functionality with regard to
+   *          statistical output
+   * @param inputFname
+   *          this is the file name or directory name of the test input
+   *          directory relative to the test cases data directory.
+   * @param goldFilename
+   *          this is the file name of the expected output relative to the test
+   *          cases data directory.
+   * @param inputIsDirectory
+   *          this states whether the input is an entire directory, or a single
+   *          file.
+   * @throws IOException
+   */
+  private void statisticalTest(String args[], String inputFname,
+      String goldFilename, boolean inputIsDirectory) throws IOException {
+    File tempDirectory = new File(System.getProperty("test.build.data", "/tmp"));
+
+    String rootInputDir = System.getProperty("test.tools.input.dir", "");
+    String rootTempDir = System.getProperty("test.build.data", "");
+
+    File rootInputDirFile = new File(new File(rootInputDir), inputFname);
+    File tempDirFile = new File(rootTempDir);
+
+    assertFalse("property test.build.data is not defined", ""
+        .equals(rootTempDir));
+    assertFalse("property test.tools.input.dir is not defined", ""
+        .equals(rootInputDir));
+
+    if (rootInputDir.charAt(rootInputDir.length() - 1) == '/') {
+      rootInputDir = rootInputDir.substring(0, rootInputDir.length() - 1);
+    }
+
+    if (rootTempDir.charAt(rootTempDir.length() - 1) == '/') {
+      rootTempDir = rootTempDir.substring(0, rootTempDir.length() - 1);
+    }
+
+    File jobDistroGold = new File(new File(rootInputDir), goldFilename);
+
+    String[] newArgs = new String[args.length + 1];
+
+    System.arraycopy(args, 0, newArgs, 0, args.length);
+
+    newArgs[args.length + 1 - 1] = rootInputDirFile.getPath();
+
+    String complaint = inputIsDirectory ? " is not a directory."
+        : " does not exist.";
+
+    boolean okay = inputIsDirectory ? rootInputDirFile.isDirectory()
+        : rootInputDirFile.canRead();
+
+    assertTrue("The input file " + rootInputDirFile.getPath() + complaint, okay);
+
+    PrintStream old_stdout = System.out;
+
+    File stdoutFile = File.createTempFile("stdout", "text", tempDirFile);
+
+    // stdoutFile.deleteOnExit();
+
+    PrintStream enveloped_stdout = new PrintStream(new BufferedOutputStream(
+        new FileOutputStream(stdoutFile)));
+
+    try {
+      System.setOut(enveloped_stdout);
+
+      HadoopLogsAnalyzer.main(newArgs);
+
+      enveloped_stdout.close();
+
+      System.setOut(old_stdout);
+
+      assertFilesMatch(stdoutFile, jobDistroGold);
+    } finally {
+      System.setOut(old_stdout);
+    }
+  }
+
+  static private Object readMapper(ObjectMapper mapper, JsonParser parser,
+      Object obj) throws IOException {
+    try {
+      return mapper.readValue(parser, obj.getClass());
+    } catch (EOFException e) {
+      return null;
+    }
+  }
+
+  static private void assertFilesMatch(File result, File gold)
+      throws IOException {
+    System.out.println("Comparing files: " + result.getPath() + " vrs. "
+        + gold.getPath());
+
+    int currentLineNumber = 1;
+    FileInputStream goldStream = new FileInputStream(gold);
+    BufferedReader goldReader = new BufferedReader(new InputStreamReader(
+        goldStream));
+    String currentGoldLine = goldReader.readLine();
+
+    FileInputStream resultStream = new FileInputStream(result);
+    BufferedReader resultReader = new BufferedReader(new InputStreamReader(
+        resultStream));
+    String currentResultLine = resultReader.readLine();
+
+    while (currentGoldLine != null && currentResultLine != null
+        && currentGoldLine.equals(currentResultLine)) {
+      ++currentLineNumber;
+
+      currentGoldLine = goldReader.readLine();
+      currentResultLine = resultReader.readLine();
+    }
+
+    if (currentGoldLine == null && currentResultLine == null) {
+      return;
+    }
+
+    assertFalse("Line number " + currentLineNumber + " disagrees", true);
+  }
+
+  static private void jsonFileMatchesGold(File result, File gold, Object obj,
+      String fileDescription) throws IOException {
+    FileInputStream goldStream = new FileInputStream(gold);
+    BufferedReader goldReader = new BufferedReader(new InputStreamReader(
+        goldStream));
+
+    FileInputStream resultStream = new FileInputStream(result);
+    BufferedReader resultReader = new BufferedReader(new InputStreamReader(
+        resultStream));
+
+    ObjectMapper goldMapper = new ObjectMapper();
+    ObjectMapper resultMapper = new ObjectMapper();
+    goldMapper.configure(
+        DeserializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
+    resultMapper.configure(
+        DeserializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
+
+    JsonParser goldParser = goldMapper.getJsonFactory().createJsonParser(
+        goldReader);
+    JsonParser resultParser = resultMapper.getJsonFactory().createJsonParser(
+        resultReader);
+
+    DeepCompare goldJob = (DeepCompare) readMapper(goldMapper, goldParser, obj);
+    DeepCompare resultJob = (DeepCompare) readMapper(resultMapper,
+        resultParser, obj);
+
+    while (goldJob != null && resultJob != null) {
+      try {
+        resultJob.deepCompare(goldJob, new TreePath(null, "<root>"));
+      } catch (DeepInequalityException e) {
+        String error = e.path.toString();
+
+        assertFalse(fileDescription + " mismatches: " + error, true);
+      }
+
+      goldJob = (DeepCompare) readMapper(goldMapper, goldParser, obj);
+      resultJob = (DeepCompare) readMapper(resultMapper, resultParser, obj);
+    }
+
+    if (goldJob != null) {
+      assertFalse(
+          "The Gold File has more logged jobs than the result of the run", true);
+    }
+
+    if (resultJob != null) {
+      assertFalse("The result file has more logged jobs than the Gold File",
+          true);
+    }
+  }
+}

Added: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestZombieJob.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestZombieJob.java?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestZombieJob.java (added)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestZombieJob.java Fri Aug 28 00:12:18 2009
@@ -0,0 +1,336 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.rumen;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Vector;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.hadoop.mapred.TaskStatus.State;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskType;
+
+import junit.framework.TestCase;
+
+public class TestZombieJob extends TestCase {
+
+  final double epsilon = 0.01;
+  private final int[] attemptTimesPercentiles = new int[] { 10, 50, 90 };
+  private long[] succeededCDF = new long[] { 5268, 5268, 5268, 5268, 5268 };
+  private long[] failedCDF = new long[] { 18592, 18592, 18592, 18592, 18592 };
+  private double[] expectedPs = new double[] { 0.000001, 0.18707660239708182,
+      0.0013027618551328818, 2.605523710265763E-4 };
+
+  List<LoggedJob> loggedJobs = new ArrayList<LoggedJob>();
+  List<JobStory> jobStories = new ArrayList<JobStory>();
+
+  /*
+   * (non-Javadoc)
+   * 
+   * @see junit.framework.TestCase#setUp()
+   */
+  protected void setUp() throws Exception {
+    String rootTempDir = System.getProperty("test.build.data", "/tmp");
+
+    String rootInputDir = System.getProperty("test.tools.input.dir", "");
+
+    File rootInputFile = new File(new File(rootInputDir), "rumen/zombie");
+    File tempDirFile = new File(rootTempDir);
+
+    Parser parser = new Parser(new FileReader(new File(rootInputFile,
+        "input-trace.json")));
+
+    parser.readTopology(new File(rootInputFile, "input-topology.json"));
+
+    JobStory job = null;
+    for (int i = 0; i < 4; i++) {
+      job = parser.getNextJob();
+      ZombieJob zJob = (ZombieJob) job;
+      LoggedJob loggedJob = zJob.getLoggedJob();
+      System.out.println(i + ":" + job.getNumberMaps() + "m, "
+          + job.getNumberReduces() + "r");
+      System.out
+          .println(loggedJob.getOutcome() + ", " + loggedJob.getJobtype());
+
+      System.out.println("Input Splits -- " + job.getInputSplits().length
+          + ", " + job.getNumberMaps());
+      /*
+       * for (InputSplit split: job.getInputSplits()) {
+       * System.out.print(split.getLength() + ": "); for (String location:
+       * split.getLocations()) { System.out.print(location + ","); }
+       * System.out.println(); }
+       */
+
+      System.out.println("Successful Map CDF -------");
+      for (LoggedDiscreteCDF cdf : loggedJob.getSuccessfulMapAttemptCDFs()) {
+        System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum()
+            + "--" + cdf.getMaximum());
+        for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
+          System.out.println("   " + ranking.getRelativeRanking() + ":"
+              + ranking.getDatum());
+        }
+      }
+      System.out.println("Failed Map CDF -----------");
+      for (LoggedDiscreteCDF cdf : loggedJob.getFailedMapAttemptCDFs()) {
+        System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum()
+            + "--" + cdf.getMaximum());
+        for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
+          System.out.println("   " + ranking.getRelativeRanking() + ":"
+              + ranking.getDatum());
+        }
+      }
+      System.out.println("Successful Reduce CDF ----");
+      LoggedDiscreteCDF cdf = loggedJob.getSuccessfulReduceAttemptCDF();
+      System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
+          + cdf.getMaximum());
+      for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
+        System.out.println("   " + ranking.getRelativeRanking() + ":"
+            + ranking.getDatum());
+      }
+      System.out.println("Failed Reduce CDF --------");
+      cdf = loggedJob.getFailedReduceAttemptCDF();
+      System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
+          + cdf.getMaximum());
+      for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
+        System.out.println("   " + ranking.getRelativeRanking() + ":"
+            + ranking.getDatum());
+      }
+      System.out.print("map attempts to success -- ");
+      for (double p : loggedJob.getMapperTriesToSucceed()) {
+        System.out.print(p + ", ");
+      }
+      System.out.println();
+      System.out.println("===============");
+
+      loggedJobs.add(loggedJob);
+      jobStories.add(job);
+    }
+
+    super.setUp();
+  }
+
+  /*
+   * (non-Javadoc)
+   * 
+   * @see junit.framework.TestCase#tearDown()
+   */
+  protected void tearDown() throws Exception {
+    super.tearDown();
+  }
+
+  public void testFirstJob() throws FileNotFoundException, IOException,
+      InterruptedException {
+    // 20th job seems reasonable: "totalMaps":329,"totalReduces":101
+    // successful map: 80 node-local, 196 rack-local, 53 rack-remote, 2 unknown
+    // failed map: 0-0-0-1
+    // successful reduce: 99 failed reduce: 13
+    // map attempts to success -- 0.9969879518072289, 0.0030120481927710845,
+    JobStory job = jobStories.get(0);
+    assertEquals(1, job.getNumberMaps());
+    assertEquals(1, job.getNumberReduces());
+
+    // get splits
+
+    TaskAttemptInfo taInfo = null;
+    long expectedRuntime = 2423;
+    // get a succeeded map task attempt, expect the exact same task attempt
+    taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 1);
+    assertEquals(expectedRuntime, taInfo.getRuntime());
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+    // get a succeeded map attempt, but reschedule with different locality.
+    taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 2);
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+    taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 0);
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+    expectedRuntime = 97502;
+    // get a succeeded reduce task attempt, expect the exact same task attempt
+    taInfo = job.getTaskAttemptInfo(TaskType.REDUCE, 14, 0);
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+    // get a failed reduce task attempt, expect the exact same task attempt
+    taInfo = job.getTaskAttemptInfo(TaskType.REDUCE, 14, 0);
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+    // get a non-exist reduce task attempt, expect a made-up task attempt
+    // TODO fill in test case
+  }
+
+  public void testSecondJob() throws FileNotFoundException, IOException,
+      InterruptedException {
+    // 7th job has many failed tasks.
+    // 3204 m, 0 r
+    // successful maps 497-586-23-1, failed maps 0-0-0-2714
+    // map attempts to success -- 0.8113600833767587, 0.18707660239708182,
+    // 0.0013027618551328818, 2.605523710265763E-4,
+    JobStory job = jobStories.get(1);
+    assertEquals(20, job.getNumberMaps());
+    assertEquals(1, job.getNumberReduces());
+
+    TaskAttemptInfo taInfo = null;
+    // get a succeeded map task attempt
+    taInfo = job.getMapTaskAttemptInfoAdjusted(17, 1, 1);
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+    // get a succeeded map task attempt, with different locality
+    taInfo = job.getMapTaskAttemptInfoAdjusted(17, 1, 2);
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+    taInfo = job.getMapTaskAttemptInfoAdjusted(17, 1, 0);
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+    // get a failed map task attempt
+    taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 1);
+    assertEquals(1927, taInfo.getRuntime());
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+    // get a failed map task attempt, with different locality
+    // TODO: this test does not make sense here, because I don't have
+    // available data set.
+  }
+
+  public void testFourthJob() throws FileNotFoundException, IOException,
+      InterruptedException {
+    // 7th job has many failed tasks.
+    // 3204 m, 0 r
+    // successful maps 497-586-23-1, failed maps 0-0-0-2714
+    // map attempts to success -- 0.8113600833767587, 0.18707660239708182,
+    // 0.0013027618551328818, 2.605523710265763E-4,
+    JobStory job = jobStories.get(3);
+    assertEquals(131, job.getNumberMaps());
+    assertEquals(47, job.getNumberReduces());
+
+    TaskAttemptInfo taInfo = null;
+    // get a succeeded map task attempt
+    long runtime = 5268;
+    taInfo = job.getMapTaskAttemptInfoAdjusted(113, 1, 1);
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+    assertEquals(runtime, taInfo.getRuntime());
+
+    // get a succeeded map task attempt, with different locality
+    taInfo = job.getMapTaskAttemptInfoAdjusted(113, 1, 2);
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+    assertEquals(runtime, taInfo.getRuntime() / 2);
+    taInfo = job.getMapTaskAttemptInfoAdjusted(113, 1, 0);
+    assertEquals(State.SUCCEEDED, taInfo.getRunState());
+    assertEquals((long) (runtime / 1.5), taInfo.getRuntime());
+
+    // get a failed map task attempt
+    taInfo = job.getMapTaskAttemptInfoAdjusted(113, 0, 1);
+    assertEquals(18592, taInfo.getRuntime());
+    assertEquals(State.FAILED, taInfo.getRunState());
+  }
+
+  public void testMakeUpInfo() throws FileNotFoundException, IOException,
+      InterruptedException {
+    // get many non-exist tasks
+    // total 3204 map tasks, 3300 is a non-exist task.
+    checkMakeUpTask(jobStories.get(3), 113, 1);
+  }
+
+  private void checkMakeUpTask(JobStory job, int taskNumber, int locality) {
+    TaskAttemptInfo taInfo = null;
+
+    Histogram sampleSucceeded = new Histogram();
+    Histogram sampleFailed = new Histogram();
+    Vector<Integer> sampleAttempts = new Vector<Integer>();
+    for (int i = 0; i < 100000; i++) {
+      int attemptId = 0;
+      while (true) {
+        taInfo = job.getMapTaskAttemptInfoAdjusted(taskNumber, attemptId, 1);
+        if (taInfo.getRunState() == State.SUCCEEDED) {
+          sampleSucceeded.enter(taInfo.getRuntime());
+          break;
+        }
+        sampleFailed.enter(taInfo.getRuntime());
+        attemptId++;
+      }
+      sampleAttempts.add(attemptId);
+    }
+
+    // check state distribution
+    int[] countTries = new int[] { 0, 0, 0, 0 };
+    for (int attempts : sampleAttempts) {
+      assertTrue(attempts < 4);
+      countTries[attempts]++;
+    }
+    /*
+     * System.out.print("Generated map attempts to success -- "); for (int
+     * count: countTries) { System.out.print((double)count/sampleAttempts.size()
+     * + ", "); } System.out.println(); System.out.println("===============");
+     */
+    for (int i = 0; i < 4; i++) {
+      int count = countTries[i];
+      double p = (double) count / sampleAttempts.size();
+      assertTrue(expectedPs[i] - p < epsilon);
+    }
+
+    // check succeeded attempts runtime distribution
+    long[] expectedCDF = succeededCDF;
+    LoggedDiscreteCDF cdf = new LoggedDiscreteCDF();
+    cdf.setCDF(sampleSucceeded, attemptTimesPercentiles, 100);
+    /*
+     * System.out.println("generated succeeded map runtime distribution");
+     * System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
+     * + cdf.getMaximum()); for (LoggedSingleRelativeRanking ranking:
+     * cdf.getRankings()) { System.out.println("   " +
+     * ranking.getRelativeRanking() + ":" + ranking.getDatum()); }
+     */
+    assertRuntimeEqual(cdf.getMinimum(), expectedCDF[0]);
+    assertRuntimeEqual(cdf.getMaximum(), expectedCDF[4]);
+    for (int i = 0; i < 3; i++) {
+      LoggedSingleRelativeRanking ranking = cdf.getRankings().get(i);
+      assertRuntimeEqual(expectedCDF[i + 1], ranking.getDatum());
+    }
+
+    // check failed attempts runtime distribution
+    expectedCDF = failedCDF;
+    cdf = new LoggedDiscreteCDF();
+    cdf.setCDF(sampleFailed, attemptTimesPercentiles, 100);
+
+    System.out.println("generated failed map runtime distribution");
+    System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
+        + cdf.getMaximum());
+    for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
+      System.out.println("   " + ranking.getRelativeRanking() + ":"
+          + ranking.getDatum());
+    }
+    assertRuntimeEqual(cdf.getMinimum(), expectedCDF[0]);
+    assertRuntimeEqual(cdf.getMaximum(), expectedCDF[4]);
+    for (int i = 0; i < 3; i++) {
+      LoggedSingleRelativeRanking ranking = cdf.getRankings().get(i);
+      assertRuntimeEqual(expectedCDF[i + 1], ranking.getDatum());
+    }
+  }
+
+  private void assertRuntimeEqual(long expected, long generated) {
+    if (expected == 0) {
+      assertTrue(generated > -1000 && generated < 1000);
+    } else {
+      long epsilon = Math.max(expected / 10, 5000);
+      assertTrue(expected - generated > -epsilon);
+      assertTrue(expected - generated < epsilon);
+    }
+  }
+
+}

Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-minimal.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-minimal.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-minimal.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-minimal.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,15 @@
+{
+  "minimum" : 12345,
+  "rankings" : [ {
+    "relativeRanking" : 0.25,
+    "datum" : 12345
+  }, {
+    "relativeRanking" : 0.5,
+    "datum" : 2345678901
+  }, {
+    "relativeRanking" : 0.75,
+    "datum" : 2345678902
+  } ],
+  "maximum" : 23456789012,
+  "numberValues" : 5
+}
\ No newline at end of file

Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-one-value-many-repeats.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-one-value-many-repeats.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-one-value-many-repeats.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-one-value-many-repeats.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,15 @@
+{
+  "minimum" : 23456789012,
+  "rankings" : [ {
+    "relativeRanking" : 0.25,
+    "datum" : 23456789012
+  }, {
+    "relativeRanking" : 0.5,
+    "datum" : 23456789012
+  }, {
+    "relativeRanking" : 0.75,
+    "datum" : 23456789012
+  } ],
+  "maximum" : 23456789012,
+  "numberValues" : 64
+}
\ No newline at end of file

Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-only-one-value.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-only-one-value.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-only-one-value.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-only-one-value.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,15 @@
+{
+  "minimum" : 23456789012,
+  "rankings" : [ {
+    "relativeRanking" : 0.25,
+    "datum" : 23456789012
+  }, {
+    "relativeRanking" : 0.5,
+    "datum" : 23456789012
+  }, {
+    "relativeRanking" : 0.75,
+    "datum" : 23456789012
+  } ],
+  "maximum" : 23456789012,
+  "numberValues" : 1
+}
\ No newline at end of file

Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-three-values.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-three-values.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-three-values.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-three-values.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,15 @@
+{
+  "minimum" : 1,
+  "rankings" : [ {
+    "relativeRanking" : 0.25,
+    "datum" : 1
+  }, {
+    "relativeRanking" : 0.5,
+    "datum" : 1
+  }, {
+    "relativeRanking" : 0.75,
+    "datum" : 23456789012
+  } ],
+  "maximum" : 234567890123,
+  "numberValues" : 3
+}
\ No newline at end of file

Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-minimal.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-minimal.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-minimal.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-minimal.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,17 @@
+{
+	"data" :
+		[
+			12345,
+			2345678901,
+			23456789012,
+			2345678902,
+			23456789012
+		],
+	"percentiles" :
+		[
+			25,
+			50,
+			75
+		],
+	"scale" : 100
+}

Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-one-value-many-repeats.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-one-value-many-repeats.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-one-value-many-repeats.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-one-value-many-repeats.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,76 @@
+{
+	"data" :
+		[
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012,
+			23456789012
+		],
+	"percentiles" :
+		[
+			25,
+			50,
+			75
+		],
+	"scale" : 100
+}

Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-only-one-value.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-only-one-value.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-only-one-value.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-only-one-value.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,13 @@
+{
+	"data" :
+		[
+			23456789012
+		],
+	"percentiles" :
+		[
+			25,
+			50,
+			75
+		],
+	"scale" : 100
+}

Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-three-values.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-three-values.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-three-values.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-three-values.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,15 @@
+{
+	"data" :
+		[
+			1,
+			23456789012,
+			234567890123
+		],
+	"percentiles" :
+		[
+			25,
+			50,
+			75
+		],
+	"scale" : 100
+}