You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by cd...@apache.org on 2009/08/28 02:12:21 UTC
svn commit: r808686 [1/9] - in /hadoop/mapreduce/trunk: ./ ivy/
src/java/org/apache/hadoop/mapred/
src/test/mapred/org/apache/hadoop/tools/rumen/ src/test/tools/
src/test/tools/data/ src/test/tools/data/rumen/
src/test/tools/data/rumen/histogram-tests/...
Author: cdouglas
Date: Fri Aug 28 00:12:18 2009
New Revision: 808686
URL: http://svn.apache.org/viewvc?rev=808686&view=rev
Log:
MAPREDUCE-751. Add Rumen, a tool for extracting statistics from job tracker
logs and generating job traces for simulation and analysis.
Contributed by Dick King and Guanying Wang
Added:
hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/
hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/HistogramRawTestData.java
hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestHistograms.java
hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestPiecewiseLinearInterpolation.java
hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestZombieJob.java
hadoop/mapreduce/trunk/src/test/tools/
hadoop/mapreduce/trunk/src/test/tools/data/
hadoop/mapreduce/trunk/src/test/tools/data/rumen/
hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/
hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-minimal.json
hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-one-value-many-repeats.json
hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-only-one-value.json
hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-three-values.json
hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-minimal.json
hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-one-value-many-repeats.json
hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-only-one-value.json
hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-three-values.json
hadoop/mapreduce/trunk/src/test/tools/data/rumen/small-trace-test/
hadoop/mapreduce/trunk/src/test/tools/data/rumen/small-trace-test/job-tracker-logs-topology-output
hadoop/mapreduce/trunk/src/test/tools/data/rumen/small-trace-test/job-tracker-logs-trace-output
hadoop/mapreduce/trunk/src/test/tools/data/rumen/small-trace-test/sample-job-tracker-logs
hadoop/mapreduce/trunk/src/test/tools/data/rumen/zombie/
hadoop/mapreduce/trunk/src/test/tools/data/rumen/zombie/input-topology.json
hadoop/mapreduce/trunk/src/test/tools/data/rumen/zombie/input-trace.json
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/CDFPiecewiseLinearRandomGenerator.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/CDFRandomGenerator.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/DeepCompare.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/DeepInequalityException.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/HadoopLogsAnalyzer.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/Histogram.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/JobStory.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LogRecordType.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedDiscreteCDF.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedJob.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedLocation.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedNetworkTopology.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedSingleRelativeRanking.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedTask.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/MapTaskAttemptInfo.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/Pair.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/ParsedConfigFile.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/ParsedHost.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/ParsedLine.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/Parser.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/ReduceTaskAttemptInfo.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/TaskAttemptInfo.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/TaskInfo.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/TreePath.java
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/ZombieJob.java
Modified:
hadoop/mapreduce/trunk/CHANGES.txt
hadoop/mapreduce/trunk/build.xml
hadoop/mapreduce/trunk/ivy.xml
hadoop/mapreduce/trunk/ivy/libraries.properties
hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java
Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=808686&r1=808685&r2=808686&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Fri Aug 28 00:12:18 2009
@@ -81,6 +81,10 @@
MAPREDUCE-824. Add support for a hierarchy of queues in the capacity
scheduler. (Rahul Kumar Singh via yhemanth)
+ MAPREDUCE-751. Add Rumen, a tool for extracting statistics from job tracker
+ logs and generating job traces for simulation and analysis. (Dick King via
+ cdouglas)
+
IMPROVEMENTS
MAPREDUCE-816. Rename "local" mysql import to "direct" in Sqoop.
Modified: hadoop/mapreduce/trunk/build.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/build.xml?rev=808686&r1=808685&r2=808686&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/build.xml (original)
+++ hadoop/mapreduce/trunk/build.xml Fri Aug 28 00:12:18 2009
@@ -112,6 +112,8 @@
<property name="test.junit.haltonfailure" value="no" />
<property name="test.junit.maxmemory" value="512m" />
+ <property name="test.tools.input.dir" value="${basedir}/src/test/tools/data" />
+
<property name="test.mapred.build.classes" value="${test.build.dir}/mapred/classes"/>
<property name="test.mapred.commit.tests.file" value="${test.src.dir}/commit-tests" />
<property name="test.mapred.all.tests.file" value="${test.src.dir}/all-tests" />
@@ -372,7 +374,7 @@
<target name="compile-core" depends="clover, compile-mapred-classes, compile-c++" description="Compile core only"/>
- <target name="compile-contrib" depends="compile-core,compile-c++-libhdfs">
+ <target name="compile-contrib" depends="compile-core,tools,compile-c++-libhdfs">
<subant target="compile">
<property name="version" value="${version}"/>
<property name="hadoop-core.version" value="${hadoop-core.version}"/>
@@ -564,6 +566,7 @@
dir="${basedir}" timeout="${test.timeout}"
errorProperty="tests.failed" failureProperty="tests.failed">
<sysproperty key="test.build.data" value="${test.build.data}"/>
+ <sysproperty key="test.tools.input.dir" value = "${test.tools.input.dir}"/>
<sysproperty key="test.cache.data" value="${test.cache.data}"/>
<sysproperty key="test.debug.data" value="${test.debug.data}"/>
<sysproperty key="hadoop.log.dir" value="${test.log.dir}"/>
Modified: hadoop/mapreduce/trunk/ivy.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/ivy.xml?rev=808686&r1=808685&r2=808686&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/ivy.xml (original)
+++ hadoop/mapreduce/trunk/ivy.xml Fri Aug 28 00:12:18 2009
@@ -275,7 +275,11 @@
conf="common->default"/>
<dependency org="org.codehaus.jackson"
name="jackson-mapper-asl"
- rev="1.0.1"
+ rev="${jackson.version}"
+ conf="common->default"/>
+ <dependency org="org.codehaus.jackson"
+ name="jackson-core-asl"
+ rev="${jackson.version}"
conf="common->default"/>
<dependency org="com.thoughtworks.paranamer"
name="paranamer"
Modified: hadoop/mapreduce/trunk/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/ivy/libraries.properties?rev=808686&r1=808685&r2=808686&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/ivy/libraries.properties (original)
+++ hadoop/mapreduce/trunk/ivy/libraries.properties Fri Aug 28 00:12:18 2009
@@ -49,7 +49,6 @@
jetty-util.version=6.1.14
junit.version=4.5
jdiff.version=1.0.9
-json.version=1.0
kfs.version=0.3
@@ -70,3 +69,5 @@
xmlenc.version=0.52
xerces.version=1.4.4
+
+jackson.version=1.0.1
Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java?rev=808686&r1=808685&r2=808686&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java Fri Aug 28 00:12:18 2009
@@ -32,7 +32,7 @@
* not intended to be a comprehensive piece of data.
*
**************************************************/
-abstract class TaskStatus implements Writable, Cloneable {
+public abstract class TaskStatus implements Writable, Cloneable {
static final Log LOG =
LogFactory.getLog(TaskStatus.class.getName());
Added: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/HistogramRawTestData.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/HistogramRawTestData.java?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/HistogramRawTestData.java (added)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/HistogramRawTestData.java Fri Aug 28 00:12:18 2009
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.rumen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+class HistogramRawTestData {
+ List<Long> data = new ArrayList<Long>();
+
+ List<Integer> percentiles = new ArrayList<Integer>();
+
+ int scale;
+
+ public List<Integer> getPercentiles() {
+ return percentiles;
+ }
+
+ public void setPercentiles(List<Integer> percentiles) {
+ this.percentiles = percentiles;
+ }
+
+ public int getScale() {
+ return scale;
+ }
+
+ public void setScale(int scale) {
+ this.scale = scale;
+ }
+
+ public List<Long> getData() {
+ return data;
+ }
+
+ public void setData(List<Long> data) {
+ this.data = data;
+ }
+}
Added: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestHistograms.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestHistograms.java?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestHistograms.java (added)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestHistograms.java Fri Aug 28 00:12:18 2009
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.rumen;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import junit.framework.TestCase;
+
+import java.util.List;
+
+import org.codehaus.jackson.JsonParseException;
+import org.codehaus.jackson.JsonEncoding;
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonParser;
+import org.codehaus.jackson.map.DeserializationConfig;
+import org.codehaus.jackson.JsonProcessingException;
+import org.codehaus.jackson.map.JsonMappingException;
+import org.codehaus.jackson.map.ObjectMapper;
+
+/**
+ *
+ */
+public class TestHistograms extends TestCase {
+
+ /**
+ * @throws IOException
+ *
+ * There should be files in the directory named by
+ * ${test.build.data}/rumen/histogram-test .
+ *
+ * There will be pairs of files, inputXxx.json and goldXxx.json .
+ *
+ * We read the input file as a HistogramRawTestData in json. Then we
+ * create a Histogram using the data field, and then a
+ * LoggedDiscreteCDF using the percentiles and scale field. Finally,
+ * we read the corresponding goldXxx.json as a LoggedDiscreteCDF and
+ * deepCompare them.
+ */
+ public void testHistograms() throws IOException {
+ String rootInputDir = System.getProperty("test.tools.input.dir", "");
+
+ File rootInputDirFile = new File(rootInputDir);
+
+ File rootInputFile = new File(rootInputDirFile, "rumen/histogram-tests");
+
+ if (rootInputDir.charAt(rootInputDir.length() - 1) == '/') {
+ rootInputDir = rootInputDir.substring(0, rootInputDir.length() - 1);
+ }
+
+ String[] tests = rootInputFile.list();
+
+ for (int i = 0; i < tests.length; ++i) {
+ if (tests[i].length() > 5 && "input".equals(tests[i].substring(0, 5))) {
+ File inputData = new File(rootInputFile, tests[i]);
+
+ if (!(new File(rootInputFile, "build" + tests[i].substring(5)))
+ .exists()
+ && !(new File(rootInputFile, "gold" + tests[i].substring(5))
+ .exists())
+ && !(new File(rootInputFile, "silver" + tests[i].substring(5))
+ .exists())) {
+ System.out
+ .println("Neither a build nor a gold file exists for the file, "
+ + inputData.getCanonicalPath());
+
+ continue;
+ }
+
+ LoggedDiscreteCDF newResult = histogramFileToCDF(inputData.getPath());
+
+ if ((new File(rootInputFile, "build" + tests[i].substring(5))).exists()
+ && !(new File(rootInputFile, "gold" + tests[i].substring(5)))
+ .exists()
+ && !(new File(rootInputFile, "silver" + tests[i].substring(5)))
+ .exists()) {
+ try {
+ System.out.println("Building a new gold file for the file, "
+ + inputData.getCanonicalPath());
+ System.out.println("Please inspect it thoroughly and rename it.");
+
+ ObjectMapper mapper = new ObjectMapper();
+ JsonFactory factory = mapper.getJsonFactory();
+ PrintStream ostream = new PrintStream(new File(rootInputFile,
+ "silver" + tests[i].substring(5)));
+ JsonGenerator gen = factory.createJsonGenerator(ostream,
+ JsonEncoding.UTF8);
+ gen.useDefaultPrettyPrinter();
+
+ gen.writeObject(newResult);
+
+ gen.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ } else {
+ System.out.println("Testing a Histogram built from the file, "
+ + inputData.getCanonicalPath());
+ File goldCDF = new File(rootInputFile, "gold" + tests[i].substring(5));
+ FileInputStream goldStream = new FileInputStream(goldCDF);
+ BufferedReader goldReader = new BufferedReader(new InputStreamReader(
+ goldStream));
+ ObjectMapper goldMapper = new ObjectMapper();
+ JsonParser goldParser = goldMapper.getJsonFactory().createJsonParser(
+ goldReader);
+ LoggedDiscreteCDF DCDF = goldMapper.readValue(goldParser,
+ LoggedDiscreteCDF.class);
+
+ try {
+ DCDF.deepCompare(newResult, new TreePath(null, "<root>"));
+ } catch (DeepInequalityException e) {
+ String error = e.path.toString();
+
+ assertFalse(error, true);
+ }
+ }
+ }
+ }
+ }
+
+ private static LoggedDiscreteCDF histogramFileToCDF(String filename)
+ throws IOException {
+
+ File inputData = new File(filename);
+
+ FileInputStream dataStream = new FileInputStream(inputData);
+ BufferedReader dataReader = new BufferedReader(new InputStreamReader(
+ dataStream));
+ ObjectMapper dataMapper = new ObjectMapper();
+ dataMapper.configure(
+ DeserializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
+ JsonParser dataParser = dataMapper.getJsonFactory().createJsonParser(
+ dataReader);
+ HistogramRawTestData data = dataMapper.readValue(dataParser,
+ HistogramRawTestData.class);
+
+ Histogram hist = new Histogram();
+
+ List<Long> measurements = data.getData();
+
+ List<Long> typeProbeData = new HistogramRawTestData().getData();
+
+ assertTrue(
+ "The data attribute of a jackson-reconstructed HistogramRawTestData "
+ + " should be a " + typeProbeData.getClass().getName()
+ + ", like a virgin HistogramRawTestData, but it's a "
+ + measurements.getClass().getName(),
+ measurements.getClass() == typeProbeData.getClass());
+
+ for (int j = 0; j < measurements.size(); ++j) {
+ hist.enter(measurements.get(j));
+ }
+
+ LoggedDiscreteCDF result = new LoggedDiscreteCDF();
+ int[] percentiles = new int[data.getPercentiles().size()];
+
+ for (int j = 0; j < data.getPercentiles().size(); ++j) {
+ percentiles[j] = data.getPercentiles().get(j);
+ }
+
+ result.setCDF(hist, percentiles, data.getScale());
+
+ return result;
+ }
+}
Added: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestPiecewiseLinearInterpolation.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestPiecewiseLinearInterpolation.java?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestPiecewiseLinearInterpolation.java (added)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestPiecewiseLinearInterpolation.java Fri Aug 28 00:12:18 2009
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.rumen;
+
+import java.util.ArrayList;
+
+import junit.framework.TestCase;
+
+public class TestPiecewiseLinearInterpolation extends TestCase {
+
+ static private double maximumRelativeError = 0.002D;
+
+ static private LoggedSingleRelativeRanking makeRR(double ranking, long datum) {
+ LoggedSingleRelativeRanking result = new LoggedSingleRelativeRanking();
+
+ result.setDatum(datum);
+ result.setRelativeRanking(ranking);
+
+ return result;
+ }
+
+ public void testOneRun() {
+ LoggedDiscreteCDF input = new LoggedDiscreteCDF();
+
+ input.setMinimum(100000L);
+ input.setMaximum(1100000L);
+
+ ArrayList<LoggedSingleRelativeRanking> rankings = new ArrayList<LoggedSingleRelativeRanking>();
+
+ rankings.add(makeRR(0.1, 200000L));
+ rankings.add(makeRR(0.5, 800000L));
+ rankings.add(makeRR(0.9, 1000000L));
+
+ input.setRankings(rankings);
+ input.setNumberValues(3);
+
+ CDFRandomGenerator gen = new CDFPiecewiseLinearRandomGenerator(input);
+ Histogram values = new Histogram();
+
+ for (int i = 0; i < 1000000; ++i) {
+ long value = gen.randomValue();
+ values.enter(value);
+ }
+
+ /*
+ * Now we build a percentiles CDF, and compute the sum of the squares of the
+ * actual percentiles vrs. the predicted percentiles
+ */
+ int[] percentiles = new int[99];
+
+ for (int i = 0; i < 99; ++i) {
+ percentiles[i] = i + 1;
+ }
+
+ long[] result = values.getCDF(100, percentiles);
+ long sumErrorSquares = 0L;
+
+ for (int i = 0; i < 10; ++i) {
+ long error = result[i] - (10000L * i + 100000L);
+ System.out.println("element " + i + ", got " + result[i] + ", expected "
+ + (10000L * i + 100000L) + ", error = " + error);
+ sumErrorSquares += error * error;
+ }
+
+ for (int i = 10; i < 50; ++i) {
+ long error = result[i] - (15000L * i + 50000L);
+ System.out.println("element " + i + ", got " + result[i] + ", expected "
+ + (15000L * i + 50000L) + ", error = " + error);
+ sumErrorSquares += error * error;
+ }
+
+ for (int i = 50; i < 90; ++i) {
+ long error = result[i] - (5000L * i + 550000L);
+ System.out.println("element " + i + ", got " + result[i] + ", expected "
+ + (5000L * i + 550000L) + ", error = " + error);
+ sumErrorSquares += error * error;
+ }
+
+ for (int i = 90; i <= 100; ++i) {
+ long error = result[i] - (10000L * i + 100000L);
+ System.out.println("element " + i + ", got " + result[i] + ", expected "
+ + (10000L * i + 100000L) + ", error = " + error);
+ sumErrorSquares += error * error;
+ }
+
+ // normalize the error
+ double realSumErrorSquares = (double) sumErrorSquares;
+
+ double normalizedError = realSumErrorSquares / 100
+ / rankings.get(1).getDatum() / rankings.get(1).getDatum();
+ double RMSNormalizedError = Math.sqrt(normalizedError);
+
+ System.out.println("sumErrorSquares = " + sumErrorSquares);
+
+ System.out.println("normalizedError: " + normalizedError
+ + ", RMSNormalizedError: " + RMSNormalizedError);
+
+ System.out.println("Cumulative error is " + RMSNormalizedError);
+
+ assertTrue("The RMS relative error per bucket, " + RMSNormalizedError
+ + ", exceeds our tolerance of " + maximumRelativeError,
+ RMSNormalizedError <= maximumRelativeError);
+
+ }
+}
Added: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java (added)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java Fri Aug 28 00:12:18 2009
@@ -0,0 +1,336 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.rumen;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintStream;
+
+import org.codehaus.jackson.JsonParseException;
+import org.codehaus.jackson.JsonParser;
+import org.codehaus.jackson.JsonProcessingException;
+import org.codehaus.jackson.map.JsonMappingException;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.DeserializationConfig;
+
+import junit.framework.TestCase;
+
+public class TestRumenJobTraces extends TestCase {
+ public void testSmallTrace() throws IOException {
+ File tempDirectory = new File(System.getProperty("test.build.data", "/tmp"));
+
+ String rootInputDir = System.getProperty("test.tools.input.dir", "");
+ String rootTempDir = System.getProperty("test.build.data", "");
+
+ File rootInputFile = new File(new File(rootInputDir),
+ "rumen/small-trace-test");
+ File tempDirFile = new File(rootTempDir);
+
+ assertFalse("property test.build.data is not defined", ""
+ .equals(rootTempDir));
+ assertFalse("property test.tools.input.dir is not defined", ""
+ .equals(rootInputDir));
+
+ if (rootInputDir.charAt(rootInputDir.length() - 1) == '/') {
+ rootInputDir = rootInputDir.substring(0, rootInputDir.length() - 1);
+ }
+
+ if (rootTempDir.charAt(rootTempDir.length() - 1) == '/') {
+ rootTempDir = rootTempDir.substring(0, rootTempDir.length() - 1);
+ }
+
+ File topologyFile = File.createTempFile("topology", ".json", tempDirFile);
+ File traceFile = File.createTempFile("trace", ".json", tempDirFile);
+
+ File inputFile = new File(rootInputFile, "sample-job-tracker-logs");
+
+ // topologyFile.deleteOnExit();
+ // traceFile.deleteOnExit();
+ System.out.println("topology result file = "
+ + topologyFile.getCanonicalPath());
+ System.out.println("trace result file = " + traceFile.getCanonicalPath());
+
+ String[] args = new String[6];
+
+ args[0] = "-v1";
+
+ args[1] = "-write-topology";
+ args[2] = topologyFile.getPath();
+
+ args[3] = "-write-job-trace";
+ args[4] = traceFile.getPath();
+
+ args[5] = inputFile.getPath();
+
+ assertTrue("The input file " + inputFile.getPath() + " does not exist.",
+ inputFile.canRead());
+ assertTrue("The output topology file " + topologyFile.getPath()
+ + " cannot be written.", topologyFile.canWrite());
+ assertTrue("The output trace file " + traceFile.getPath()
+ + " cannot be written.", traceFile.canWrite());
+
+ PrintStream old_stdout = System.out;
+
+ File stdoutFile = File.createTempFile("stdout", ".text", tempDirFile);
+
+ // stdoutFile.deleteOnExit();
+ System.out.println("stdout file = " + stdoutFile.getCanonicalPath());
+
+ PrintStream enveloped_stdout = new PrintStream(new BufferedOutputStream(
+ new FileOutputStream(stdoutFile)));
+
+ File topologyGoldFile = new File(rootInputFile,
+ "job-tracker-logs-topology-output");
+ File traceGoldFile = new File(rootInputFile,
+ "job-tracker-logs-trace-output");
+
+ try {
+ System.setOut(enveloped_stdout);
+
+ HadoopLogsAnalyzer.main(args);
+
+ enveloped_stdout.close();
+ } finally {
+ System.setOut(old_stdout);
+ }
+
+ jsonFileMatchesGold(topologyFile, topologyGoldFile,
+ new LoggedNetworkTopology(), "topology");
+ jsonFileMatchesGold(traceFile, traceGoldFile, new LoggedJob(), "trace");
+
+ System.out
+ .println("These files have been erased because the tests have succeeded.");
+
+ topologyFile.deleteOnExit();
+ traceFile.deleteOnExit();
+ stdoutFile.deleteOnExit();
+ }
+
+ /*
+ * This block of methods is commented out because its methods require huge
+ * test files to support them meaningfully. We expect to be able to fix this
+ * problem in a furture release.
+ *
+ * public void testBulkFilesJobDistro() throws IOException { String args[] = {
+ * "-v1", "-delays", "-runtimes" }; statisticalTest(args,
+ * "rumen/large-test-inputs/monolithic-files",
+ * "rumen/large-test-inputs/gold-bulk-job-distribution.text", true); }
+ *
+ * public void testIndividualFilesJobDistro() throws IOException { String
+ * args[] = { "-v1", "-delays", "-runtimes" }; statisticalTest(args,
+ * "rumen/large-test-inputs/individual-files",
+ * "rumen/large-test-inputs/gold-individual-job-distribution.text", true); }
+ *
+ * public void testSpreadsGZFile() throws IOException { String args[] = {
+ * "-v1", "-delays", "-runtimes", "-spreads", "10", "90",
+ * "-job-digest-spectra", "10", "50", "90" }; statisticalTest( args,
+ * "rumen/large-test-inputs/monolithic-files/jobs-0-99-including-truncations.gz"
+ * , "rumen/large-test-inputs/gold-single-gz-task-distribution.text", false);
+ * }
+ *
+ * public void testSpreadsSingleFile() throws IOException { String args[] = {
+ * "-v1", "-delays", "-runtimes", "-spreads", "10", "90",
+ * "-job-digest-spectra", "10", "50", "90" }; statisticalTest(args,
+ * "rumen/large-test-inputs/monolithic-files/jobs-100-199",
+ * "rumen/large-test-inputs/gold-single-bulk-task-distribution.text", false);
+ * }
+ */
+
+ /**
+ *
+ * A test case of HadoopLogsAnalyzer.main consists of a call to this function.
+ * It succeeds by returning,fails by performing a junit assertion failure, and
+ * can abend with an I/O error if some of the inputs aren't there or some of
+ * the output cannot be written [due to quota, perhaps, or permissions
+ *
+ *
+ * @param args
+ * these are the arguments that we eventually supply to
+ * HadoopLogsAnalyzer.main to test its functionality with regard to
+ * statistical output
+ * @param inputFname
+ * this is the file name or directory name of the test input
+ * directory relative to the test cases data directory.
+ * @param goldFilename
+ * this is the file name of the expected output relative to the test
+ * cases data directory.
+ * @param inputIsDirectory
+ * this states whether the input is an entire directory, or a single
+ * file.
+ * @throws IOException
+ */
+ private void statisticalTest(String args[], String inputFname,
+ String goldFilename, boolean inputIsDirectory) throws IOException {
+ File tempDirectory = new File(System.getProperty("test.build.data", "/tmp"));
+
+ String rootInputDir = System.getProperty("test.tools.input.dir", "");
+ String rootTempDir = System.getProperty("test.build.data", "");
+
+ File rootInputDirFile = new File(new File(rootInputDir), inputFname);
+ File tempDirFile = new File(rootTempDir);
+
+ assertFalse("property test.build.data is not defined", ""
+ .equals(rootTempDir));
+ assertFalse("property test.tools.input.dir is not defined", ""
+ .equals(rootInputDir));
+
+ if (rootInputDir.charAt(rootInputDir.length() - 1) == '/') {
+ rootInputDir = rootInputDir.substring(0, rootInputDir.length() - 1);
+ }
+
+ if (rootTempDir.charAt(rootTempDir.length() - 1) == '/') {
+ rootTempDir = rootTempDir.substring(0, rootTempDir.length() - 1);
+ }
+
+ File jobDistroGold = new File(new File(rootInputDir), goldFilename);
+
+ String[] newArgs = new String[args.length + 1];
+
+ System.arraycopy(args, 0, newArgs, 0, args.length);
+
+ newArgs[args.length + 1 - 1] = rootInputDirFile.getPath();
+
+ String complaint = inputIsDirectory ? " is not a directory."
+ : " does not exist.";
+
+ boolean okay = inputIsDirectory ? rootInputDirFile.isDirectory()
+ : rootInputDirFile.canRead();
+
+ assertTrue("The input file " + rootInputDirFile.getPath() + complaint, okay);
+
+ PrintStream old_stdout = System.out;
+
+ File stdoutFile = File.createTempFile("stdout", "text", tempDirFile);
+
+ // stdoutFile.deleteOnExit();
+
+ PrintStream enveloped_stdout = new PrintStream(new BufferedOutputStream(
+ new FileOutputStream(stdoutFile)));
+
+ try {
+ System.setOut(enveloped_stdout);
+
+ HadoopLogsAnalyzer.main(newArgs);
+
+ enveloped_stdout.close();
+
+ System.setOut(old_stdout);
+
+ assertFilesMatch(stdoutFile, jobDistroGold);
+ } finally {
+ System.setOut(old_stdout);
+ }
+ }
+
+ static private Object readMapper(ObjectMapper mapper, JsonParser parser,
+ Object obj) throws IOException {
+ try {
+ return mapper.readValue(parser, obj.getClass());
+ } catch (EOFException e) {
+ return null;
+ }
+ }
+
+ static private void assertFilesMatch(File result, File gold)
+ throws IOException {
+ System.out.println("Comparing files: " + result.getPath() + " vrs. "
+ + gold.getPath());
+
+ int currentLineNumber = 1;
+ FileInputStream goldStream = new FileInputStream(gold);
+ BufferedReader goldReader = new BufferedReader(new InputStreamReader(
+ goldStream));
+ String currentGoldLine = goldReader.readLine();
+
+ FileInputStream resultStream = new FileInputStream(result);
+ BufferedReader resultReader = new BufferedReader(new InputStreamReader(
+ resultStream));
+ String currentResultLine = resultReader.readLine();
+
+ while (currentGoldLine != null && currentResultLine != null
+ && currentGoldLine.equals(currentResultLine)) {
+ ++currentLineNumber;
+
+ currentGoldLine = goldReader.readLine();
+ currentResultLine = resultReader.readLine();
+ }
+
+ if (currentGoldLine == null && currentResultLine == null) {
+ return;
+ }
+
+ assertFalse("Line number " + currentLineNumber + " disagrees", true);
+ }
+
+ static private void jsonFileMatchesGold(File result, File gold, Object obj,
+ String fileDescription) throws IOException {
+ FileInputStream goldStream = new FileInputStream(gold);
+ BufferedReader goldReader = new BufferedReader(new InputStreamReader(
+ goldStream));
+
+ FileInputStream resultStream = new FileInputStream(result);
+ BufferedReader resultReader = new BufferedReader(new InputStreamReader(
+ resultStream));
+
+ ObjectMapper goldMapper = new ObjectMapper();
+ ObjectMapper resultMapper = new ObjectMapper();
+ goldMapper.configure(
+ DeserializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
+ resultMapper.configure(
+ DeserializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
+
+ JsonParser goldParser = goldMapper.getJsonFactory().createJsonParser(
+ goldReader);
+ JsonParser resultParser = resultMapper.getJsonFactory().createJsonParser(
+ resultReader);
+
+ DeepCompare goldJob = (DeepCompare) readMapper(goldMapper, goldParser, obj);
+ DeepCompare resultJob = (DeepCompare) readMapper(resultMapper,
+ resultParser, obj);
+
+ while (goldJob != null && resultJob != null) {
+ try {
+ resultJob.deepCompare(goldJob, new TreePath(null, "<root>"));
+ } catch (DeepInequalityException e) {
+ String error = e.path.toString();
+
+ assertFalse(fileDescription + " mismatches: " + error, true);
+ }
+
+ goldJob = (DeepCompare) readMapper(goldMapper, goldParser, obj);
+ resultJob = (DeepCompare) readMapper(resultMapper, resultParser, obj);
+ }
+
+ if (goldJob != null) {
+ assertFalse(
+ "The Gold File has more logged jobs than the result of the run", true);
+ }
+
+ if (resultJob != null) {
+ assertFalse("The result file has more logged jobs than the Gold File",
+ true);
+ }
+ }
+}
Added: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestZombieJob.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestZombieJob.java?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestZombieJob.java (added)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestZombieJob.java Fri Aug 28 00:12:18 2009
@@ -0,0 +1,336 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.rumen;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Vector;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.hadoop.mapred.TaskStatus.State;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskType;
+
+import junit.framework.TestCase;
+
+public class TestZombieJob extends TestCase {
+
+ final double epsilon = 0.01;
+ private final int[] attemptTimesPercentiles = new int[] { 10, 50, 90 };
+ private long[] succeededCDF = new long[] { 5268, 5268, 5268, 5268, 5268 };
+ private long[] failedCDF = new long[] { 18592, 18592, 18592, 18592, 18592 };
+ private double[] expectedPs = new double[] { 0.000001, 0.18707660239708182,
+ 0.0013027618551328818, 2.605523710265763E-4 };
+
+ List<LoggedJob> loggedJobs = new ArrayList<LoggedJob>();
+ List<JobStory> jobStories = new ArrayList<JobStory>();
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see junit.framework.TestCase#setUp()
+ */
+ protected void setUp() throws Exception {
+ String rootTempDir = System.getProperty("test.build.data", "/tmp");
+
+ String rootInputDir = System.getProperty("test.tools.input.dir", "");
+
+ File rootInputFile = new File(new File(rootInputDir), "rumen/zombie");
+ File tempDirFile = new File(rootTempDir);
+
+ Parser parser = new Parser(new FileReader(new File(rootInputFile,
+ "input-trace.json")));
+
+ parser.readTopology(new File(rootInputFile, "input-topology.json"));
+
+ JobStory job = null;
+ for (int i = 0; i < 4; i++) {
+ job = parser.getNextJob();
+ ZombieJob zJob = (ZombieJob) job;
+ LoggedJob loggedJob = zJob.getLoggedJob();
+ System.out.println(i + ":" + job.getNumberMaps() + "m, "
+ + job.getNumberReduces() + "r");
+ System.out
+ .println(loggedJob.getOutcome() + ", " + loggedJob.getJobtype());
+
+ System.out.println("Input Splits -- " + job.getInputSplits().length
+ + ", " + job.getNumberMaps());
+ /*
+ * for (InputSplit split: job.getInputSplits()) {
+ * System.out.print(split.getLength() + ": "); for (String location:
+ * split.getLocations()) { System.out.print(location + ","); }
+ * System.out.println(); }
+ */
+
+ System.out.println("Successful Map CDF -------");
+ for (LoggedDiscreteCDF cdf : loggedJob.getSuccessfulMapAttemptCDFs()) {
+ System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum()
+ + "--" + cdf.getMaximum());
+ for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
+ System.out.println(" " + ranking.getRelativeRanking() + ":"
+ + ranking.getDatum());
+ }
+ }
+ System.out.println("Failed Map CDF -----------");
+ for (LoggedDiscreteCDF cdf : loggedJob.getFailedMapAttemptCDFs()) {
+ System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum()
+ + "--" + cdf.getMaximum());
+ for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
+ System.out.println(" " + ranking.getRelativeRanking() + ":"
+ + ranking.getDatum());
+ }
+ }
+ System.out.println("Successful Reduce CDF ----");
+ LoggedDiscreteCDF cdf = loggedJob.getSuccessfulReduceAttemptCDF();
+ System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
+ + cdf.getMaximum());
+ for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
+ System.out.println(" " + ranking.getRelativeRanking() + ":"
+ + ranking.getDatum());
+ }
+ System.out.println("Failed Reduce CDF --------");
+ cdf = loggedJob.getFailedReduceAttemptCDF();
+ System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
+ + cdf.getMaximum());
+ for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
+ System.out.println(" " + ranking.getRelativeRanking() + ":"
+ + ranking.getDatum());
+ }
+ System.out.print("map attempts to success -- ");
+ for (double p : loggedJob.getMapperTriesToSucceed()) {
+ System.out.print(p + ", ");
+ }
+ System.out.println();
+ System.out.println("===============");
+
+ loggedJobs.add(loggedJob);
+ jobStories.add(job);
+ }
+
+ super.setUp();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see junit.framework.TestCase#tearDown()
+ */
+ protected void tearDown() throws Exception {
+ super.tearDown();
+ }
+
+ public void testFirstJob() throws FileNotFoundException, IOException,
+ InterruptedException {
+ // 20th job seems reasonable: "totalMaps":329,"totalReduces":101
+ // successful map: 80 node-local, 196 rack-local, 53 rack-remote, 2 unknown
+ // failed map: 0-0-0-1
+ // successful reduce: 99 failed reduce: 13
+ // map attempts to success -- 0.9969879518072289, 0.0030120481927710845,
+ JobStory job = jobStories.get(0);
+ assertEquals(1, job.getNumberMaps());
+ assertEquals(1, job.getNumberReduces());
+
+ // get splits
+
+ TaskAttemptInfo taInfo = null;
+ long expectedRuntime = 2423;
+ // get a succeeded map task attempt, expect the exact same task attempt
+ taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 1);
+ assertEquals(expectedRuntime, taInfo.getRuntime());
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+ // get a succeeded map attempt, but reschedule with different locality.
+ taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 2);
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+ taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 0);
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+ expectedRuntime = 97502;
+ // get a succeeded reduce task attempt, expect the exact same task attempt
+ taInfo = job.getTaskAttemptInfo(TaskType.REDUCE, 14, 0);
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+ // get a failed reduce task attempt, expect the exact same task attempt
+ taInfo = job.getTaskAttemptInfo(TaskType.REDUCE, 14, 0);
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+ // get a non-exist reduce task attempt, expect a made-up task attempt
+ // TODO fill in test case
+ }
+
+ public void testSecondJob() throws FileNotFoundException, IOException,
+ InterruptedException {
+ // 7th job has many failed tasks.
+ // 3204 m, 0 r
+ // successful maps 497-586-23-1, failed maps 0-0-0-2714
+ // map attempts to success -- 0.8113600833767587, 0.18707660239708182,
+ // 0.0013027618551328818, 2.605523710265763E-4,
+ JobStory job = jobStories.get(1);
+ assertEquals(20, job.getNumberMaps());
+ assertEquals(1, job.getNumberReduces());
+
+ TaskAttemptInfo taInfo = null;
+ // get a succeeded map task attempt
+ taInfo = job.getMapTaskAttemptInfoAdjusted(17, 1, 1);
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+ // get a succeeded map task attempt, with different locality
+ taInfo = job.getMapTaskAttemptInfoAdjusted(17, 1, 2);
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+ taInfo = job.getMapTaskAttemptInfoAdjusted(17, 1, 0);
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+ // get a failed map task attempt
+ taInfo = job.getMapTaskAttemptInfoAdjusted(14, 0, 1);
+ assertEquals(1927, taInfo.getRuntime());
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+
+ // get a failed map task attempt, with different locality
+ // TODO: this test does not make sense here, because I don't have
+ // available data set.
+ }
+
+ public void testFourthJob() throws FileNotFoundException, IOException,
+ InterruptedException {
+ // 7th job has many failed tasks.
+ // 3204 m, 0 r
+ // successful maps 497-586-23-1, failed maps 0-0-0-2714
+ // map attempts to success -- 0.8113600833767587, 0.18707660239708182,
+ // 0.0013027618551328818, 2.605523710265763E-4,
+ JobStory job = jobStories.get(3);
+ assertEquals(131, job.getNumberMaps());
+ assertEquals(47, job.getNumberReduces());
+
+ TaskAttemptInfo taInfo = null;
+ // get a succeeded map task attempt
+ long runtime = 5268;
+ taInfo = job.getMapTaskAttemptInfoAdjusted(113, 1, 1);
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+ assertEquals(runtime, taInfo.getRuntime());
+
+ // get a succeeded map task attempt, with different locality
+ taInfo = job.getMapTaskAttemptInfoAdjusted(113, 1, 2);
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+ assertEquals(runtime, taInfo.getRuntime() / 2);
+ taInfo = job.getMapTaskAttemptInfoAdjusted(113, 1, 0);
+ assertEquals(State.SUCCEEDED, taInfo.getRunState());
+ assertEquals((long) (runtime / 1.5), taInfo.getRuntime());
+
+ // get a failed map task attempt
+ taInfo = job.getMapTaskAttemptInfoAdjusted(113, 0, 1);
+ assertEquals(18592, taInfo.getRuntime());
+ assertEquals(State.FAILED, taInfo.getRunState());
+ }
+
+ public void testMakeUpInfo() throws FileNotFoundException, IOException,
+ InterruptedException {
+ // get many non-exist tasks
+ // total 3204 map tasks, 3300 is a non-exist task.
+ checkMakeUpTask(jobStories.get(3), 113, 1);
+ }
+
+ private void checkMakeUpTask(JobStory job, int taskNumber, int locality) {
+ TaskAttemptInfo taInfo = null;
+
+ Histogram sampleSucceeded = new Histogram();
+ Histogram sampleFailed = new Histogram();
+ Vector<Integer> sampleAttempts = new Vector<Integer>();
+ for (int i = 0; i < 100000; i++) {
+ int attemptId = 0;
+ while (true) {
+ taInfo = job.getMapTaskAttemptInfoAdjusted(taskNumber, attemptId, 1);
+ if (taInfo.getRunState() == State.SUCCEEDED) {
+ sampleSucceeded.enter(taInfo.getRuntime());
+ break;
+ }
+ sampleFailed.enter(taInfo.getRuntime());
+ attemptId++;
+ }
+ sampleAttempts.add(attemptId);
+ }
+
+ // check state distribution
+ int[] countTries = new int[] { 0, 0, 0, 0 };
+ for (int attempts : sampleAttempts) {
+ assertTrue(attempts < 4);
+ countTries[attempts]++;
+ }
+ /*
+ * System.out.print("Generated map attempts to success -- "); for (int
+ * count: countTries) { System.out.print((double)count/sampleAttempts.size()
+ * + ", "); } System.out.println(); System.out.println("===============");
+ */
+ for (int i = 0; i < 4; i++) {
+ int count = countTries[i];
+ double p = (double) count / sampleAttempts.size();
+ assertTrue(expectedPs[i] - p < epsilon);
+ }
+
+ // check succeeded attempts runtime distribution
+ long[] expectedCDF = succeededCDF;
+ LoggedDiscreteCDF cdf = new LoggedDiscreteCDF();
+ cdf.setCDF(sampleSucceeded, attemptTimesPercentiles, 100);
+ /*
+ * System.out.println("generated succeeded map runtime distribution");
+ * System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
+ * + cdf.getMaximum()); for (LoggedSingleRelativeRanking ranking:
+ * cdf.getRankings()) { System.out.println(" " +
+ * ranking.getRelativeRanking() + ":" + ranking.getDatum()); }
+ */
+ assertRuntimeEqual(cdf.getMinimum(), expectedCDF[0]);
+ assertRuntimeEqual(cdf.getMaximum(), expectedCDF[4]);
+ for (int i = 0; i < 3; i++) {
+ LoggedSingleRelativeRanking ranking = cdf.getRankings().get(i);
+ assertRuntimeEqual(expectedCDF[i + 1], ranking.getDatum());
+ }
+
+ // check failed attempts runtime distribution
+ expectedCDF = failedCDF;
+ cdf = new LoggedDiscreteCDF();
+ cdf.setCDF(sampleFailed, attemptTimesPercentiles, 100);
+
+ System.out.println("generated failed map runtime distribution");
+ System.out.println(cdf.getNumberValues() + ": " + cdf.getMinimum() + "--"
+ + cdf.getMaximum());
+ for (LoggedSingleRelativeRanking ranking : cdf.getRankings()) {
+ System.out.println(" " + ranking.getRelativeRanking() + ":"
+ + ranking.getDatum());
+ }
+ assertRuntimeEqual(cdf.getMinimum(), expectedCDF[0]);
+ assertRuntimeEqual(cdf.getMaximum(), expectedCDF[4]);
+ for (int i = 0; i < 3; i++) {
+ LoggedSingleRelativeRanking ranking = cdf.getRankings().get(i);
+ assertRuntimeEqual(expectedCDF[i + 1], ranking.getDatum());
+ }
+ }
+
+ private void assertRuntimeEqual(long expected, long generated) {
+ if (expected == 0) {
+ assertTrue(generated > -1000 && generated < 1000);
+ } else {
+ long epsilon = Math.max(expected / 10, 5000);
+ assertTrue(expected - generated > -epsilon);
+ assertTrue(expected - generated < epsilon);
+ }
+ }
+
+}
Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-minimal.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-minimal.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-minimal.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-minimal.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,15 @@
+{
+ "minimum" : 12345,
+ "rankings" : [ {
+ "relativeRanking" : 0.25,
+ "datum" : 12345
+ }, {
+ "relativeRanking" : 0.5,
+ "datum" : 2345678901
+ }, {
+ "relativeRanking" : 0.75,
+ "datum" : 2345678902
+ } ],
+ "maximum" : 23456789012,
+ "numberValues" : 5
+}
\ No newline at end of file
Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-one-value-many-repeats.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-one-value-many-repeats.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-one-value-many-repeats.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-one-value-many-repeats.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,15 @@
+{
+ "minimum" : 23456789012,
+ "rankings" : [ {
+ "relativeRanking" : 0.25,
+ "datum" : 23456789012
+ }, {
+ "relativeRanking" : 0.5,
+ "datum" : 23456789012
+ }, {
+ "relativeRanking" : 0.75,
+ "datum" : 23456789012
+ } ],
+ "maximum" : 23456789012,
+ "numberValues" : 64
+}
\ No newline at end of file
Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-only-one-value.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-only-one-value.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-only-one-value.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-only-one-value.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,15 @@
+{
+ "minimum" : 23456789012,
+ "rankings" : [ {
+ "relativeRanking" : 0.25,
+ "datum" : 23456789012
+ }, {
+ "relativeRanking" : 0.5,
+ "datum" : 23456789012
+ }, {
+ "relativeRanking" : 0.75,
+ "datum" : 23456789012
+ } ],
+ "maximum" : 23456789012,
+ "numberValues" : 1
+}
\ No newline at end of file
Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-three-values.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-three-values.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-three-values.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/gold-three-values.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,15 @@
+{
+ "minimum" : 1,
+ "rankings" : [ {
+ "relativeRanking" : 0.25,
+ "datum" : 1
+ }, {
+ "relativeRanking" : 0.5,
+ "datum" : 1
+ }, {
+ "relativeRanking" : 0.75,
+ "datum" : 23456789012
+ } ],
+ "maximum" : 234567890123,
+ "numberValues" : 3
+}
\ No newline at end of file
Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-minimal.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-minimal.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-minimal.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-minimal.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,17 @@
+{
+ "data" :
+ [
+ 12345,
+ 2345678901,
+ 23456789012,
+ 2345678902,
+ 23456789012
+ ],
+ "percentiles" :
+ [
+ 25,
+ 50,
+ 75
+ ],
+ "scale" : 100
+}
Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-one-value-many-repeats.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-one-value-many-repeats.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-one-value-many-repeats.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-one-value-many-repeats.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,76 @@
+{
+ "data" :
+ [
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012,
+ 23456789012
+ ],
+ "percentiles" :
+ [
+ 25,
+ 50,
+ 75
+ ],
+ "scale" : 100
+}
Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-only-one-value.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-only-one-value.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-only-one-value.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-only-one-value.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,13 @@
+{
+ "data" :
+ [
+ 23456789012
+ ],
+ "percentiles" :
+ [
+ 25,
+ 50,
+ 75
+ ],
+ "scale" : 100
+}
Added: hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-three-values.json
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-three-values.json?rev=808686&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-three-values.json (added)
+++ hadoop/mapreduce/trunk/src/test/tools/data/rumen/histogram-tests/input-three-values.json Fri Aug 28 00:12:18 2009
@@ -0,0 +1,15 @@
+{
+ "data" :
+ [
+ 1,
+ 23456789012,
+ 234567890123
+ ],
+ "percentiles" :
+ [
+ 25,
+ 50,
+ 75
+ ],
+ "scale" : 100
+}