You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by om...@apache.org on 2011/03/08 07:02:35 UTC
svn commit: r1079270 - in /hadoop/mapreduce/branches/yahoo-merge/src: docs/src/documentation/content/xdocs/rumen.xml test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java tools/org/apache/hadoop/tools/rumen/Folder.java

Author: omalley
Date: Tue Mar  8 06:02:35 2011
New Revision: 1079270

URL: http://svn.apache.org/viewvc?rev=1079270&view=rev
Log:
commit 856e0b31712d11b0ad455cf8cec64ac767d64ec6
Author: Rajesh Balamohan <rb...@yahoo-inc.com>
Date:   Thu Feb 24 14:06:49 2011 +0530

    : Feature to instruct rumen-folder utility to skip jobs worth of specific duration

Modified:
    hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/rumen.xml
    hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java
    hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Folder.java

Modified: hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/rumen.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/rumen.xml?rev=1079270&r1=1079269&r2=1079270&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/rumen.xml (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/rumen.xml Tue Mar  8 06:02:35 2011
@@ -323,6 +323,17 @@
           </td>
         </tr>
         <tr>
+          <td><code>-starts-after</code></td>
+          <td>Specify the time in milliseconds relative to the begining of   
+	      trace, after which this utility should start considering the jobs 
+              from input trace.
+          </td>
+          <td>If this value is specified as 10000, Folder would ignore 
+              first 10000ms worth of jobs in the trace and 
+              start considering the rest of the jobs in the trace for folding.
+          </td>
+        </tr>
+        <tr>
           <td><code>-temp-directory</code></td>
           <td>Temporary directory for the Folder. By default the <strong>output
               folder's parent directory</strong> is used as the scratch space.

Modified: hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java?rev=1079270&r1=1079269&r2=1079270&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java Tue Mar  8 06:02:35 2011
@@ -71,6 +71,73 @@ public class TestRumenFolder {
     TestRumenFolder.<LoggedJob> jsonFileMatchesGold(conf, lfs, foldedTracePath,
         foldedGoldFile, LoggedJob.class, "trace");
   }
+  
+  @Test
+  public void testStartsAfterOption() throws Exception {
+    final Configuration conf = new Configuration();
+    final FileSystem lfs = FileSystem.getLocal(conf);
+
+    @SuppressWarnings("deprecation")
+    final Path rootInputDir =
+      new Path(System.getProperty("test.tools.input.dir", ""))
+            .makeQualified(lfs);
+    @SuppressWarnings("deprecation")
+    final Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"))
+            .makeQualified(lfs);
+
+    final Path rootInputFile = new Path(rootInputDir, "rumen/small-trace-test");
+    final Path tempDir = new Path(rootTempDir, "TestRumenJobTraces");
+    lfs.delete(tempDir, true);
+
+    final Path foldedTracePath = new Path(tempDir, "folded-trace.json");
+
+    final Path inputFile =
+      new Path(rootInputFile, "folder-input-trace.json.gz");
+
+    String[] args =
+       { "-input-cycle", "100S", "-output-duration", "300S",
+           "-skew-buffer-length", "1", "-seed", "100", "-starts-after", "30S", "-concentration", "2",
+           inputFile.toString(), foldedTracePath.toString() };
+
+    final Path foldedGoldFile =
+        new Path(rootInputFile, "goldFoldedTrace.json.gz");
+
+    Folder folder = new Folder();
+    int result = ToolRunner.run(folder, args);
+    assertEquals("Non-zero exit", 0, result);
+
+    TestRumenFolder.<LoggedJob> checkValidityAfterSkippingJobs(conf, lfs, foldedTracePath,
+        foldedGoldFile, LoggedJob.class, "trace", 30000);
+  }
+  
+  static private <T extends DeepCompare> void 
+  	checkValidityAfterSkippingJobs(Configuration conf, 
+  			FileSystem lfs, Path result, Path gold,
+      Class<? extends T> clazz, String fileDescription, 
+      long startsAfter) throws IOException {
+  	JsonObjectMapperParser<T> goldParser =
+      new JsonObjectMapperParser<T>(gold, clazz, conf);
+    InputStream resultStream = lfs.open(result);
+    JsonObjectMapperParser<T> resultParser =
+        new JsonObjectMapperParser<T>(resultStream, clazz);
+    try {
+      long submitTime = ((LoggedJob)goldParser.getNext()).getSubmitTime();
+      long target = submitTime + startsAfter;
+      while(true) {
+        DeepCompare resultJob = resultParser.getNext();
+        if (resultJob == null) {
+          break;
+        }
+        LoggedJob job = (LoggedJob) resultJob;
+        assertTrue("job's submit time in the output trace is less " +
+          "than the specified value of starts-after", 
+          (job.getSubmitTime() >= target));
+      }
+    } finally {
+      IOUtils.cleanup(null, goldParser, resultParser);
+    }
+  }
 
   static private <T extends DeepCompare> void jsonFileMatchesGold(
       Configuration conf, FileSystem lfs, Path result, Path gold,

Modified: hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Folder.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Folder.java?rev=1079270&r1=1079269&r2=1079270&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Folder.java (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Folder.java Tue Mar  8 06:02:35 2011
@@ -61,6 +61,7 @@ public class Folder extends Configured i
   private boolean debug = false;
   private boolean allowMissorting = false;
   private int skewBufferLength = 0;
+  private long startsAfter = -1;
 
   static final private Log LOG = LogFactory.getLog(Folder.class);
 
@@ -130,8 +131,9 @@ public class Folder extends Configured i
 
     for (int i = 0; i < args.length; ++i) {
       String thisArg = args[i];
-
-      if (thisArg.equalsIgnoreCase("-output-duration")) {
+      if (thisArg.equalsIgnoreCase("-starts-after")) {
+        startsAfter = parseDuration(args[++i]);
+      } else if (thisArg.equalsIgnoreCase("-output-duration")) {
         outputDuration = parseDuration(args[++i]);
       } else if (thisArg.equalsIgnoreCase("-input-cycle")) {
         inputCycle = parseDuration(args[++i]);
@@ -274,6 +276,29 @@ public class Folder extends Configured i
 
         return EMPTY_JOB_TRACE;
       }
+      
+      // If starts-after time is specified, skip the number of jobs till we reach
+      // the starting time limit.
+      if (startsAfter > 0) {
+        LOG.info("starts-after time is specified. Initial job submit time : " + job.getSubmitTime());
+
+        long approximateTime = job.getSubmitTime() + startsAfter;
+        job = reader.nextJob();
+        long skippedCount = 0;
+        while (job != null && job.getSubmitTime() < approximateTime) {
+          job = reader.nextJob();
+          skippedCount++;
+        }
+
+        LOG.debug("Skipped the initial " + startsAfter+ " ms jobs. Total number of skipped Jobs : " 
+            + skippedCount);
+
+        if (job == null) {
+          LOG.error("No more jobs to process in the trace..May be you skipped too many jobs..");
+          return EMPTY_JOB_TRACE;
+        }
+        LOG.info("The first job has a submit time of " + job.getSubmitTime());
+      }
 
       firstJobSubmitTime = job.getSubmitTime();
       long lastJobSubmitTime = firstJobSubmitTime;