You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by ch...@apache.org on 2013/09/12 04:49:32 UTC

git commit: CRUNCH-262: Shorten job name if it is too long (putting "..." to the end)

Updated Branches:
  refs/heads/master 35136cb4e -> fee4d1654


CRUNCH-262: Shorten job name if it is too long (putting "..." to the end)


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/fee4d165
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/fee4d165
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/fee4d165

Branch: refs/heads/master
Commit: fee4d1654f791cf314b83141cef9fe99703b6477
Parents: 35136cb
Author: Chao Shi <ch...@apache.org>
Authored: Thu Sep 12 10:25:00 2013 +0800
Committer: Chao Shi <ch...@apache.org>
Committed: Thu Sep 12 10:40:10 2013 +0800

----------------------------------------------------------------------
 .../crunch/impl/mr/plan/JobNameBuilder.java     | 31 ++++++++++++++++++--
 .../crunch/impl/mr/plan/JobPrototype.java       | 17 ++++++-----
 .../apache/crunch/impl/mr/plan/MSCRPlanner.java |  6 ++--
 .../crunch/impl/mr/plan/PlanningParameters.java |  2 ++
 .../crunch/impl/mr/plan/JobNameBuilderTest.java | 31 ++++++++++++++++----
 5 files changed, 68 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
index 9ad7300..6fac1be 100644
--- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
@@ -19,8 +19,10 @@ package org.apache.crunch.impl.mr.plan;
 
 import java.util.List;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
 import com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
 
 /**
  * Visitor that traverses the {@code DoNode} instances in a job and builds a
@@ -30,12 +32,20 @@ class JobNameBuilder {
 
   private static final Joiner JOINER = Joiner.on("+");
   private static final Joiner CHILD_JOINER = Joiner.on("/");
+  private static final int DEFAULT_JOB_NAME_MAX_STACK_LENGTH = 60;
 
-  private String pipelineName;
+  private final String pipelineName;
+  private final int jobID;
+  private final int numOfJobs;
   List<String> rootStack = Lists.newArrayList();
+  private final int maxStackNameLength;
 
-  public JobNameBuilder(final String pipelineName) {
+  public JobNameBuilder(Configuration conf, String pipelineName, int jobID, int numOfJobs) {
     this.pipelineName = pipelineName;
+    this.jobID = jobID;
+    this.numOfJobs = numOfJobs;
+    this.maxStackNameLength = conf.getInt(
+        PlanningParameters.JOB_NAME_MAX_STACK_LENGTH, DEFAULT_JOB_NAME_MAX_STACK_LENGTH);
   }
 
   public void visit(DoNode node) {
@@ -74,6 +84,21 @@ class JobNameBuilder {
   }
 
   public String build() {
-    return String.format("%s: %s", pipelineName, JOINER.join(rootStack));
+    return String.format("%s: %s (%d/%d)",
+        pipelineName,
+        shortenRootStackName(JOINER.join(rootStack), maxStackNameLength),
+        jobID,
+        numOfJobs);
+  }
+
+  private static String shortenRootStackName(String s, int len) {
+    int n = s.length();
+    if (len <= 3) {
+      return "...";
+    }
+    if (n <= len) {
+      return s;
+    }
+    return s.substring(0, len - 3) + "...";
   }
 }

http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
index c733323..c9b7111 100644
--- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
@@ -127,17 +127,19 @@ class JobPrototype {
     this.dependencies.add(dependency);
   }
 
-  public CrunchControlledJob getCrunchJob(Class<?> jarClass, Configuration conf, Pipeline pipeline) throws IOException {
+  public CrunchControlledJob getCrunchJob(
+      Class<?> jarClass, Configuration conf, Pipeline pipeline, int numOfJobs) throws IOException {
     if (job == null) {
-      job = build(jarClass, conf, pipeline);
+      job = build(jarClass, conf, pipeline, numOfJobs);
       for (JobPrototype proto : dependencies) {
-        job.addDependingJob(proto.getCrunchJob(jarClass, conf, pipeline));
+        job.addDependingJob(proto.getCrunchJob(jarClass, conf, pipeline, numOfJobs));
       }
     }
     return job;
   }
 
-  private CrunchControlledJob build(Class<?> jarClass, Configuration conf, Pipeline pipeline) throws IOException {
+  private CrunchControlledJob build(
+      Class<?> jarClass, Configuration conf, Pipeline pipeline, int numOfJobs) throws IOException {
     Job job = new Job(conf);
     conf = job.getConfiguration();
     conf.set(PlanningParameters.CRUNCH_WORKING_DIRECTORY, workingPath.toString());
@@ -220,7 +222,7 @@ class JobPrototype {
       }
       job.setInputFormatClass(CrunchInputFormat.class);
     }
-    job.setJobName(createJobName(pipeline.getName(), inputNodes, reduceNode));
+    job.setJobName(createJobName(conf, pipeline.getName(), inputNodes, reduceNode, numOfJobs));
 
     return new CrunchControlledJob(
         jobID,
@@ -239,8 +241,9 @@ class JobPrototype {
     DistCache.write(conf, path, rtNodes);
   }
 
-  private String createJobName(String pipelineName, List<DoNode> mapNodes, DoNode reduceNode) {
-    JobNameBuilder builder = new JobNameBuilder(pipelineName);
+  private String createJobName(
+      Configuration conf, String pipelineName, List<DoNode> mapNodes, DoNode reduceNode, int numOfJobs) {
+    JobNameBuilder builder = new JobNameBuilder(conf, pipelineName, jobID, numOfJobs);
     builder.visit(mapNodes);
     if (reduceNode != null) {
       builder.visit(reduceNode);

http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
index 5ad5ca1..f765313 100644
--- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
@@ -113,7 +113,7 @@ public class MSCRPlanner {
       // job prototype a particular GBK is assigned to.
       Multimap<Vertex, JobPrototype> newAssignments = HashMultimap.create();
       for (List<Vertex> component : components) {
-        newAssignments.putAll(constructJobPrototypes(component));
+        newAssignments.putAll(constructJobPrototypes(component, components.size()));
       }
 
       // Add in the job dependency information here.
@@ -154,7 +154,7 @@ public class MSCRPlanner {
     MRExecutor exec = new MRExecutor(jarClass, outputs, toMaterialize);
     for (JobPrototype proto : Sets.newHashSet(assignments.values())) {
       dotfileWriter.addJobPrototype(proto);
-      exec.addJob(proto.getCrunchJob(jarClass, conf, pipeline));
+      exec.addJob(proto.getCrunchJob(jarClass, conf, pipeline, lastJobID));
     }
 
     String planDotFile = dotfileWriter.buildDotfile();
@@ -239,7 +239,7 @@ public class MSCRPlanner {
     return graph;
   }
   
-  private Multimap<Vertex, JobPrototype> constructJobPrototypes(List<Vertex> component) {
+  private Multimap<Vertex, JobPrototype> constructJobPrototypes(List<Vertex> component, int numOfJobs) {
     Multimap<Vertex, JobPrototype> assignment = HashMultimap.create();
     List<Vertex> gbks = Lists.newArrayList();
     for (Vertex v : component) {

http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
index b90a911..d0b74b7 100644
--- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
@@ -33,6 +33,8 @@ public class PlanningParameters {
    */
   public static final String PIPELINE_PLAN_DOTFILE = "crunch.planner.dotfile";
 
+  public static final String JOB_NAME_MAX_STACK_LENGTH = "crunch.job.name.max.stack.length";
+
   private PlanningParameters() {
   }
 }

http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
index 0a30fa4..3ba2763 100644
--- a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
@@ -17,25 +17,44 @@
  */
 package org.apache.crunch.impl.mr.plan;
 
-import static org.junit.Assert.assertEquals;
-
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
 import org.apache.crunch.types.writable.Writables;
+import org.apache.hadoop.conf.Configuration;
 import org.junit.Test;
 
-import com.google.common.collect.Lists;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 
 public class JobNameBuilderTest {
 
+  private static final Configuration CONF = new Configuration();
+
   @Test
   public void testBuild() {
     final String pipelineName = "PipelineName";
     final String nodeName = "outputNode";
-    DoNode doNode = DoNode.createOutputNode(nodeName, Writables.strings().getConverter(), Writables.strings());
-    JobNameBuilder jobNameBuilder = new JobNameBuilder(pipelineName);
+    DoNode doNode = createDoNode(nodeName);
+    JobNameBuilder jobNameBuilder = new JobNameBuilder(CONF, pipelineName, 1, 1);
+    jobNameBuilder.visit(Lists.newArrayList(doNode));
+    String jobName = jobNameBuilder.build();
+
+    assertEquals(String.format("%s: %s (1/1)", pipelineName, nodeName), jobName);
+  }
+
+  @Test
+  public void testNodeNameTooLong() {
+    final String pipelineName = "PipelineName";
+    final String nodeName = Strings.repeat("very_long_node_name", 100);
+    DoNode doNode = createDoNode(nodeName);
+    JobNameBuilder jobNameBuilder = new JobNameBuilder(CONF, pipelineName, 1, 1);
     jobNameBuilder.visit(Lists.newArrayList(doNode));
     String jobName = jobNameBuilder.build();
 
-    assertEquals(String.format("%s: %s", pipelineName, nodeName), jobName);
+    assertFalse(jobName.contains(nodeName)); // Tests that the very long node name was shorten
   }
 
+  private DoNode createDoNode(String nodeName) {
+    return DoNode.createOutputNode(nodeName, Writables.strings().getConverter(), Writables.strings());
+  }
 }