You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@crunch.apache.org by ch...@apache.org on 2013/09/12 04:49:32 UTC
git commit: CRUNCH-262: Shorten job name if it is too long (putting
"..." to the end)
Updated Branches:
refs/heads/master 35136cb4e -> fee4d1654
CRUNCH-262: Shorten job name if it is too long (putting "..." to the end)
Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/fee4d165
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/fee4d165
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/fee4d165
Branch: refs/heads/master
Commit: fee4d1654f791cf314b83141cef9fe99703b6477
Parents: 35136cb
Author: Chao Shi <ch...@apache.org>
Authored: Thu Sep 12 10:25:00 2013 +0800
Committer: Chao Shi <ch...@apache.org>
Committed: Thu Sep 12 10:40:10 2013 +0800
----------------------------------------------------------------------
.../crunch/impl/mr/plan/JobNameBuilder.java | 31 ++++++++++++++++++--
.../crunch/impl/mr/plan/JobPrototype.java | 17 ++++++-----
.../apache/crunch/impl/mr/plan/MSCRPlanner.java | 6 ++--
.../crunch/impl/mr/plan/PlanningParameters.java | 2 ++
.../crunch/impl/mr/plan/JobNameBuilderTest.java | 31 ++++++++++++++++----
5 files changed, 68 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
index 9ad7300..6fac1be 100644
--- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java
@@ -19,8 +19,10 @@ package org.apache.crunch.impl.mr.plan;
import java.util.List;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
/**
* Visitor that traverses the {@code DoNode} instances in a job and builds a
@@ -30,12 +32,20 @@ class JobNameBuilder {
private static final Joiner JOINER = Joiner.on("+");
private static final Joiner CHILD_JOINER = Joiner.on("/");
+ private static final int DEFAULT_JOB_NAME_MAX_STACK_LENGTH = 60;
- private String pipelineName;
+ private final String pipelineName;
+ private final int jobID;
+ private final int numOfJobs;
List<String> rootStack = Lists.newArrayList();
+ private final int maxStackNameLength;
- public JobNameBuilder(final String pipelineName) {
+ public JobNameBuilder(Configuration conf, String pipelineName, int jobID, int numOfJobs) {
this.pipelineName = pipelineName;
+ this.jobID = jobID;
+ this.numOfJobs = numOfJobs;
+ this.maxStackNameLength = conf.getInt(
+ PlanningParameters.JOB_NAME_MAX_STACK_LENGTH, DEFAULT_JOB_NAME_MAX_STACK_LENGTH);
}
public void visit(DoNode node) {
@@ -74,6 +84,21 @@ class JobNameBuilder {
}
public String build() {
- return String.format("%s: %s", pipelineName, JOINER.join(rootStack));
+ return String.format("%s: %s (%d/%d)",
+ pipelineName,
+ shortenRootStackName(JOINER.join(rootStack), maxStackNameLength),
+ jobID,
+ numOfJobs);
+ }
+
+ private static String shortenRootStackName(String s, int len) {
+ int n = s.length();
+ if (len <= 3) {
+ return "...";
+ }
+ if (n <= len) {
+ return s;
+ }
+ return s.substring(0, len - 3) + "...";
}
}
http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
index c733323..c9b7111 100644
--- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java
@@ -127,17 +127,19 @@ class JobPrototype {
this.dependencies.add(dependency);
}
- public CrunchControlledJob getCrunchJob(Class<?> jarClass, Configuration conf, Pipeline pipeline) throws IOException {
+ public CrunchControlledJob getCrunchJob(
+ Class<?> jarClass, Configuration conf, Pipeline pipeline, int numOfJobs) throws IOException {
if (job == null) {
- job = build(jarClass, conf, pipeline);
+ job = build(jarClass, conf, pipeline, numOfJobs);
for (JobPrototype proto : dependencies) {
- job.addDependingJob(proto.getCrunchJob(jarClass, conf, pipeline));
+ job.addDependingJob(proto.getCrunchJob(jarClass, conf, pipeline, numOfJobs));
}
}
return job;
}
- private CrunchControlledJob build(Class<?> jarClass, Configuration conf, Pipeline pipeline) throws IOException {
+ private CrunchControlledJob build(
+ Class<?> jarClass, Configuration conf, Pipeline pipeline, int numOfJobs) throws IOException {
Job job = new Job(conf);
conf = job.getConfiguration();
conf.set(PlanningParameters.CRUNCH_WORKING_DIRECTORY, workingPath.toString());
@@ -220,7 +222,7 @@ class JobPrototype {
}
job.setInputFormatClass(CrunchInputFormat.class);
}
- job.setJobName(createJobName(pipeline.getName(), inputNodes, reduceNode));
+ job.setJobName(createJobName(conf, pipeline.getName(), inputNodes, reduceNode, numOfJobs));
return new CrunchControlledJob(
jobID,
@@ -239,8 +241,9 @@ class JobPrototype {
DistCache.write(conf, path, rtNodes);
}
- private String createJobName(String pipelineName, List<DoNode> mapNodes, DoNode reduceNode) {
- JobNameBuilder builder = new JobNameBuilder(pipelineName);
+ private String createJobName(
+ Configuration conf, String pipelineName, List<DoNode> mapNodes, DoNode reduceNode, int numOfJobs) {
+ JobNameBuilder builder = new JobNameBuilder(conf, pipelineName, jobID, numOfJobs);
builder.visit(mapNodes);
if (reduceNode != null) {
builder.visit(reduceNode);
http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
index 5ad5ca1..f765313 100644
--- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java
@@ -113,7 +113,7 @@ public class MSCRPlanner {
// job prototype a particular GBK is assigned to.
Multimap<Vertex, JobPrototype> newAssignments = HashMultimap.create();
for (List<Vertex> component : components) {
- newAssignments.putAll(constructJobPrototypes(component));
+ newAssignments.putAll(constructJobPrototypes(component, components.size()));
}
// Add in the job dependency information here.
@@ -154,7 +154,7 @@ public class MSCRPlanner {
MRExecutor exec = new MRExecutor(jarClass, outputs, toMaterialize);
for (JobPrototype proto : Sets.newHashSet(assignments.values())) {
dotfileWriter.addJobPrototype(proto);
- exec.addJob(proto.getCrunchJob(jarClass, conf, pipeline));
+ exec.addJob(proto.getCrunchJob(jarClass, conf, pipeline, lastJobID));
}
String planDotFile = dotfileWriter.buildDotfile();
@@ -239,7 +239,7 @@ public class MSCRPlanner {
return graph;
}
- private Multimap<Vertex, JobPrototype> constructJobPrototypes(List<Vertex> component) {
+ private Multimap<Vertex, JobPrototype> constructJobPrototypes(List<Vertex> component, int numOfJobs) {
Multimap<Vertex, JobPrototype> assignment = HashMultimap.create();
List<Vertex> gbks = Lists.newArrayList();
for (Vertex v : component) {
http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
index b90a911..d0b74b7 100644
--- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
+++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java
@@ -33,6 +33,8 @@ public class PlanningParameters {
*/
public static final String PIPELINE_PLAN_DOTFILE = "crunch.planner.dotfile";
+ public static final String JOB_NAME_MAX_STACK_LENGTH = "crunch.job.name.max.stack.length";
+
private PlanningParameters() {
}
}
http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
index 0a30fa4..3ba2763 100644
--- a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
+++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java
@@ -17,25 +17,44 @@
*/
package org.apache.crunch.impl.mr.plan;
-import static org.junit.Assert.assertEquals;
-
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
import org.apache.crunch.types.writable.Writables;
+import org.apache.hadoop.conf.Configuration;
import org.junit.Test;
-import com.google.common.collect.Lists;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
public class JobNameBuilderTest {
+ private static final Configuration CONF = new Configuration();
+
@Test
public void testBuild() {
final String pipelineName = "PipelineName";
final String nodeName = "outputNode";
- DoNode doNode = DoNode.createOutputNode(nodeName, Writables.strings().getConverter(), Writables.strings());
- JobNameBuilder jobNameBuilder = new JobNameBuilder(pipelineName);
+ DoNode doNode = createDoNode(nodeName);
+ JobNameBuilder jobNameBuilder = new JobNameBuilder(CONF, pipelineName, 1, 1);
+ jobNameBuilder.visit(Lists.newArrayList(doNode));
+ String jobName = jobNameBuilder.build();
+
+ assertEquals(String.format("%s: %s (1/1)", pipelineName, nodeName), jobName);
+ }
+
+ @Test
+ public void testNodeNameTooLong() {
+ final String pipelineName = "PipelineName";
+ final String nodeName = Strings.repeat("very_long_node_name", 100);
+ DoNode doNode = createDoNode(nodeName);
+ JobNameBuilder jobNameBuilder = new JobNameBuilder(CONF, pipelineName, 1, 1);
jobNameBuilder.visit(Lists.newArrayList(doNode));
String jobName = jobNameBuilder.build();
- assertEquals(String.format("%s: %s", pipelineName, nodeName), jobName);
+ assertFalse(jobName.contains(nodeName)); // Tests that the very long node name was shorten
}
+ private DoNode createDoNode(String nodeName) {
+ return DoNode.createOutputNode(nodeName, Writables.strings().getConverter(), Writables.strings());
+ }
}