You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ac...@apache.org on 2012/11/30 22:28:10 UTC
svn commit: r1415857 - in /giraph/trunk: ./
giraph/src/main/java/org/apache/giraph/
giraph/src/main/java/org/apache/giraph/benchmark/
giraph/src/main/java/org/apache/giraph/graph/
giraph/src/test/java/org/apache/giraph/ giraph/src/test/java/org/apache/...
Author: aching
Date: Fri Nov 30 21:28:09 2012
New Revision: 1415857
URL: http://svn.apache.org/viewvc?rev=1415857&view=rev
Log:
GIRAPH-438: When checkpointing is disable, fast fail (aching)
Modified:
giraph/trunk/CHANGELOG
giraph/trunk/giraph/src/main/java/org/apache/giraph/GiraphConfiguration.java
giraph/trunk/giraph/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java
giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/BspService.java
giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/GiraphJob.java
giraph/trunk/giraph/src/test/java/org/apache/giraph/TestAutoCheckpoint.java
giraph/trunk/giraph/src/test/java/org/apache/giraph/TestManualCheckpoint.java
giraph/trunk/giraph/src/test/java/org/apache/giraph/graph/TestAggregatorsHandling.java
Modified: giraph/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/giraph/trunk/CHANGELOG?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/CHANGELOG (original)
+++ giraph/trunk/CHANGELOG Fri Nov 30 21:28:09 2012
@@ -1,6 +1,8 @@
Giraph Change Log
Release 0.2.0 - unreleased
+ GIRAPH-438: When checkpointing is disable, fast fail (aching)
+
GIRAPH-437: Missing progress calls when stopping Netty server (majakabiljo)
GIRAPH-439: Fix naming of input superstep counter (apresta)
Modified: giraph/trunk/giraph/src/main/java/org/apache/giraph/GiraphConfiguration.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/main/java/org/apache/giraph/GiraphConfiguration.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/main/java/org/apache/giraph/GiraphConfiguration.java (original)
+++ giraph/trunk/giraph/src/main/java/org/apache/giraph/GiraphConfiguration.java Fri Nov 30 21:28:09 2012
@@ -651,6 +651,13 @@ public class GiraphConfiguration extends
public static final boolean USE_UNSAFE_SERIALIZATION_DEFAULT = true;
/**
+ * Maximum number of attempts a master/worker will retry before killing
+ * the job. This directly maps to the number of map task attempts in
+ * Hadoop.
+ */
+ public static final String MAX_TASK_ATTEMPTS = "mapred.map.max.attempts";
+
+ /**
* Constructor that creates the configuration
*/
public GiraphConfiguration() { }
@@ -1125,4 +1132,42 @@ public class GiraphConfiguration extends
return getBoolean(
USE_MESSAGE_SIZE_ENCODING, USE_MESSAGE_SIZE_ENCODING_DEFAULT);
}
+
+ /**
+ * Set the checkpoint frequeuncy of how many supersteps to wait before
+ * checkpointing
+ *
+ * @param checkpointFrequency How often to checkpoint (0 means never)
+ */
+ public void setCheckpointFrequency(int checkpointFrequency) {
+ setInt(CHECKPOINT_FREQUENCY, checkpointFrequency);
+ }
+
+ /**
+ * Get the checkpoint frequeuncy of how many supersteps to wait
+ * before checkpointing
+ *
+ * @return Checkpoint frequency (0 means never)
+ */
+ public int getCheckpointFrequency() {
+ return getInt(CHECKPOINT_FREQUENCY, CHECKPOINT_FREQUENCY_DEFAULT);
+ }
+
+ /**
+ * Set the max task attempts
+ *
+ * @param maxTaskAttempts Max task attempts to use
+ */
+ public void setMaxTaskAttempts(int maxTaskAttempts) {
+ setInt(MAX_TASK_ATTEMPTS, maxTaskAttempts);
+ }
+
+ /**
+ * Get the max task attempts
+ *
+ * @return Max task attempts or -1, if not set
+ */
+ public int getMaxTaskAttempts() {
+ return getInt(MAX_TASK_ATTEMPTS, -1);
+ }
}
Modified: giraph/trunk/giraph/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java (original)
+++ giraph/trunk/giraph/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java Fri Nov 30 21:28:09 2012
@@ -353,7 +353,6 @@ public class RandomMessageBenchmark impl
}
int workers = Integer.parseInt(cmd.getOptionValue('w'));
GiraphJob job = new GiraphJob(getConf(), getClass().getName());
- job.getConfiguration().setInt(GiraphConfiguration.CHECKPOINT_FREQUENCY, 0);
job.getConfiguration().setVertexClass(RandomMessageVertex.class);
job.getConfiguration().setVertexInputFormatClass(
PseudoRandomVertexInputFormat.class);
Modified: giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/BspService.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/BspService.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/BspService.java (original)
+++ giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/BspService.java Fri Nov 30 21:28:09 2012
@@ -298,9 +298,7 @@ public abstract class BspService<I exten
this.hostnamePartitionId = hostname + "_" + getTaskPartition();
this.graphPartitionerFactory = conf.createGraphPartitioner();
- this.checkpointFrequency =
- conf.getInt(GiraphConfiguration.CHECKPOINT_FREQUENCY,
- GiraphConfiguration.CHECKPOINT_FREQUENCY_DEFAULT);
+ this.checkpointFrequency = conf.getCheckpointFrequency();
basePath = ZooKeeperManager.getBasePath(conf) + BASE_DIR + "/" + jobId;
masterJobStatePath = basePath + MASTER_JOB_STATE_NODE;
Modified: giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/GiraphJob.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/GiraphJob.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/GiraphJob.java (original)
+++ giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/GiraphJob.java Fri Nov 30 21:28:09 2012
@@ -248,6 +248,19 @@ public class GiraphJob {
giraphConfiguration.setBoolean("mapreduce.user.classpath.first", true);
giraphConfiguration.setBoolean("mapreduce.job.user.classpath.first", true);
+ // If the checkpoint frequency is 0 (no failure handling), set the max
+ // tasks attempts to be 0 to encourage faster failure of unrecoverable jobs
+ if (giraphConfiguration.getCheckpointFrequency() == 0) {
+ int oldMaxTaskAttempts = giraphConfiguration.getMaxTaskAttempts();
+ giraphConfiguration.setMaxTaskAttempts(0);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("run: Since checkpointing is disabled (default), " +
+ "do not allow any task retries (setting " +
+ GiraphConfiguration.MAX_TASK_ATTEMPTS + " = 0, " +
+ "old value = " + oldMaxTaskAttempts + ")");
+ }
+ }
+
// Set the job properties, check them, and submit the job
ImmutableClassesGiraphConfiguration immutableClassesGiraphConfiguration =
new ImmutableClassesGiraphConfiguration(giraphConfiguration);
Modified: giraph/trunk/giraph/src/test/java/org/apache/giraph/TestAutoCheckpoint.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/test/java/org/apache/giraph/TestAutoCheckpoint.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/test/java/org/apache/giraph/TestAutoCheckpoint.java (original)
+++ giraph/trunk/giraph/src/test/java/org/apache/giraph/TestAutoCheckpoint.java Fri Nov 30 21:28:09 2012
@@ -18,18 +18,16 @@
package org.apache.giraph;
-import static org.junit.Assert.assertTrue;
-
import java.io.IOException;
-
import org.apache.giraph.examples.SimpleCheckpointVertex;
import org.apache.giraph.examples.SimpleSuperstepVertex.SimpleSuperstepVertexInputFormat;
import org.apache.giraph.examples.SimpleSuperstepVertex.SimpleSuperstepVertexOutputFormat;
import org.apache.giraph.graph.GiraphJob;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
+import static org.junit.Assert.assertTrue;
+
/**
* Unit test for automated checkpoint restarting
*/
@@ -64,13 +62,13 @@ public class TestAutoCheckpoint extends
SimpleSuperstepVertexOutputFormat.class,
outputPath);
- Configuration conf = job.getConfiguration();
+ GiraphConfiguration conf = job.getConfiguration();
conf.setBoolean(SimpleCheckpointVertex.ENABLE_FAULT, true);
conf.setInt("mapred.map.max.attempts", 4);
// Trigger failure faster
conf.setInt("mapred.task.timeout", 30000);
conf.setInt(GiraphConfiguration.POLL_MSECS, 5000);
- conf.setInt(GiraphConfiguration.CHECKPOINT_FREQUENCY, 2);
+ conf.setCheckpointFrequency(2);
conf.set(GiraphConfiguration.CHECKPOINT_DIRECTORY,
getTempPath("_singleFaultCheckpoints").toString());
conf.setBoolean(GiraphConfiguration.CLEANUP_CHECKPOINTS_AFTER_SUCCESS, false);
Modified: giraph/trunk/giraph/src/test/java/org/apache/giraph/TestManualCheckpoint.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/test/java/org/apache/giraph/TestManualCheckpoint.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/test/java/org/apache/giraph/TestManualCheckpoint.java (original)
+++ giraph/trunk/giraph/src/test/java/org/apache/giraph/TestManualCheckpoint.java Fri Nov 30 21:28:09 2012
@@ -17,11 +17,7 @@
*/
package org.apache.giraph;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
import java.io.IOException;
-
import org.apache.giraph.examples.SimpleCheckpointVertex;
import org.apache.giraph.examples.SimpleSuperstepVertex.SimpleSuperstepVertexInputFormat;
import org.apache.giraph.examples.SimpleSuperstepVertex.SimpleSuperstepVertexOutputFormat;
@@ -30,6 +26,9 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
/**
* Unit test for manual checkpoint restarting
*/
@@ -61,7 +60,7 @@ public class TestManualCheckpoint extend
checkpointsDir.toString());
job.getConfiguration().setBoolean(
GiraphConfiguration.CLEANUP_CHECKPOINTS_AFTER_SUCCESS, false);
- job.getConfiguration().setInt(GiraphConfiguration.CHECKPOINT_FREQUENCY, 2);
+ job.getConfiguration().setCheckpointFrequency(2);
assertTrue(job.run(true));
Modified: giraph/trunk/giraph/src/test/java/org/apache/giraph/graph/TestAggregatorsHandling.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/test/java/org/apache/giraph/graph/TestAggregatorsHandling.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/test/java/org/apache/giraph/graph/TestAggregatorsHandling.java (original)
+++ giraph/trunk/giraph/src/test/java/org/apache/giraph/graph/TestAggregatorsHandling.java Fri Nov 30 21:28:09 2012
@@ -166,7 +166,7 @@ public class TestAggregatorsHandling ext
checkpointsDir.toString());
job.getConfiguration().setBoolean(
GiraphConfiguration.CLEANUP_CHECKPOINTS_AFTER_SUCCESS, false);
- job.getConfiguration().setInt(GiraphConfiguration.CHECKPOINT_FREQUENCY, 4);
+ job.getConfiguration().setCheckpointFrequency(4);
assertTrue(job.run(true));