You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ac...@apache.org on 2012/11/30 22:28:10 UTC

svn commit: r1415857 - in /giraph/trunk: ./ giraph/src/main/java/org/apache/giraph/ giraph/src/main/java/org/apache/giraph/benchmark/ giraph/src/main/java/org/apache/giraph/graph/ giraph/src/test/java/org/apache/giraph/ giraph/src/test/java/org/apache/...

Author: aching
Date: Fri Nov 30 21:28:09 2012
New Revision: 1415857

URL: http://svn.apache.org/viewvc?rev=1415857&view=rev
Log:
GIRAPH-438: When checkpointing is disable, fast fail (aching)

Modified:
    giraph/trunk/CHANGELOG
    giraph/trunk/giraph/src/main/java/org/apache/giraph/GiraphConfiguration.java
    giraph/trunk/giraph/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java
    giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/BspService.java
    giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/GiraphJob.java
    giraph/trunk/giraph/src/test/java/org/apache/giraph/TestAutoCheckpoint.java
    giraph/trunk/giraph/src/test/java/org/apache/giraph/TestManualCheckpoint.java
    giraph/trunk/giraph/src/test/java/org/apache/giraph/graph/TestAggregatorsHandling.java

Modified: giraph/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/giraph/trunk/CHANGELOG?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/CHANGELOG (original)
+++ giraph/trunk/CHANGELOG Fri Nov 30 21:28:09 2012
@@ -1,6 +1,8 @@
 Giraph Change Log
 
 Release 0.2.0 - unreleased
+  GIRAPH-438: When checkpointing is disable, fast fail (aching)
+
   GIRAPH-437: Missing progress calls when stopping Netty server (majakabiljo)
 
   GIRAPH-439: Fix naming of input superstep counter (apresta)

Modified: giraph/trunk/giraph/src/main/java/org/apache/giraph/GiraphConfiguration.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/main/java/org/apache/giraph/GiraphConfiguration.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/main/java/org/apache/giraph/GiraphConfiguration.java (original)
+++ giraph/trunk/giraph/src/main/java/org/apache/giraph/GiraphConfiguration.java Fri Nov 30 21:28:09 2012
@@ -651,6 +651,13 @@ public class GiraphConfiguration extends
   public static final boolean USE_UNSAFE_SERIALIZATION_DEFAULT = true;
 
   /**
+   * Maximum number of attempts a master/worker will retry before killing
+   * the job.  This directly maps to the number of map task attempts in
+   * Hadoop.
+   */
+  public static final String MAX_TASK_ATTEMPTS = "mapred.map.max.attempts";
+
+  /**
    * Constructor that creates the configuration
    */
   public GiraphConfiguration() { }
@@ -1125,4 +1132,42 @@ public class GiraphConfiguration extends
     return getBoolean(
         USE_MESSAGE_SIZE_ENCODING, USE_MESSAGE_SIZE_ENCODING_DEFAULT);
   }
+
+  /**
+   * Set the checkpoint frequeuncy of how many supersteps to wait before
+   * checkpointing
+   *
+   * @param checkpointFrequency How often to checkpoint (0 means never)
+   */
+  public void setCheckpointFrequency(int checkpointFrequency) {
+    setInt(CHECKPOINT_FREQUENCY, checkpointFrequency);
+  }
+
+  /**
+   * Get the checkpoint frequeuncy of how many supersteps to wait
+   * before checkpointing
+   *
+   * @return Checkpoint frequency (0 means never)
+   */
+  public int getCheckpointFrequency() {
+    return getInt(CHECKPOINT_FREQUENCY, CHECKPOINT_FREQUENCY_DEFAULT);
+  }
+
+  /**
+   * Set the max task attempts
+   *
+   * @param maxTaskAttempts Max task attempts to use
+   */
+  public void setMaxTaskAttempts(int maxTaskAttempts) {
+    setInt(MAX_TASK_ATTEMPTS, maxTaskAttempts);
+  }
+
+  /**
+   * Get the max task attempts
+   *
+   * @return Max task attempts or -1, if not set
+   */
+  public int getMaxTaskAttempts() {
+    return getInt(MAX_TASK_ATTEMPTS, -1);
+  }
 }

Modified: giraph/trunk/giraph/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java (original)
+++ giraph/trunk/giraph/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java Fri Nov 30 21:28:09 2012
@@ -353,7 +353,6 @@ public class RandomMessageBenchmark impl
     }
     int workers = Integer.parseInt(cmd.getOptionValue('w'));
     GiraphJob job = new GiraphJob(getConf(), getClass().getName());
-    job.getConfiguration().setInt(GiraphConfiguration.CHECKPOINT_FREQUENCY, 0);
     job.getConfiguration().setVertexClass(RandomMessageVertex.class);
     job.getConfiguration().setVertexInputFormatClass(
         PseudoRandomVertexInputFormat.class);

Modified: giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/BspService.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/BspService.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/BspService.java (original)
+++ giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/BspService.java Fri Nov 30 21:28:09 2012
@@ -298,9 +298,7 @@ public abstract class BspService<I exten
     this.hostnamePartitionId = hostname + "_" + getTaskPartition();
     this.graphPartitionerFactory = conf.createGraphPartitioner();
 
-    this.checkpointFrequency =
-        conf.getInt(GiraphConfiguration.CHECKPOINT_FREQUENCY,
-            GiraphConfiguration.CHECKPOINT_FREQUENCY_DEFAULT);
+    this.checkpointFrequency = conf.getCheckpointFrequency();
 
     basePath = ZooKeeperManager.getBasePath(conf) + BASE_DIR + "/" + jobId;
     masterJobStatePath = basePath + MASTER_JOB_STATE_NODE;

Modified: giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/GiraphJob.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/GiraphJob.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/GiraphJob.java (original)
+++ giraph/trunk/giraph/src/main/java/org/apache/giraph/graph/GiraphJob.java Fri Nov 30 21:28:09 2012
@@ -248,6 +248,19 @@ public class GiraphJob {
     giraphConfiguration.setBoolean("mapreduce.user.classpath.first", true);
     giraphConfiguration.setBoolean("mapreduce.job.user.classpath.first", true);
 
+    // If the checkpoint frequency is 0 (no failure handling), set the max
+    // tasks attempts to be 0 to encourage faster failure of unrecoverable jobs
+    if (giraphConfiguration.getCheckpointFrequency() == 0) {
+      int oldMaxTaskAttempts = giraphConfiguration.getMaxTaskAttempts();
+      giraphConfiguration.setMaxTaskAttempts(0);
+      if (LOG.isInfoEnabled()) {
+        LOG.info("run: Since checkpointing is disabled (default), " +
+            "do not allow any task retries (setting " +
+            GiraphConfiguration.MAX_TASK_ATTEMPTS + " = 0, " +
+            "old value = " + oldMaxTaskAttempts + ")");
+      }
+    }
+
     // Set the job properties, check them, and submit the job
     ImmutableClassesGiraphConfiguration immutableClassesGiraphConfiguration =
         new ImmutableClassesGiraphConfiguration(giraphConfiguration);

Modified: giraph/trunk/giraph/src/test/java/org/apache/giraph/TestAutoCheckpoint.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/test/java/org/apache/giraph/TestAutoCheckpoint.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/test/java/org/apache/giraph/TestAutoCheckpoint.java (original)
+++ giraph/trunk/giraph/src/test/java/org/apache/giraph/TestAutoCheckpoint.java Fri Nov 30 21:28:09 2012
@@ -18,18 +18,16 @@
 
 package org.apache.giraph;
 
-import static org.junit.Assert.assertTrue;
-
 import java.io.IOException;
-
 import org.apache.giraph.examples.SimpleCheckpointVertex;
 import org.apache.giraph.examples.SimpleSuperstepVertex.SimpleSuperstepVertexInputFormat;
 import org.apache.giraph.examples.SimpleSuperstepVertex.SimpleSuperstepVertexOutputFormat;
 import org.apache.giraph.graph.GiraphJob;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.junit.Test;
 
+import static org.junit.Assert.assertTrue;
+
 /**
  * Unit test for automated checkpoint restarting
  */
@@ -64,13 +62,13 @@ public class TestAutoCheckpoint extends 
         SimpleSuperstepVertexOutputFormat.class,
         outputPath);
 
-    Configuration conf = job.getConfiguration();
+    GiraphConfiguration conf = job.getConfiguration();
     conf.setBoolean(SimpleCheckpointVertex.ENABLE_FAULT, true);
     conf.setInt("mapred.map.max.attempts", 4);
     // Trigger failure faster
     conf.setInt("mapred.task.timeout", 30000);
     conf.setInt(GiraphConfiguration.POLL_MSECS, 5000);
-    conf.setInt(GiraphConfiguration.CHECKPOINT_FREQUENCY, 2);
+    conf.setCheckpointFrequency(2);
     conf.set(GiraphConfiguration.CHECKPOINT_DIRECTORY,
         getTempPath("_singleFaultCheckpoints").toString());
     conf.setBoolean(GiraphConfiguration.CLEANUP_CHECKPOINTS_AFTER_SUCCESS, false);

Modified: giraph/trunk/giraph/src/test/java/org/apache/giraph/TestManualCheckpoint.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/test/java/org/apache/giraph/TestManualCheckpoint.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/test/java/org/apache/giraph/TestManualCheckpoint.java (original)
+++ giraph/trunk/giraph/src/test/java/org/apache/giraph/TestManualCheckpoint.java Fri Nov 30 21:28:09 2012
@@ -17,11 +17,7 @@
  */
 
 package org.apache.giraph;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 import java.io.IOException;
-
 import org.apache.giraph.examples.SimpleCheckpointVertex;
 import org.apache.giraph.examples.SimpleSuperstepVertex.SimpleSuperstepVertexInputFormat;
 import org.apache.giraph.examples.SimpleSuperstepVertex.SimpleSuperstepVertexOutputFormat;
@@ -30,6 +26,9 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.junit.Test;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 /**
  * Unit test for manual checkpoint restarting
  */
@@ -61,7 +60,7 @@ public class TestManualCheckpoint extend
         checkpointsDir.toString());
     job.getConfiguration().setBoolean(
         GiraphConfiguration.CLEANUP_CHECKPOINTS_AFTER_SUCCESS, false);
-    job.getConfiguration().setInt(GiraphConfiguration.CHECKPOINT_FREQUENCY, 2);
+    job.getConfiguration().setCheckpointFrequency(2);
 
     assertTrue(job.run(true));
 

Modified: giraph/trunk/giraph/src/test/java/org/apache/giraph/graph/TestAggregatorsHandling.java
URL: http://svn.apache.org/viewvc/giraph/trunk/giraph/src/test/java/org/apache/giraph/graph/TestAggregatorsHandling.java?rev=1415857&r1=1415856&r2=1415857&view=diff
==============================================================================
--- giraph/trunk/giraph/src/test/java/org/apache/giraph/graph/TestAggregatorsHandling.java (original)
+++ giraph/trunk/giraph/src/test/java/org/apache/giraph/graph/TestAggregatorsHandling.java Fri Nov 30 21:28:09 2012
@@ -166,7 +166,7 @@ public class TestAggregatorsHandling ext
         checkpointsDir.toString());
     job.getConfiguration().setBoolean(
         GiraphConfiguration.CLEANUP_CHECKPOINTS_AFTER_SUCCESS, false);
-    job.getConfiguration().setInt(GiraphConfiguration.CHECKPOINT_FREQUENCY, 4);
+    job.getConfiguration().setCheckpointFrequency(4);
 
     assertTrue(job.run(true));