You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ac...@apache.org on 2011/09/08 20:47:19 UTC
svn commit: r1166854 - in /incubator/giraph/trunk: ./
src/main/java/org/apache/giraph/bsp/ src/main/java/org/apache/giraph/graph/
src/test/java/org/apache/giraph/
Author: aching
Date: Thu Sep 8 18:47:19 2011
New Revision: 1166854
URL: http://svn.apache.org/viewvc?rev=1166854&view=rev
Log:
GIRAPH-25 NPE in BspServiceMaster when failing a job (committed by
aching on behalf of dvryaboy).
Added:
incubator/giraph/trunk/src/test/java/org/apache/giraph/TestNotEnoughMapTasks.java
Modified:
incubator/giraph/trunk/CHANGELOG
incubator/giraph/trunk/src/main/java/org/apache/giraph/bsp/CentralizedServiceMaster.java
incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/BspServiceMaster.java
incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/MasterThread.java
Modified: incubator/giraph/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/CHANGELOG?rev=1166854&r1=1166853&r2=1166854&view=diff
==============================================================================
--- incubator/giraph/trunk/CHANGELOG (original)
+++ incubator/giraph/trunk/CHANGELOG Thu Sep 8 18:47:19 2011
@@ -1,25 +1,28 @@
Giraph Change Log
Release 0.70.0 - unreleased
+
+ GIRAPH-25 NPE in BspServiceMaster when failing a job (aching on behalf
+ of dvryaboy)
- GIRAPH-24. Job-level statistics reports one superstep greater than
+ GIRAPH-24 Job-level statistics reports one superstep greater than
workers. (jghoman)
- GIRAPH-18. Refactor BspServiceWorker::loadVertices(). (jghoman)
+ GIRAPH-18 Refactor BspServiceWorker::loadVertices(). (jghoman)
- GIRAPH-14. Support for the Facebook Hadoop branch. (aching)
+ GIRAPH-14 Support for the Facebook Hadoop branch. (aching)
- GIRAPH-16. Add Apache RAT to the verify build step. (omalley)
+ GIRAPH-16 Add Apache RAT to the verify build step. (omalley)
- GIRAPH-17 - Giraph doesn't give up properly after the maximum connect
+ GIRAPH-17 Giraph doesn't give up properly after the maximum connect
attempts to ZooKeeper. (aching)
- GIRAPH-2: Make the project homepage. (jghoman)
+ GIRAPH-2 Make the project homepage. (jghoman)
- GIRAPH-9: Change Yahoo License Header to Apache License Header (hyunsik)
+ GIRAPH-9 Change Yahoo License Header to Apache License Header (hyunsik)
- GIRAPH-6. Remove Yahoo-specific code from pom.xml. (jghoman)
+ GIRAPH-6 Remove Yahoo-specific code from pom.xml. (jghoman)
GIRAPH-5 Remove Yahoo directories after svn import from Yahoo! (aching)
- GIRAPH-3. Vertex:sentMsgToAllEdges should be sendMsg. (jghoman)
\ No newline at end of file
+ GIRAPH-3 Vertex:sentMsgToAllEdges should be sendMsg. (jghoman)
\ No newline at end of file
Modified: incubator/giraph/trunk/src/main/java/org/apache/giraph/bsp/CentralizedServiceMaster.java
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/main/java/org/apache/giraph/bsp/CentralizedServiceMaster.java?rev=1166854&r1=1166853&r2=1166854&view=diff
==============================================================================
--- incubator/giraph/trunk/src/main/java/org/apache/giraph/bsp/CentralizedServiceMaster.java (original)
+++ incubator/giraph/trunk/src/main/java/org/apache/giraph/bsp/CentralizedServiceMaster.java Thu Sep 8 18:47:19 2011
@@ -46,7 +46,7 @@ public interface CentralizedServiceMaste
* VertexInputFormat. These InputSplits will be split further into
* partitions by the workers.
*
- * @return number of partitions
+ * @return number of partitions. Returns -1 on failure to create valid input splits.
*/
int createInputSplits();
Modified: incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/BspServiceMaster.java
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/BspServiceMaster.java?rev=1166854&r1=1166853&r2=1166854&view=diff
==============================================================================
--- incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/BspServiceMaster.java (original)
+++ incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/BspServiceMaster.java Thu Sep 8 18:47:19 2011
@@ -408,15 +408,18 @@ public class BspServiceMaster<
++pollAttempt;
}
if (failJob) {
- LOG.warn("checkWorkers: Did not receive enough processes in " +
- "time (only " + totalResponses + " of " +
- minWorkers + " required)");
+ LOG.error("checkWorkers: Did not receive enough processes in " +
+ "time (only " + totalResponses + " of " +
+ minWorkers + " required). This occurs if you do not " +
+ "have enough map tasks available simultaneously on " +
+ "your Hadoop instance to fulfill the number of " +
+ "requested workers.");
return null;
}
if (healthyWorkerList.size() < minWorkers) {
- LOG.warn("checkWorkers: Only " + healthyWorkerList.size() +
- " available when " + minWorkers + " are required.");
+ LOG.error("checkWorkers: Only " + healthyWorkerList.size() +
+ " available when " + minWorkers + " are required.");
return null;
}
@@ -450,6 +453,7 @@ public class BspServiceMaster<
return workerHostnamePortMap;
}
+ @Override
public int createInputSplits() {
// Only the 'master' should be doing this. Wait until the number of
// processes that have reported health exceeds the minimum percentage.
@@ -481,6 +485,7 @@ public class BspServiceMaster<
Map<String, JSONArray> healthyWorkerHostnamePortMap = checkWorkers();
if (healthyWorkerHostnamePortMap == null) {
setJobState(ApplicationState.FAILED, -1, -1);
+ return -1;
}
List<InputSplit> splitList =
@@ -726,6 +731,7 @@ public class BspServiceMaster<
}
}
+ @Override
public void setup() {
// Might have to manually load a checkpoint.
// In that case, the input splits are not set, they will be faked by
@@ -750,6 +756,7 @@ public class BspServiceMaster<
}
}
+ @Override
public boolean becomeMaster() {
// Create my bid to become the master, then try to become the worker
// or return false.
Modified: incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/MasterThread.java
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/MasterThread.java?rev=1166854&r1=1166853&r2=1166854&view=diff
==============================================================================
--- incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/MasterThread.java (original)
+++ incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/MasterThread.java Thu Sep 8 18:47:19 2011
@@ -89,61 +89,61 @@ public class MasterThread<I extends Writ
long endMillis = 0;
bspServiceMaster.setup();
if (bspServiceMaster.becomeMaster() == true) {
- if (bspServiceMaster.getRestartedSuperstep() ==
- BspService.UNSET_SUPERSTEP) {
- bspServiceMaster.createInputSplits();
- }
- long setupMillis = (System.currentTimeMillis() - startMillis);
- context.getCounter(GIRAPH_TIMERS_COUNTER_GROUP_NAME,
- "Setup (milliseconds)").
- increment(setupMillis);
- setupSecs = setupMillis / 1000.0d;
- SuperstepState superstepState = SuperstepState.INITIAL;
- long cachedSuperstep = BspService.UNSET_SUPERSTEP;
- while (superstepState != SuperstepState.ALL_SUPERSTEPS_DONE) {
- long startSuperstepMillis = System.currentTimeMillis();
- cachedSuperstep = bspServiceMaster.getSuperstep();
- superstepState = bspServiceMaster.coordinateSuperstep();
- long superstepMillis = System.currentTimeMillis() -
- startSuperstepMillis;
- superstepSecsMap.put(new Long(cachedSuperstep),
- superstepMillis / 1000.0d);
- if (LOG.isInfoEnabled()) {
- LOG.info("masterThread: Coordination of superstep " +
- cachedSuperstep + " took " +
- superstepMillis / 1000.0d +
- " seconds ended with state " + superstepState +
- " and is now on superstep " +
- bspServiceMaster.getSuperstep());
- }
- if (superstepCounterOn) {
- String counterPrefix;
- if (cachedSuperstep == -1) {
- counterPrefix = "Vertex input superstep";
- } else {
- counterPrefix = "Superstep " + cachedSuperstep;
+ // Attempt to create InputSplits if necessary. Bail out if that fails.
+ if (bspServiceMaster.getRestartedSuperstep() != BspService.UNSET_SUPERSTEP
+ || bspServiceMaster.createInputSplits() != -1) {
+ long setupMillis = (System.currentTimeMillis() - startMillis);
+ context.getCounter(GIRAPH_TIMERS_COUNTER_GROUP_NAME,
+ "Setup (milliseconds)").
+ increment(setupMillis);
+ setupSecs = setupMillis / 1000.0d;
+ SuperstepState superstepState = SuperstepState.INITIAL;
+ long cachedSuperstep = BspService.UNSET_SUPERSTEP;
+ while (superstepState != SuperstepState.ALL_SUPERSTEPS_DONE) {
+ long startSuperstepMillis = System.currentTimeMillis();
+ cachedSuperstep = bspServiceMaster.getSuperstep();
+ superstepState = bspServiceMaster.coordinateSuperstep();
+ long superstepMillis = System.currentTimeMillis() -
+ startSuperstepMillis;
+ superstepSecsMap.put(new Long(cachedSuperstep),
+ superstepMillis / 1000.0d);
+ if (LOG.isInfoEnabled()) {
+ LOG.info("masterThread: Coordination of superstep " +
+ cachedSuperstep + " took " +
+ superstepMillis / 1000.0d +
+ " seconds ended with state " + superstepState +
+ " and is now on superstep " +
+ bspServiceMaster.getSuperstep());
+ }
+ if (superstepCounterOn) {
+ String counterPrefix;
+ if (cachedSuperstep == -1) {
+ counterPrefix = "Vertex input superstep";
+ } else {
+ counterPrefix = "Superstep " + cachedSuperstep;
+ }
+ context.getCounter(GIRAPH_TIMERS_COUNTER_GROUP_NAME,
+ counterPrefix +
+ " (milliseconds)").
+ increment(superstepMillis);
}
- context.getCounter(GIRAPH_TIMERS_COUNTER_GROUP_NAME,
- counterPrefix +
- " (milliseconds)").
- increment(superstepMillis);
- }
- // If a worker failed, restart from a known good superstep
- if (superstepState == SuperstepState.WORKER_FAILURE) {
- bspServiceMaster.restartFromCheckpoint(
- bspServiceMaster.getLastGoodCheckpoint());
+ // If a worker failed, restart from a known good superstep
+ if (superstepState == SuperstepState.WORKER_FAILURE) {
+ bspServiceMaster.restartFromCheckpoint(
+ bspServiceMaster.getLastGoodCheckpoint());
+ }
+ endMillis = System.currentTimeMillis();
}
- endMillis = System.currentTimeMillis();
+ bspServiceMaster.setJobState(ApplicationState.FINISHED, -1, -1);
}
- bspServiceMaster.setJobState(ApplicationState.FINISHED, -1, -1);
}
bspServiceMaster.cleanup();
if (!superstepSecsMap.isEmpty()) {
context.getCounter(
- GIRAPH_TIMERS_COUNTER_GROUP_NAME,
- "Shutdown (milliseconds)").
- increment(System.currentTimeMillis() - endMillis);
+ GIRAPH_TIMERS_COUNTER_GROUP_NAME,
+ "Shutdown (milliseconds)").
+ increment(System.currentTimeMillis() - endMillis);
if (LOG.isInfoEnabled()) {
LOG.info("setup: Took " + setupSecs + " seconds.");
}
Added: incubator/giraph/trunk/src/test/java/org/apache/giraph/TestNotEnoughMapTasks.java
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/test/java/org/apache/giraph/TestNotEnoughMapTasks.java?rev=1166854&view=auto
==============================================================================
--- incubator/giraph/trunk/src/test/java/org/apache/giraph/TestNotEnoughMapTasks.java (added)
+++ incubator/giraph/trunk/src/test/java/org/apache/giraph/TestNotEnoughMapTasks.java Thu Sep 8 18:47:19 2011
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph;
+
+import java.io.IOException;
+
+import org.apache.giraph.examples.SimpleCheckpointVertex;
+import org.apache.giraph.examples.SimpleSuperstepVertex.SimpleSuperstepVertexOutputFormat;
+import org.apache.giraph.examples.SimpleSuperstepVertex.SimpleSuperstepVertexInputFormat;
+import org.apache.giraph.graph.GiraphJob;
+import org.apache.hadoop.fs.Path;
+import junit.framework.Test;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for not enough map tasks
+ */
+public class TestNotEnoughMapTasks extends BspCase {
+ /**
+ * Create the test case
+ *
+ * @param testName name of the test case
+ */
+ public TestNotEnoughMapTasks(String testName) {
+ super(testName);
+ }
+
+ /**
+ * @return the suite of tests being tested
+ */
+ public static Test suite() {
+ return new TestSuite(TestNotEnoughMapTasks.class);
+ }
+
+ /**
+ * This job should always fail gracefully with not enough map tasks.
+ *
+ * @throws IOException
+ * @throws ClassNotFoundException
+ * @throws InterruptedException
+ */
+ public void testNotEnoughMapTasks()
+ throws IOException, InterruptedException, ClassNotFoundException {
+ if (getJobTracker() == null) {
+ System.out.println(
+ "testNotEnoughMapTasks: Ignore this test in local mode.");
+ return;
+ }
+ GiraphJob job = new GiraphJob(getCallingMethodName());
+ setupConfiguration(job);
+ // An unlikely impossible number of workers to achieve
+ final int unlikelyWorkers = Short.MAX_VALUE;
+ job.setWorkerConfiguration(
+ unlikelyWorkers, unlikelyWorkers, 100.0f);
+ // Only one poll attempt of one second to make failure faster
+ job.getConfiguration().setInt(GiraphJob.POLL_ATTEMPTS, 1);
+ job.getConfiguration().setInt(GiraphJob.POLL_MSECS, 1);
+ job.setVertexClass(SimpleCheckpointVertex.class);
+ job.setVertexInputFormatClass(SimpleSuperstepVertexInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleSuperstepVertexOutputFormat.class);
+ Path outputPath = new Path("/tmp/" + getCallingMethodName());
+ removeAndSetOutput(job, outputPath);
+ assertFalse(job.run(false));
+ }
+}