You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by jg...@apache.org on 2011/11/14 22:04:47 UTC
svn commit: r1201899 - in /incubator/giraph/trunk: CHANGELOG bin/ bin/giraph
pom.xml src/main/assembly/ src/main/assembly/assembly.xml
src/main/java/org/apache/giraph/GiraphRunner.java
src/main/java/org/apache/giraph/graph/GraphMapper.java
Author: jghoman
Date: Mon Nov 14 21:04:46 2011
New Revision: 1201899
URL: http://svn.apache.org/viewvc?rev=1201899&view=rev
Log:
GIRAPH-64. Create VertexRunner to make it easier to run users' computations.
Added:
incubator/giraph/trunk/bin/
incubator/giraph/trunk/bin/giraph
incubator/giraph/trunk/src/main/assembly/
incubator/giraph/trunk/src/main/assembly/assembly.xml
incubator/giraph/trunk/src/main/java/org/apache/giraph/GiraphRunner.java
Modified:
incubator/giraph/trunk/CHANGELOG
incubator/giraph/trunk/pom.xml
incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/GraphMapper.java
Modified: incubator/giraph/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/CHANGELOG?rev=1201899&r1=1201898&r2=1201899&view=diff
==============================================================================
--- incubator/giraph/trunk/CHANGELOG (original)
+++ incubator/giraph/trunk/CHANGELOG Mon Nov 14 21:04:46 2011
@@ -1,8 +1,11 @@
Giraph Change Log
Release 0.70.0 - unreleased
-
- GIRAPH-79. Change the menu layout of the site. (hyunsik via jghoman)
+
+ GIRAPH-64: Create VertexRunner to make it easier to run users'
+ computations. (jghoman)
+
+ GIRAPH-79: Change the menu layout of the site. (hyunsik via jghoman)
GIRAPH-75: Create sections on how to get involved and how
to generate patches on website. (jghoman)
Added: incubator/giraph/trunk/bin/giraph
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/bin/giraph?rev=1201899&view=auto
==============================================================================
--- incubator/giraph/trunk/bin/giraph (added)
+++ incubator/giraph/trunk/bin/giraph Mon Nov 14 21:04:46 2011
@@ -0,0 +1,87 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# resolve links - $0 may be a softlink
+THIS="$0"
+while [ -h "$THIS" ]; do
+ ls=`ls -ld "$THIS"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '.*/.*' > /dev/null; then
+ THIS="$link"
+ else
+ THIS=`dirname "$THIS"`/"$link"
+ fi
+done
+
+# some directories
+THIS_DIR=`dirname "$THIS"`
+GIRAPH_HOME=`cd "$THIS_DIR/.." ; pwd`
+
+# extra properites to send straight to Hadoop
+HADOOP_PROPERTIES=
+while [ $1 ] && [ ${1:0:2} == "-D" ] ; do
+ HADOOP_PROPERTIES="$1 $HADOOP_PROPERTIES"
+ shift
+done
+
+USER_JAR=$1
+shift
+
+if [ ! -e "$USER_JAR" ]; then
+ echo "Can't find user jar to execute."
+ exit 1
+fi
+
+# add user jar to classpath
+CLASSPATH=${USER_JAR}
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# add release dependencies to CLASSPATH
+for f in $GIRAPH_HOME/lib/*.jar; do
+ CLASSPATH=${CLASSPATH}:$f;
+done
+
+CLASS=org.apache.giraph.GiraphRunner
+
+for f in $GIRAPH_HOME/lib/giraph*.jar ; do
+ if [ -e "$f" ]; then
+ JAR=$f
+ fi
+done
+
+# restore ordinary behaviour
+unset IFS
+
+if [ "$JAR" = "" ] ; then
+ echo "Can't find Giraph jar."
+ exit 1
+fi
+
+if [ "$HADOOP_CONF_DIR" = "" ] ; then
+ HADOOP_CONF_DIR=$HADOOP_HOME/conf
+ echo "No HADOOP_CONF_DIR set, using $HADOOP_HOME/conf "
+else
+ echo "HADOOP_CONF_DIR=$HADOOP_CONF_DIR"
+fi
+
+# Giraph's jars to add to distributed cache via -libjar, which are csv rather than :sv
+GIRAPH_JARS=`echo ${JAR}:${CLASSPATH}|sed s/:/,/g`
+export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$CLASSPATH
+
+exec "$HADOOP_HOME/bin/hadoop" --config $HADOOP_CONF_DIR jar $JAR $CLASS $HADOOP_PROPERTIES -libjars $GIRAPH_JARS "$@"
\ No newline at end of file
Modified: incubator/giraph/trunk/pom.xml
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/pom.xml?rev=1201899&r1=1201898&r2=1201899&view=diff
==============================================================================
--- incubator/giraph/trunk/pom.xml (original)
+++ incubator/giraph/trunk/pom.xml Mon Nov 14 21:04:46 2011
@@ -163,6 +163,7 @@ under the License.
<build>
<plugins>
+
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
@@ -187,18 +188,38 @@ under the License.
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.2</version>
- <configuration>
- <outputDirectory>target</outputDirectory>
- <descriptorRefs>
- <descriptorRef>jar-with-dependencies</descriptorRef>
- </descriptorRefs>
- </configuration>
<executions>
<execution>
- <id>make-assembly</id> <!-- this is used for inheritance merges -->
- <phase>compile</phase> <!-- append to the packaging phase. -->
+ <id>build-fat-jar</id>
+ <!-- this is used for inheritance merges -->
+ <phase>compile</phase>
+ <!-- append to the packaging phase. -->
+ <configuration>
+ <descriptorRefs>
+ <descriptorRef>jar-with-dependencies</descriptorRef>
+ </descriptorRefs>
+ </configuration>
+ <goals>
+ <goal>single</goal>
+ <!-- goals == mojos -->
+ </goals>
+ </execution>
+ <execution>
+ <id>make-assembly</id>
+ <!-- this is used for inheritance merges -->
+ <phase>package</phase>
+ <!-- append to the packaging phase. -->
+ <configuration>
+ <!-- Specifies the configuration file of the assembly plugin -->
+ <descriptors>
+ <descriptor>${basedir}/src/main/assembly/assembly.xml
+ </descriptor>
+ </descriptors>
+ <outputDirectory>target</outputDirectory>
+ </configuration>
<goals>
- <goal>single</goal> <!-- goals == mojos -->
+ <goal>single</goal>
+ <!-- goals == mojos -->
</goals>
</execution>
</executions>
Added: incubator/giraph/trunk/src/main/assembly/assembly.xml
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/main/assembly/assembly.xml?rev=1201899&view=auto
==============================================================================
--- incubator/giraph/trunk/src/main/assembly/assembly.xml (added)
+++ incubator/giraph/trunk/src/main/assembly/assembly.xml Mon Nov 14 21:04:46 2011
@@ -0,0 +1,86 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
+ <id>bin</id>
+ <formats>
+ <format>tar.gz</format>
+ </formats>
+ <moduleSets>
+ <moduleSet>
+ <binaries>
+ <includeDependencies>true</includeDependencies>
+ <outputDirectory>lib</outputDirectory>
+ <unpack>false</unpack>
+ <dependencySets>
+ <dependencySet/>
+ </dependencySets>
+ </binaries>
+ </moduleSet>
+ </moduleSets>
+ <fileSets>
+ <fileSet>
+ <directory>${project.build.directory}</directory>
+ <outputDirectory>/lib</outputDirectory>
+ <includes>
+ <include>*.jar</include>
+ </includes>
+ <excludes>
+ <exclude>giraph*jar-with-dependencies.jar</exclude>
+ </excludes>
+ </fileSet>
+
+ <fileSet>
+ <includes>
+ <include>${basedir}/CHANGELOG</include>
+ <include>${basedir}/LICENSE.txt</include>
+ <include>${basedir}/NOTICE</include>
+ <include>${basedir}/README</include>
+ <include>${basedir}/CODE_CONVENTIONS</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <includes>
+ <include>pom.xml</include>
+ </includes>
+ </fileSet>
+
+ <fileSet>
+ <directory>src</directory>
+ </fileSet>
+
+ <fileSet>
+ <directory>bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ <fileMode>755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>target/site</directory>
+ <outputDirectory>docs</outputDirectory>
+ </fileSet>
+
+ </fileSets>
+ <dependencySets>
+ <dependencySet>
+ <useProjectArtifact>false</useProjectArtifact>
+ <outputDirectory>/lib</outputDirectory>
+ <unpack>false</unpack>
+ <scope>runtime</scope>
+ </dependencySet>
+ </dependencySets>
+</assembly>
Added: incubator/giraph/trunk/src/main/java/org/apache/giraph/GiraphRunner.java
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/main/java/org/apache/giraph/GiraphRunner.java?rev=1201899&view=auto
==============================================================================
--- incubator/giraph/trunk/src/main/java/org/apache/giraph/GiraphRunner.java (added)
+++ incubator/giraph/trunk/src/main/java/org/apache/giraph/GiraphRunner.java Mon Nov 14 21:04:46 2011
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph;
+
+import org.apache.commons.cli.BasicParser;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.giraph.graph.GiraphJob;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Logger;
+
+public class GiraphRunner implements Tool {
+ private static final Logger LOG = Logger.getLogger(GiraphRunner.class);
+ private Configuration conf;
+
+ final String [][] requiredOptions =
+ {{"w", "Need to choose the number of workers (-w)"},
+ {"if", "Need to set inputformat (-if)"}};
+
+ private Options getOptions() {
+ Options options = new Options();
+ options.addOption("h", "help", false, "Help");
+ options.addOption("q", "quiet", false, "Quiet output");
+ options.addOption("w", "workers", true, "Number of workers");
+ options.addOption("if", "inputFormat", true, "Graph inputformat");
+ options.addOption("of", "outputFormat", true, "Graph outputformat");
+ options.addOption("ip", "inputPath", true, "Graph input path");
+ options.addOption("op", "outputPath", true, "Graph output path");
+ return options;
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ Options options = getOptions();
+ HelpFormatter formatter = new HelpFormatter();
+ if (args.length == 0) {
+ formatter.printHelp(getClass().getName(), options, true);
+ return 0;
+ }
+
+ String vertexClassName = args[0];
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("Attempting to run Vertex: " + vertexClassName);
+ }
+
+ CommandLineParser parser = new BasicParser();
+ CommandLine cmd = parser.parse(options, args);
+
+ // Verify all the options have been provided
+ for (String[] requiredOption : requiredOptions) {
+ if(!cmd.hasOption(requiredOption[0])) {
+ System.out.println(requiredOption[1]);
+ return -1;
+ }
+ }
+
+ int workers = Integer.parseInt(cmd.getOptionValue('w'));
+ GiraphJob job = new GiraphJob(getConf(), "Giraph: " + vertexClassName);
+ job.setVertexClass(Class.forName(vertexClassName));
+ job.setVertexInputFormatClass(Class.forName(cmd.getOptionValue("if")));
+ job.setVertexOutputFormatClass(Class.forName(cmd.getOptionValue("of")));
+
+ if(cmd.hasOption("ip")) {
+ FileInputFormat.addInputPath(job, new Path(cmd.getOptionValue("ip")));
+ } else {
+ LOG.info("No input path specified. Ensure your InputFormat does not require one.");
+ }
+
+ if(cmd.hasOption("op")) {
+ FileOutputFormat.setOutputPath(job, new Path(cmd.getOptionValue("op")));
+ } else {
+ LOG.info("No output path specified. Ensure your OutputFormat does not require one.");
+ }
+
+ job.setWorkerConfiguration(workers, workers, 100.0f);
+
+ boolean isQuiet = !cmd.hasOption('q');
+
+ return job.run(isQuiet) ? 0 : -1;
+ }
+
+ public static void main(String[] args) throws Exception {
+ System.exit(ToolRunner.run(new GiraphRunner(), args));
+ }
+}
Modified: incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/GraphMapper.java
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/GraphMapper.java?rev=1201899&r1=1201898&r2=1201899&view=diff
==============================================================================
--- incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/GraphMapper.java (original)
+++ incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/GraphMapper.java Mon Nov 14 21:04:46 2011
@@ -25,6 +25,8 @@ import org.apache.giraph.comm.WorkerComm
import org.apache.giraph.utils.ReflectionUtils;
import org.apache.giraph.zk.ZooKeeperManager;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
@@ -408,16 +410,32 @@ public class GraphMapper<I extends Writa
// Do some initial setup (possibly starting up a Zookeeper service)
context.setStatus("setup: Initializing Zookeeper services.");
- String jarFile = context.getJar();
- if (jarFile == null) {
- jarFile = findContainingJar(getClass());
+ Path[] fileClassPaths = DistributedCache.getLocalCacheArchives(conf);
+ String zkClasspath = null;
+ if(fileClassPaths == null) {
+ if(LOG.isInfoEnabled()) {
+ LOG.info("Distributed cache is empty. Assuming fatjar.");
+ }
+ String jarFile = context.getJar();
+ if (jarFile == null) {
+ jarFile = findContainingJar(getClass());
+ }
+ zkClasspath = jarFile.replaceFirst("file:", "");
+ } else {
+ StringBuilder sb = new StringBuilder();
+ sb.append(fileClassPaths[0]);
+
+ for (int i = 1; i < fileClassPaths.length; i++) {
+ sb.append(":");
+ sb.append(fileClassPaths[i]);
+ }
+ zkClasspath = sb.toString();
}
- String trimmedJarFile = jarFile.replaceFirst("file:", "");
+
if (LOG.isInfoEnabled()) {
- LOG.info("setup: jar file @ " + jarFile +
- ", using " + trimmedJarFile);
+ LOG.info("setup: classpath @ " + zkClasspath);
}
- conf.set(GiraphJob.ZOOKEEPER_JAR, trimmedJarFile);
+ conf.set(GiraphJob.ZOOKEEPER_JAR, zkClasspath);
String serverPortList =
conf.get(GiraphJob.ZOOKEEPER_LIST, "");
if (serverPortList == "") {