You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by jg...@apache.org on 2011/11/14 22:04:47 UTC

svn commit: r1201899 - in /incubator/giraph/trunk: CHANGELOG bin/ bin/giraph pom.xml src/main/assembly/ src/main/assembly/assembly.xml src/main/java/org/apache/giraph/GiraphRunner.java src/main/java/org/apache/giraph/graph/GraphMapper.java

Author: jghoman
Date: Mon Nov 14 21:04:46 2011
New Revision: 1201899

URL: http://svn.apache.org/viewvc?rev=1201899&view=rev
Log:
GIRAPH-64. Create VertexRunner to make it easier to run users' computations.

Added:
    incubator/giraph/trunk/bin/
    incubator/giraph/trunk/bin/giraph
    incubator/giraph/trunk/src/main/assembly/
    incubator/giraph/trunk/src/main/assembly/assembly.xml
    incubator/giraph/trunk/src/main/java/org/apache/giraph/GiraphRunner.java
Modified:
    incubator/giraph/trunk/CHANGELOG
    incubator/giraph/trunk/pom.xml
    incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/GraphMapper.java

Modified: incubator/giraph/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/CHANGELOG?rev=1201899&r1=1201898&r2=1201899&view=diff
==============================================================================
--- incubator/giraph/trunk/CHANGELOG (original)
+++ incubator/giraph/trunk/CHANGELOG Mon Nov 14 21:04:46 2011
@@ -1,8 +1,11 @@
 Giraph Change Log
 
 Release 0.70.0 - unreleased
-  
-  GIRAPH-79. Change the menu layout of the site. (hyunsik via jghoman)
+ 
+  GIRAPH-64: Create VertexRunner to make it easier to run users'
+  computations. (jghoman)
+ 
+  GIRAPH-79: Change the menu layout of the site. (hyunsik via jghoman)
 
   GIRAPH-75: Create sections on how to get involved and how 
   to generate patches on website. (jghoman)

Added: incubator/giraph/trunk/bin/giraph
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/bin/giraph?rev=1201899&view=auto
==============================================================================
--- incubator/giraph/trunk/bin/giraph (added)
+++ incubator/giraph/trunk/bin/giraph Mon Nov 14 21:04:46 2011
@@ -0,0 +1,87 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# resolve links - $0 may be a softlink
+THIS="$0"
+while [ -h "$THIS" ]; do
+  ls=`ls -ld "$THIS"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '.*/.*' > /dev/null; then
+    THIS="$link"
+  else
+    THIS=`dirname "$THIS"`/"$link"
+  fi
+done
+
+# some directories
+THIS_DIR=`dirname "$THIS"`
+GIRAPH_HOME=`cd "$THIS_DIR/.." ; pwd`
+
+# extra properites to send straight to Hadoop
+HADOOP_PROPERTIES=
+while [ $1 ] && [ ${1:0:2} == "-D" ] ; do
+    HADOOP_PROPERTIES="$1 $HADOOP_PROPERTIES"
+    shift
+done
+
+USER_JAR=$1
+shift
+
+if [ ! -e "$USER_JAR" ]; then
+  echo "Can't find user jar to execute."
+  exit 1
+fi
+
+# add user jar to classpath
+CLASSPATH=${USER_JAR}
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# add release dependencies to CLASSPATH
+for f in $GIRAPH_HOME/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+CLASS=org.apache.giraph.GiraphRunner
+
+for f in $GIRAPH_HOME/lib/giraph*.jar ; do
+  if [ -e "$f" ]; then
+    JAR=$f
+  fi
+done
+
+# restore ordinary behaviour
+unset IFS
+
+if [ "$JAR" = "" ] ; then
+  echo "Can't find Giraph jar."
+  exit 1
+fi
+
+if [ "$HADOOP_CONF_DIR" = "" ] ; then
+  HADOOP_CONF_DIR=$HADOOP_HOME/conf
+  echo "No HADOOP_CONF_DIR set, using $HADOOP_HOME/conf "
+else
+  echo "HADOOP_CONF_DIR=$HADOOP_CONF_DIR"
+fi
+
+# Giraph's jars to add to distributed cache via -libjar, which are csv rather than :sv
+GIRAPH_JARS=`echo ${JAR}:${CLASSPATH}|sed s/:/,/g`
+export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$CLASSPATH
+
+exec "$HADOOP_HOME/bin/hadoop" --config $HADOOP_CONF_DIR jar $JAR $CLASS $HADOOP_PROPERTIES -libjars $GIRAPH_JARS  "$@"
\ No newline at end of file

Modified: incubator/giraph/trunk/pom.xml
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/pom.xml?rev=1201899&r1=1201898&r2=1201899&view=diff
==============================================================================
--- incubator/giraph/trunk/pom.xml (original)
+++ incubator/giraph/trunk/pom.xml Mon Nov 14 21:04:46 2011
@@ -163,6 +163,7 @@ under the License.
 
   <build>
     <plugins>
+
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-enforcer-plugin</artifactId>
@@ -187,18 +188,38 @@ under the License.
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-assembly-plugin</artifactId>
         <version>2.2</version>
-        <configuration>
-         <outputDirectory>target</outputDirectory>
-          <descriptorRefs>
-            <descriptorRef>jar-with-dependencies</descriptorRef>
-          </descriptorRefs>
-        </configuration>
         <executions>
           <execution>
-            <id>make-assembly</id> <!-- this is used for inheritance merges -->
-            <phase>compile</phase> <!-- append to the packaging phase. -->
+            <id>build-fat-jar</id>
+            <!-- this is used for inheritance merges -->
+            <phase>compile</phase>
+            <!-- append to the packaging phase. -->
+            <configuration>
+              <descriptorRefs>
+                <descriptorRef>jar-with-dependencies</descriptorRef>
+              </descriptorRefs>
+            </configuration>
+            <goals>
+              <goal>single</goal>
+              <!-- goals == mojos -->
+            </goals>
+          </execution>
+          <execution>
+            <id>make-assembly</id>
+            <!-- this is used for inheritance merges -->
+            <phase>package</phase>
+            <!-- append to the packaging phase. -->
+            <configuration>
+              <!-- Specifies the configuration file of the assembly plugin -->
+              <descriptors>
+                <descriptor>${basedir}/src/main/assembly/assembly.xml
+                </descriptor>
+              </descriptors>
+              <outputDirectory>target</outputDirectory>
+            </configuration>
             <goals>
-              <goal>single</goal> <!-- goals == mojos -->
+              <goal>single</goal>
+              <!-- goals == mojos -->
             </goals>
           </execution>
         </executions>

Added: incubator/giraph/trunk/src/main/assembly/assembly.xml
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/main/assembly/assembly.xml?rev=1201899&view=auto
==============================================================================
--- incubator/giraph/trunk/src/main/assembly/assembly.xml (added)
+++ incubator/giraph/trunk/src/main/assembly/assembly.xml Mon Nov 14 21:04:46 2011
@@ -0,0 +1,86 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
+  <id>bin</id>
+  <formats>
+    <format>tar.gz</format>
+  </formats>
+  <moduleSets>
+    <moduleSet>
+      <binaries>
+        <includeDependencies>true</includeDependencies>
+        <outputDirectory>lib</outputDirectory>
+        <unpack>false</unpack>
+        <dependencySets>
+          <dependencySet/>
+        </dependencySets>
+      </binaries>
+    </moduleSet>
+  </moduleSets>
+  <fileSets>
+    <fileSet>
+      <directory>${project.build.directory}</directory>
+      <outputDirectory>/lib</outputDirectory>
+      <includes>
+        <include>*.jar</include>
+      </includes>
+      <excludes>
+        <exclude>giraph*jar-with-dependencies.jar</exclude>
+      </excludes>
+    </fileSet>
+
+    <fileSet>
+      <includes>
+        <include>${basedir}/CHANGELOG</include>
+        <include>${basedir}/LICENSE.txt</include>
+        <include>${basedir}/NOTICE</include>
+        <include>${basedir}/README</include>
+        <include>${basedir}/CODE_CONVENTIONS</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <includes>
+        <include>pom.xml</include>
+      </includes>
+    </fileSet>
+
+    <fileSet>
+      <directory>src</directory>
+    </fileSet>
+
+    <fileSet>
+      <directory>bin</directory>
+      <outputDirectory>bin</outputDirectory>
+      <fileMode>755</fileMode>
+    </fileSet>
+    <fileSet>
+      <directory>target/site</directory>
+      <outputDirectory>docs</outputDirectory>
+    </fileSet>
+
+  </fileSets>
+  <dependencySets>
+    <dependencySet>
+      <useProjectArtifact>false</useProjectArtifact>
+      <outputDirectory>/lib</outputDirectory>
+      <unpack>false</unpack>
+      <scope>runtime</scope>
+    </dependencySet>
+  </dependencySets>
+</assembly>

Added: incubator/giraph/trunk/src/main/java/org/apache/giraph/GiraphRunner.java
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/main/java/org/apache/giraph/GiraphRunner.java?rev=1201899&view=auto
==============================================================================
--- incubator/giraph/trunk/src/main/java/org/apache/giraph/GiraphRunner.java (added)
+++ incubator/giraph/trunk/src/main/java/org/apache/giraph/GiraphRunner.java Mon Nov 14 21:04:46 2011
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph;
+
+import org.apache.commons.cli.BasicParser;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.giraph.graph.GiraphJob;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Logger;
+
+public class GiraphRunner implements Tool {
+  private static final Logger LOG = Logger.getLogger(GiraphRunner.class);
+  private Configuration conf;
+
+  final String [][] requiredOptions =
+      {{"w", "Need to choose the number of workers (-w)"},
+       {"if", "Need to set inputformat (-if)"}};
+
+  private Options getOptions() {
+    Options options = new Options();
+    options.addOption("h", "help", false, "Help");
+    options.addOption("q", "quiet", false, "Quiet output");
+    options.addOption("w", "workers", true, "Number of workers");
+    options.addOption("if", "inputFormat", true, "Graph inputformat");
+    options.addOption("of", "outputFormat", true, "Graph outputformat");
+    options.addOption("ip", "inputPath", true, "Graph input path");
+    options.addOption("op", "outputPath", true, "Graph output path");
+    return options;
+  }
+
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    Options options = getOptions();
+    HelpFormatter formatter = new HelpFormatter();
+    if (args.length == 0) {
+      formatter.printHelp(getClass().getName(), options, true);
+      return 0;
+    }
+
+    String vertexClassName = args[0];
+    if(LOG.isDebugEnabled()) {
+      LOG.debug("Attempting to run Vertex: " + vertexClassName);
+    }
+
+    CommandLineParser parser = new BasicParser();
+    CommandLine cmd = parser.parse(options, args);
+
+    // Verify all the options have been provided
+    for (String[] requiredOption : requiredOptions) {
+      if(!cmd.hasOption(requiredOption[0])) {
+        System.out.println(requiredOption[1]);
+        return -1;
+      }
+    }
+
+    int workers = Integer.parseInt(cmd.getOptionValue('w'));
+    GiraphJob job = new GiraphJob(getConf(), "Giraph: " + vertexClassName);
+    job.setVertexClass(Class.forName(vertexClassName));
+    job.setVertexInputFormatClass(Class.forName(cmd.getOptionValue("if")));
+    job.setVertexOutputFormatClass(Class.forName(cmd.getOptionValue("of")));
+
+    if(cmd.hasOption("ip")) {
+      FileInputFormat.addInputPath(job, new Path(cmd.getOptionValue("ip")));
+    } else {
+      LOG.info("No input path specified. Ensure your InputFormat does not require one.");
+    }
+
+    if(cmd.hasOption("op")) {
+      FileOutputFormat.setOutputPath(job, new Path(cmd.getOptionValue("op")));
+    } else {
+      LOG.info("No output path specified. Ensure your OutputFormat does not require one.");
+    }
+
+    job.setWorkerConfiguration(workers, workers, 100.0f);
+
+    boolean isQuiet = !cmd.hasOption('q');
+
+    return job.run(isQuiet) ? 0 : -1;
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.exit(ToolRunner.run(new GiraphRunner(), args));
+  }
+}

Modified: incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/GraphMapper.java
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/GraphMapper.java?rev=1201899&r1=1201898&r2=1201899&view=diff
==============================================================================
--- incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/GraphMapper.java (original)
+++ incubator/giraph/trunk/src/main/java/org/apache/giraph/graph/GraphMapper.java Mon Nov 14 21:04:46 2011
@@ -25,6 +25,8 @@ import org.apache.giraph.comm.WorkerComm
 import org.apache.giraph.utils.ReflectionUtils;
 import org.apache.giraph.zk.ZooKeeperManager;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
@@ -408,16 +410,32 @@ public class GraphMapper<I extends Writa
 
         // Do some initial setup (possibly starting up a Zookeeper service)
         context.setStatus("setup: Initializing Zookeeper services.");
-        String jarFile = context.getJar();
-        if (jarFile == null) {
-            jarFile = findContainingJar(getClass());
+        Path[] fileClassPaths = DistributedCache.getLocalCacheArchives(conf);
+        String zkClasspath = null;
+        if(fileClassPaths == null) {
+            if(LOG.isInfoEnabled()) {
+                LOG.info("Distributed cache is empty. Assuming fatjar.");
+            }
+            String jarFile = context.getJar();
+            if (jarFile == null) {
+               jarFile = findContainingJar(getClass());
+            }
+            zkClasspath = jarFile.replaceFirst("file:", "");
+        } else {
+            StringBuilder sb = new StringBuilder();
+            sb.append(fileClassPaths[0]);
+
+            for (int i = 1; i < fileClassPaths.length; i++) {
+                sb.append(":");
+                sb.append(fileClassPaths[i]);
+            }
+            zkClasspath = sb.toString();
         }
-        String trimmedJarFile = jarFile.replaceFirst("file:", "");
+
         if (LOG.isInfoEnabled()) {
-            LOG.info("setup: jar file @ " + jarFile +
-                     ", using " + trimmedJarFile);
+            LOG.info("setup: classpath @ " + zkClasspath);
         }
-        conf.set(GiraphJob.ZOOKEEPER_JAR, trimmedJarFile);
+        conf.set(GiraphJob.ZOOKEEPER_JAR, zkClasspath);
         String serverPortList =
             conf.get(GiraphJob.ZOOKEEPER_LIST, "");
         if (serverPortList == "") {