You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@tinkerpop.apache.org by ok...@apache.org on 2015/03/04 15:41:56 UTC

[01/20] incubator-tinkerpop git commit: first push on the spark adaptor....crazy dep hell.

Repository: incubator-tinkerpop
Updated Branches:
  refs/heads/master 2eb3dba95 -> e0e08ebaa


first push on the spark adaptor....crazy dep hell.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/531b86a8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/531b86a8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/531b86a8

Branch: refs/heads/master
Commit: 531b86a8f82ee683f4c7d853d502ed5a6eeeeb46
Parents: 0d7802d
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Mon Mar 2 09:24:46 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Mon Mar 2 09:24:46 2015 -0700

----------------------------------------------------------------------
 hadoop-gremlin/pom.xml                          | 159 ++++++++++++++++++-
 .../tinkerpop/gremlin/hadoop/Constants.java     |   1 +
 .../computer/spark/SparkGraphComputer.java      | 113 +++++++++++++
 3 files changed, 272 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/531b86a8/hadoop-gremlin/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/pom.xml b/hadoop-gremlin/pom.xml
index 52496d2..84497c9 100644
--- a/hadoop-gremlin/pom.xml
+++ b/hadoop-gremlin/pom.xml
@@ -45,8 +45,55 @@ limitations under the License.
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-core</artifactId>
             <version>1.2.1</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>net.java.dev.jets3t</groupId>
+                    <artifactId>jets3t</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>commons-net</groupId>
+                    <artifactId>commons-net</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>javax.servlet</groupId>
+                    <artifactId>servlet-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>javax.servlet</groupId>
+                    <artifactId>javax.servlet-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>javax.servlet</groupId>
+                    <artifactId>jsp-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.mortbay.jetty</groupId>
+                    <artifactId>jetty</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.mortbay.jetty</groupId>
+                    <artifactId>jetty-parent</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.mortbay.jetty</groupId>
+                    <artifactId>jetty-sslengine</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.mortbay.jetty</groupId>
+                    <artifactId>jetty-sslengine</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.mortbay.jetty</groupId>
+                    <artifactId>jetty-util</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.eclipse.jetty</groupId>
+                    <artifactId>jetty-parent</artifactId>
+                </exclusion>
+            </exclusions>
             <!--<scope>provided</scope>-->
         </dependency>
+        <!-- GIRAPH GRAPH COMPUTER -->
         <dependency>
             <groupId>org.apache.giraph</groupId>
             <artifactId>giraph-core</artifactId>
@@ -71,12 +118,122 @@ limitations under the License.
                     <groupId>jline</groupId>
                     <artifactId>jline</artifactId>
                 </exclusion>
+                <exclusion>
+                    <groupId>io.netty</groupId>
+                    <artifactId>netty</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.jboss.netty</groupId>
+                    <artifactId>netty</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.codehaus.jackson</groupId>
+                    <artifactId>jackson-core</artifactId>
+                </exclusion>
+                <exclusion>
+                <groupId>org.codehaus.jackson</groupId>
+                <artifactId>jackson-core-asl</artifactId>
+            </exclusion>
+                <exclusion>
+                    <groupId>org.codehaus.jackson</groupId>
+                    <artifactId>jackson-mapper-asl</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.zookeeper</groupId>
+                    <artifactId>zookeeper</artifactId>
+                </exclusion>
             </exclusions>
         </dependency>
-        <!-- consistent dependencies chosen for hadoop-core -->
+        <!-- SPARK GRAPH COMPUTER -->
         <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_2.10</artifactId>
+            <version>1.2.1</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-mapreduce-client-app</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-client</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>jcl-over-slf4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>commons-io</groupId>
+                    <artifactId>commons-io</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.esotericsoftware.kryo</groupId>
+                    <artifactId>kryo</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-databind</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-annotations</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.scala-lang</groupId>
+                    <artifactId>scala-library</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>log4j</groupId>
+                    <artifactId>log4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.google.guava</groupId>
+                    <artifactId>guava</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>jline</groupId>
+                    <artifactId>jline</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>commons</groupId>
+                    <artifactId>commons-lang</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.commons</groupId>
+                    <artifactId>commons-lang3</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>commons-codec</groupId>
+                    <artifactId>commons-codec</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <!-- consistent dependencies chosen for hadoop-core -->
+        <!--<dependency>
             <groupId>commons-httpclient</groupId>
             <artifactId>commons-httpclient</artifactId>
+
+        </dependency>-->
+        <dependency>
+            <groupId>org.scala-lang</groupId>
+            <artifactId>scala-library</artifactId>
+            <version>2.10.3</version>
         </dependency>
         <dependency>
             <groupId>org.apache.tinkerpop</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/531b86a8/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
index 46cf993..f229b17 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
@@ -35,6 +35,7 @@ public class Constants {
     public static final String GREMLIN_HADOOP_JOB_PREFIX = "HadoopGremlin: ";
     public static final String GREMLIN_HADOOP_GIRAPH_JOB_PREFIX = "HadoopGremlin(Giraph): ";
     public static final String GREMLIN_HADOOP_MAP_REDUCE_JOB_PREFIX = "HadoopGremlin(MapReduce): ";
+    public static final String GREMLIN_HADOOP_SPARK_JOB_PREFIX = "HadoopGremlin(Spark): ";
     public static final String HADOOP_GREMLIN_LIBS = "HADOOP_GREMLIN_LIBS";
     public static final String DOT_JAR = ".jar";
     public static final String GREMLIN_HADOOP_DERIVE_MEMORY = "gremlin.hadoop.deriveMemory";

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/531b86a8/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
new file mode 100644
index 0000000..0586a14
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.tinkerpop.gremlin.hadoop.Constants;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.process.computer.ComputerResult;
+import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
+import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
+import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
+import org.apache.tinkerpop.gremlin.process.computer.util.GraphComputerHelper;
+import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.Future;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class SparkGraphComputer implements GraphComputer {
+
+    public static final Logger LOGGER = LoggerFactory.getLogger(SparkGraphComputer.class);
+
+    protected final SparkConf configuration = new SparkConf();
+
+    protected final HadoopGraph hadoopGraph;
+
+    private boolean executed = false;
+    private final Set<MapReduce> mapReduces = new HashSet<>();
+    private VertexProgram vertexProgram;
+
+    public SparkGraphComputer(final HadoopGraph hadoopGraph) {
+        this.hadoopGraph = hadoopGraph;
+    }
+
+    public static void main(final String[] args) {
+        final SparkConf configuration = new SparkConf();
+        configuration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX);
+        configuration.setMaster("local");
+        final JavaSparkContext sc = new JavaSparkContext(configuration);
+        JavaRDD<String> rdd = sc.textFile("religious-traversals.txt");
+        System.out.println(rdd.count());
+    }
+
+
+    @Override
+    public GraphComputer isolation(final Isolation isolation) {
+        if (!isolation.equals(Isolation.BSP))
+            throw GraphComputer.Exceptions.isolationNotSupported(isolation);
+        return this;
+    }
+
+    @Override
+    public GraphComputer program(final VertexProgram vertexProgram) {
+        this.vertexProgram = vertexProgram;
+        return this;
+    }
+
+    @Override
+    public GraphComputer mapReduce(final MapReduce mapReduce) {
+        this.mapReduces.add(mapReduce);
+        return this;
+    }
+
+    @Override
+    public String toString() {
+        return StringFactory.graphComputerString(this);
+    }
+
+    @Override
+    public Future<ComputerResult> submit() {
+        if (this.executed)
+            throw Exceptions.computerHasAlreadyBeenSubmittedAVertexProgram();
+        else
+            this.executed = true;
+
+        // it is not possible execute a computer if it has no vertex program nor mapreducers
+        if (null == this.vertexProgram && this.mapReduces.isEmpty())
+            throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
+        // it is possible to run mapreducers without a vertex program
+        if (null != this.vertexProgram)
+            GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
+
+        final long startTime = System.currentTimeMillis();
+        return CompletableFuture.<ComputerResult>supplyAsync(() -> {
+            return null;
+        });
+    }
+
+}

[08/20] incubator-tinkerpop git commit: SparkGraphComputer is prim and proper. No longer using public static void main(). The GraphComputer API is legitamately implemented. All that is left --- MapReduce engine (easy) and GraphComputer Memory (hard).

Posted by ok...@apache.org.

SparkGraphComputer is prim and proper. No longer using public static void main(). The GraphComputer API is legitamately implemented.  All that is left --- MapReduce engine (easy) and GraphComputer Memory (hard).


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/051994ae
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/051994ae
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/051994ae

Branch: refs/heads/master
Commit: 051994aeac31de6e02213f1de5eb258f798602eb
Parents: c98d5be
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Mar 3 09:28:28 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Mar 3 09:28:28 2015 -0700

----------------------------------------------------------------------
 hadoop-gremlin/conf/giraph-graphson.properties  |  57 ++++++++
 hadoop-gremlin/conf/giraph-kryo.properties      |  31 +++++
 hadoop-gremlin/conf/hadoop-graphson.properties  |  57 --------
 hadoop-gremlin/conf/hadoop-kryo.properties      |  31 -----
 hadoop-gremlin/conf/spark-kryo.properties       |  38 ++++++
 .../computer/giraph/GiraphGraphComputer.java    |   3 +-
 .../computer/spark/GraphComputerRDD.java        |   3 +-
 .../hadoop/process/computer/spark/RDDTools.java |  46 -------
 .../spark/SerializableConfiguration.java        |   2 +-
 .../computer/spark/SparkGraphComputer.java      | 135 +++++++++++++------
 .../process/computer/spark/SparkMessenger.java  |   5 -
 .../process/computer/spark/SparkVertex.java     |  44 +++---
 12 files changed, 248 insertions(+), 204 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/conf/giraph-graphson.properties
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/conf/giraph-graphson.properties b/hadoop-gremlin/conf/giraph-graphson.properties
new file mode 100644
index 0000000..090b0ce
--- /dev/null
+++ b/hadoop-gremlin/conf/giraph-graphson.properties
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# the graph class
+gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
+# i/o formats for graphs and memory (i.e. computer result)
+gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.graphson.GraphSONInputFormat
+gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.graphson.GraphSONOutputFormat
+gremlin.hadoop.memoryOutputFormat=org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
+# i/o locations
+gremlin.hadoop.inputLocation=tinkerpop-modern-vertices.ldjson
+gremlin.hadoop.outputLocation=output
+# deriving a complete view of the memory requires an extra mapreduce job and thus, if not needed, should be avoided
+gremlin.hadoop.deriveMemory=false
+# if the job jars are not on the classpath of every hadoop node, then they must be provided to the distributed cache at runtime
+gremlin.hadoop.jarsInDistributedCache=true
+# the vertex program to execute
+gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram
+
+# It is possible to provide Giraph configuration parameters for use with GiraphGraphComputer
+############################################################################################
+giraph.minWorkers=2
+giraph.maxWorkers=2
+# giraph.useInputSplitLocality=false
+# giraph.logLevel=debug
+
+# It is possible to provide Hadoop configuration parameters.
+# Note that these parameters are provided to each MapReduce job within the entire Hadoop-Gremlin job pipeline.
+# Some of these parameters may be over written by Hadoop-Gremlin as deemed necessary.
+##############################################################################################################
+# mapred.linerecordreader.maxlength=5242880
+# mapred.map.child.java.opts=-Xmx1024m -Dtinkerpop.profiling=true
+# mapred.reduce.child.java.opts=-Xmx1024m -Dtinkerpop.profiling=true
+# mapred.map.tasks=6
+# mapred.reduce.tasks=3
+# mapred.job.reuse.jvm.num.tasks=-1
+# mapred.task.timeout=5400000
+# mapred.reduce.parallel.copies=50
+# io.sort.factor=100
+# io.sort.mb=200
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/conf/giraph-kryo.properties
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/conf/giraph-kryo.properties b/hadoop-gremlin/conf/giraph-kryo.properties
new file mode 100644
index 0000000..d546da7
--- /dev/null
+++ b/hadoop-gremlin/conf/giraph-kryo.properties
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
+gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.kryo.KryoInputFormat
+gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.kryo.KryoOutputFormat
+gremlin.hadoop.memoryOutputFormat=org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
+gremlin.hadoop.deriveMemory=false
+gremlin.hadoop.jarsInDistributedCache=true
+
+gremlin.hadoop.inputLocation=tinkerpop-modern-vertices.gio
+gremlin.hadoop.outputLocation=output
+#gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.traversal.TraversalVertexProgram
+#gremlin.traversalVertexProgram.traversalSupplier.type=CLASS
+#gremlin.traversalVertexProgram.traversalSupplier.object=org.apache.tinkerpop.gremlin.hadoop.process.computer.example.TraversalSupplier1
+
+giraph.minWorkers=2
+giraph.maxWorkers=2
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/conf/hadoop-graphson.properties
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/conf/hadoop-graphson.properties b/hadoop-gremlin/conf/hadoop-graphson.properties
deleted file mode 100644
index 090b0ce..0000000
--- a/hadoop-gremlin/conf/hadoop-graphson.properties
+++ /dev/null
@@ -1,57 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# the graph class
-gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
-# i/o formats for graphs and memory (i.e. computer result)
-gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.graphson.GraphSONInputFormat
-gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.graphson.GraphSONOutputFormat
-gremlin.hadoop.memoryOutputFormat=org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
-# i/o locations
-gremlin.hadoop.inputLocation=tinkerpop-modern-vertices.ldjson
-gremlin.hadoop.outputLocation=output
-# deriving a complete view of the memory requires an extra mapreduce job and thus, if not needed, should be avoided
-gremlin.hadoop.deriveMemory=false
-# if the job jars are not on the classpath of every hadoop node, then they must be provided to the distributed cache at runtime
-gremlin.hadoop.jarsInDistributedCache=true
-# the vertex program to execute
-gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram
-
-# It is possible to provide Giraph configuration parameters for use with GiraphGraphComputer
-############################################################################################
-giraph.minWorkers=2
-giraph.maxWorkers=2
-# giraph.useInputSplitLocality=false
-# giraph.logLevel=debug
-
-# It is possible to provide Hadoop configuration parameters.
-# Note that these parameters are provided to each MapReduce job within the entire Hadoop-Gremlin job pipeline.
-# Some of these parameters may be over written by Hadoop-Gremlin as deemed necessary.
-##############################################################################################################
-# mapred.linerecordreader.maxlength=5242880
-# mapred.map.child.java.opts=-Xmx1024m -Dtinkerpop.profiling=true
-# mapred.reduce.child.java.opts=-Xmx1024m -Dtinkerpop.profiling=true
-# mapred.map.tasks=6
-# mapred.reduce.tasks=3
-# mapred.job.reuse.jvm.num.tasks=-1
-# mapred.task.timeout=5400000
-# mapred.reduce.parallel.copies=50
-# io.sort.factor=100
-# io.sort.mb=200
-
-
-

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/conf/hadoop-kryo.properties
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/conf/hadoop-kryo.properties b/hadoop-gremlin/conf/hadoop-kryo.properties
deleted file mode 100644
index d546da7..0000000
--- a/hadoop-gremlin/conf/hadoop-kryo.properties
+++ /dev/null
@@ -1,31 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
-gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.kryo.KryoInputFormat
-gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.kryo.KryoOutputFormat
-gremlin.hadoop.memoryOutputFormat=org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
-gremlin.hadoop.deriveMemory=false
-gremlin.hadoop.jarsInDistributedCache=true
-
-gremlin.hadoop.inputLocation=tinkerpop-modern-vertices.gio
-gremlin.hadoop.outputLocation=output
-#gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.traversal.TraversalVertexProgram
-#gremlin.traversalVertexProgram.traversalSupplier.type=CLASS
-#gremlin.traversalVertexProgram.traversalSupplier.object=org.apache.tinkerpop.gremlin.hadoop.process.computer.example.TraversalSupplier1
-
-giraph.minWorkers=2
-giraph.maxWorkers=2
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/conf/spark-kryo.properties
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/conf/spark-kryo.properties b/hadoop-gremlin/conf/spark-kryo.properties
new file mode 100644
index 0000000..de4df3b
--- /dev/null
+++ b/hadoop-gremlin/conf/spark-kryo.properties
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
+gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.kryo.KryoInputFormat
+gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.kryo.KryoOutputFormat
+gremlin.hadoop.memoryOutputFormat=org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
+gremlin.hadoop.deriveMemory=false
+gremlin.hadoop.jarsInDistributedCache=true
+
+gremlin.hadoop.inputLocation=hdfs://localhost:9000/user/marko/tinkerpop-modern-vertices.gio
+gremlin.hadoop.outputLocation=output
+
+# the vertex program to execute
+gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram
+
+# It is possible to provide Spark configuration parameters for use with SparkGraphComputer
+##########################################################################################
+spark.master=local[4]
+spark.executor.memory=1024m
+spark.eventLog.enabled=true
+#spark.serializer=org.apache.spark.serializer.KryoSerializer
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
index e382699..7a5e362 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
@@ -222,8 +222,7 @@ public class GiraphGraphComputer extends Configured implements GraphComputer, To
 
     public static void main(final String[] args) throws Exception {
         try {
-            final FileConfiguration configuration = new PropertiesConfiguration();
-            configuration.load(new File(args[0]));
+            final FileConfiguration configuration = new PropertiesConfiguration(args[0]);
             final GiraphGraphComputer computer = new GiraphGraphComputer(HadoopGraph.open(configuration));
             computer.program(VertexProgram.createVertexProgram(configuration)).submit().get();
         } catch (Exception e) {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
index abf0ac6..786e5af 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
@@ -57,7 +57,7 @@ public class GraphComputerRDD<M> extends JavaPairRDD<Object, SparkMessenger<M>>
             });
         });
         // clear all previous incoming messages
-        if(!memory.isInitialIteration()) {
+        if (!memory.isInitialIteration()) {
             current = current.mapValues(messenger -> {
                 messenger.clearIncomingMessages();
                 return messenger;
@@ -97,6 +97,7 @@ public class GraphComputerRDD<M> extends JavaPairRDD<Object, SparkMessenger<M>>
 
     //////////////
 
+    // TODO: What the hell is this for?
     @Override
     public JavaRDD zipPartitions(JavaRDDLike uJavaRDDLike, FlatMapFunction2 iteratorIteratorVFlatMapFunction2) {
         return (JavaRDD) new JavaRDD<>(null, null);

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RDDTools.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RDDTools.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RDDTools.java
deleted file mode 100644
index cef6040..0000000
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RDDTools.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
-
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.tinkerpop.gremlin.structure.Vertex;
-import scala.Tuple2;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.stream.Collectors;
-
-/**
- * @author Marko A. Rodriguez (http://markorodriguez.com)
- */
-public class RDDTools {
-
-    public static <M> void sendMessage(final Tuple2<Vertex, List<M>> tuple, final M message) {
-        tuple._2().add(message);
-    }
-
-    public static <M> Iterable<M> receiveMessages(final Tuple2<Vertex, List<M>> tuple) {
-        return tuple._2();
-    }
-
-    public static <M> JavaPairRDD<Vertex, List<M>> endIteration(final JavaPairRDD<Vertex, List<M>> graph) {
-        return graph.flatMapToPair(tuple -> tuple._2().stream().map(message -> new Tuple2<>(tuple._1(), Arrays.asList(message))).collect(Collectors.toList()));
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
index a71b456..b4a8005 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
@@ -28,7 +28,7 @@ import java.util.Map;
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
-public class SerializableConfiguration extends AbstractConfiguration implements Serializable {
+public final class SerializableConfiguration extends AbstractConfiguration implements Serializable {
 
     private final Map<String, Object> configurations = new HashMap<>();
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index 107f1bc..774c3c7 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -18,20 +18,28 @@
  */
 package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
 
+import org.apache.commons.configuration.FileConfiguration;
+import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.tinkerpop.gremlin.hadoop.Constants;
 import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable;
-import org.apache.tinkerpop.gremlin.hadoop.structure.io.kryo.KryoInputFormat;
+import org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil;
+import org.apache.tinkerpop.gremlin.hadoop.structure.util.HadoopHelper;
 import org.apache.tinkerpop.gremlin.process.computer.ComputerResult;
 import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
 import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
 import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
-import org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram;
+import org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult;
 import org.apache.tinkerpop.gremlin.process.computer.util.GraphComputerHelper;
 import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
 import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerVertex;
@@ -57,46 +65,14 @@ public class SparkGraphComputer implements GraphComputer {
     protected final SparkConf configuration = new SparkConf();
 
     protected final HadoopGraph hadoopGraph;
-
     private boolean executed = false;
-    private final Set<MapReduce> mapReduces = new HashSet<>();
+    private final Set<MapReduce> mapReducers = new HashSet<>();
     private VertexProgram vertexProgram;
 
     public SparkGraphComputer(final HadoopGraph hadoopGraph) {
         this.hadoopGraph = hadoopGraph;
     }
 
-    public static void main(final String[] args) throws IOException {
-        final SparkConf configuration = new SparkConf();
-        configuration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX);
-        configuration.setMaster("local");
-        final JavaSparkContext sc = new JavaSparkContext(configuration);
-        //JavaRDD<String> rdd = sc.textFile("hdfs://localhost:9000/user/marko/religious-traversals.txt");
-        final Configuration conf = new Configuration();
-        conf.set("mapred.input.dir", "hdfs://localhost:9000/user/marko/grateful-dead-vertices.gio");
-        JavaPairRDD<NullWritable, VertexWritable> rdd = sc.newAPIHadoopRDD(conf, KryoInputFormat.class, NullWritable.class, VertexWritable.class);
-        JavaPairRDD<Object, SparkMessenger<Double>> rdd2 = rdd.mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(new SparkVertex((TinkerVertex) tuple._2().get()), new ArrayList<>())));
-
-        GraphComputerRDD<Double> g = GraphComputerRDD.of(rdd2);
-
-        final org.apache.commons.configuration.Configuration vertexProgram = new SerializableConfiguration();
-        final PageRankVertexProgram pageRankVertexProgram = PageRankVertexProgram.build().create();
-        pageRankVertexProgram.storeState(vertexProgram);
-        final SparkMemory memory = new SparkMemory(Collections.emptySet());
-
-        while (!pageRankVertexProgram.terminate(memory)) {
-            g = g.execute(vertexProgram, memory);
-            g.foreachPartition(iterator -> doNothing());
-            memory.incrIteration();
-        }
-        g.foreach(t -> System.out.println(t._2().vertex.property(PageRankVertexProgram.PAGE_RANK) + "-->" + t._2().vertex.value("name")));
-        System.out.println(g.count());
-    }
-
-    private static final void doNothing() {
-    }
-
-
     @Override
     public GraphComputer isolation(final Isolation isolation) {
         if (!isolation.equals(Isolation.BSP))
@@ -112,7 +88,7 @@ public class SparkGraphComputer implements GraphComputer {
 
     @Override
     public GraphComputer mapReduce(final MapReduce mapReduce) {
-        this.mapReduces.add(mapReduce);
+        this.mapReducers.add(mapReduce);
         return this;
     }
 
@@ -129,16 +105,95 @@ public class SparkGraphComputer implements GraphComputer {
             this.executed = true;
 
         // it is not possible execute a computer if it has no vertex program nor mapreducers
-        if (null == this.vertexProgram && this.mapReduces.isEmpty())
+        if (null == this.vertexProgram && this.mapReducers.isEmpty())
             throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
         // it is possible to run mapreducers without a vertex program
         if (null != this.vertexProgram)
             GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
 
-        final long startTime = System.currentTimeMillis();
+        final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(this.hadoopGraph.configuration());
+        final SparkMemory memory = new SparkMemory(Collections.emptySet());
+
         return CompletableFuture.<ComputerResult>supplyAsync(() -> {
-            return null;
-        });
+                    final long startTime = System.currentTimeMillis();
+                    // load the graph
+                    if (null != this.vertexProgram) {
+                        final SparkConf sparkConfiguration = new SparkConf();
+                        sparkConfiguration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX + this.vertexProgram);
+                        hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
+                        if (FileInputFormat.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class)))
+                            hadoopConfiguration.set("mapred.input.dir", hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION));
+
+                        // set up the input format
+                        final JavaSparkContext sc = new JavaSparkContext(sparkConfiguration);
+                        final JavaPairRDD<NullWritable, VertexWritable> rdd = sc.newAPIHadoopRDD(hadoopConfiguration,
+                                (Class<InputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class),
+                                NullWritable.class,
+                                VertexWritable.class);
+                        final JavaPairRDD<Object, SparkMessenger<Double>> rdd2 = rdd.mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(new SparkVertex((TinkerVertex) tuple._2().get()), new ArrayList<>())));
+                        GraphComputerRDD<Double> g = GraphComputerRDD.of(rdd2);
+
+                        // set up the vertex program
+                        this.vertexProgram.setup(memory);
+                        final org.apache.commons.configuration.Configuration vertexProgramConfiguration = new SerializableConfiguration();
+                        this.vertexProgram.storeState(vertexProgramConfiguration);
+
+                        // execute the vertex program
+                        while (true) {
+                            g = g.execute(vertexProgramConfiguration, memory);
+                            g.foreachPartition(iterator -> doNothing());
+                            memory.incrIteration();
+                            if (this.vertexProgram.terminate(memory))
+                                break;
+                        }
+                        // write the output graph back to disk
+                        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
+                        if (null != outputLocation) {
+                            try {
+                                FileSystem.get(hadoopConfiguration).delete(new Path(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)), true);
+                            } catch (final IOException e) {
+                                throw new IllegalStateException(e.getMessage(), e);
+                            }
+                            // map back to a <nullwritable,vertexwritable> stream for output
+                            g.mapToPair(tuple -> new Tuple2<>(NullWritable.get(), new VertexWritable<>(tuple._2().vertex)))
+                                    .saveAsNewAPIHadoopFile(outputLocation + "/" + Constants.SYSTEM_G,
+                                            NullWritable.class,
+                                            VertexWritable.class,
+                                            (Class<OutputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, OutputFormat.class));
+                        }
+                    }
+
+                    // execute mapreduce jobs
+                    for (final MapReduce mapReduce : this.mapReducers) {
+                        //TODO
+                       /* g.mapValues(messenger -> {
+                            mapReduce.map(messenger.vertex, null);
+                            return messenger;
+                        }).combine().reduce();*/
+                    }
+                    // update runtime and return the newly computed graph
+                    memory.setRuntime(System.currentTimeMillis() - startTime);
+                    memory.complete();
+                    return new DefaultComputerResult(HadoopHelper.getOutputGraph(this.hadoopGraph), memory.asImmutable());
+                }
+        );
+    }
+
+    private static final void doNothing() {
+        // a cheap action
     }
 
+    /////////////////
+
+    public static void main(final String[] args) throws Exception {
+        final FileConfiguration configuration = new PropertiesConfiguration("/Users/marko/software/tinkerpop/tinkerpop3/hadoop-gremlin/conf/spark-kryo.properties");
+        // TODO: final FileConfiguration configuration = new PropertiesConfiguration(args[0]);
+        final HadoopGraph graph = HadoopGraph.open(configuration);
+        final ComputerResult result = new SparkGraphComputer(graph).program(VertexProgram.createVertexProgram(configuration)).submit().get();
+        System.out.println(result);
+        //result.graph().configuration().getKeys().forEachRemaining(key -> System.out.println(key + "-->" + result.graph().configuration().getString(key)));
+        result.graph().V().valueMap().forEachRemaining(System.out::println);
+    }
+
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java
index b18940a..cc170c4 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java
@@ -64,11 +64,6 @@ public class SparkMessenger<M> implements Serializable, Messenger<M> {
     }
 
     @Override
-    public String toString() {
-        return "messageBox[incoming(" + this.incoming.size() + "):outgoing(" + this.outgoing.size() + ")]";
-    }
-
-    @Override
     public Iterable<M> receiveMessages(final MessageScope messageScope) {
         return this.incoming;
     }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/051994ae/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
index 5a81017..38f8a61 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
@@ -31,6 +31,7 @@ import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerVertex;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.io.Serializable;
 import java.util.Iterator;
@@ -38,54 +39,47 @@ import java.util.Iterator;
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
-public class SparkVertex implements Vertex, Vertex.Iterators, Serializable {
+public final class SparkVertex implements Vertex, Vertex.Iterators, Serializable {
 
     private static KryoWriter KRYO_WRITER = KryoWriter.build().create();
     private static KryoReader KRYO_READER = KryoReader.build().create();
     private static final String VERTEX_ID = Graph.Hidden.hide("giraph.gremlin.vertexId");
 
     private transient TinkerVertex vertex;
-    private byte[] serializedForm;
+    private byte[] vertexBytes;
 
     public SparkVertex(final TinkerVertex vertex) {
         this.vertex = vertex;
         this.vertex.graph().variables().set(VERTEX_ID, this.vertex.id());
-        this.deflateVertex();
     }
 
     @Override
-    public Edge addEdge(String label, Vertex inVertex, Object... keyValues) {
-        inflateVertex();
+    public Edge addEdge(final String label, final Vertex inVertex, final Object... keyValues) {
         return this.vertex.addEdge(label, inVertex, keyValues);
     }
 
     @Override
     public Object id() {
-        inflateVertex();
         return this.vertex.id();
     }
 
     @Override
     public String label() {
-        inflateVertex();
         return this.vertex.label();
     }
 
     @Override
     public Graph graph() {
-        inflateVertex();
         return this.vertex.graph();
     }
 
     @Override
-    public <V> VertexProperty<V> property(String key, V value) {
-        inflateVertex();
+    public <V> VertexProperty<V> property(final String key, final V value) {
         return this.vertex.property(key, value);
     }
 
     @Override
     public void remove() {
-        inflateVertex();
         this.vertex.remove();
     }
 
@@ -95,51 +89,59 @@ public class SparkVertex implements Vertex, Vertex.Iterators, Serializable {
     }
 
     @Override
-    public Iterator<Edge> edgeIterator(Direction direction, String... edgeLabels) {
-        inflateVertex();
+    public Iterator<Edge> edgeIterator(final Direction direction, final String... edgeLabels) {
         return this.vertex.iterators().edgeIterator(direction, edgeLabels);
     }
 
     @Override
-    public Iterator<Vertex> vertexIterator(Direction direction, String... edgeLabels) {
-        inflateVertex();
+    public Iterator<Vertex> vertexIterator(final Direction direction, final String... edgeLabels) {
         return this.vertex.iterators().vertexIterator(direction, edgeLabels);
     }
 
     @Override
-    public <V> Iterator<VertexProperty<V>> propertyIterator(String... propertyKeys) {
-        inflateVertex();
+    public <V> Iterator<VertexProperty<V>> propertyIterator(final String... propertyKeys) {
         return this.vertex.iterators().propertyIterator(propertyKeys);
     }
 
+    ///////////////////////////////
+
     private void writeObject(final ObjectOutputStream outputStream) throws IOException {
-        this.inflateVertex();
         this.deflateVertex();
         outputStream.defaultWriteObject();
     }
 
+    private void readObject(final ObjectInputStream inputStream) throws IOException, ClassNotFoundException {
+        inputStream.defaultReadObject();
+        this.inflateVertex();
+    }
+
     private final void inflateVertex() {
         if (null != this.vertex)
             return;
 
         try {
-            final ByteArrayInputStream bis = new ByteArrayInputStream(this.serializedForm);
+            final ByteArrayInputStream bis = new ByteArrayInputStream(this.vertexBytes);
             final TinkerGraph tinkerGraph = TinkerGraph.open();
             KRYO_READER.readGraph(bis, tinkerGraph);
             bis.close();
+            this.vertexBytes = null;
             this.vertex = (TinkerVertex) tinkerGraph.iterators().vertexIterator(tinkerGraph.variables().get(VERTEX_ID).get()).next();
-        } catch (final Exception e) {
+        } catch (final IOException e) {
             throw new IllegalStateException(e.getMessage(), e);
         }
     }
 
     private final void deflateVertex() {
+        if (null != this.vertexBytes)
+            return;
+
         try {
             final ByteArrayOutputStream bos = new ByteArrayOutputStream();
             KRYO_WRITER.writeGraph(bos, this.vertex.graph());
             bos.flush();
             bos.close();
-            this.serializedForm = bos.toByteArray();
+            this.vertex = null;
+            this.vertexBytes = bos.toByteArray();
         } catch (final IOException e) {
             throw new IllegalStateException(e.getMessage(), e);
         }

[07/20] incubator-tinkerpop git commit: Merge branch 'master' into spark

Posted by ok...@apache.org.

Merge branch 'master' into spark


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/c98d5be3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/c98d5be3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/c98d5be3

Branch: refs/heads/master
Commit: c98d5be3714d62a24fa4f00966a046c59b11695b
Parents: 84be267 5f77614
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Mon Mar 2 19:23:20 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Mon Mar 2 19:23:20 2015 -0700

----------------------------------------------------------------------
 .../graph/traversal/step/sideEffect/ProfileStep.java  |  3 +++
 .../graph/traversal/strategy/ProfileStrategy.java     |  3 +++
 .../graph/traversal/step/sideEffect/ProfileTest.java  | 14 ++++++++++++++
 3 files changed, 20 insertions(+)
----------------------------------------------------------------------

[13/20] incubator-tinkerpop git commit: SparkGraphComputer implemented. There is still lots of cleanup and some optimizations that can be added, but the semantics are correct and 90 percent of the test cases are passing. Having some weird serialization i

Posted by ok...@apache.org.

SparkGraphComputer implemented. There is still lots of cleanup and some optimizations that can be added, but the semantics are correct and 90 percent of the test cases are passing. Having some weird serialization issues on some random tests---don't get why.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/8246ee6d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/8246ee6d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/8246ee6d

Branch: refs/heads/master
Commit: 8246ee6d52c1520fb66796ec04c6e969d6d72d3d
Parents: 96ffd77
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Mar 3 13:50:08 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Mar 3 13:50:08 2015 -0700

----------------------------------------------------------------------
 .../process/computer/util/DefaultMemory.java    | 109 +++++++++++
 .../structure/util/detached/DetachedVertex.java |   2 +-
 hadoop-gremlin/conf/spark-gryo.properties       |  42 +++++
 hadoop-gremlin/conf/spark-kryo.properties       |  38 ----
 .../tinkerpop/gremlin/hadoop/Constants.java     |   1 +
 .../computer/example/TraversalSupplier1.java    |   7 +-
 .../process/computer/spark/RuleAccumulator.java |  45 +++++
 .../spark/SerializableConfiguration.java        |  10 +
 .../computer/spark/SparkGraphComputer.java      | 187 ++++++++++---------
 .../process/computer/spark/SparkMapEmitter.java |   2 +-
 .../process/computer/spark/SparkMemory.java     |  70 +++----
 .../process/computer/spark/SparkVertex.java     |  17 ++
 .../hadoop/process/computer/util/Rule.java      |  77 ++++++++
 .../hadoop/structure/HadoopConfiguration.java   |  20 +-
 .../gremlin/hadoop/structure/HadoopGraph.java   |   8 +-
 .../gremlin/hadoop/HadoopGraphProvider.java     |  16 +-
 ...HadoopGraphProcessComputerIntegrateTest.java |  32 ----
 ...GiraphGraphComputerProcessIntegrateTest.java |  32 ++++
 .../computer/giraph/GiraphGraphProvider.java    |  58 ++++++
 ...GraphComputerGroovyProcessIntegrateTest.java |  33 ++++
 .../SparkGraphComputerProcessIntegrateTest.java |  31 +++
 .../computer/spark/SparkGraphProvider.java      |  52 ++++++
 ...GraphComputerGroovyProcessIntegrateTest.java |  34 ++++
 ...GraphGroovyProcessComputerIntegrateTest.java |  33 ----
 24 files changed, 707 insertions(+), 249 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/DefaultMemory.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/DefaultMemory.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/DefaultMemory.java
new file mode 100644
index 0000000..9fee439
--- /dev/null
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/DefaultMemory.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.process.computer.util;
+
+import org.apache.tinkerpop.gremlin.process.computer.Memory;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class DefaultMemory implements Memory.Admin {
+
+    private Map<String, Object> memory = new HashMap<>();
+    private int iteration = 0;
+    private long runtime = 0l;
+
+    public DefaultMemory() {
+
+    }
+
+    public DefaultMemory(final Memory.Admin copyMemory) {
+        this.iteration = copyMemory.getIteration();
+        this.runtime = copyMemory.getRuntime();
+        copyMemory.keys().forEach(key -> this.memory.put(key, copyMemory.get(key)));
+    }
+
+    @Override
+    public void setIteration(final int iteration) {
+        this.iteration = iteration;
+    }
+
+    @Override
+    public void setRuntime(final long runtime) {
+        this.runtime = runtime;
+    }
+
+    @Override
+    public Set<String> keys() {
+        return Collections.unmodifiableSet(this.memory.keySet());
+    }
+
+    @Override
+    public <R> R get(final String key) throws IllegalArgumentException {
+        final R r = (R) this.memory.get(key);
+        if (null == r)
+            throw Memory.Exceptions.memoryDoesNotExist(key);
+        else
+            return r;
+    }
+
+    @Override
+    public void set(final String key, final Object value) {
+        this.memory.put(key, value);
+    }
+
+    @Override
+    public int getIteration() {
+        return this.iteration;
+    }
+
+    @Override
+    public long getRuntime() {
+        return this.runtime;
+    }
+
+    @Override
+    public long incr(final String key, final long delta) {
+        final Long value = (Long) this.memory.get(key);
+        final Long newValue = (null == value) ? delta : delta + value;
+        this.memory.put(key, newValue);
+        return newValue;
+    }
+
+    @Override
+    public boolean and(final String key, final boolean bool) {
+        final Boolean value = (Boolean) this.memory.get(key);
+        final Boolean newValue = (null == value) ? bool : bool && value;
+        this.memory.put(key, newValue);
+        return newValue;
+    }
+
+    @Override
+    public boolean or(final String key, final boolean bool) {
+        final Boolean value = (Boolean) this.memory.get(key);
+        final Boolean newValue = (null == value) ? bool : bool || value;
+        this.memory.put(key, newValue);
+        return newValue;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/util/detached/DetachedVertex.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/util/detached/DetachedVertex.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/util/detached/DetachedVertex.java
index 2517986..6469a62 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/util/detached/DetachedVertex.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/util/detached/DetachedVertex.java
@@ -118,7 +118,7 @@ public class DetachedVertex extends DetachedElement<Vertex> implements Vertex, V
         if (hostVertex.equals(this))
             return hostVertex;
         else
-            throw new IllegalStateException("The host vertex must be the detached vertex to attach: " + this);
+            throw new IllegalStateException("The host vertex must be the detached vertex to attach: " + this + "!=" + hostVertex);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/conf/spark-gryo.properties
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/conf/spark-gryo.properties b/hadoop-gremlin/conf/spark-gryo.properties
new file mode 100644
index 0000000..a25482a
--- /dev/null
+++ b/hadoop-gremlin/conf/spark-gryo.properties
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
+gremlin.hadoop.defaultGraphComputer=org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.SparkGraphComputer
+gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat
+gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat
+gremlin.hadoop.memoryOutputFormat=org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
+gremlin.hadoop.deriveMemory=false
+gremlin.hadoop.jarsInDistributedCache=false
+
+gremlin.hadoop.inputLocation=hdfs://localhost:9000/user/marko/tinkerpop-modern-vertices.kryo
+gremlin.hadoop.outputLocation=output
+
+# the vertex program to execute
+# gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram
+gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.traversal.TraversalVertexProgram
+gremlin.traversalVertexProgram.traversalSupplier.type=CLASS
+gremlin.traversalVertexProgram.traversalSupplier.object=org.apache.tinkerpop.gremlin.hadoop.process.computer.example.TraversalSupplier1
+
+# It is possible to provide Spark configuration parameters for use with SparkGraphComputer
+##########################################################################################
+spark.master=local[4]
+spark.executor.memory=1024m
+spark.eventLog.enabled=true
+spark.serializer=org.apache.spark.serializer.JavaSerializer
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/conf/spark-kryo.properties
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/conf/spark-kryo.properties b/hadoop-gremlin/conf/spark-kryo.properties
deleted file mode 100644
index 85426a0..0000000
--- a/hadoop-gremlin/conf/spark-kryo.properties
+++ /dev/null
@@ -1,38 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
-gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat
-gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat
-gremlin.hadoop.memoryOutputFormat=org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
-gremlin.hadoop.deriveMemory=false
-gremlin.hadoop.jarsInDistributedCache=false
-
-gremlin.hadoop.inputLocation=hdfs://localhost:9000/user/marko/tinkerpop-modern-vertices.kryo
-gremlin.hadoop.outputLocation=output
-
-# the vertex program to execute
-gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram
-
-# It is possible to provide Spark configuration parameters for use with SparkGraphComputer
-##########################################################################################
-spark.master=local[4]
-spark.executor.memory=1024m
-spark.eventLog.enabled=true
-spark.serializer=org.apache.spark.serializer.JavaSerializer
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
index bf06fcc..60ef636 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
@@ -30,6 +30,7 @@ public class Constants {
     public static final String GREMLIN_HADOOP_GRAPH_INPUT_FORMAT = "gremlin.hadoop.graphInputFormat";
     public static final String GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT = "gremlin.hadoop.graphOutputFormat";
     public static final String GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT = "gremlin.hadoop.memoryOutputFormat";
+    public static final String GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER = "gremlin.hadoop.defaultGraphComputer";
 
     public static final String GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE = "gremlin.hadoop.jarsInDistributedCache";
     public static final String SYSTEM_G = Graph.Hidden.hide("g");

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/example/TraversalSupplier1.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/example/TraversalSupplier1.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/example/TraversalSupplier1.java
index 1779809..dd9b5b7 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/example/TraversalSupplier1.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/example/TraversalSupplier1.java
@@ -20,6 +20,9 @@ package org.apache.tinkerpop.gremlin.hadoop.process.computer.example;
 
 import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
 import org.apache.tinkerpop.gremlin.process.Traversal;
+import org.apache.tinkerpop.gremlin.process.TraversalEngine;
+import org.apache.tinkerpop.gremlin.process.traversal.engine.ComputerTraversalEngine;
+import org.apache.tinkerpop.gremlin.structure.Graph;
 
 import java.util.function.Supplier;
 
@@ -29,6 +32,8 @@ import java.util.function.Supplier;
 public class TraversalSupplier1 implements Supplier<Traversal> {
     @Override
     public Traversal get() {
-        return HadoopGraph.open().V().out().out().values("name");
+        final Graph graph = HadoopGraph.open();
+        graph.engine(ComputerTraversalEngine.computer);
+        return graph.V().out().out().values("name");
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RuleAccumulator.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RuleAccumulator.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RuleAccumulator.java
new file mode 100644
index 0000000..59da2f4
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RuleAccumulator.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.spark.AccumulatorParam;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.util.Rule;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class RuleAccumulator implements AccumulatorParam<Rule> {
+
+    @Override
+    public Rule addAccumulator(final Rule a, final Rule b) {
+        return new Rule(b.operation, b.operation.compute(a.object, b.object));
+    }
+
+    @Override
+    public Rule addInPlace(final Rule a, final Rule b) {
+        return new Rule(b.operation, b.operation.compute(a.object, b.object));
+    }
+
+    @Override
+    public Rule zero(final Rule rule) {
+        return new Rule(Rule.Operation.NO_OP, null);
+    }
+
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
index b4a8005..73e3d08 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
@@ -19,6 +19,8 @@
 package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
 
 import org.apache.commons.configuration.AbstractConfiguration;
+import org.apache.commons.configuration.Configuration;
+import org.apache.commons.configuration.ConfigurationUtils;
 
 import java.io.Serializable;
 import java.util.HashMap;
@@ -32,6 +34,14 @@ public final class SerializableConfiguration extends AbstractConfiguration imple
 
     private final Map<String, Object> configurations = new HashMap<>();
 
+    public SerializableConfiguration() {
+
+    }
+
+    public SerializableConfiguration(final Configuration configuration) {
+        ConfigurationUtils.copy(configuration, this);
+    }
+
     @Override
     protected void addPropertyDirect(final String key, final Object value) {
         this.configurations.put(key, value);

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index 7cace20..946d2af 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -18,6 +18,7 @@
  */
 package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
 
+import org.apache.commons.configuration.ConfigurationUtils;
 import org.apache.commons.configuration.FileConfiguration;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.hadoop.conf.Configuration;
@@ -41,9 +42,10 @@ import org.apache.tinkerpop.gremlin.hadoop.structure.util.HadoopHelper;
 import org.apache.tinkerpop.gremlin.process.computer.ComputerResult;
 import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
 import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
+import org.apache.tinkerpop.gremlin.process.computer.Memory;
 import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
-import org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankMapReduce;
 import org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult;
+import org.apache.tinkerpop.gremlin.process.computer.util.DefaultMemory;
 import org.apache.tinkerpop.gremlin.process.computer.util.GraphComputerHelper;
 import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
 import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerVertex;
@@ -54,10 +56,8 @@ import scala.Tuple2;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.Set;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Future;
@@ -119,11 +119,12 @@ public class SparkGraphComputer implements GraphComputer {
         if (null != this.vertexProgram)
             GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
 
+        final org.apache.commons.configuration.Configuration apacheConfiguration = this.hadoopGraph.configuration();
         final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(this.hadoopGraph.configuration());
-        final SparkMemory memory = new SparkMemory(Collections.emptySet());
 
         return CompletableFuture.<ComputerResult>supplyAsync(() -> {
                     final long startTime = System.currentTimeMillis();
+                    SparkMemory memory = null;
                     // load the graph
                     if (null != this.vertexProgram) {
                         final SparkConf sparkConfiguration = new SparkConf();
@@ -135,49 +136,57 @@ public class SparkGraphComputer implements GraphComputer {
                         // set up the input format
                         final JavaSparkContext sparkContext = new JavaSparkContext(sparkConfiguration);
                         SparkGraphComputer.loadJars(sparkContext, hadoopConfiguration);
-                        final JavaPairRDD<NullWritable, VertexWritable> rdd = sparkContext.newAPIHadoopRDD(hadoopConfiguration,
-                                (Class<InputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class),
-                                NullWritable.class,
-                                VertexWritable.class);
-                        final JavaPairRDD<Object, SparkMessenger<Object>> rdd2 = rdd.mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(new SparkVertex((TinkerVertex) tuple._2().get()), new ArrayList<>())));
-                        GraphComputerRDD<Object> g = GraphComputerRDD.of(rdd2);
+                        ///
+                        try {
+                            final JavaPairRDD<NullWritable, VertexWritable> rdd = sparkContext.newAPIHadoopRDD(hadoopConfiguration,
+                                    (Class<InputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class),
+                                    NullWritable.class,
+                                    VertexWritable.class);
+                            final JavaPairRDD<Object, SparkMessenger<Object>> rdd2 = rdd.mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(new SparkVertex((TinkerVertex) tuple._2().get()), new ArrayList<>())));
+                            GraphComputerRDD<Object> g = GraphComputerRDD.of(rdd2);
 
-                        // set up the vertex program
-                        this.vertexProgram.setup(memory);
-                        final org.apache.commons.configuration.Configuration vertexProgramConfiguration = new SerializableConfiguration();
-                        this.vertexProgram.storeState(vertexProgramConfiguration);
-
-                        // execute the vertex program
-                        while (true) {
-                            g = g.execute(vertexProgramConfiguration, memory);
-                            g.foreachPartition(iterator -> doNothing());
-                            memory.incrIteration();
-                            if (this.vertexProgram.terminate(memory))
-                                break;
-                        }
-                        // write the output graph back to disk
-                        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
-                        if (null != outputLocation) {
-                            try {
-                                FileSystem.get(hadoopConfiguration).delete(new Path(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)), true);
-                            } catch (final IOException e) {
-                                throw new IllegalStateException(e.getMessage(), e);
+                            // set up the vertex program
+                            memory = new SparkMemory(this.vertexProgram, this.mapReducers, sparkContext);
+                            this.vertexProgram.setup(memory);
+                            final SerializableConfiguration vertexProgramConfiguration = new SerializableConfiguration();
+                            this.vertexProgram.storeState(vertexProgramConfiguration);
+                            this.mapReducers.addAll(this.vertexProgram.getMapReducers());
+                            ConfUtil.mergeApacheIntoHadoopConfiguration(vertexProgramConfiguration, hadoopConfiguration);
+                            ConfigurationUtils.copy(vertexProgramConfiguration, apacheConfiguration);
+                            // execute the vertex program
+                            while (true) {
+                                g = g.execute(vertexProgramConfiguration, memory);
+                                g.foreachPartition(iterator -> doNothing());
+                                memory.incrIteration();
+                                if (this.vertexProgram.terminate(memory))
+                                    break;
+                            }
+                            // write the output graph back to disk
+                            final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
+                            if (null != outputLocation) {
+                                try {
+                                    FileSystem.get(hadoopConfiguration).delete(new Path(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)), true);
+                                } catch (final IOException e) {
+                                    throw new IllegalStateException(e.getMessage(), e);
+                                }
+                                // map back to a <nullwritable,vertexwritable> stream for output
+                                g.mapToPair(tuple -> new Tuple2<>(NullWritable.get(), new VertexWritable<>(tuple._2().vertex)))
+                                        .saveAsNewAPIHadoopFile(outputLocation + "/" + Constants.SYSTEM_G,
+                                                NullWritable.class,
+                                                VertexWritable.class,
+                                                (Class<OutputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, OutputFormat.class));
                             }
-                            // map back to a <nullwritable,vertexwritable> stream for output
-                            g.mapToPair(tuple -> new Tuple2<>(NullWritable.get(), new VertexWritable<>(tuple._2().vertex)))
-                                    .saveAsNewAPIHadoopFile(outputLocation + "/" + Constants.SYSTEM_G,
-                                            NullWritable.class,
-                                            VertexWritable.class,
-                                            (Class<OutputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, OutputFormat.class));
+                        } finally {
+                            sparkContext.close();
                         }
-                        sparkContext.close();
                     }
 
+                    final Memory.Admin finalMemory = null == memory ? new DefaultMemory() : new DefaultMemory(memory);
                     // execute mapreduce jobs
                     for (final MapReduce mapReduce : this.mapReducers) {
                         // set up the map reduce job
-                        final org.apache.commons.configuration.Configuration mapReduceConfiguration = new SerializableConfiguration();
-                        mapReduce.storeState(mapReduceConfiguration);
+                        final SerializableConfiguration newConfiguration = new SerializableConfiguration(apacheConfiguration);
+                        mapReduce.storeState(newConfiguration);
 
                         // set up spark job
                         final SparkConf sparkConfiguration = new SparkConf();
@@ -188,59 +197,61 @@ public class SparkGraphComputer implements GraphComputer {
                         // set up the input format
                         final JavaSparkContext sparkContext = new JavaSparkContext(sparkConfiguration);
                         SparkGraphComputer.loadJars(sparkContext, hadoopConfiguration);
-                        final JavaPairRDD<NullWritable, VertexWritable> g = sparkContext.newAPIHadoopRDD(hadoopConfiguration,
-                                (Class<InputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class),
-                                NullWritable.class,
-                                VertexWritable.class);
+                        try {
+                            final JavaPairRDD<NullWritable, VertexWritable> g = sparkContext.newAPIHadoopRDD(hadoopConfiguration,
+                                    (Class<InputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class),
+                                    NullWritable.class,
+                                    VertexWritable.class);
 
-                        // map
-                        JavaPairRDD<?, ?> mapRDD = g.flatMapToPair(tuple -> {
-                            final MapReduce m = MapReduce.createMapReduce(mapReduceConfiguration);
-                            final SparkMapEmitter mapEmitter = new SparkMapEmitter();
-                            m.map(tuple._2().get(), mapEmitter);
-                            return mapEmitter.getEmissions();
-                        });
-                        if (mapReduce.getMapKeySort().isPresent())
-                            mapRDD = mapRDD.sortByKey((Comparator) mapReduce.getMapKeySort().get());
-                        // todo: combine
-                        // reduce
-                        JavaPairRDD<?, ?> reduceRDD = null;
-                        if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
-                            reduceRDD = mapRDD.groupByKey().flatMapToPair(tuple -> {
-                                final MapReduce m = MapReduce.createMapReduce(mapReduceConfiguration);
-                                final SparkReduceEmitter reduceEmitter = new SparkReduceEmitter();
-                                m.reduce(tuple._1(), tuple._2().iterator(), reduceEmitter);
-                                return reduceEmitter.getEmissions();
+                            // map
+                            JavaPairRDD<?, ?> mapRDD = g.flatMapToPair(tuple -> {
+                                final MapReduce m = MapReduce.createMapReduce(newConfiguration);
+                                final SparkMapEmitter mapEmitter = new SparkMapEmitter();
+                                m.map(tuple._2().get(), mapEmitter);
+                                return mapEmitter.getEmissions();
                             });
-                            if (mapReduce.getReduceKeySort().isPresent())
-                                reduceRDD = reduceRDD.sortByKey((Comparator) mapReduce.getReduceKeySort().get());
-                        }
-                        // write the output graph back to disk
-                        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
-                        if (null != outputLocation) {
-                            // map back to a <nullwritable,vertexwritable> stream for output
-                            ((null == reduceRDD) ? mapRDD : reduceRDD).mapToPair(tuple -> new Tuple2<>(new ObjectWritable<>(tuple._1()), new ObjectWritable<>(tuple._2()))).saveAsNewAPIHadoopFile(outputLocation + "/" + mapReduce.getMemoryKey(),
-                                    ObjectWritable.class,
-                                    ObjectWritable.class,
-                                    (Class<OutputFormat<ObjectWritable, ObjectWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, OutputFormat.class));
-                            // if its not a SequenceFile there is no certain way to convert to necessary Java objects.
-                            // to get results you have to look through HDFS directory structure. Oh the horror.
-                            try {
-                                if (hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, SequenceFileOutputFormat.class, OutputFormat.class).equals(SequenceFileOutputFormat.class))
-                                    mapReduce.addResultToMemory(memory, new ObjectWritableIterator(hadoopConfiguration, new Path(outputLocation + "/" + mapReduce.getMemoryKey())));
-                                else
-                                    HadoopGraph.LOGGER.warn(Constants.SEQUENCE_WARNING);
-                            } catch (final IOException e) {
-                                throw new IllegalStateException(e.getMessage(), e);
+                            if (mapReduce.getMapKeySort().isPresent())
+                                mapRDD = mapRDD.sortByKey((Comparator) mapReduce.getMapKeySort().get());
+                            // todo: combine
+                            // reduce
+                            JavaPairRDD<?, ?> reduceRDD = null;
+                            if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
+                                reduceRDD = mapRDD.groupByKey().flatMapToPair(tuple -> {
+                                    final MapReduce m = MapReduce.createMapReduce(newConfiguration);
+                                    final SparkReduceEmitter reduceEmitter = new SparkReduceEmitter();
+                                    m.reduce(tuple._1(), tuple._2().iterator(), reduceEmitter);
+                                    return reduceEmitter.getEmissions();
+                                });
+                                if (mapReduce.getReduceKeySort().isPresent())
+                                    reduceRDD = reduceRDD.sortByKey((Comparator) mapReduce.getReduceKeySort().get());
+                            }
+                            // write the output graph back to disk
+                            final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
+                            if (null != outputLocation) {
+                                // map back to a Hadoop stream for output
+                                ((null == reduceRDD) ? mapRDD : reduceRDD).mapToPair(tuple -> new Tuple2<>(new ObjectWritable<>(tuple._1()), new ObjectWritable<>(tuple._2()))).saveAsNewAPIHadoopFile(outputLocation + "/" + mapReduce.getMemoryKey(),
+                                        ObjectWritable.class,
+                                        ObjectWritable.class,
+                                        (Class<OutputFormat<ObjectWritable, ObjectWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, OutputFormat.class));
+                                // if its not a SequenceFile there is no certain way to convert to necessary Java objects.
+                                // to get results you have to look through HDFS directory structure. Oh the horror.
+                                try {
+                                    if (hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, SequenceFileOutputFormat.class, OutputFormat.class).equals(SequenceFileOutputFormat.class))
+                                        mapReduce.addResultToMemory(finalMemory, new ObjectWritableIterator(hadoopConfiguration, new Path(outputLocation + "/" + mapReduce.getMemoryKey())));
+                                    else
+                                        HadoopGraph.LOGGER.warn(Constants.SEQUENCE_WARNING);
+                                } catch (final IOException e) {
+                                    throw new IllegalStateException(e.getMessage(), e);
+                                }
                             }
+                        } finally {
+                            sparkContext.close();
                         }
-                        sparkContext.close();
                     }
 
                     // update runtime and return the newly computed graph
-                    memory.setRuntime(System.currentTimeMillis() - startTime);
-                    memory.complete();
-                    return new DefaultComputerResult(HadoopHelper.getOutputGraph(this.hadoopGraph), memory.asImmutable());
+                    finalMemory.setRuntime(System.currentTimeMillis() - startTime);
+                    return new DefaultComputerResult(HadoopHelper.getOutputGraph(this.hadoopGraph), finalMemory.asImmutable());
                 }
         );
     }
@@ -270,13 +281,13 @@ public class SparkGraphComputer implements GraphComputer {
     /////////////////
 
     public static void main(final String[] args) throws Exception {
-        final FileConfiguration configuration = new PropertiesConfiguration("/Users/marko/software/tinkerpop/tinkerpop3/hadoop-gremlin/conf/spark-kryo.properties");
+        final FileConfiguration configuration = new PropertiesConfiguration("/Users/marko/software/tinkerpop/tinkerpop3/hadoop-gremlin/conf/spark-gryo.properties");
         // TODO: final FileConfiguration configuration = new PropertiesConfiguration(args[0]);
         final HadoopGraph graph = HadoopGraph.open(configuration);
-        final ComputerResult result = new SparkGraphComputer(graph).program(VertexProgram.createVertexProgram(configuration)).mapReduce(PageRankMapReduce.build().create()).submit().get();
+        final ComputerResult result = new SparkGraphComputer(graph).program(VertexProgram.createVertexProgram(configuration)).submit().get();
         // TODO: remove everything below
         System.out.println(result);
-        result.memory().<Iterator>get(PageRankMapReduce.DEFAULT_MEMORY_KEY).forEachRemaining(System.out::println);
+        //result.memory().<Iterator>get(PageRankMapReduce.DEFAULT_MEMORY_KEY).forEachRemaining(System.out::println);
         //result.graph().configuration().getKeys().forEachRemaining(key -> System.out.println(key + "-->" + result.graph().configuration().getString(key)));
         result.graph().V().valueMap().forEachRemaining(System.out::println);
     }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java
index 3a4a424..0f5acc1 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java
@@ -33,7 +33,7 @@ public class SparkMapEmitter<K, V> implements MapReduce.MapEmitter<K, V> {
 
     @Override
     public void emit(final K key, final V value) {
-        emissions.add(new Tuple2<>(key, value));
+        this.emissions.add(new Tuple2<>(key, value));
     }
 
     public Iterable<Tuple2<K, V>> getEmissions() {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
index eb2af7f..b277e83 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
@@ -18,18 +18,22 @@
  */
 package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
 
+import org.apache.spark.Accumulator;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.util.Rule;
 import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
 import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
 import org.apache.tinkerpop.gremlin.process.computer.Memory;
-import org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram;
+import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
 import org.apache.tinkerpop.gremlin.process.computer.util.MemoryHelper;
 import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
 
 import java.io.Serializable;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
@@ -39,28 +43,34 @@ import java.util.concurrent.atomic.AtomicLong;
 public final class SparkMemory implements Memory.Admin, Serializable {
 
     public final Set<String> memoryKeys = new HashSet<>();
-    public Map<String, Object> previousMap;
-    public Map<String, Object> currentMap;
     private final AtomicInteger iteration = new AtomicInteger(0);
     private final AtomicLong runtime = new AtomicLong(0l);
-
-    public SparkMemory(final Set<MapReduce> mapReducers) {
-        this.currentMap = new ConcurrentHashMap<>();
-        this.previousMap = new ConcurrentHashMap<>();
-        //if (null != vertexProgram) {
-        for (final String key : (Set<String>) PageRankVertexProgram.build().create().getMemoryComputeKeys()) {
-            MemoryHelper.validateKey(key);
-            this.memoryKeys.add(key);
+    private final Map<String, Accumulator<Rule>> memory = new HashMap<>();
+
+    public SparkMemory(final VertexProgram<?> vertexProgram, final Set<MapReduce> mapReducers, final JavaSparkContext sparkContext) {
+        if (null != vertexProgram) {
+            for (final String key : vertexProgram.getMemoryComputeKeys()) {
+                MemoryHelper.validateKey(key);
+                this.memoryKeys.add(key);
+            }
         }
-        //}
         for (final MapReduce mapReduce : mapReducers) {
             this.memoryKeys.add(mapReduce.getMemoryKey());
         }
+        for (final String key : this.memoryKeys) {
+            this.memory.put(key, sparkContext.accumulator(new Rule(Rule.Operation.NO_OP, null), new RuleAccumulator()));
+        }
+
     }
 
     @Override
     public Set<String> keys() {
-        return this.previousMap.keySet();
+        final Set<String> trueKeys = new HashSet<>();
+        this.memory.forEach((key, value) -> {
+            if (value.value().object != null)
+                trueKeys.add(key);
+        });
+        return Collections.unmodifiableSet(trueKeys);
     }
 
     @Override
@@ -88,16 +98,6 @@ public final class SparkMemory implements Memory.Admin, Serializable {
         return this.runtime.get();
     }
 
-    protected void complete() {
-        this.iteration.decrementAndGet();
-        this.previousMap = this.currentMap;
-    }
-
-    protected void completeSubRound() {
-        this.previousMap = new ConcurrentHashMap<>(this.currentMap);
-
-    }
-
     @Override
     public boolean isInitialIteration() {
         return this.getIteration() == 0;
@@ -105,7 +105,7 @@ public final class SparkMemory implements Memory.Admin, Serializable {
 
     @Override
     public <R> R get(final String key) throws IllegalArgumentException {
-        final R r = (R) this.previousMap.get(key);
+        final R r = (R) this.memory.get(key).value().object;
         if (null == r)
             throw Memory.Exceptions.memoryDoesNotExist(key);
         else
@@ -115,28 +115,28 @@ public final class SparkMemory implements Memory.Admin, Serializable {
     @Override
     public long incr(final String key, final long delta) {
         checkKeyValue(key, delta);
-        this.currentMap.compute(key, (k, v) -> null == v ? delta : delta + (Long) v);
-        return (Long) this.previousMap.getOrDefault(key, 0l) + delta;
+        this.memory.get(key).add(new Rule(Rule.Operation.INCR, delta));
+        return (Long) this.memory.get(key).value().object + delta;
     }
 
     @Override
     public boolean and(final String key, final boolean bool) {
         checkKeyValue(key, bool);
-        this.currentMap.compute(key, (k, v) -> null == v ? bool : bool && (Boolean) v);
-        return (Boolean) this.previousMap.getOrDefault(key, true) && bool;
+        this.memory.get(key).add(new Rule(Rule.Operation.AND, bool));
+        return bool;
     }
 
     @Override
     public boolean or(final String key, final boolean bool) {
         checkKeyValue(key, bool);
-        this.currentMap.compute(key, (k, v) -> null == v ? bool : bool || (Boolean) v);
-        return (Boolean) this.previousMap.getOrDefault(key, true) || bool;
+        this.memory.get(key).add(new Rule(Rule.Operation.OR, bool));
+        return bool;
     }
 
     @Override
     public void set(final String key, final Object value) {
         checkKeyValue(key, value);
-        this.currentMap.put(key, value);
+        this.memory.get(key).add(new Rule(Rule.Operation.SET, value));
     }
 
     @Override
@@ -145,8 +145,8 @@ public final class SparkMemory implements Memory.Admin, Serializable {
     }
 
     private void checkKeyValue(final String key, final Object value) {
-        //if (!this.memoryKeys.contains(key))
-        //    throw GraphComputer.Exceptions.providedKeyIsNotAMemoryComputeKey(key);
-        //MemoryHelper.validateValue(value);
+        if (!this.memoryKeys.contains(key))
+            throw GraphComputer.Exceptions.providedKeyIsNotAMemoryComputeKey(key);
+        MemoryHelper.validateValue(value);
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
index 0a02156..fc6c4f2 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
@@ -25,6 +25,8 @@ import org.apache.tinkerpop.gremlin.structure.Vertex;
 import org.apache.tinkerpop.gremlin.structure.VertexProperty;
 import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoReader;
 import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoWriter;
+import org.apache.tinkerpop.gremlin.structure.util.ElementHelper;
+import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
 import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph;
 import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerVertex;
 
@@ -103,6 +105,21 @@ public final class SparkVertex implements Vertex, Vertex.Iterators, Serializable
         return this.vertex.iterators().propertyIterator(propertyKeys);
     }
 
+    @Override
+    public String toString() {
+        return StringFactory.vertexString(this);
+    }
+
+    @Override
+    public int hashCode() {
+        return ElementHelper.hashCode(this);
+    }
+
+    @Override
+    public boolean equals(final Object other) {
+        return ElementHelper.areEqual(this, other);
+    }
+
     ///////////////////////////////
 
     private void writeObject(final ObjectOutputStream outputStream) throws IOException {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/Rule.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/Rule.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/Rule.java
new file mode 100644
index 0000000..16e2189
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/Rule.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.util;
+
+import java.io.Serializable;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class Rule implements Serializable {
+
+    public enum Operation {
+        OR {
+            public Boolean compute(final Object first, final Object second) {
+                if (null == first)
+                    return (Boolean) second;
+                else if (null == second)
+                    return (Boolean) first;
+                else
+                    return (Boolean) first || (Boolean) second;
+            }
+        }, AND {
+            public Boolean compute(final Object first, final Object second) {
+                if (null == first)
+                    return (Boolean) second;
+                else if (null == second)
+                    return (Boolean) first;
+                else
+                    return (Boolean) first && (Boolean) second;
+            }
+        }, INCR {
+            public Long compute(final Object first, final Object second) {
+                if (null == first)
+                    return (Long) second;
+                else if (null == second)
+                    return (Long) first;
+                else
+                    return (Long) first + (Long) second;
+
+            }
+        }, SET {
+            public Object compute(final Object first, final Object second) {
+                return second;
+            }
+        }, NO_OP {
+            public Object compute(final Object first, final Object second) {
+                return null == first ? second : first;
+            }
+        };
+
+        public abstract Object compute(final Object first, final Object second);
+    }
+
+    public final Operation operation;
+    public final Object object;
+
+    public Rule(final Operation operation, final Object object) {
+        this.operation = operation;
+        this.object = object;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopConfiguration.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopConfiguration.java
index 40b786e..76636cd 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopConfiguration.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopConfiguration.java
@@ -18,14 +18,16 @@
  */
 package org.apache.tinkerpop.gremlin.hadoop.structure;
 
-import org.apache.tinkerpop.gremlin.hadoop.Constants;
-import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable;
-import org.apache.tinkerpop.gremlin.util.StreamFactory;
 import org.apache.commons.configuration.BaseConfiguration;
 import org.apache.commons.configuration.Configuration;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.OutputFormat;
 import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.tinkerpop.gremlin.hadoop.Constants;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph.GiraphGraphComputer;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable;
+import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
+import org.apache.tinkerpop.gremlin.util.StreamFactory;
 import org.javatuples.Pair;
 
 import java.io.Serializable;
@@ -84,6 +86,18 @@ public class HadoopConfiguration extends BaseConfiguration implements Serializab
         this.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, outputLocation);
     }
 
+    public Class<? extends GraphComputer> getGraphComputer() {
+        if (!this.containsKey(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER))
+            return GiraphGraphComputer.class;
+        else {
+            try {
+                return (Class) Class.forName(this.getString(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER));
+            } catch (final ClassNotFoundException e) {
+                throw new RuntimeException(e.getMessage(), e);
+            }
+        }
+    }
+
     @Override
     public Iterator iterator() {
         return StreamFactory.stream(this.getKeys()).map(k -> new Pair(k, this.getProperty(k))).iterator();

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopGraph.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopGraph.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopGraph.java
index 7feb9e3..8154888 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopGraph.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopGraph.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.mapred.OutputFormat;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.tinkerpop.gremlin.hadoop.Constants;
 import org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph.GiraphGraphComputer;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.SparkGraphComputer;
 import org.apache.tinkerpop.gremlin.hadoop.process.graph.traversal.strategy.HadoopElementStepStrategy;
 import org.apache.tinkerpop.gremlin.hadoop.structure.hdfs.HadoopEdgeIterator;
 import org.apache.tinkerpop.gremlin.hadoop.structure.hdfs.HadoopVertexIterator;
@@ -141,9 +142,11 @@ public class HadoopGraph implements Graph, Graph.Iterators {
 
     protected final HadoopConfiguration configuration;
     private TraversalEngine traversalEngine = StandardTraversalEngine.standard;
+    private Class<? extends GraphComputer> graphComputerClass = GiraphGraphComputer.class;
 
     private HadoopGraph(final Configuration configuration) {
         this.configuration = new HadoopConfiguration(configuration);
+        this.graphComputerClass = this.configuration.getGraphComputer();
     }
 
     public static HadoopGraph open() {
@@ -161,13 +164,14 @@ public class HadoopGraph implements Graph, Graph.Iterators {
 
     @Override
     public void compute(final Class<? extends GraphComputer> graphComputerClass) {
-        if (!graphComputerClass.equals(GiraphGraphComputer.class))
+        if (!graphComputerClass.equals(GiraphGraphComputer.class) || !graphComputerClass.equals(SparkGraphComputer.class))
             throw Graph.Exceptions.graphDoesNotSupportProvidedGraphComputer(graphComputerClass);
+        this.graphComputerClass = graphComputerClass;
     }
 
     @Override
     public GraphComputer compute() {
-        return new GiraphGraphComputer(this);
+        return this.graphComputerClass.equals(GiraphGraphComputer.class) ? new GiraphGraphComputer(this) : new SparkGraphComputer(this);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
index 300def1..69c0b36 100644
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
@@ -19,12 +19,10 @@
 package org.apache.tinkerpop.gremlin.hadoop;
 
 import org.apache.commons.configuration.Configuration;
-import org.apache.giraph.conf.GiraphConstants;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.tinkerpop.gremlin.AbstractGraphProvider;
 import org.apache.tinkerpop.gremlin.LoadGraphWith;
 import org.apache.tinkerpop.gremlin.TestHelper;
-import org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph.GiraphGraphComputer;
 import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopEdge;
 import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopElement;
 import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
@@ -96,23 +94,11 @@ public class HadoopGraphProvider extends AbstractGraphProvider {
     @Override
     public Map<String, Object> getBaseConfiguration(final String graphName, final Class<?> test, final String testMethodName) {
         return new HashMap<String, Object>() {{
-            put("gremlin.graph", HadoopGraph.class.getName());
+            put(Graph.GRAPH, HadoopGraph.class.getName());
             put(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, GryoInputFormat.class.getCanonicalName());
             put(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, GryoOutputFormat.class.getCanonicalName());
-            //put(Constants.GREMLIN_GIRAPH_MEMORY_OUTPUT_FORMAT_CLASS, TextOutputFormat.class.getCanonicalName());
             put(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, SequenceFileOutputFormat.class.getCanonicalName());
-            put(GiraphConstants.MIN_WORKERS, 1);
-            put(GiraphConstants.MAX_WORKERS, 1);
-            put(GiraphConstants.SPLIT_MASTER_WORKER.getKey(), false);
-            //put("giraph.localTestMode", true);
-            put(GiraphConstants.ZOOKEEPER_JAR, GiraphGraphComputer.class.getResource("zookeeper-3.3.3.jar").getPath());
-            put("giraph.zkServerPort", "2181");  // you must have a local zookeeper running on this port
-            put("giraph.nettyServerUseExecutionHandler", false); // this prevents so many integration tests running out of threads
-            put("giraph.nettyClientUseExecutionHandler", false); // this prevents so many integration tests running out of threads
-            //put(Constants.GREMLIN_GIRAPH_INPUT_LOCATION, GryoInputFormat.class.getResource("tinkerpop-classic-vertices.kryo").getPath());
             put(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "hadoop-gremlin/target/test-output");
-            put(Constants.GREMLIN_HADOOP_DERIVE_MEMORY, true);
-            put(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
         }};
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/HadoopGraphProcessComputerIntegrateTest.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/HadoopGraphProcessComputerIntegrateTest.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/HadoopGraphProcessComputerIntegrateTest.java
deleted file mode 100644
index 653a713..0000000
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/HadoopGraphProcessComputerIntegrateTest.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.tinkerpop.gremlin.hadoop.process;
-
-import org.apache.tinkerpop.gremlin.hadoop.HadoopGraphProvider;
-import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
-import org.apache.tinkerpop.gremlin.process.ProcessComputerSuite;
-import org.junit.runner.RunWith;
-
-/**
- * @author Marko A. Rodriguez (http://markorodriguez.com)
- */
-@RunWith(ProcessComputerSuite.class)
-@ProcessComputerSuite.GraphProviderClass(provider = HadoopGraphProvider.class, graph = HadoopGraph.class)
-public class HadoopGraphProcessComputerIntegrateTest {
-}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputerProcessIntegrateTest.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputerProcessIntegrateTest.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputerProcessIntegrateTest.java
new file mode 100644
index 0000000..7c9eae7
--- /dev/null
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputerProcessIntegrateTest.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph;
+
+import org.apache.tinkerpop.gremlin.hadoop.HadoopGraphProvider;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.process.ProcessComputerSuite;
+import org.junit.runner.RunWith;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+@RunWith(ProcessComputerSuite.class)
+@ProcessComputerSuite.GraphProviderClass(provider = GiraphGraphProvider.class, graph = HadoopGraph.class)
+public class GiraphGraphComputerProcessIntegrateTest {
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphProvider.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphProvider.java
new file mode 100644
index 0000000..5650ae5
--- /dev/null
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphProvider.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph;
+
+import org.apache.giraph.conf.GiraphConstants;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.tinkerpop.gremlin.hadoop.Constants;
+import org.apache.tinkerpop.gremlin.hadoop.HadoopGraphProvider;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat;
+import org.apache.tinkerpop.gremlin.structure.Graph;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public final class GiraphGraphProvider extends HadoopGraphProvider {
+
+    @Override
+    public Map<String, Object> getBaseConfiguration(final String graphName, final Class<?> test, final String testMethodName) {
+        return new HashMap<String, Object>() {{
+            put(Graph.GRAPH, HadoopGraph.class.getName());
+            put(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, GryoInputFormat.class.getCanonicalName());
+            put(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, GryoOutputFormat.class.getCanonicalName());
+            put(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, SequenceFileOutputFormat.class.getCanonicalName());
+            put(GiraphConstants.MIN_WORKERS, 1);
+            put(GiraphConstants.MAX_WORKERS, 1);
+            put(GiraphConstants.SPLIT_MASTER_WORKER.getKey(), false);
+            //put("giraph.localTestMode", true);
+            put(GiraphConstants.ZOOKEEPER_JAR, GiraphGraphComputer.class.getResource("zookeeper-3.3.3.jar").getPath());
+            put("giraph.zkServerPort", "2181");  // you must have a local zookeeper running on this port
+            put("giraph.nettyServerUseExecutionHandler", false); // this prevents so many integration tests running out of threads
+            put("giraph.nettyClientUseExecutionHandler", false); // this prevents so many integration tests running out of threads
+            put(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "hadoop-gremlin/target/test-output");
+            put(Constants.GREMLIN_HADOOP_DERIVE_MEMORY, true);
+            put(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
+        }};
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/groovy/GiraphGraphComputerGroovyProcessIntegrateTest.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/groovy/GiraphGraphComputerGroovyProcessIntegrateTest.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/groovy/GiraphGraphComputerGroovyProcessIntegrateTest.java
new file mode 100644
index 0000000..3e909b4
--- /dev/null
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/groovy/GiraphGraphComputerGroovyProcessIntegrateTest.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph.groovy;
+
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph.GiraphGraphProvider;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.process.GroovyProcessComputerSuite;
+import org.apache.tinkerpop.gremlin.process.ProcessComputerSuite;
+import org.junit.runner.RunWith;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+@RunWith(GroovyProcessComputerSuite.class)
+@ProcessComputerSuite.GraphProviderClass(provider = GiraphGraphProvider.class, graph = HadoopGraph.class)
+public class GiraphGraphComputerGroovyProcessIntegrateTest {
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputerProcessIntegrateTest.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputerProcessIntegrateTest.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputerProcessIntegrateTest.java
new file mode 100644
index 0000000..91a7fc5
--- /dev/null
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputerProcessIntegrateTest.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.process.ProcessComputerSuite;
+import org.junit.runner.RunWith;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+@RunWith(ProcessComputerSuite.class)
+@ProcessComputerSuite.GraphProviderClass(provider = SparkGraphProvider.class, graph = HadoopGraph.class)
+public class SparkGraphComputerProcessIntegrateTest {
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphProvider.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphProvider.java
new file mode 100644
index 0000000..255b0a5
--- /dev/null
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphProvider.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.tinkerpop.gremlin.hadoop.Constants;
+import org.apache.tinkerpop.gremlin.hadoop.HadoopGraphProvider;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat;
+import org.apache.tinkerpop.gremlin.structure.Graph;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public final class SparkGraphProvider extends HadoopGraphProvider {
+
+    @Override
+    public Map<String, Object> getBaseConfiguration(final String graphName, final Class<?> test, final String testMethodName) {
+        return new HashMap<String, Object>() {{
+            put(Graph.GRAPH, HadoopGraph.class.getName());
+            put(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, GryoInputFormat.class.getCanonicalName());
+            put(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, GryoOutputFormat.class.getCanonicalName());
+            put(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, SequenceFileOutputFormat.class.getCanonicalName());
+            put(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "hadoop-gremlin/target/test-output");
+            put(Constants.GREMLIN_HADOOP_DERIVE_MEMORY, true);
+            put(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
+            ///////////
+            put(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER, SparkGraphComputer.class.getCanonicalName());
+            put("spark.master", "local[4]");
+        }};
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/groovy/SparkGraphComputerGroovyProcessIntegrateTest.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/groovy/SparkGraphComputerGroovyProcessIntegrateTest.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/groovy/SparkGraphComputerGroovyProcessIntegrateTest.java
new file mode 100644
index 0000000..e7b843a
--- /dev/null
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/groovy/SparkGraphComputerGroovyProcessIntegrateTest.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.groovy;
+
+import org.apache.tinkerpop.gremlin.hadoop.HadoopGraphProvider;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.SparkGraphProvider;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.process.GroovyProcessComputerSuite;
+import org.apache.tinkerpop.gremlin.process.ProcessComputerSuite;
+import org.junit.runner.RunWith;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+@RunWith(GroovyProcessComputerSuite.class)
+@ProcessComputerSuite.GraphProviderClass(provider = SparkGraphProvider.class, graph = HadoopGraph.class)
+public class SparkGraphComputerGroovyProcessIntegrateTest {
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8246ee6d/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/groovy/HadoopGraphGroovyProcessComputerIntegrateTest.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/groovy/HadoopGraphGroovyProcessComputerIntegrateTest.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/groovy/HadoopGraphGroovyProcessComputerIntegrateTest.java
deleted file mode 100644
index 3827a6f..0000000
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/groovy/HadoopGraphGroovyProcessComputerIntegrateTest.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.tinkerpop.gremlin.hadoop.process.groovy;
-
-import org.apache.tinkerpop.gremlin.hadoop.HadoopGraphProvider;
-import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
-import org.apache.tinkerpop.gremlin.process.GroovyProcessComputerSuite;
-import org.apache.tinkerpop.gremlin.process.ProcessComputerSuite;
-import org.junit.runner.RunWith;
-
-/**
- * @author Marko A. Rodriguez (http://markorodriguez.com)
- */
-@RunWith(GroovyProcessComputerSuite.class)
-@ProcessComputerSuite.GraphProviderClass(provider = HadoopGraphProvider.class, graph = HadoopGraph.class)
-public class HadoopGraphGroovyProcessComputerIntegrateTest {
-}

[14/20] incubator-tinkerpop git commit: lots of clean up and organization. SparkGraphComputer is now really clean with all the dirty work being done by SparkHelper.

Posted by ok...@apache.org.

lots of clean up and organization. SparkGraphComputer is now really clean with all the dirty work being done by SparkHelper.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/b6133ae7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/b6133ae7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/b6133ae7

Branch: refs/heads/master
Commit: b6133ae75e4f8ebb29f0da042c37e9ce09d92ca4
Parents: 8246ee6
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Mar 3 15:32:05 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Mar 3 15:32:05 2015 -0700

----------------------------------------------------------------------
 .../computer/giraph/GiraphGraphComputer.java    |  10 +-
 .../computer/spark/GraphComputerRDD.java        | 106 -----------
 .../process/computer/spark/RuleAccumulator.java |   2 +-
 .../computer/spark/SparkGraphComputer.java      | 158 ++++++-----------
 .../process/computer/spark/SparkMapEmitter.java |   2 +-
 .../process/computer/spark/SparkMemory.java     |   1 -
 .../computer/spark/SparkMemoryAccumulator.java  |   2 +-
 .../process/computer/spark/SparkMessenger.java  |  31 +++-
 .../computer/spark/SparkReduceEmitter.java      |   2 +-
 .../process/computer/spark/ToyVertex.java       | 114 ------------
 .../computer/spark/util/SparkHelper.java        | 177 +++++++++++++++++++
 .../hadoop/structure/HadoopConfiguration.java   |   5 +-
 .../gremlin/hadoop/structure/HadoopGraph.java   |   2 +-
 13 files changed, 267 insertions(+), 345 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
index 589c22c..56d029c 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
@@ -221,14 +221,8 @@ public class GiraphGraphComputer extends Configured implements GraphComputer, To
     }
 
     public static void main(final String[] args) throws Exception {
-        try {
-            final FileConfiguration configuration = new PropertiesConfiguration(args[0]);
-            final GiraphGraphComputer computer = new GiraphGraphComputer(HadoopGraph.open(configuration));
-            computer.program(VertexProgram.createVertexProgram(configuration)).submit().get();
-        } catch (Exception e) {
-            e.printStackTrace();
-            throw e;
-        }
+        final FileConfiguration configuration = new PropertiesConfiguration(args[0]);
+        new GiraphGraphComputer(HadoopGraph.open(configuration)).program(VertexProgram.createVertexProgram(configuration)).submit().get();
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
deleted file mode 100644
index 786e5af..0000000
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
-
-import org.apache.commons.configuration.Configuration;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaRDDLike;
-import org.apache.spark.api.java.function.FlatMapFunction2;
-import org.apache.spark.rdd.RDD;
-import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
-import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils;
-import scala.Tuple2;
-import scala.reflect.ManifestFactory;
-
-import java.util.List;
-import java.util.stream.Collectors;
-
-
-/**
- * @author Marko A. Rodriguez (http://markorodriguez.com)
- */
-public class GraphComputerRDD<M> extends JavaPairRDD<Object, SparkMessenger<M>> {
-
-    public GraphComputerRDD(final RDD<Tuple2<Object, SparkMessenger<M>>> rdd) {
-        super(rdd, ManifestFactory.classType(Object.class), ManifestFactory.classType(SparkMessenger.class));
-    }
-
-    public GraphComputerRDD(final JavaPairRDD<Object, SparkMessenger<M>> rdd) {
-        super(rdd.rdd(), ManifestFactory.classType(Object.class), ManifestFactory.classType(SparkMessenger.class));
-    }
-
-    public GraphComputerRDD execute(final Configuration configuration, final SparkMemory memory) {
-        JavaPairRDD<Object, SparkMessenger<M>> current = this;
-        // execute vertex program
-        current = current.mapPartitionsToPair(iterator -> {
-            final VertexProgram<M> vertexProgram = VertexProgram.createVertexProgram(configuration);
-            return () -> IteratorUtils.<Tuple2<Object, SparkMessenger<M>>, Tuple2<Object, SparkMessenger<M>>>map(iterator, tuple -> {
-                vertexProgram.execute(tuple._2().vertex, tuple._2(), memory);
-                return tuple;
-            });
-        });
-        // clear all previous incoming messages
-        if (!memory.isInitialIteration()) {
-            current = current.mapValues(messenger -> {
-                messenger.clearIncomingMessages();
-                return messenger;
-            });
-        }
-        // emit messages
-        current = current.<Object, SparkMessenger<M>>flatMapToPair(tuple -> {
-            final List<Tuple2<Object, SparkMessenger<M>>> list = tuple._2().outgoing.entrySet()
-                    .stream()
-                    .map(entry -> new Tuple2<>(entry.getKey(), new SparkMessenger<>(new ToyVertex(entry.getKey()), entry.getValue())))
-                    .collect(Collectors.toList());          // the message vertices
-            list.add(new Tuple2<>(tuple._1(), tuple._2())); // the raw vertex
-            return list;
-        });
-        // "message pass" via reduction
-        current = current.reduceByKey((a, b) -> {
-            if (a.vertex instanceof ToyVertex && !(b.vertex instanceof ToyVertex))
-                a.vertex = b.vertex;
-            a.incoming.addAll(b.incoming);
-            return a;
-        });
-        // clear all previous outgoing messages
-        current = current.mapValues(messenger -> {
-            messenger.clearOutgoingMessages();
-            return messenger;
-        });
-        return GraphComputerRDD.of(current);
-    }
-
-    public static <M> GraphComputerRDD<M> of(final JavaPairRDD<Object, SparkMessenger<M>> javaPairRDD) {
-        return new GraphComputerRDD<>(javaPairRDD);
-    }
-
-    public static <M> GraphComputerRDD<M> of(final JavaRDD<Tuple2<Object, SparkMessenger<M>>> javaRDD) {
-        return new GraphComputerRDD<>(javaRDD.rdd());
-    }
-
-    //////////////
-
-    // TODO: What the hell is this for?
-    @Override
-    public JavaRDD zipPartitions(JavaRDDLike uJavaRDDLike, FlatMapFunction2 iteratorIteratorVFlatMapFunction2) {
-        return (JavaRDD) new JavaRDD<>(null, null);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RuleAccumulator.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RuleAccumulator.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RuleAccumulator.java
index 59da2f4..446dbdb 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RuleAccumulator.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RuleAccumulator.java
@@ -24,7 +24,7 @@ import org.apache.tinkerpop.gremlin.hadoop.process.computer.util.Rule;
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
-public class RuleAccumulator implements AccumulatorParam<Rule> {
+public final class RuleAccumulator implements AccumulatorParam<Rule> {
 
     @Override
     public Rule addAccumulator(final Rule a, final Rule b) {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index 946d2af..dd004bc 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -22,20 +22,15 @@ import org.apache.commons.configuration.ConfigurationUtils;
 import org.apache.commons.configuration.FileConfiguration;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.tinkerpop.gremlin.hadoop.Constants;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.util.SparkHelper;
 import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
-import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritable;
-import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritableIterator;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable;
 import org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil;
 import org.apache.tinkerpop.gremlin.hadoop.structure.util.HadoopHelper;
@@ -54,9 +49,6 @@ import org.slf4j.LoggerFactory;
 import scala.Tuple2;
 
 import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Comparator;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.concurrent.CompletableFuture;
@@ -66,12 +58,11 @@ import java.util.stream.Stream;
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
-public class SparkGraphComputer implements GraphComputer {
+public final class SparkGraphComputer implements GraphComputer {
 
     public static final Logger LOGGER = LoggerFactory.getLogger(SparkGraphComputer.class);
 
     protected final SparkConf configuration = new SparkConf();
-
     protected final HadoopGraph hadoopGraph;
     private boolean executed = false;
     private final Set<MapReduce> mapReducers = new HashSet<>();
@@ -116,137 +107,99 @@ public class SparkGraphComputer implements GraphComputer {
         if (null == this.vertexProgram && this.mapReducers.isEmpty())
             throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
         // it is possible to run mapreducers without a vertex program
-        if (null != this.vertexProgram)
+        if (null != this.vertexProgram) {
             GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
-
+            this.mapReducers.addAll(this.vertexProgram.getMapReducers());
+        }
+        // apache and hadoop configurations that are used throughout
         final org.apache.commons.configuration.Configuration apacheConfiguration = this.hadoopGraph.configuration();
         final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(this.hadoopGraph.configuration());
 
         return CompletableFuture.<ComputerResult>supplyAsync(() -> {
                     final long startTime = System.currentTimeMillis();
                     SparkMemory memory = null;
-                    // load the graph
+                    SparkHelper.deleteOutputDirectory(hadoopConfiguration);
+                    ////////////////////////////////
+                    // process the vertex program //
+                    ////////////////////////////////
                     if (null != this.vertexProgram) {
+                        // set up the spark job
                         final SparkConf sparkConfiguration = new SparkConf();
                         sparkConfiguration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX + this.vertexProgram);
                         hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
                         if (FileInputFormat.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class)))
-                            hadoopConfiguration.set("mapred.input.dir", hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION));
-
-                        // set up the input format
+                            hadoopConfiguration.set("mapred.input.dir", hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION)); // necessary for Spark and newAPIHadoopRDD
                         final JavaSparkContext sparkContext = new JavaSparkContext(sparkConfiguration);
                         SparkGraphComputer.loadJars(sparkContext, hadoopConfiguration);
                         ///
                         try {
-                            final JavaPairRDD<NullWritable, VertexWritable> rdd = sparkContext.newAPIHadoopRDD(hadoopConfiguration,
+                            // create a message-passing friendly rdd from the hadoop input format
+                            JavaPairRDD<Object, SparkMessenger<Object>> graphRDD = sparkContext.newAPIHadoopRDD(hadoopConfiguration,
                                     (Class<InputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class),
                                     NullWritable.class,
-                                    VertexWritable.class);
-                            final JavaPairRDD<Object, SparkMessenger<Object>> rdd2 = rdd.mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(new SparkVertex((TinkerVertex) tuple._2().get()), new ArrayList<>())));
-                            GraphComputerRDD<Object> g = GraphComputerRDD.of(rdd2);
+                                    VertexWritable.class)
+                                    .mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(new SparkVertex((TinkerVertex) tuple._2().get()))));
 
-                            // set up the vertex program
+                            // set up the vertex program and wire up configurations
                             memory = new SparkMemory(this.vertexProgram, this.mapReducers, sparkContext);
                             this.vertexProgram.setup(memory);
                             final SerializableConfiguration vertexProgramConfiguration = new SerializableConfiguration();
                             this.vertexProgram.storeState(vertexProgramConfiguration);
-                            this.mapReducers.addAll(this.vertexProgram.getMapReducers());
                             ConfUtil.mergeApacheIntoHadoopConfiguration(vertexProgramConfiguration, hadoopConfiguration);
                             ConfigurationUtils.copy(vertexProgramConfiguration, apacheConfiguration);
+
                             // execute the vertex program
-                            while (true) {
-                                g = g.execute(vertexProgramConfiguration, memory);
-                                g.foreachPartition(iterator -> doNothing());
+                            do {
+                                graphRDD = SparkHelper.executeStep(graphRDD, this.vertexProgram, memory, vertexProgramConfiguration);
+                                graphRDD.foreachPartition(iterator -> doNothing()); // i think this is a fast way to execute the rdd
+                                graphRDD.cache(); // TODO: learn about persistence and caching
                                 memory.incrIteration();
-                                if (this.vertexProgram.terminate(memory))
-                                    break;
-                            }
+                            } while (!this.vertexProgram.terminate(memory));
+
                             // write the output graph back to disk
-                            final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
-                            if (null != outputLocation) {
-                                try {
-                                    FileSystem.get(hadoopConfiguration).delete(new Path(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)), true);
-                                } catch (final IOException e) {
-                                    throw new IllegalStateException(e.getMessage(), e);
-                                }
-                                // map back to a <nullwritable,vertexwritable> stream for output
-                                g.mapToPair(tuple -> new Tuple2<>(NullWritable.get(), new VertexWritable<>(tuple._2().vertex)))
-                                        .saveAsNewAPIHadoopFile(outputLocation + "/" + Constants.SYSTEM_G,
-                                                NullWritable.class,
-                                                VertexWritable.class,
-                                                (Class<OutputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, OutputFormat.class));
-                            }
+                            SparkHelper.saveVertexProgramRDD(graphRDD, hadoopConfiguration);
                         } finally {
+                            // must close the context or bad things happen
                             sparkContext.close();
                         }
+                        sparkContext.close(); // why not try again todo
                     }
 
+                    //////////////////////////////
+                    // process the map reducers //
+                    //////////////////////////////
                     final Memory.Admin finalMemory = null == memory ? new DefaultMemory() : new DefaultMemory(memory);
-                    // execute mapreduce jobs
                     for (final MapReduce mapReduce : this.mapReducers) {
-                        // set up the map reduce job
-                        final SerializableConfiguration newConfiguration = new SerializableConfiguration(apacheConfiguration);
-                        mapReduce.storeState(newConfiguration);
-
-                        // set up spark job
+                        // set up the spark job
                         final SparkConf sparkConfiguration = new SparkConf();
                         sparkConfiguration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX + mapReduce);
                         hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
                         if (FileInputFormat.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class)))
                             hadoopConfiguration.set("mapred.input.dir", hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION) + "/" + Constants.SYSTEM_G);
-                        // set up the input format
                         final JavaSparkContext sparkContext = new JavaSparkContext(sparkConfiguration);
                         SparkGraphComputer.loadJars(sparkContext, hadoopConfiguration);
+                        // execute the map reduce job
                         try {
-                            final JavaPairRDD<NullWritable, VertexWritable> g = sparkContext.newAPIHadoopRDD(hadoopConfiguration,
+                            final JavaPairRDD<NullWritable, VertexWritable> hadoopGraphRDD = sparkContext.newAPIHadoopRDD(hadoopConfiguration,
                                     (Class<InputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class),
                                     NullWritable.class,
                                     VertexWritable.class);
 
+                            final SerializableConfiguration newApacheConfiguration = new SerializableConfiguration(apacheConfiguration);
+                            mapReduce.storeState(newApacheConfiguration);
                             // map
-                            JavaPairRDD<?, ?> mapRDD = g.flatMapToPair(tuple -> {
-                                final MapReduce m = MapReduce.createMapReduce(newConfiguration);
-                                final SparkMapEmitter mapEmitter = new SparkMapEmitter();
-                                m.map(tuple._2().get(), mapEmitter);
-                                return mapEmitter.getEmissions();
-                            });
-                            if (mapReduce.getMapKeySort().isPresent())
-                                mapRDD = mapRDD.sortByKey((Comparator) mapReduce.getMapKeySort().get());
-                            // todo: combine
+                            final JavaPairRDD mapRDD = SparkHelper.executeMap(hadoopGraphRDD, mapReduce, newApacheConfiguration);
+                            // combine todo
                             // reduce
-                            JavaPairRDD<?, ?> reduceRDD = null;
-                            if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
-                                reduceRDD = mapRDD.groupByKey().flatMapToPair(tuple -> {
-                                    final MapReduce m = MapReduce.createMapReduce(newConfiguration);
-                                    final SparkReduceEmitter reduceEmitter = new SparkReduceEmitter();
-                                    m.reduce(tuple._1(), tuple._2().iterator(), reduceEmitter);
-                                    return reduceEmitter.getEmissions();
-                                });
-                                if (mapReduce.getReduceKeySort().isPresent())
-                                    reduceRDD = reduceRDD.sortByKey((Comparator) mapReduce.getReduceKeySort().get());
-                            }
-                            // write the output graph back to disk
-                            final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
-                            if (null != outputLocation) {
-                                // map back to a Hadoop stream for output
-                                ((null == reduceRDD) ? mapRDD : reduceRDD).mapToPair(tuple -> new Tuple2<>(new ObjectWritable<>(tuple._1()), new ObjectWritable<>(tuple._2()))).saveAsNewAPIHadoopFile(outputLocation + "/" + mapReduce.getMemoryKey(),
-                                        ObjectWritable.class,
-                                        ObjectWritable.class,
-                                        (Class<OutputFormat<ObjectWritable, ObjectWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, OutputFormat.class));
-                                // if its not a SequenceFile there is no certain way to convert to necessary Java objects.
-                                // to get results you have to look through HDFS directory structure. Oh the horror.
-                                try {
-                                    if (hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, SequenceFileOutputFormat.class, OutputFormat.class).equals(SequenceFileOutputFormat.class))
-                                        mapReduce.addResultToMemory(finalMemory, new ObjectWritableIterator(hadoopConfiguration, new Path(outputLocation + "/" + mapReduce.getMemoryKey())));
-                                    else
-                                        HadoopGraph.LOGGER.warn(Constants.SEQUENCE_WARNING);
-                                } catch (final IOException e) {
-                                    throw new IllegalStateException(e.getMessage(), e);
-                                }
-                            }
+                            final JavaPairRDD reduceRDD = (mapReduce.doStage(MapReduce.Stage.REDUCE)) ? SparkHelper.executeReduce(mapRDD, mapReduce, newApacheConfiguration) : null;
+
+                            // write the map reduce output back to disk (memory)
+                            SparkHelper.saveMapReduceRDD(null == reduceRDD ? mapRDD : reduceRDD, mapReduce, finalMemory, hadoopConfiguration);
                         } finally {
+                            // must close the context or bad things happen
                             sparkContext.close();
                         }
+                        sparkContext.close(); // why not try again todo
                     }
 
                     // update runtime and return the newly computed graph
@@ -256,6 +209,8 @@ public class SparkGraphComputer implements GraphComputer {
         );
     }
 
+    /////////////////
+
     private static final void doNothing() {
         // a cheap action
     }
@@ -278,19 +233,18 @@ public class SparkGraphComputer implements GraphComputer {
         }
     }
 
-    /////////////////
-
     public static void main(final String[] args) throws Exception {
-        final FileConfiguration configuration = new PropertiesConfiguration("/Users/marko/software/tinkerpop/tinkerpop3/hadoop-gremlin/conf/spark-gryo.properties");
-        // TODO: final FileConfiguration configuration = new PropertiesConfiguration(args[0]);
-        final HadoopGraph graph = HadoopGraph.open(configuration);
-        final ComputerResult result = new SparkGraphComputer(graph).program(VertexProgram.createVertexProgram(configuration)).submit().get();
-        // TODO: remove everything below
-        System.out.println(result);
-        //result.memory().<Iterator>get(PageRankMapReduce.DEFAULT_MEMORY_KEY).forEachRemaining(System.out::println);
-        //result.graph().configuration().getKeys().forEachRemaining(key -> System.out.println(key + "-->" + result.graph().configuration().getString(key)));
-        result.graph().V().valueMap().forEachRemaining(System.out::println);
+        final FileConfiguration configuration = new PropertiesConfiguration(args[0]);
+        new SparkGraphComputer(HadoopGraph.open(configuration)).program(VertexProgram.createVertexProgram(configuration)).submit().get();
     }
 
-
+    @Override
+    public Features features() {
+        return new Features() {
+            @Override
+            public boolean supportsNonSerializableObjects() {
+                return true;  // TODO
+            }
+        };
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java
index 0f5acc1..6cd8885 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java
@@ -27,7 +27,7 @@ import java.util.List;
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
-public class SparkMapEmitter<K, V> implements MapReduce.MapEmitter<K, V> {
+public final class SparkMapEmitter<K, V> implements MapReduce.MapEmitter<K, V> {
 
     private final List<Tuple2<K, V>> emissions = new ArrayList<>();
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
index b277e83..90bc73a 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
@@ -60,7 +60,6 @@ public final class SparkMemory implements Memory.Admin, Serializable {
         for (final String key : this.memoryKeys) {
             this.memory.put(key, sparkContext.accumulator(new Rule(Rule.Operation.NO_OP, null), new RuleAccumulator()));
         }
-
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java
index 470774a..10b9525 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java
@@ -23,7 +23,7 @@ import org.apache.spark.AccumulatorParam;
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
-public class SparkMemoryAccumulator implements AccumulatorParam<SparkMemory> {
+public final class SparkMemoryAccumulator implements AccumulatorParam<SparkMemory> {
     @Override
     public SparkMemory addAccumulator(final SparkMemory first, final SparkMemory second) {
         return first;

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java
index cc170c4..812bdd3 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java
@@ -28,28 +28,31 @@ import org.apache.tinkerpop.gremlin.structure.Direction;
 import org.apache.tinkerpop.gremlin.structure.Edge;
 import org.apache.tinkerpop.gremlin.structure.Vertex;
 
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
 public class SparkMessenger<M> implements Serializable, Messenger<M> {
 
-    protected Vertex vertex;
-    protected List<M> incoming;
-    protected Map<Object, List<M>> outgoing = new HashMap<>();
+    private Vertex vertex;
+    private List<M> incoming;
+    private Map<Object, List<M>> outgoing = new HashMap<>();
 
     public SparkMessenger() {
 
     }
 
+    public SparkMessenger(final Vertex vertex) {
+        this.vertex = vertex;
+        this.incoming = new ArrayList<>();
+    }
+
     public SparkMessenger(final Vertex vertex, final List<M> incomingMessages) {
         this.vertex = vertex;
         this.incoming = incomingMessages;
@@ -63,6 +66,22 @@ public class SparkMessenger<M> implements Serializable, Messenger<M> {
         this.outgoing.clear();
     }
 
+    public Vertex getVertex() {
+        return this.vertex;
+    }
+
+    public void setVertex(final Vertex vertex) {
+        this.vertex = vertex;
+    }
+
+    public void addIncomingMessages(final SparkMessenger<M> otherMessenger) {
+        this.incoming.addAll(otherMessenger.incoming);
+    }
+
+    public Set<Map.Entry<Object, List<M>>> getOutgoingMessages() {
+        return this.outgoing.entrySet();
+    }
+
     @Override
     public Iterable<M> receiveMessages(final MessageScope messageScope) {
         return this.incoming;

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkReduceEmitter.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkReduceEmitter.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkReduceEmitter.java
index b9f056c..77e7072 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkReduceEmitter.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkReduceEmitter.java
@@ -27,7 +27,7 @@ import java.util.List;
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
-public class SparkReduceEmitter<OK, OV> implements MapReduce.ReduceEmitter<OK, OV> {
+public final class SparkReduceEmitter<OK, OV> implements MapReduce.ReduceEmitter<OK, OV> {
 
     private final List<Tuple2<OK, OV>> emissions = new ArrayList<>();
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/ToyVertex.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/ToyVertex.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/ToyVertex.java
deleted file mode 100644
index 121ae2d..0000000
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/ToyVertex.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
-
-import org.apache.tinkerpop.gremlin.structure.Direction;
-import org.apache.tinkerpop.gremlin.structure.Edge;
-import org.apache.tinkerpop.gremlin.structure.Graph;
-import org.apache.tinkerpop.gremlin.structure.Vertex;
-import org.apache.tinkerpop.gremlin.structure.VertexProperty;
-import org.apache.tinkerpop.gremlin.structure.util.ElementHelper;
-import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
-import org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph;
-
-import java.io.Serializable;
-import java.util.Collections;
-import java.util.Iterator;
-
-/**
-* @author Marko A. Rodriguez (http://markorodriguez.com)
-*/
-public final class ToyVertex implements Vertex, Vertex.Iterators, Serializable {
-
-    private final Object id;
-    private static final String TOY_VERTEX = "toyVertex";
-
-    public ToyVertex(final Object id) {
-        this.id = id;
-    }
-
-    ToyVertex() {
-        this.id = null;
-    }
-
-    @Override
-    public Edge addEdge(final String label, final Vertex inVertex, final Object... keyValues) {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public Object id() {
-        return this.id;
-    }
-
-    @Override
-    public String label() {
-        return TOY_VERTEX;
-    }
-
-    @Override
-    public Graph graph() {
-        return EmptyGraph.instance();
-    }
-
-    @Override
-    public <V> VertexProperty<V> property(final String key, final V value) {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public void remove() {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public Iterators iterators() {
-        return this;
-    }
-
-    @Override
-    public Iterator<Edge> edgeIterator(Direction direction, String... edgeLabels) {
-        return Collections.emptyIterator();
-    }
-
-    @Override
-    public Iterator<Vertex> vertexIterator(Direction direction, String... edgeLabels) {
-        return Collections.emptyIterator();
-    }
-
-    @Override
-    public <V> Iterator<VertexProperty<V>> propertyIterator(String... propertyKeys) {
-        return Collections.emptyIterator();
-    }
-
-    @Override
-    public int hashCode() {
-        return ElementHelper.hashCode(this);
-    }
-
-    @Override
-    public boolean equals(final Object other) {
-        return ElementHelper.areEqual(this, other);
-    }
-
-    @Override
-    public String toString() {
-        return StringFactory.vertexString(this);
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SparkHelper.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SparkHelper.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SparkHelper.java
new file mode 100644
index 0000000..ece9d7c
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SparkHelper.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.util;
+
+import org.apache.commons.configuration.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.tinkerpop.gremlin.hadoop.Constants;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.SparkMapEmitter;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.SparkMemory;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.SparkMessenger;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.SparkReduceEmitter;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritable;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritableIterator;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable;
+import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
+import org.apache.tinkerpop.gremlin.process.computer.Memory;
+import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
+import org.apache.tinkerpop.gremlin.structure.Vertex;
+import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedVertex;
+import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils;
+import scala.Tuple2;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public final class SparkHelper {
+
+    private SparkHelper() {
+    }
+
+    public static <M> JavaPairRDD<Object, SparkMessenger<M>> executeStep(final JavaPairRDD<Object, SparkMessenger<M>> graphRDD, final VertexProgram<M> globalVertexProgram, final SparkMemory memory, final Configuration apacheConfiguration) {
+        JavaPairRDD<Object, SparkMessenger<M>> current = graphRDD;
+        // execute vertex program
+        current = current.mapPartitionsToPair(iterator -> {     // each partition has a copy of the vertex program
+            final VertexProgram<M> vertexProgram = VertexProgram.<VertexProgram<M>>createVertexProgram(apacheConfiguration);
+            return () -> IteratorUtils.<Tuple2<Object, SparkMessenger<M>>, Tuple2<Object, SparkMessenger<M>>>map(iterator, tuple -> {
+                vertexProgram.execute(tuple._2().getVertex(), tuple._2(), memory);
+                return tuple;
+            });
+        });
+        // clear all previous incoming messages
+        if (!memory.isInitialIteration()) {
+            current = current.mapValues(messenger -> {
+                messenger.clearIncomingMessages();
+                return messenger;
+            });
+        }
+        // emit messages
+        current = current.<Object, SparkMessenger<M>>flatMapToPair(tuple -> {
+            final List<Tuple2<Object, SparkMessenger<M>>> list = tuple._2().getOutgoingMessages()
+                    .stream()
+                    .map(entry -> new Tuple2<>(entry.getKey(), new SparkMessenger<>(new DetachedVertex(entry.getKey(), Vertex.DEFAULT_LABEL, Collections.emptyMap()), entry.getValue()))) // maybe go back to toy vertex if label is expensive
+                    .collect(Collectors.toList());          // the message vertices
+            list.add(new Tuple2<>(tuple._1(), tuple._2())); // the raw vertex
+            return list;
+        });
+
+        // TODO: local message combiner
+        if (globalVertexProgram.getMessageCombiner().isPresent()) {
+           /* current = current.combineByKey(messenger -> {
+                return messenger;
+            });*/
+        }
+
+        // "message pass" via reduction
+        current = current.reduceByKey((a, b) -> {
+            if (a.getVertex() instanceof DetachedVertex && !(b.getVertex() instanceof DetachedVertex))
+                a.setVertex(b.getVertex());
+            a.addIncomingMessages(b);
+            return a;
+        });
+
+        // clear all previous outgoing messages
+        current = current.mapValues(messenger -> {
+            messenger.clearOutgoingMessages();
+            return messenger;
+        });
+        return current;
+    }
+
+    public static <K, V> JavaPairRDD<K, V> executeMap(final JavaPairRDD<NullWritable, VertexWritable> hadoopGraphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce, final Configuration apacheConfiguration) {
+        JavaPairRDD<K, V> mapRDD = hadoopGraphRDD.flatMapToPair(tuple -> {
+            final MapReduce<K, V, ?, ?, ?> m = MapReduce.createMapReduce(apacheConfiguration);    // todo create only for each partition
+            final SparkMapEmitter<K, V> mapEmitter = new SparkMapEmitter<>();
+            m.map(tuple._2().get(), mapEmitter);
+            return mapEmitter.getEmissions();
+        });
+        if (mapReduce.getMapKeySort().isPresent())
+            mapRDD = mapRDD.sortByKey(mapReduce.getMapKeySort().get());
+        return mapRDD;
+    }
+
+    // TODO: public static executeCombine()
+
+    public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce(final JavaPairRDD<K, V> mapRDD, final MapReduce<K, V, OK, OV, ?> mapReduce, final Configuration apacheConfiguration) {
+        JavaPairRDD<OK, OV> reduceRDD = mapRDD.groupByKey().flatMapToPair(tuple -> {
+            final MapReduce<K, V, OK, OV, ?> m = MapReduce.createMapReduce(apacheConfiguration);     // todo create only for each partition
+            final SparkReduceEmitter<OK, OV> reduceEmitter = new SparkReduceEmitter<>();
+            m.reduce(tuple._1(), tuple._2().iterator(), reduceEmitter);
+            return reduceEmitter.getEmissions();
+        });
+        if (mapReduce.getReduceKeySort().isPresent())
+            reduceRDD = reduceRDD.sortByKey(mapReduce.getReduceKeySort().get());
+        return reduceRDD;
+    }
+
+    public static void deleteOutputDirectory(final org.apache.hadoop.conf.Configuration hadoopConfiguration) {
+        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
+        if (null != outputLocation) {
+            try {
+                FileSystem.get(hadoopConfiguration).delete(new Path(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)), true);
+            } catch (final IOException e) {
+                throw new IllegalStateException(e.getMessage(), e);
+            }
+        }
+    }
+
+    public static <M> void saveVertexProgramRDD(final JavaPairRDD<Object, SparkMessenger<M>> graphRDD, final org.apache.hadoop.conf.Configuration hadoopConfiguration) {
+        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
+        if (null != outputLocation) {
+            // map back to a <nullwritable,vertexwritable> stream for output
+            graphRDD.mapToPair(tuple -> new Tuple2<>(NullWritable.get(), new VertexWritable<>(tuple._2().getVertex())))
+                    .saveAsNewAPIHadoopFile(outputLocation + "/" + Constants.SYSTEM_G,
+                            NullWritable.class,
+                            VertexWritable.class,
+                            (Class<OutputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, OutputFormat.class));
+        }
+    }
+
+    public static void saveMapReduceRDD(final JavaPairRDD<Object, Object> mapReduceRDD, final MapReduce mapReduce, final Memory.Admin memory, final org.apache.hadoop.conf.Configuration hadoopConfiguration) {
+        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
+        if (null != outputLocation) {
+            // map back to a Hadoop stream for output
+            mapReduceRDD.mapToPair(tuple -> new Tuple2<>(new ObjectWritable<>(tuple._1()), new ObjectWritable<>(tuple._2()))).saveAsNewAPIHadoopFile(outputLocation + "/" + mapReduce.getMemoryKey(),
+                    ObjectWritable.class,
+                    ObjectWritable.class,
+                    (Class<OutputFormat<ObjectWritable, ObjectWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, OutputFormat.class));
+            // if its not a SequenceFile there is no certain way to convert to necessary Java objects.
+            // to get results you have to look through HDFS directory structure. Oh the horror.
+            try {
+                if (hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, SequenceFileOutputFormat.class, OutputFormat.class).equals(SequenceFileOutputFormat.class))
+                    mapReduce.addResultToMemory(memory, new ObjectWritableIterator(hadoopConfiguration, new Path(outputLocation + "/" + mapReduce.getMemoryKey())));
+                else
+                    HadoopGraph.LOGGER.warn(Constants.SEQUENCE_WARNING);
+            } catch (final IOException e) {
+                throw new IllegalStateException(e.getMessage(), e);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopConfiguration.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopConfiguration.java
index 76636cd..0fb2e9f 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopConfiguration.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopConfiguration.java
@@ -24,7 +24,6 @@ import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.OutputFormat;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.tinkerpop.gremlin.hadoop.Constants;
-import org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph.GiraphGraphComputer;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable;
 import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
 import org.apache.tinkerpop.gremlin.util.StreamFactory;
@@ -86,9 +85,9 @@ public class HadoopConfiguration extends BaseConfiguration implements Serializab
         this.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, outputLocation);
     }
 
-    public Class<? extends GraphComputer> getGraphComputer() {
+    public Class<? extends GraphComputer> getGraphComputer(final Class<? extends GraphComputer> defaultGraphComputer) {
         if (!this.containsKey(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER))
-            return GiraphGraphComputer.class;
+            return defaultGraphComputer;
         else {
             try {
                 return (Class) Class.forName(this.getString(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER));

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b6133ae7/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopGraph.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopGraph.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopGraph.java
index 8154888..990fc77 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopGraph.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/HadoopGraph.java
@@ -146,7 +146,7 @@ public class HadoopGraph implements Graph, Graph.Iterators {
 
     private HadoopGraph(final Configuration configuration) {
         this.configuration = new HadoopConfiguration(configuration);
-        this.graphComputerClass = this.configuration.getGraphComputer();
+        this.graphComputerClass = this.configuration.getGraphComputer(GiraphGraphComputer.class);
     }
 
     public static HadoopGraph open() {

[12/20] incubator-tinkerpop git commit: fixed up spark/ branch now that is is Gryo and .kryo. @spmallette

Posted by ok...@apache.org.

fixed up spark/ branch now that is is Gryo and .kryo. @spmallette


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/96ffd77c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/96ffd77c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/96ffd77c

Branch: refs/heads/master
Commit: 96ffd77cc1b33ea9b054e0a32920e7079ecdfd6a
Parents: 70fc529 929a288
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Mar 3 11:10:39 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Mar 3 11:10:39 2015 -0700

----------------------------------------------------------------------
 CHANGELOG.asciidoc                              |   1 +
 data/grateful-dead-vertices.gio                 | Bin 1028298 -> 0 bytes
 data/grateful-dead-vertices.kryo                | Bin 0 -> 1028298 bytes
 data/grateful-dead.gio                          | Bin 515409 -> 0 bytes
 data/grateful-dead.kryo                         | Bin 0 -> 515409 bytes
 data/tinkerpop-classic-vertices.gio             | Bin 1323 -> 0 bytes
 data/tinkerpop-classic-vertices.kryo            | Bin 0 -> 1323 bytes
 data/tinkerpop-classic.gio                      | Bin 817 -> 0 bytes
 data/tinkerpop-classic.kryo                     | Bin 0 -> 817 bytes
 data/tinkerpop-crew-vertices.gio                | Bin 2743 -> 0 bytes
 data/tinkerpop-crew-vertices.kryo               | Bin 0 -> 2743 bytes
 data/tinkerpop-crew.gio                         | Bin 1916 -> 0 bytes
 data/tinkerpop-crew.kryo                        | Bin 0 -> 1916 bytes
 data/tinkerpop-modern-vertices.gio              | Bin 1439 -> 0 bytes
 data/tinkerpop-modern-vertices.kryo             | Bin 0 -> 1439 bytes
 data/tinkerpop-modern.gio                       | Bin 877 -> 0 bytes
 data/tinkerpop-modern.kryo                      | Bin 0 -> 877 bytes
 docs/src/implementations.asciidoc               |  14 +-
 docs/src/the-graph.asciidoc                     |  18 +-
 gremlin-console/conf/remote-objects.yaml        |   2 +-
 gremlin-console/conf/remote.yaml                |   2 +-
 .../plugin/gremlin-server-integration.yaml      |   4 +-
 .../gremlin/console/groovy/plugin/remote.yaml   |   2 +-
 .../util/metric/DependantMutableMetrics.java    |   2 +-
 .../process/util/metric/MutableMetrics.java     |   2 +-
 .../tinkerpop/gremlin/structure/Graph.java      |  50 +--
 .../gremlin/structure/io/DefaultIo.java         |   8 +-
 .../gremlin/structure/io/GraphMigrator.java     |  18 +-
 .../structure/io/gryo/EdgeTerminator.java       |  48 +++
 .../structure/io/gryo/EntrySerializer.java      |  43 ++
 .../structure/io/gryo/GraphSerializer.java      | 164 ++++++++
 .../structure/io/gryo/GremlinClassResolver.java | 206 ++++++++++
 .../gremlin/structure/io/gryo/GryoMapper.java   | 406 +++++++++++++++++++
 .../gremlin/structure/io/gryo/GryoReader.java   | 400 ++++++++++++++++++
 .../gremlin/structure/io/gryo/GryoWriter.java   | 180 ++++++++
 .../structure/io/gryo/URISerializer.java        |  46 +++
 .../structure/io/gryo/UUIDSerializer.java       |  46 +++
 .../io/gryo/VertexByteArrayInputStream.java     |  74 ++++
 .../structure/io/gryo/VertexTerminator.java     |  53 +++
 .../structure/io/kryo/EdgeTerminator.java       |  48 ---
 .../structure/io/kryo/EntrySerializer.java      |  43 --
 .../structure/io/kryo/GraphSerializer.java      | 164 --------
 .../structure/io/kryo/GremlinClassResolver.java | 206 ----------
 .../gremlin/structure/io/kryo/KryoMapper.java   | 406 -------------------
 .../gremlin/structure/io/kryo/KryoReader.java   | 400 ------------------
 .../gremlin/structure/io/kryo/KryoWriter.java   | 180 --------
 .../structure/io/kryo/URISerializer.java        |  46 ---
 .../structure/io/kryo/UUIDSerializer.java       |  46 ---
 .../io/kryo/VertexByteArrayInputStream.java     |  74 ----
 .../structure/io/kryo/VertexTerminator.java     |  53 ---
 .../tinkerpop/gremlin/driver/Cluster.java       |   2 +-
 .../driver/ser/GryoMessageSerializerV1d0.java   | 307 ++++++++++++++
 .../driver/ser/JsonBuilderGryoSerializer.java   |  45 ++
 .../driver/ser/JsonBuilderKryoSerializer.java   |  45 --
 .../driver/ser/KryoMessageSerializerV1d0.java   | 307 --------------
 .../tinkerpop/gremlin/driver/ser/SerTokens.java |   2 +-
 .../gremlin/driver/ser/Serializers.java         |   6 +-
 .../gremlin/driver/simple/NioClient.java        |   4 +-
 .../gremlin/driver/simple/WebSocketClient.java  |   4 +-
 .../ser/GryoMessageSerializerV1D0Test.java      | 294 ++++++++++++++
 .../ser/KryoMessageSerializerV1d0Test.java      | 294 --------------
 .../jsr223/GremlinGroovyScriptEngineTest.java   |  81 ++--
 .../AbstractImportCustomizerProvider.java       |   4 +-
 gremlin-server/conf/gremlin-server-classic.yaml |   4 +-
 gremlin-server/conf/gremlin-server-modern.yaml  |   4 +-
 gremlin-server/conf/gremlin-server-neo4j.yaml   |   4 +-
 gremlin-server/conf/gremlin-server.yaml         |   4 +-
 .../server/GremlinAdditionPerformanceTest.java  |   2 +-
 .../server/GremlinDriverIntegrateTest.java      |  10 +-
 .../server/gremlin-server-integration.yaml      |   4 +-
 .../server/gremlin-server-performance.yaml      |   2 +-
 .../gremlin/AbstractGraphProvider.java          |   8 +-
 .../apache/tinkerpop/gremlin/LoadGraphWith.java |  10 +-
 .../structure/GraphWritePerformanceTest.java    |   6 +-
 .../tinkerpop/gremlin/structure/IoTest.java     | 180 ++++----
 .../gremlin/structure/SerializationTest.java    |  27 +-
 .../structure/io/gryo/GryoResourceAccess.java   |  27 ++
 .../structure/io/kryo/KryoResourceAccess.java   |  27 --
 .../io/gryo/grateful-dead-vertices.kryo         | Bin 0 -> 1028298 bytes
 .../structure/io/gryo/grateful-dead.kryo        | Bin 0 -> 515409 bytes
 .../io/gryo/tinkerpop-classic-vertices.kryo     | Bin 0 -> 1323 bytes
 .../structure/io/gryo/tinkerpop-classic.kryo    | Bin 0 -> 817 bytes
 .../io/gryo/tinkerpop-crew-vertices.kryo        | Bin 0 -> 2743 bytes
 .../structure/io/gryo/tinkerpop-crew.kryo       | Bin 0 -> 1916 bytes
 .../io/gryo/tinkerpop-modern-vertices.kryo      | Bin 0 -> 1439 bytes
 .../structure/io/gryo/tinkerpop-modern.kryo     | Bin 0 -> 877 bytes
 .../io/kryo/grateful-dead-vertices.gio          | Bin 1028298 -> 0 bytes
 .../gremlin/structure/io/kryo/grateful-dead.gio | Bin 515409 -> 0 bytes
 .../io/kryo/tinkerpop-classic-vertices.gio      | Bin 1323 -> 0 bytes
 .../structure/io/kryo/tinkerpop-classic.gio     | Bin 817 -> 0 bytes
 .../io/kryo/tinkerpop-crew-vertices.gio         | Bin 2743 -> 0 bytes
 .../structure/io/kryo/tinkerpop-crew.gio        | Bin 1916 -> 0 bytes
 .../io/kryo/tinkerpop-modern-vertices.gio       | Bin 1439 -> 0 bytes
 .../structure/io/kryo/tinkerpop-modern.gio      | Bin 877 -> 0 bytes
 .../structure/io/gryo/GryoMapperTest.java       |  34 ++
 .../structure/io/kryo/KryoMapperTest.java       |  34 --
 hadoop-gremlin/conf/giraph-gryo.properties      |  31 ++
 hadoop-gremlin/conf/giraph-kryo.properties      |  31 --
 hadoop-gremlin/conf/spark-kryo.properties       |   6 +-
 .../groovy/plugin/HadoopGremlinPlugin.java      |   4 +-
 .../computer/giraph/GiraphComputeVertex.java    |   8 +-
 .../process/computer/giraph/RuleWritable.java   |   4 +-
 .../process/computer/spark/SparkVertex.java     |  12 +-
 .../hadoop/structure/io/ObjectWritable.java     |   4 +-
 .../hadoop/structure/io/VertexWritable.java     |   8 +-
 .../structure/io/gryo/GryoInputFormat.java      |  63 +++
 .../structure/io/gryo/GryoOutputFormat.java     |  43 ++
 .../structure/io/gryo/GryoRecordReader.java     | 113 ++++++
 .../structure/io/gryo/GryoRecordWriter.java     |  54 +++
 .../structure/io/gryo/VertexStreamIterator.java | 147 +++++++
 .../structure/io/kryo/KryoInputFormat.java      |  63 ---
 .../structure/io/kryo/KryoOutputFormat.java     |  43 --
 .../structure/io/kryo/KryoRecordReader.java     | 113 ------
 .../structure/io/kryo/KryoRecordWriter.java     |  54 ---
 .../structure/io/kryo/VertexStreamIterator.java | 147 -------
 .../gremlin/hadoop/HadoopGraphProvider.java     |  30 +-
 .../io/graphson/VertexStreamIteratorTest.java   |   6 +-
 .../io/gryo/GryoRecordReaderWriterTest.java     | 113 ++++++
 .../io/kryo/KryoRecordReaderWriterTest.java     | 113 ------
 pom.xml                                         |   2 +-
 tinkergraph-gremlin/pom.xml                     |   6 +-
 .../tinkergraph/structure/TinkerGraphTest.java  |  55 ++-
 122 files changed, 3266 insertions(+), 3244 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/96ffd77c/hadoop-gremlin/conf/giraph-gryo.properties
----------------------------------------------------------------------
diff --cc hadoop-gremlin/conf/giraph-gryo.properties
index 0000000,0000000..7813ad7
new file mode 100644
--- /dev/null
+++ b/hadoop-gremlin/conf/giraph-gryo.properties
@@@ -1,0 -1,0 +1,31 @@@
++# Licensed to the Apache Software Foundation (ASF) under one
++# or more contributor license agreements.  See the NOTICE file
++# distributed with this work for additional information
++# regarding copyright ownership.  The ASF licenses this file
++# to you under the Apache License, Version 2.0 (the
++# "License"); you may not use this file except in compliance
++# with the License.  You may obtain a copy of the License at
++#
++#  http://www.apache.org/licenses/LICENSE-2.0
++#
++# Unless required by applicable law or agreed to in writing,
++# software distributed under the License is distributed on an
++# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++# KIND, either express or implied.  See the License for the
++# specific language governing permissions and limitations
++# under the License.
++gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
++gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat
++gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat
++gremlin.hadoop.memoryOutputFormat=org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
++gremlin.hadoop.deriveMemory=false
++gremlin.hadoop.jarsInDistributedCache=true
++
++gremlin.hadoop.inputLocation=tinkerpop-modern-vertices.kryo
++gremlin.hadoop.outputLocation=output
++#gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.traversal.TraversalVertexProgram
++#gremlin.traversalVertexProgram.traversalSupplier.type=CLASS
++#gremlin.traversalVertexProgram.traversalSupplier.object=org.apache.tinkerpop.gremlin.hadoop.process.computer.example.TraversalSupplier1
++
++giraph.minWorkers=2
++giraph.maxWorkers=2

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/96ffd77c/hadoop-gremlin/conf/spark-kryo.properties
----------------------------------------------------------------------
diff --cc hadoop-gremlin/conf/spark-kryo.properties
index 483349f,0000000..85426a0
mode 100644,000000..100644
--- a/hadoop-gremlin/conf/spark-kryo.properties
+++ b/hadoop-gremlin/conf/spark-kryo.properties
@@@ -1,38 -1,0 +1,38 @@@
 +#
 +# Licensed to the Apache Software Foundation (ASF) under one
 +# or more contributor license agreements.  See the NOTICE file
 +# distributed with this work for additional information
 +# regarding copyright ownership.  The ASF licenses this file
 +# to you under the Apache License, Version 2.0 (the
 +# "License"); you may not use this file except in compliance
 +# with the License.  You may obtain a copy of the License at
 +#
 +# http://www.apache.org/licenses/LICENSE-2.0
 +#
 +# Unless required by applicable law or agreed to in writing,
 +# software distributed under the License is distributed on an
 +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 +# KIND, either express or implied.  See the License for the
 +# specific language governing permissions and limitations
 +# under the License.
 +#
 +
 +gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
- gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.kryo.KryoInputFormat
- gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.kryo.KryoOutputFormat
++gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat
++gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat
 +gremlin.hadoop.memoryOutputFormat=org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
 +gremlin.hadoop.deriveMemory=false
 +gremlin.hadoop.jarsInDistributedCache=false
 +
- gremlin.hadoop.inputLocation=hdfs://localhost:9000/user/marko/tinkerpop-modern-vertices.gio
++gremlin.hadoop.inputLocation=hdfs://localhost:9000/user/marko/tinkerpop-modern-vertices.kryo
 +gremlin.hadoop.outputLocation=output
 +
 +# the vertex program to execute
 +gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram
 +
 +# It is possible to provide Spark configuration parameters for use with SparkGraphComputer
 +##########################################################################################
 +spark.master=local[4]
 +spark.executor.memory=1024m
 +spark.eventLog.enabled=true
 +spark.serializer=org.apache.spark.serializer.JavaSerializer

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/96ffd77c/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
----------------------------------------------------------------------
diff --cc hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
index 38f8a61,0000000..0a02156
mode 100644,000000..100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
@@@ -1,149 -1,0 +1,149 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + * http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
 +
 +import org.apache.tinkerpop.gremlin.structure.Direction;
 +import org.apache.tinkerpop.gremlin.structure.Edge;
 +import org.apache.tinkerpop.gremlin.structure.Graph;
 +import org.apache.tinkerpop.gremlin.structure.Vertex;
 +import org.apache.tinkerpop.gremlin.structure.VertexProperty;
- import org.apache.tinkerpop.gremlin.structure.io.kryo.KryoReader;
- import org.apache.tinkerpop.gremlin.structure.io.kryo.KryoWriter;
++import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoReader;
++import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoWriter;
 +import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph;
 +import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerVertex;
 +
 +import java.io.ByteArrayInputStream;
 +import java.io.ByteArrayOutputStream;
 +import java.io.IOException;
 +import java.io.ObjectInputStream;
 +import java.io.ObjectOutputStream;
 +import java.io.Serializable;
 +import java.util.Iterator;
 +
 +/**
 + * @author Marko A. Rodriguez (http://markorodriguez.com)
 + */
 +public final class SparkVertex implements Vertex, Vertex.Iterators, Serializable {
 +
-     private static KryoWriter KRYO_WRITER = KryoWriter.build().create();
-     private static KryoReader KRYO_READER = KryoReader.build().create();
++    private static GryoWriter GRYO_WRITER = GryoWriter.build().create();
++    private static GryoReader GRYO_READER = GryoReader.build().create();
 +    private static final String VERTEX_ID = Graph.Hidden.hide("giraph.gremlin.vertexId");
 +
 +    private transient TinkerVertex vertex;
 +    private byte[] vertexBytes;
 +
 +    public SparkVertex(final TinkerVertex vertex) {
 +        this.vertex = vertex;
 +        this.vertex.graph().variables().set(VERTEX_ID, this.vertex.id());
 +    }
 +
 +    @Override
 +    public Edge addEdge(final String label, final Vertex inVertex, final Object... keyValues) {
 +        return this.vertex.addEdge(label, inVertex, keyValues);
 +    }
 +
 +    @Override
 +    public Object id() {
 +        return this.vertex.id();
 +    }
 +
 +    @Override
 +    public String label() {
 +        return this.vertex.label();
 +    }
 +
 +    @Override
 +    public Graph graph() {
 +        return this.vertex.graph();
 +    }
 +
 +    @Override
 +    public <V> VertexProperty<V> property(final String key, final V value) {
 +        return this.vertex.property(key, value);
 +    }
 +
 +    @Override
 +    public void remove() {
 +        this.vertex.remove();
 +    }
 +
 +    @Override
 +    public Iterators iterators() {
 +        return this;
 +    }
 +
 +    @Override
 +    public Iterator<Edge> edgeIterator(final Direction direction, final String... edgeLabels) {
 +        return this.vertex.iterators().edgeIterator(direction, edgeLabels);
 +    }
 +
 +    @Override
 +    public Iterator<Vertex> vertexIterator(final Direction direction, final String... edgeLabels) {
 +        return this.vertex.iterators().vertexIterator(direction, edgeLabels);
 +    }
 +
 +    @Override
 +    public <V> Iterator<VertexProperty<V>> propertyIterator(final String... propertyKeys) {
 +        return this.vertex.iterators().propertyIterator(propertyKeys);
 +    }
 +
 +    ///////////////////////////////
 +
 +    private void writeObject(final ObjectOutputStream outputStream) throws IOException {
 +        this.deflateVertex();
 +        outputStream.defaultWriteObject();
 +    }
 +
 +    private void readObject(final ObjectInputStream inputStream) throws IOException, ClassNotFoundException {
 +        inputStream.defaultReadObject();
 +        this.inflateVertex();
 +    }
 +
 +    private final void inflateVertex() {
 +        if (null != this.vertex)
 +            return;
 +
 +        try {
 +            final ByteArrayInputStream bis = new ByteArrayInputStream(this.vertexBytes);
 +            final TinkerGraph tinkerGraph = TinkerGraph.open();
-             KRYO_READER.readGraph(bis, tinkerGraph);
++            GRYO_READER.readGraph(bis, tinkerGraph);
 +            bis.close();
 +            this.vertexBytes = null;
 +            this.vertex = (TinkerVertex) tinkerGraph.iterators().vertexIterator(tinkerGraph.variables().get(VERTEX_ID).get()).next();
 +        } catch (final IOException e) {
 +            throw new IllegalStateException(e.getMessage(), e);
 +        }
 +    }
 +
 +    private final void deflateVertex() {
 +        if (null != this.vertexBytes)
 +            return;
 +
 +        try {
 +            final ByteArrayOutputStream bos = new ByteArrayOutputStream();
-             KRYO_WRITER.writeGraph(bos, this.vertex.graph());
++            GRYO_WRITER.writeGraph(bos, this.vertex.graph());
 +            bos.flush();
 +            bos.close();
 +            this.vertex = null;
 +            this.vertexBytes = bos.toByteArray();
 +        } catch (final IOException e) {
 +            throw new IllegalStateException(e.getMessage(), e);
 +        }
 +    }
 +}

[06/20] incubator-tinkerpop git commit: Some really cool lazy optimizations to SparkGraphComputer. I 'get it' now. Easy peasy lemon squeezy.

Posted by ok...@apache.org.

Some really cool lazy optimizations to SparkGraphComputer. I 'get it' now. Easy peasy lemon squeezy.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/84be267a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/84be267a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/84be267a

Branch: refs/heads/master
Commit: 84be267aaf4a3ec2c3700af4c10586212cf894c8
Parents: 1020fd2
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Mon Mar 2 19:23:12 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Mon Mar 2 19:23:12 2015 -0700

----------------------------------------------------------------------
 .../computer/spark/GraphComputerRDD.java        | 39 +++++++++-----------
 .../computer/spark/SparkGraphComputer.java      | 14 ++-----
 2 files changed, 22 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/84be267a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
index c99b108..abf0ac6 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
@@ -25,6 +25,7 @@ import org.apache.spark.api.java.JavaRDDLike;
 import org.apache.spark.api.java.function.FlatMapFunction2;
 import org.apache.spark.rdd.RDD;
 import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
+import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils;
 import scala.Tuple2;
 import scala.reflect.ManifestFactory;
 
@@ -45,20 +46,30 @@ public class GraphComputerRDD<M> extends JavaPairRDD<Object, SparkMessenger<M>>
         super(rdd.rdd(), ManifestFactory.classType(Object.class), ManifestFactory.classType(SparkMessenger.class));
     }
 
-    public GraphComputerRDD completeIteration() {
+    public GraphComputerRDD execute(final Configuration configuration, final SparkMemory memory) {
         JavaPairRDD<Object, SparkMessenger<M>> current = this;
-        // clear all previous incoming messages
-        current = current.mapValues(messenger -> {
-            messenger.clearIncomingMessages();
-            return messenger;
+        // execute vertex program
+        current = current.mapPartitionsToPair(iterator -> {
+            final VertexProgram<M> vertexProgram = VertexProgram.createVertexProgram(configuration);
+            return () -> IteratorUtils.<Tuple2<Object, SparkMessenger<M>>, Tuple2<Object, SparkMessenger<M>>>map(iterator, tuple -> {
+                vertexProgram.execute(tuple._2().vertex, tuple._2(), memory);
+                return tuple;
+            });
         });
+        // clear all previous incoming messages
+        if(!memory.isInitialIteration()) {
+            current = current.mapValues(messenger -> {
+                messenger.clearIncomingMessages();
+                return messenger;
+            });
+        }
         // emit messages
         current = current.<Object, SparkMessenger<M>>flatMapToPair(tuple -> {
             final List<Tuple2<Object, SparkMessenger<M>>> list = tuple._2().outgoing.entrySet()
                     .stream()
                     .map(entry -> new Tuple2<>(entry.getKey(), new SparkMessenger<>(new ToyVertex(entry.getKey()), entry.getValue())))
-                    .collect(Collectors.toList());
-            list.add(new Tuple2<>(tuple._1(), tuple._2()));
+                    .collect(Collectors.toList());          // the message vertices
+            list.add(new Tuple2<>(tuple._1(), tuple._2())); // the raw vertex
             return list;
         });
         // "message pass" via reduction
@@ -73,20 +84,6 @@ public class GraphComputerRDD<M> extends JavaPairRDD<Object, SparkMessenger<M>>
             messenger.clearOutgoingMessages();
             return messenger;
         });
-        current.count(); // TODO: necessary for BSP?
-        return GraphComputerRDD.of(current);
-    }
-
-    private static void doNothing() {
-
-    }
-
-    public GraphComputerRDD execute(final Configuration configuration, final SparkMemory memory) {
-        JavaPairRDD<Object, SparkMessenger<M>> current = this;
-        current = current.mapValues(messenger -> {
-            VertexProgram.createVertexProgram(configuration).execute(messenger.vertex, messenger, memory);
-            return messenger;
-        });
         return GraphComputerRDD.of(current);
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/84be267a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index 7478998..107f1bc 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -18,12 +18,10 @@
  */
 package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
 
-import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.tinkerpop.gremlin.hadoop.Constants;
 import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
@@ -41,7 +39,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import scala.Tuple2;
 
-import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -81,8 +78,6 @@ public class SparkGraphComputer implements GraphComputer {
         JavaPairRDD<Object, SparkMessenger<Double>> rdd2 = rdd.mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(new SparkVertex((TinkerVertex) tuple._2().get()), new ArrayList<>())));
 
         GraphComputerRDD<Double> g = GraphComputerRDD.of(rdd2);
-        FileUtils.deleteDirectory(new File("/tmp/test"));
-        g.saveAsObjectFile("/tmp/test");
 
         final org.apache.commons.configuration.Configuration vertexProgram = new SerializableConfiguration();
         final PageRankVertexProgram pageRankVertexProgram = PageRankVertexProgram.build().create();
@@ -90,18 +85,17 @@ public class SparkGraphComputer implements GraphComputer {
         final SparkMemory memory = new SparkMemory(Collections.emptySet());
 
         while (!pageRankVertexProgram.terminate(memory)) {
-            g = GraphComputerRDD.of((JavaRDD) sc.objectFile("/tmp/test"));
             g = g.execute(vertexProgram, memory);
-            g = g.completeIteration();
+            g.foreachPartition(iterator -> doNothing());
             memory.incrIteration();
-            FileUtils.deleteDirectory(new File("/tmp/test"));
-            g.saveAsObjectFile("/tmp/test");
-
         }
         g.foreach(t -> System.out.println(t._2().vertex.property(PageRankVertexProgram.PAGE_RANK) + "-->" + t._2().vertex.value("name")));
         System.out.println(g.count());
     }
 
+    private static final void doNothing() {
+    }
+
 
     @Override
     public GraphComputer isolation(final Isolation isolation) {

[15/20] incubator-tinkerpop git commit: Merge branch 'master' into spark

Posted by ok...@apache.org.

Merge branch 'master' into spark


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/b045e618
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/b045e618
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/b045e618

Branch: refs/heads/master
Commit: b045e618527087efefa8d60b2f384ffac51b287d
Parents: b6133ae 332f812
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Mar 3 15:34:51 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Mar 3 15:34:51 2015 -0700

----------------------------------------------------------------------
 .../server/op/AbstractEvalOpProcessor.java      | 44 +++++-----
 .../server/op/session/SessionOpProcessor.java   | 53 +++++++++++-
 .../gremlin/server/op/session/SessionOps.java   | 85 --------------------
 .../server/op/standard/StandardOpProcessor.java | 25 +++++-
 .../gremlin/server/op/standard/StandardOps.java | 53 ------------
 .../tinkergraph/structure/TinkerGraphTest.java  |  7 --
 6 files changed, 100 insertions(+), 167 deletions(-)
----------------------------------------------------------------------

[05/20] incubator-tinkerpop git commit: Added SparkVertex which lazily generates a serialized Vertex using transient and byte[]... this would be good in core, but for now, only used by SparkGraphComputer.

Posted by ok...@apache.org.

Added SparkVertex which lazily generates a serialized Vertex using transient and byte[]... this would be good in core, but for now, only used by SparkGraphComputer.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/1020fd28
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/1020fd28
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/1020fd28

Branch: refs/heads/master
Commit: 1020fd281602b4e1a01177987057238df5f4b6c2
Parents: 3a32aa2
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Mon Mar 2 17:43:33 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Mon Mar 2 17:43:33 2015 -0700

----------------------------------------------------------------------
 .../computer/spark/SparkGraphComputer.java      |   3 +-
 .../process/computer/spark/SparkVertex.java     | 147 +++++++++++++++++++
 2 files changed, 149 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/1020fd28/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index 1b1fc3d..7478998 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -36,6 +36,7 @@ import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
 import org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram;
 import org.apache.tinkerpop.gremlin.process.computer.util.GraphComputerHelper;
 import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
+import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerVertex;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import scala.Tuple2;
@@ -77,7 +78,7 @@ public class SparkGraphComputer implements GraphComputer {
         final Configuration conf = new Configuration();
         conf.set("mapred.input.dir", "hdfs://localhost:9000/user/marko/grateful-dead-vertices.gio");
         JavaPairRDD<NullWritable, VertexWritable> rdd = sc.newAPIHadoopRDD(conf, KryoInputFormat.class, NullWritable.class, VertexWritable.class);
-        JavaPairRDD<Object, SparkMessenger<Double>> rdd2 = rdd.mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(tuple._2().get(), new ArrayList<>())));
+        JavaPairRDD<Object, SparkMessenger<Double>> rdd2 = rdd.mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(new SparkVertex((TinkerVertex) tuple._2().get()), new ArrayList<>())));
 
         GraphComputerRDD<Double> g = GraphComputerRDD.of(rdd2);
         FileUtils.deleteDirectory(new File("/tmp/test"));

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/1020fd28/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
new file mode 100644
index 0000000..5a81017
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.tinkerpop.gremlin.structure.Direction;
+import org.apache.tinkerpop.gremlin.structure.Edge;
+import org.apache.tinkerpop.gremlin.structure.Graph;
+import org.apache.tinkerpop.gremlin.structure.Vertex;
+import org.apache.tinkerpop.gremlin.structure.VertexProperty;
+import org.apache.tinkerpop.gremlin.structure.io.kryo.KryoReader;
+import org.apache.tinkerpop.gremlin.structure.io.kryo.KryoWriter;
+import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph;
+import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerVertex;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.Iterator;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class SparkVertex implements Vertex, Vertex.Iterators, Serializable {
+
+    private static KryoWriter KRYO_WRITER = KryoWriter.build().create();
+    private static KryoReader KRYO_READER = KryoReader.build().create();
+    private static final String VERTEX_ID = Graph.Hidden.hide("giraph.gremlin.vertexId");
+
+    private transient TinkerVertex vertex;
+    private byte[] serializedForm;
+
+    public SparkVertex(final TinkerVertex vertex) {
+        this.vertex = vertex;
+        this.vertex.graph().variables().set(VERTEX_ID, this.vertex.id());
+        this.deflateVertex();
+    }
+
+    @Override
+    public Edge addEdge(String label, Vertex inVertex, Object... keyValues) {
+        inflateVertex();
+        return this.vertex.addEdge(label, inVertex, keyValues);
+    }
+
+    @Override
+    public Object id() {
+        inflateVertex();
+        return this.vertex.id();
+    }
+
+    @Override
+    public String label() {
+        inflateVertex();
+        return this.vertex.label();
+    }
+
+    @Override
+    public Graph graph() {
+        inflateVertex();
+        return this.vertex.graph();
+    }
+
+    @Override
+    public <V> VertexProperty<V> property(String key, V value) {
+        inflateVertex();
+        return this.vertex.property(key, value);
+    }
+
+    @Override
+    public void remove() {
+        inflateVertex();
+        this.vertex.remove();
+    }
+
+    @Override
+    public Iterators iterators() {
+        return this;
+    }
+
+    @Override
+    public Iterator<Edge> edgeIterator(Direction direction, String... edgeLabels) {
+        inflateVertex();
+        return this.vertex.iterators().edgeIterator(direction, edgeLabels);
+    }
+
+    @Override
+    public Iterator<Vertex> vertexIterator(Direction direction, String... edgeLabels) {
+        inflateVertex();
+        return this.vertex.iterators().vertexIterator(direction, edgeLabels);
+    }
+
+    @Override
+    public <V> Iterator<VertexProperty<V>> propertyIterator(String... propertyKeys) {
+        inflateVertex();
+        return this.vertex.iterators().propertyIterator(propertyKeys);
+    }
+
+    private void writeObject(final ObjectOutputStream outputStream) throws IOException {
+        this.inflateVertex();
+        this.deflateVertex();
+        outputStream.defaultWriteObject();
+    }
+
+    private final void inflateVertex() {
+        if (null != this.vertex)
+            return;
+
+        try {
+            final ByteArrayInputStream bis = new ByteArrayInputStream(this.serializedForm);
+            final TinkerGraph tinkerGraph = TinkerGraph.open();
+            KRYO_READER.readGraph(bis, tinkerGraph);
+            bis.close();
+            this.vertex = (TinkerVertex) tinkerGraph.iterators().vertexIterator(tinkerGraph.variables().get(VERTEX_ID).get()).next();
+        } catch (final Exception e) {
+            throw new IllegalStateException(e.getMessage(), e);
+        }
+    }
+
+    private final void deflateVertex() {
+        try {
+            final ByteArrayOutputStream bos = new ByteArrayOutputStream();
+            KRYO_WRITER.writeGraph(bos, this.vertex.graph());
+            bos.flush();
+            bos.close();
+            this.serializedForm = bos.toByteArray();
+        } catch (final IOException e) {
+            throw new IllegalStateException(e.getMessage(), e);
+        }
+    }
+}

[17/20] incubator-tinkerpop git commit: lots of cleanups and optimizations. a few bugs in Spark around the nitty gritty semantics of GraphComputer (incr, and, or...). Will fix up tomorrow.

Posted by ok...@apache.org.

lots of cleanups and optimizations. a few bugs in Spark around the nitty gritty semantics of GraphComputer (incr, and, or...). Will fix up tomorrow.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/406dd681
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/406dd681
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/406dd681

Branch: refs/heads/master
Commit: 406dd68164f2e17abea3c9ab0447d82c357de886
Parents: a9d0cf1
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Mar 3 17:01:37 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Mar 3 17:01:37 2015 -0700

----------------------------------------------------------------------
 .../process/GroovyProcessComputerSuite.java     |  3 +-
 .../process/computer/GraphComputerTest.java     |  3 +-
 .../process/computer/spark/SparkMemory.java     |  6 +--
 .../computer/spark/SparkMemoryAccumulator.java  | 41 --------------------
 .../process/computer/spark/SparkVertex.java     |  2 +
 .../computer/spark/util/SparkHelper.java        | 12 +++---
 .../process/computer/util/MapReduceHelper.java  |  1 +
 .../process/computer/util/MemoryMapReduce.java  |  2 +-
 .../hadoop/process/computer/util/Rule.java      |  2 +-
 .../gremlin/hadoop/structure/util/ConfUtil.java | 14 +------
 10 files changed, 19 insertions(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/406dd681/gremlin-groovy-test/src/main/java/org/apache/tinkerpop/gremlin/process/GroovyProcessComputerSuite.java
----------------------------------------------------------------------
diff --git a/gremlin-groovy-test/src/main/java/org/apache/tinkerpop/gremlin/process/GroovyProcessComputerSuite.java b/gremlin-groovy-test/src/main/java/org/apache/tinkerpop/gremlin/process/GroovyProcessComputerSuite.java
index 3f8d926..57288bd 100644
--- a/gremlin-groovy-test/src/main/java/org/apache/tinkerpop/gremlin/process/GroovyProcessComputerSuite.java
+++ b/gremlin-groovy-test/src/main/java/org/apache/tinkerpop/gremlin/process/GroovyProcessComputerSuite.java
@@ -22,6 +22,7 @@ import org.apache.tinkerpop.gremlin.AbstractGremlinTest;
 import org.apache.tinkerpop.gremlin.GraphManager;
 import org.apache.tinkerpop.gremlin.groovy.loaders.SugarLoader;
 import org.apache.tinkerpop.gremlin.groovy.util.SugarTestHelper;
+import org.apache.tinkerpop.gremlin.process.computer.GroovyGraphComputerTest;
 import org.apache.tinkerpop.gremlin.process.computer.ranking.PageRankVertexProgramTest;
 import org.apache.tinkerpop.gremlin.process.graph.traversal.step.branch.GroovyBranchTest;
 import org.apache.tinkerpop.gremlin.process.graph.traversal.step.branch.GroovyChooseTest;
@@ -80,7 +81,7 @@ public class GroovyProcessComputerSuite extends ProcessComputerSuite {
      */
     private static final Class<?>[] testsToExecute = new Class<?>[]{
 
-            //GroovyGraphComputerTest.ComputerTest.class,
+            GroovyGraphComputerTest.ComputerTraversals.class,
 
             //branch
             GroovyBranchTest.ComputerTraversals.class,

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/406dd681/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/GraphComputerTest.java
----------------------------------------------------------------------
diff --git a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/GraphComputerTest.java b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/GraphComputerTest.java
index bc36db4..d687734 100644
--- a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/GraphComputerTest.java
+++ b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/GraphComputerTest.java
@@ -487,7 +487,8 @@ public abstract class GraphComputerTest extends AbstractGremlinProcessTest {
         @Override
         public GraphComputer get_g_compute_programXTraversalVertexProgram_build_traversalXg_V_both_hasXlabel_personX_age_groupCountXaXX_create() {
             return g.compute().program(TraversalVertexProgram.build().
-                    traversal("GraphFactory.open(['gremlin.graph':'" + g.getClass().getCanonicalName() + "']).V().both().has(label,'person').values('age').groupCount('a')").
+                    // TODO: need to set the engine to be computer
+                            traversal("GraphFactory.open(['gremlin.graph':'" + g.getClass().getCanonicalName() + "']).V().both().has(label,'person').values('age').groupCount('a')").
                     create());
         }
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/406dd681/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
index 90bc73a..402f2d3 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
@@ -115,21 +115,21 @@ public final class SparkMemory implements Memory.Admin, Serializable {
     public long incr(final String key, final long delta) {
         checkKeyValue(key, delta);
         this.memory.get(key).add(new Rule(Rule.Operation.INCR, delta));
-        return (Long) this.memory.get(key).value().object + delta;
+        return (Long) this.memory.get(key).localValue().object + delta;
     }
 
     @Override
     public boolean and(final String key, final boolean bool) {
         checkKeyValue(key, bool);
         this.memory.get(key).add(new Rule(Rule.Operation.AND, bool));
-        return bool;
+        return (Boolean) this.memory.get(key).localValue().object && bool;
     }
 
     @Override
     public boolean or(final String key, final boolean bool) {
         checkKeyValue(key, bool);
         this.memory.get(key).add(new Rule(Rule.Operation.OR, bool));
-        return bool;
+        return (Boolean) this.memory.get(key).localValue().object || bool;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/406dd681/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java
deleted file mode 100644
index 10b9525..0000000
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
-
-import org.apache.spark.AccumulatorParam;
-
-/**
- * @author Marko A. Rodriguez (http://markorodriguez.com)
- */
-public final class SparkMemoryAccumulator implements AccumulatorParam<SparkMemory> {
-    @Override
-    public SparkMemory addAccumulator(final SparkMemory first, final SparkMemory second) {
-        return first;
-    }
-
-    @Override
-    public SparkMemory addInPlace(final SparkMemory first, final SparkMemory second) {
-        return first;
-    }
-
-    @Override
-    public SparkMemory zero(SparkMemory sparkMemory) {
-        return null;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/406dd681/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
index fc6c4f2..c7bd37a 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkVertex.java
@@ -47,6 +47,8 @@ public final class SparkVertex implements Vertex, Vertex.Iterators, Serializable
     private static GryoReader GRYO_READER = GryoReader.build().create();
     private static final String VERTEX_ID = Graph.Hidden.hide("giraph.gremlin.vertexId");
 
+    // TODO: Wrapped vertex -- need VertexProgram in partition (broadcast variable?)
+
     private transient TinkerVertex vertex;
     private byte[] vertexBytes;
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/406dd681/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SparkHelper.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SparkHelper.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SparkHelper.java
index ece9d7c..205c0d6 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SparkHelper.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SparkHelper.java
@@ -106,10 +106,10 @@ public final class SparkHelper {
     }
 
     public static <K, V> JavaPairRDD<K, V> executeMap(final JavaPairRDD<NullWritable, VertexWritable> hadoopGraphRDD, final MapReduce<K, V, ?, ?, ?> mapReduce, final Configuration apacheConfiguration) {
-        JavaPairRDD<K, V> mapRDD = hadoopGraphRDD.flatMapToPair(tuple -> {
-            final MapReduce<K, V, ?, ?, ?> m = MapReduce.createMapReduce(apacheConfiguration);    // todo create only for each partition
+        JavaPairRDD<K, V> mapRDD = hadoopGraphRDD.mapPartitionsToPair(iterator -> {
+            final MapReduce<K, V, ?, ?, ?> m = MapReduce.createMapReduce(apacheConfiguration);
             final SparkMapEmitter<K, V> mapEmitter = new SparkMapEmitter<>();
-            m.map(tuple._2().get(), mapEmitter);
+            iterator.forEachRemaining(tuple -> m.map(tuple._2().get(), mapEmitter));
             return mapEmitter.getEmissions();
         });
         if (mapReduce.getMapKeySort().isPresent())
@@ -120,10 +120,10 @@ public final class SparkHelper {
     // TODO: public static executeCombine()
 
     public static <K, V, OK, OV> JavaPairRDD<OK, OV> executeReduce(final JavaPairRDD<K, V> mapRDD, final MapReduce<K, V, OK, OV, ?> mapReduce, final Configuration apacheConfiguration) {
-        JavaPairRDD<OK, OV> reduceRDD = mapRDD.groupByKey().flatMapToPair(tuple -> {
-            final MapReduce<K, V, OK, OV, ?> m = MapReduce.createMapReduce(apacheConfiguration);     // todo create only for each partition
+        JavaPairRDD<OK, OV> reduceRDD = mapRDD.groupByKey().mapPartitionsToPair(iterator -> {
+            final MapReduce<K, V, OK, OV, ?> m = MapReduce.createMapReduce(apacheConfiguration);
             final SparkReduceEmitter<OK, OV> reduceEmitter = new SparkReduceEmitter<>();
-            m.reduce(tuple._1(), tuple._2().iterator(), reduceEmitter);
+            iterator.forEachRemaining(tuple -> m.reduce(tuple._1(), tuple._2().iterator(), reduceEmitter));
             return reduceEmitter.getEmissions();
         });
         if (mapReduce.getReduceKeySort().isPresent())

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/406dd681/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MapReduceHelper.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MapReduceHelper.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MapReduceHelper.java
index 89a1abf..14f44c1 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MapReduceHelper.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MapReduceHelper.java
@@ -59,6 +59,7 @@ public final class MapReduceHelper {
     public static void executeMapReduceJob(final MapReduce mapReduce, final Memory.Admin memory, final Configuration configuration) throws IOException, ClassNotFoundException, InterruptedException {
         final Configuration newConfiguration = new Configuration(configuration);
         final BaseConfiguration apacheConfiguration = new BaseConfiguration();
+        apacheConfiguration.setDelimiterParsingDisabled(true);
         mapReduce.storeState(apacheConfiguration);
         ConfUtil.mergeApacheIntoHadoopConfiguration(apacheConfiguration, newConfiguration);
         if (!mapReduce.doStage(MapReduce.Stage.MAP)) {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/406dd681/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MemoryMapReduce.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MemoryMapReduce.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MemoryMapReduce.java
index 0db931f..8583f44 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MemoryMapReduce.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MemoryMapReduce.java
@@ -38,7 +38,7 @@ import java.util.Set;
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
-public class MemoryMapReduce extends StaticMapReduce<MapReduce.NullObject, MapMemory, MapReduce.NullObject, MapMemory, MapMemory> {
+public final class MemoryMapReduce extends StaticMapReduce<MapReduce.NullObject, MapMemory, MapReduce.NullObject, MapMemory, MapMemory> {
 
     public Set<String> memoryKeys = new HashSet<>();
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/406dd681/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/Rule.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/Rule.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/Rule.java
index 16e2189..1e9198b 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/Rule.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/Rule.java
@@ -23,7 +23,7 @@ import java.io.Serializable;
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
-public class Rule implements Serializable {
+public final class Rule implements Serializable {
 
     public enum Operation {
         OR {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/406dd681/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/util/ConfUtil.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/util/ConfUtil.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/util/ConfUtil.java
index 3c2d721..efb17f5 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/util/ConfUtil.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/util/ConfUtil.java
@@ -28,6 +28,7 @@ public class ConfUtil {
 
     public static org.apache.commons.configuration.Configuration makeApacheConfiguration(final Configuration hadoopConfiguration) {
         final BaseConfiguration apacheConfiguration = new BaseConfiguration();
+        apacheConfiguration.setDelimiterParsingDisabled(true);
         hadoopConfiguration.iterator().forEachRemaining(e -> apacheConfiguration.setProperty(e.getKey(), e.getValue()));
         return apacheConfiguration;
     }
@@ -47,17 +48,4 @@ public class ConfUtil {
             hadoopConfiguration.set(key, object.toString());
         });
     }
-
-    /*public static HadoopGraph getOutputGraph(final HadoopGraph hadoopGraph) {
-        final BaseConfiguration newConfiguration = new BaseConfiguration();
-        newConfiguration.copy(hadoopGraph.configuration());
-        if (hadoopGraph.configuration().containsKey(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)) {
-            newConfiguration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, hadoopGraph.configuration().getOutputLocation() + "/" + Constants.SYSTEM_G);
-            newConfiguration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, hadoopGraph.configuration().getOutputLocation() + "_");
-        }
-        if (hadoopGraph.configuration().containsKey(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT)) {
-            newConfiguration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, hadoopGraph.configuration().getString(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT).replace("OutputFormat", "InputFormat"));
-        }
-        return HadoopGraph.open(newConfiguration);
-    }*/
 }

[11/20] incubator-tinkerpop git commit: Spark MapReduce engine built. It was more complex than I suspected. Will definately need to break up SparkGraphComputer in various XXXHelper classes with static methods... its pretty beefy right now. All that is le

Posted by ok...@apache.org.

Spark MapReduce engine built. It was more complex than I suspected. Will definately need to break up SparkGraphComputer in various XXXHelper classes with static methods... its pretty beefy right now. All that is left is Memory using Spark Aggregators (going to be painful).


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/70fc529b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/70fc529b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/70fc529b

Branch: refs/heads/master
Commit: 70fc529be06cbe45c7800af900abc4e65dbd3a11
Parents: 3ed0fa6
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Mar 3 11:05:27 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Mar 3 11:05:27 2015 -0700

----------------------------------------------------------------------
 hadoop-gremlin/conf/spark-kryo.properties       |  2 +-
 .../tinkerpop/gremlin/hadoop/Constants.java     |  5 ++
 .../computer/spark/SparkGraphComputer.java      | 81 ++++++++++++++++++--
 .../process/computer/spark/SparkMapEmitter.java | 42 ++++++++++
 .../process/computer/spark/SparkMemory.java     |  6 +-
 .../computer/spark/SparkReduceEmitter.java      | 42 ++++++++++
 .../process/computer/util/MapReduceHelper.java  | 32 ++++----
 7 files changed, 180 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/70fc529b/hadoop-gremlin/conf/spark-kryo.properties
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/conf/spark-kryo.properties b/hadoop-gremlin/conf/spark-kryo.properties
index ec8b393..483349f 100644
--- a/hadoop-gremlin/conf/spark-kryo.properties
+++ b/hadoop-gremlin/conf/spark-kryo.properties
@@ -22,7 +22,7 @@ gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io
 gremlin.hadoop.graphOutputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.kryo.KryoOutputFormat
 gremlin.hadoop.memoryOutputFormat=org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
 gremlin.hadoop.deriveMemory=false
-gremlin.hadoop.jarsInDistributedCache=true
+gremlin.hadoop.jarsInDistributedCache=false
 
 gremlin.hadoop.inputLocation=hdfs://localhost:9000/user/marko/tinkerpop-modern-vertices.gio
 gremlin.hadoop.outputLocation=output

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/70fc529b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
index f229b17..bf06fcc 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
@@ -18,6 +18,7 @@
  */
 package org.apache.tinkerpop.gremlin.hadoop;
 
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.tinkerpop.gremlin.structure.Graph;
 
 /**
@@ -46,4 +47,8 @@ public class Constants {
     public static final String GREMLIN_HADOOP_MAP_REDUCE_CLASS = "gremlin.hadoop.mapReduceClass";
     public static final String GREMLIN_HADOOP_HALT = "gremlin.hadoop.halt";
     public static final String MAP_MEMORY = "gremlin.hadoop.mapMemory";
+
+    public static final String SEQUENCE_WARNING = "The " + Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT
+            + " is not " + SequenceFileOutputFormat.class.getCanonicalName()
+            + " and thus, graph computer memory can not be converted to Java objects";
 }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/70fc529b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index 4b30e16..7cace20 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -27,11 +27,14 @@ import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.tinkerpop.gremlin.hadoop.Constants;
 import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritable;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritableIterator;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable;
 import org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil;
 import org.apache.tinkerpop.gremlin.hadoop.structure.util.HadoopHelper;
@@ -39,6 +42,7 @@ import org.apache.tinkerpop.gremlin.process.computer.ComputerResult;
 import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
 import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
 import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
+import org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankMapReduce;
 import org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult;
 import org.apache.tinkerpop.gremlin.process.computer.util.GraphComputerHelper;
 import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
@@ -51,7 +55,9 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.Set;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Future;
@@ -133,8 +139,8 @@ public class SparkGraphComputer implements GraphComputer {
                                 (Class<InputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class),
                                 NullWritable.class,
                                 VertexWritable.class);
-                        final JavaPairRDD<Object, SparkMessenger<Double>> rdd2 = rdd.mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(new SparkVertex((TinkerVertex) tuple._2().get()), new ArrayList<>())));
-                        GraphComputerRDD<Double> g = GraphComputerRDD.of(rdd2);
+                        final JavaPairRDD<Object, SparkMessenger<Object>> rdd2 = rdd.mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(new SparkVertex((TinkerVertex) tuple._2().get()), new ArrayList<>())));
+                        GraphComputerRDD<Object> g = GraphComputerRDD.of(rdd2);
 
                         // set up the vertex program
                         this.vertexProgram.setup(memory);
@@ -164,16 +170,73 @@ public class SparkGraphComputer implements GraphComputer {
                                             VertexWritable.class,
                                             (Class<OutputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, OutputFormat.class));
                         }
+                        sparkContext.close();
                     }
 
                     // execute mapreduce jobs
                     for (final MapReduce mapReduce : this.mapReducers) {
-                        //TODO
-                       /* g.mapValues(messenger -> {
-                            mapReduce.map(messenger.vertex, null);
-                            return messenger;
-                        }).combine().reduce();*/
+                        // set up the map reduce job
+                        final org.apache.commons.configuration.Configuration mapReduceConfiguration = new SerializableConfiguration();
+                        mapReduce.storeState(mapReduceConfiguration);
+
+                        // set up spark job
+                        final SparkConf sparkConfiguration = new SparkConf();
+                        sparkConfiguration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX + mapReduce);
+                        hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
+                        if (FileInputFormat.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class)))
+                            hadoopConfiguration.set("mapred.input.dir", hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION) + "/" + Constants.SYSTEM_G);
+                        // set up the input format
+                        final JavaSparkContext sparkContext = new JavaSparkContext(sparkConfiguration);
+                        SparkGraphComputer.loadJars(sparkContext, hadoopConfiguration);
+                        final JavaPairRDD<NullWritable, VertexWritable> g = sparkContext.newAPIHadoopRDD(hadoopConfiguration,
+                                (Class<InputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class),
+                                NullWritable.class,
+                                VertexWritable.class);
+
+                        // map
+                        JavaPairRDD<?, ?> mapRDD = g.flatMapToPair(tuple -> {
+                            final MapReduce m = MapReduce.createMapReduce(mapReduceConfiguration);
+                            final SparkMapEmitter mapEmitter = new SparkMapEmitter();
+                            m.map(tuple._2().get(), mapEmitter);
+                            return mapEmitter.getEmissions();
+                        });
+                        if (mapReduce.getMapKeySort().isPresent())
+                            mapRDD = mapRDD.sortByKey((Comparator) mapReduce.getMapKeySort().get());
+                        // todo: combine
+                        // reduce
+                        JavaPairRDD<?, ?> reduceRDD = null;
+                        if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
+                            reduceRDD = mapRDD.groupByKey().flatMapToPair(tuple -> {
+                                final MapReduce m = MapReduce.createMapReduce(mapReduceConfiguration);
+                                final SparkReduceEmitter reduceEmitter = new SparkReduceEmitter();
+                                m.reduce(tuple._1(), tuple._2().iterator(), reduceEmitter);
+                                return reduceEmitter.getEmissions();
+                            });
+                            if (mapReduce.getReduceKeySort().isPresent())
+                                reduceRDD = reduceRDD.sortByKey((Comparator) mapReduce.getReduceKeySort().get());
+                        }
+                        // write the output graph back to disk
+                        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
+                        if (null != outputLocation) {
+                            // map back to a <nullwritable,vertexwritable> stream for output
+                            ((null == reduceRDD) ? mapRDD : reduceRDD).mapToPair(tuple -> new Tuple2<>(new ObjectWritable<>(tuple._1()), new ObjectWritable<>(tuple._2()))).saveAsNewAPIHadoopFile(outputLocation + "/" + mapReduce.getMemoryKey(),
+                                    ObjectWritable.class,
+                                    ObjectWritable.class,
+                                    (Class<OutputFormat<ObjectWritable, ObjectWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, OutputFormat.class));
+                            // if its not a SequenceFile there is no certain way to convert to necessary Java objects.
+                            // to get results you have to look through HDFS directory structure. Oh the horror.
+                            try {
+                                if (hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, SequenceFileOutputFormat.class, OutputFormat.class).equals(SequenceFileOutputFormat.class))
+                                    mapReduce.addResultToMemory(memory, new ObjectWritableIterator(hadoopConfiguration, new Path(outputLocation + "/" + mapReduce.getMemoryKey())));
+                                else
+                                    HadoopGraph.LOGGER.warn(Constants.SEQUENCE_WARNING);
+                            } catch (final IOException e) {
+                                throw new IllegalStateException(e.getMessage(), e);
+                            }
+                        }
+                        sparkContext.close();
                     }
+
                     // update runtime and return the newly computed graph
                     memory.setRuntime(System.currentTimeMillis() - startTime);
                     memory.complete();
@@ -210,8 +273,10 @@ public class SparkGraphComputer implements GraphComputer {
         final FileConfiguration configuration = new PropertiesConfiguration("/Users/marko/software/tinkerpop/tinkerpop3/hadoop-gremlin/conf/spark-kryo.properties");
         // TODO: final FileConfiguration configuration = new PropertiesConfiguration(args[0]);
         final HadoopGraph graph = HadoopGraph.open(configuration);
-        final ComputerResult result = new SparkGraphComputer(graph).program(VertexProgram.createVertexProgram(configuration)).submit().get();
+        final ComputerResult result = new SparkGraphComputer(graph).program(VertexProgram.createVertexProgram(configuration)).mapReduce(PageRankMapReduce.build().create()).submit().get();
+        // TODO: remove everything below
         System.out.println(result);
+        result.memory().<Iterator>get(PageRankMapReduce.DEFAULT_MEMORY_KEY).forEachRemaining(System.out::println);
         //result.graph().configuration().getKeys().forEachRemaining(key -> System.out.println(key + "-->" + result.graph().configuration().getString(key)));
         result.graph().V().valueMap().forEachRemaining(System.out::println);
     }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/70fc529b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java
new file mode 100644
index 0000000..3a4a424
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMapEmitter.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
+import scala.Tuple2;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class SparkMapEmitter<K, V> implements MapReduce.MapEmitter<K, V> {
+
+    private final List<Tuple2<K, V>> emissions = new ArrayList<>();
+
+    @Override
+    public void emit(final K key, final V value) {
+        emissions.add(new Tuple2<>(key, value));
+    }
+
+    public Iterable<Tuple2<K, V>> getEmissions() {
+        return this.emissions;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/70fc529b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
index 88b046e..eb2af7f 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
@@ -145,8 +145,8 @@ public final class SparkMemory implements Memory.Admin, Serializable {
     }
 
     private void checkKeyValue(final String key, final Object value) {
-        if (!this.memoryKeys.contains(key))
-            throw GraphComputer.Exceptions.providedKeyIsNotAMemoryComputeKey(key);
-        MemoryHelper.validateValue(value);
+        //if (!this.memoryKeys.contains(key))
+        //    throw GraphComputer.Exceptions.providedKeyIsNotAMemoryComputeKey(key);
+        //MemoryHelper.validateValue(value);
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/70fc529b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkReduceEmitter.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkReduceEmitter.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkReduceEmitter.java
new file mode 100644
index 0000000..b9f056c
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkReduceEmitter.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
+import scala.Tuple2;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class SparkReduceEmitter<OK, OV> implements MapReduce.ReduceEmitter<OK, OV> {
+
+    private final List<Tuple2<OK, OV>> emissions = new ArrayList<>();
+
+    @Override
+    public void emit(final OK key, final OV value) {
+        this.emissions.add(new Tuple2<>(key, value));
+    }
+
+    public List<Tuple2<OK, OV>> getEmissions() {
+        return this.emissions;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/70fc529b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MapReduceHelper.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MapReduceHelper.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MapReduceHelper.java
index cd49a91..89a1abf 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MapReduceHelper.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/util/MapReduceHelper.java
@@ -18,18 +18,6 @@
  */
 package org.apache.tinkerpop.gremlin.hadoop.process.computer.util;
 
-import org.apache.tinkerpop.gremlin.hadoop.Constants;
-import org.apache.tinkerpop.gremlin.hadoop.process.computer.HadoopCombine;
-import org.apache.tinkerpop.gremlin.hadoop.process.computer.HadoopMap;
-import org.apache.tinkerpop.gremlin.hadoop.process.computer.HadoopReduce;
-import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
-import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritable;
-import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritableComparator;
-import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritableIterator;
-import org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil;
-import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
-import org.apache.tinkerpop.gremlin.process.computer.Memory;
-import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
 import org.apache.commons.configuration.BaseConfiguration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -43,6 +31,18 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.tinkerpop.gremlin.hadoop.Constants;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.HadoopCombine;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.HadoopMap;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.HadoopReduce;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritable;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritableComparator;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritableIterator;
+import org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil;
+import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
+import org.apache.tinkerpop.gremlin.process.computer.Memory;
+import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
 
 import java.io.IOException;
 import java.util.Comparator;
@@ -56,10 +56,6 @@ public final class MapReduceHelper {
     private MapReduceHelper() {
     }
 
-    private static final String SEQUENCE_WARNING = "The " + Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT
-            + " is not " + SequenceFileOutputFormat.class.getCanonicalName()
-            + " and thus, graph computer memory can not be converted to Java objects";
-
     public static void executeMapReduceJob(final MapReduce mapReduce, final Memory.Admin memory, final Configuration configuration) throws IOException, ClassNotFoundException, InterruptedException {
         final Configuration newConfiguration = new Configuration(configuration);
         final BaseConfiguration apacheConfiguration = new BaseConfiguration();
@@ -70,7 +66,7 @@ public final class MapReduceHelper {
             if (newConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, SequenceFileOutputFormat.class, OutputFormat.class).equals(SequenceFileOutputFormat.class))
                 mapReduce.addResultToMemory(memory, new ObjectWritableIterator(configuration, memoryPath));
             else
-                HadoopGraph.LOGGER.warn(SEQUENCE_WARNING);
+                HadoopGraph.LOGGER.warn(Constants.SEQUENCE_WARNING);
         } else {
             final Optional<Comparator<?>> mapSort = mapReduce.getMapKeySort();
             final Optional<Comparator<?>> reduceSort = mapReduce.getReduceKeySort();
@@ -137,7 +133,7 @@ public final class MapReduceHelper {
             if (newConfiguration.getClass(Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT, SequenceFileOutputFormat.class, OutputFormat.class).equals(SequenceFileOutputFormat.class))
                 mapReduce.addResultToMemory(memory, new ObjectWritableIterator(configuration, memoryPath));
             else
-                HadoopGraph.LOGGER.warn(SEQUENCE_WARNING);
+                HadoopGraph.LOGGER.warn(Constants.SEQUENCE_WARNING);
         }
     }
 }

[02/20] incubator-tinkerpop git commit: finally made it through the dependency maze. Spark and Giraph both cleanly work in hadoop-gremlin. Need to go deeper into the SparkGraphComputer implementation to make sure its perfect, but I think we have the pom.

Posted by ok...@apache.org.

finally made it through the dependency maze. Spark and Giraph both cleanly work in hadoop-gremlin. Need to go deeper into the SparkGraphComputer implementation to make sure its perfect, but I think we have the pom.xml right.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/0f0b60ba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/0f0b60ba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/0f0b60ba

Branch: refs/heads/master
Commit: 0f0b60ba71d90d5422b83bdf4c8bc38a0bc91a69
Parents: 531b86a
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Mon Mar 2 10:57:40 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Mon Mar 2 10:57:40 2015 -0700

----------------------------------------------------------------------
 hadoop-gremlin/pom.xml                          | 135 ++++++-------------
 .../computer/spark/SparkGraphComputer.java      |   4 +-
 2 files changed, 45 insertions(+), 94 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/0f0b60ba/hadoop-gremlin/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/pom.xml b/hadoop-gremlin/pom.xml
index 84497c9..21970d8 100644
--- a/hadoop-gremlin/pom.xml
+++ b/hadoop-gremlin/pom.xml
@@ -51,20 +51,12 @@ limitations under the License.
                     <artifactId>jets3t</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>commons-net</groupId>
-                    <artifactId>commons-net</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>servlet-api</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>javax.servlet-api</artifactId>
+                    <groupId>commons-io</groupId>
+                    <artifactId>commons-io</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>jsp-api</artifactId>
+                    <groupId>commons-net</groupId>
+                    <artifactId>commons-net</artifactId>
                 </exclusion>
                 <exclusion>
                     <groupId>org.mortbay.jetty</groupId>
@@ -72,23 +64,7 @@ limitations under the License.
                 </exclusion>
                 <exclusion>
                     <groupId>org.mortbay.jetty</groupId>
-                    <artifactId>jetty-parent</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.mortbay.jetty</groupId>
-                    <artifactId>jetty-sslengine</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.mortbay.jetty</groupId>
-                    <artifactId>jetty-sslengine</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.mortbay.jetty</groupId>
-                    <artifactId>jetty-util</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.eclipse.jetty</groupId>
-                    <artifactId>jetty-parent</artifactId>
+                    <artifactId>jsp-api-2.1</artifactId>
                 </exclusion>
             </exclusions>
             <!--<scope>provided</scope>-->
@@ -99,7 +75,7 @@ limitations under the License.
             <artifactId>giraph-core</artifactId>
             <version>1.0.0</version>
             <exclusions>
-                <!-- self-conflict -->
+                <!-- self conflicts -->
                 <exclusion>
                     <groupId>log4j</groupId>
                     <artifactId>log4j</artifactId>
@@ -118,25 +94,14 @@ limitations under the License.
                     <groupId>jline</groupId>
                     <artifactId>jline</artifactId>
                 </exclusion>
+                <!-- spark conflicts -->
                 <exclusion>
                     <groupId>io.netty</groupId>
                     <artifactId>netty</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>org.jboss.netty</groupId>
-                    <artifactId>netty</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.codehaus.jackson</groupId>
-                    <artifactId>jackson-core</artifactId>
-                </exclusion>
-                <exclusion>
-                <groupId>org.codehaus.jackson</groupId>
-                <artifactId>jackson-core-asl</artifactId>
-            </exclusion>
-                <exclusion>
-                    <groupId>org.codehaus.jackson</groupId>
-                    <artifactId>jackson-mapper-asl</artifactId>
+                    <groupId>commons-io</groupId>
+                    <artifactId>commons-io</artifactId>
                 </exclusion>
                 <exclusion>
                     <groupId>org.apache.zookeeper</groupId>
@@ -150,92 +115,78 @@ limitations under the License.
             <artifactId>spark-core_2.10</artifactId>
             <version>1.2.1</version>
             <exclusions>
+                <!-- self conflicts -->
                 <exclusion>
-                    <groupId>org.apache.hadoop</groupId>
-                    <artifactId>hadoop-core</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.apache.hadoop</groupId>
-                    <artifactId>hadoop-mapreduce-client-app</artifactId>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-databind</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>org.apache.hadoop</groupId>
-                    <artifactId>hadoop-client</artifactId>
+                    <groupId>org.scala-lang</groupId>
+                    <artifactId>scala-library</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>org.slf4j</groupId>
-                    <artifactId>slf4j-api</artifactId>
+                    <groupId>log4j</groupId>
+                    <artifactId>log4j</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>org.slf4j</groupId>
-                    <artifactId>slf4j-log4j12</artifactId>
+                    <groupId>com.google.guava</groupId>
+                    <artifactId>guava</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>org.slf4j</groupId>
-                    <artifactId>jcl-over-slf4j</artifactId>
+                    <groupId>org.apache.commons</groupId>
+                    <artifactId>commons-lang3</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>commons-io</groupId>
-                    <artifactId>commons-io</artifactId>
+                    <groupId>commons-codec</groupId>
+                    <artifactId>commons-codec</artifactId>
                 </exclusion>
+                <!-- gremlin-core conflicts -->
                 <exclusion>
                     <groupId>com.esotericsoftware.kryo</groupId>
                     <artifactId>kryo</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>com.fasterxml.jackson.core</groupId>
-                    <artifactId>jackson-databind</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>com.fasterxml.jackson.core</groupId>
-                    <artifactId>jackson-annotations</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>com.fasterxml.jackson.core</groupId>
-                    <artifactId>jackson-core</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.scala-lang</groupId>
-                    <artifactId>scala-library</artifactId>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-api</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>log4j</groupId>
-                    <artifactId>log4j</artifactId>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
                 </exclusion>
                 <exclusion>
-                    <groupId>com.google.guava</groupId>
-                    <artifactId>guava</artifactId>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>jcl-over-slf4j</artifactId>
                 </exclusion>
+                <!-- gremlin-groovy conflicts -->
                 <exclusion>
                     <groupId>jline</groupId>
                     <artifactId>jline</artifactId>
                 </exclusion>
+                <!-- hadoop conflicts -->
                 <exclusion>
-                    <groupId>commons</groupId>
-                    <artifactId>commons-lang</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.apache.commons</groupId>
-                    <artifactId>commons-lang3</artifactId>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-client</artifactId>
                 </exclusion>
+                <!-- giraph conflicts -->
                 <exclusion>
-                    <groupId>commons-codec</groupId>
-                    <artifactId>commons-codec</artifactId>
+                    <groupId>io.netty</groupId>
+                    <artifactId>netty</artifactId>
                 </exclusion>
             </exclusions>
         </dependency>
-        <!-- consistent dependencies chosen for hadoop-core -->
-        <!--<dependency>
-            <groupId>commons-httpclient</groupId>
-            <artifactId>commons-httpclient</artifactId>
-
-        </dependency>-->
+        <!-- consistent dependencies -->
         <dependency>
             <groupId>org.scala-lang</groupId>
             <artifactId>scala-library</artifactId>
             <version>2.10.3</version>
         </dependency>
         <dependency>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+            <version>3.5.13.Final</version>
+        </dependency>
+        <!-- TEST -->
+        <dependency>
             <groupId>org.apache.tinkerpop</groupId>
             <artifactId>gremlin-test</artifactId>
             <version>${project.version}</version>

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/0f0b60ba/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index 0586a14..fc6ad88 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -61,8 +61,8 @@ public class SparkGraphComputer implements GraphComputer {
         configuration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX);
         configuration.setMaster("local");
         final JavaSparkContext sc = new JavaSparkContext(configuration);
-        JavaRDD<String> rdd = sc.textFile("religious-traversals.txt");
-        System.out.println(rdd.count());
+        JavaRDD<String> rdd = sc.textFile("README.asciidoc");
+        System.out.println("You made it: " + rdd.count());
     }

[19/20] incubator-tinkerpop git commit: random tweaks. about to merge master. will work from there now.

Posted by ok...@apache.org.

random tweaks. about to merge master. will work from there now.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/76a75ee1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/76a75ee1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/76a75ee1

Branch: refs/heads/master
Commit: 76a75ee147d8664ae557ac3e6005db742bbc2f55
Parents: 3b3ddb4
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Wed Mar 4 07:41:18 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Wed Mar 4 07:41:18 2015 -0700

----------------------------------------------------------------------
 hadoop-gremlin/conf/spark-gryo.properties                       | 5 +----
 .../hadoop/process/computer/spark/SparkGraphProvider.java       | 1 +
 2 files changed, 2 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/76a75ee1/hadoop-gremlin/conf/spark-gryo.properties
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/conf/spark-gryo.properties b/hadoop-gremlin/conf/spark-gryo.properties
index a25482a..bfaa6ee 100644
--- a/hadoop-gremlin/conf/spark-gryo.properties
+++ b/hadoop-gremlin/conf/spark-gryo.properties
@@ -29,10 +29,7 @@ gremlin.hadoop.inputLocation=hdfs://localhost:9000/user/marko/tinkerpop-modern-v
 gremlin.hadoop.outputLocation=output
 
 # the vertex program to execute
-# gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram
-gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.traversal.TraversalVertexProgram
-gremlin.traversalVertexProgram.traversalSupplier.type=CLASS
-gremlin.traversalVertexProgram.traversalSupplier.object=org.apache.tinkerpop.gremlin.hadoop.process.computer.example.TraversalSupplier1
+gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram
 
 # It is possible to provide Spark configuration parameters for use with SparkGraphComputer
 ##########################################################################################

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/76a75ee1/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphProvider.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphProvider.java
index 255b0a5..6a1b47f 100644
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphProvider.java
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphProvider.java
@@ -47,6 +47,7 @@ public final class SparkGraphProvider extends HadoopGraphProvider {
             ///////////
             put(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER, SparkGraphComputer.class.getCanonicalName());
             put("spark.master", "local[4]");
+            put("spark.serializer","org.apache.spark.serializer.JavaSerializer");
         }};
     }
 }

[04/20] incubator-tinkerpop git commit: Have PageRankVertexProgram working properly over SparkGraphComputer -- hard coded style in a public static main(). Lots of things to do tomorrow to generalizae it and clean it all up.

Posted by ok...@apache.org.

Have PageRankVertexProgram working properly over SparkGraphComputer -- hard coded style in a public static main(). Lots of things to do tomorrow to generalizae it and clean it all up.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/3a32aa2e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/3a32aa2e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/3a32aa2e

Branch: refs/heads/master
Commit: 3a32aa2e9441dca955df78d624f69cc8186fc300
Parents: e56e70c
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Mon Mar 2 17:27:30 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Mon Mar 2 17:27:30 2015 -0700

----------------------------------------------------------------------
 .../computer/spark/GraphComputerRDD.java        | 108 +++++++++++++
 .../hadoop/process/computer/spark/GraphRDD.java |  80 ----------
 .../process/computer/spark/MessageBox.java      |  64 --------
 .../spark/SerializableConfiguration.java        |  59 +++++++
 .../computer/spark/SparkGraphComputer.java      |  46 +++---
 .../process/computer/spark/SparkMemory.java     | 152 +++++++++++++++++++
 .../computer/spark/SparkMemoryAccumulator.java  |  41 +++++
 .../process/computer/spark/SparkMessenger.java  | 115 ++++++++++++++
 .../process/computer/spark/ToyVertex.java       | 114 ++++++++++++++
 9 files changed, 617 insertions(+), 162 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3a32aa2e/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
new file mode 100644
index 0000000..c99b108
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphComputerRDD.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.commons.configuration.Configuration;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaRDDLike;
+import org.apache.spark.api.java.function.FlatMapFunction2;
+import org.apache.spark.rdd.RDD;
+import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
+import scala.Tuple2;
+import scala.reflect.ManifestFactory;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class GraphComputerRDD<M> extends JavaPairRDD<Object, SparkMessenger<M>> {
+
+    public GraphComputerRDD(final RDD<Tuple2<Object, SparkMessenger<M>>> rdd) {
+        super(rdd, ManifestFactory.classType(Object.class), ManifestFactory.classType(SparkMessenger.class));
+    }
+
+    public GraphComputerRDD(final JavaPairRDD<Object, SparkMessenger<M>> rdd) {
+        super(rdd.rdd(), ManifestFactory.classType(Object.class), ManifestFactory.classType(SparkMessenger.class));
+    }
+
+    public GraphComputerRDD completeIteration() {
+        JavaPairRDD<Object, SparkMessenger<M>> current = this;
+        // clear all previous incoming messages
+        current = current.mapValues(messenger -> {
+            messenger.clearIncomingMessages();
+            return messenger;
+        });
+        // emit messages
+        current = current.<Object, SparkMessenger<M>>flatMapToPair(tuple -> {
+            final List<Tuple2<Object, SparkMessenger<M>>> list = tuple._2().outgoing.entrySet()
+                    .stream()
+                    .map(entry -> new Tuple2<>(entry.getKey(), new SparkMessenger<>(new ToyVertex(entry.getKey()), entry.getValue())))
+                    .collect(Collectors.toList());
+            list.add(new Tuple2<>(tuple._1(), tuple._2()));
+            return list;
+        });
+        // "message pass" via reduction
+        current = current.reduceByKey((a, b) -> {
+            if (a.vertex instanceof ToyVertex && !(b.vertex instanceof ToyVertex))
+                a.vertex = b.vertex;
+            a.incoming.addAll(b.incoming);
+            return a;
+        });
+        // clear all previous outgoing messages
+        current = current.mapValues(messenger -> {
+            messenger.clearOutgoingMessages();
+            return messenger;
+        });
+        current.count(); // TODO: necessary for BSP?
+        return GraphComputerRDD.of(current);
+    }
+
+    private static void doNothing() {
+
+    }
+
+    public GraphComputerRDD execute(final Configuration configuration, final SparkMemory memory) {
+        JavaPairRDD<Object, SparkMessenger<M>> current = this;
+        current = current.mapValues(messenger -> {
+            VertexProgram.createVertexProgram(configuration).execute(messenger.vertex, messenger, memory);
+            return messenger;
+        });
+        return GraphComputerRDD.of(current);
+    }
+
+    public static <M> GraphComputerRDD<M> of(final JavaPairRDD<Object, SparkMessenger<M>> javaPairRDD) {
+        return new GraphComputerRDD<>(javaPairRDD);
+    }
+
+    public static <M> GraphComputerRDD<M> of(final JavaRDD<Tuple2<Object, SparkMessenger<M>>> javaRDD) {
+        return new GraphComputerRDD<>(javaRDD.rdd());
+    }
+
+    //////////////
+
+    @Override
+    public JavaRDD zipPartitions(JavaRDDLike uJavaRDDLike, FlatMapFunction2 iteratorIteratorVFlatMapFunction2) {
+        return (JavaRDD) new JavaRDD<>(null, null);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3a32aa2e/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphRDD.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphRDD.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphRDD.java
deleted file mode 100644
index 1c1daa6..0000000
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphRDD.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
-
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaRDDLike;
-import org.apache.spark.api.java.function.FlatMapFunction2;
-import org.apache.spark.rdd.RDD;
-import org.apache.tinkerpop.gremlin.structure.Vertex;
-import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedVertex;
-import scala.Tuple2;
-import scala.reflect.ManifestFactory;
-
-import java.util.Collections;
-import java.util.List;
-import java.util.stream.Collectors;
-
-
-/**
- * @author Marko A. Rodriguez (http://markorodriguez.com)
- */
-public class GraphRDD<M> extends JavaPairRDD<Vertex, MessageBox<M>> {
-
-    public GraphRDD(final RDD<Tuple2<Vertex, MessageBox<M>>> rdd) {
-        super(rdd, ManifestFactory.classType(Vertex.class), ManifestFactory.classType(MessageBox.class));
-    }
-
-    public GraphRDD(final JavaPairRDD<Vertex, MessageBox<M>> rdd) {
-        super(rdd.rdd(), ManifestFactory.classType(Vertex.class), ManifestFactory.classType(MessageBox.class));
-    }
-
-    public GraphRDD completeIteration() {
-        JavaPairRDD<Vertex, MessageBox<M>> current = this;
-        current = current.mapToPair(tuple -> {
-            tuple._2().clearIncomingMessages();
-            return tuple;
-        });
-        current = current.<Vertex, MessageBox<M>>flatMapToPair(tuple -> {
-            final List<Tuple2<Vertex, MessageBox<M>>> list = tuple._2().outgoing.entrySet().stream().map(entry -> {
-                final Vertex toVertex = new DetachedVertex(entry.getKey(), "vertex", Collections.emptyMap());
-                return new Tuple2<>(toVertex, new MessageBox<>(entry.getValue()));
-            }).collect(Collectors.toList());
-            list.add(new Tuple2<>(tuple._1(), new MessageBox<>()));
-            return list;
-        });
-        current = current.reduceByKey((a, b) -> {
-            a.incoming.addAll(b.incoming);
-            return a;
-        });
-        return new GraphRDD<>(current.rdd());
-    }
-
-    public static <M> GraphRDD<M> of(final JavaPairRDD<Vertex, MessageBox<M>> javaPairRDD) {
-        return new GraphRDD<>(javaPairRDD);
-    }
-
-    //////////////
-
-    @Override
-    public JavaRDD zipPartitions(JavaRDDLike uJavaRDDLike, FlatMapFunction2 iteratorIteratorVFlatMapFunction2) {
-        return (JavaRDD) new JavaRDD<>(null, null);
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3a32aa2e/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/MessageBox.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/MessageBox.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/MessageBox.java
deleted file mode 100644
index 248960b..0000000
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/MessageBox.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * @author Marko A. Rodriguez (http://markorodriguez.com)
- */
-public class MessageBox<M> implements Serializable {
-
-    protected final List<M> incoming;
-    protected final Map<Object, List<M>> outgoing = new HashMap<>();
-
-    public MessageBox() {
-        this(new ArrayList<>());
-    }
-
-    public MessageBox(final List<M> incomingMessages) {
-        this.incoming = incomingMessages;
-    }
-
-    public void sendMessage(final Object vertexId, final M message) {
-        List<M> messages = this.outgoing.get(vertexId);
-        if (null == messages) {
-            messages = new ArrayList<>();
-            this.outgoing.put(vertexId, messages);
-        }
-        messages.add(message);
-    }
-
-    public List<M> receiveMessages() {
-        return this.incoming;
-    }
-
-    public void clearIncomingMessages() {
-        this.incoming.clear();
-    }
-
-    @Override
-    public String toString() {
-        return "messageBox[incoming(" + this.incoming.size() + "):outgoing(" + this.outgoing.size() + ")]";
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3a32aa2e/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
new file mode 100644
index 0000000..a71b456
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.commons.configuration.AbstractConfiguration;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class SerializableConfiguration extends AbstractConfiguration implements Serializable {
+
+    private final Map<String, Object> configurations = new HashMap<>();
+
+    @Override
+    protected void addPropertyDirect(final String key, final Object value) {
+        this.configurations.put(key, value);
+    }
+
+    @Override
+    public boolean isEmpty() {
+        return this.configurations.isEmpty();
+    }
+
+    @Override
+    public boolean containsKey(final String key) {
+        return this.configurations.containsKey(key);
+    }
+
+    @Override
+    public Object getProperty(final String key) {
+        return this.configurations.get(key);
+    }
+
+    @Override
+    public Iterator<String> getKeys() {
+        return this.configurations.keySet().iterator();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3a32aa2e/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index 66261ab..1b1fc3d 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -18,6 +18,7 @@
  */
 package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
 
+import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.spark.SparkConf;
@@ -32,14 +33,17 @@ import org.apache.tinkerpop.gremlin.process.computer.ComputerResult;
 import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
 import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
 import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
+import org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram;
 import org.apache.tinkerpop.gremlin.process.computer.util.GraphComputerHelper;
-import org.apache.tinkerpop.gremlin.structure.Vertex;
 import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
-import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import scala.Tuple2;
 
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.concurrent.CompletableFuture;
@@ -64,7 +68,7 @@ public class SparkGraphComputer implements GraphComputer {
         this.hadoopGraph = hadoopGraph;
     }
 
-    public static void main(final String[] args) {
+    public static void main(final String[] args) throws IOException {
         final SparkConf configuration = new SparkConf();
         configuration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX);
         configuration.setMaster("local");
@@ -73,21 +77,27 @@ public class SparkGraphComputer implements GraphComputer {
         final Configuration conf = new Configuration();
         conf.set("mapred.input.dir", "hdfs://localhost:9000/user/marko/grateful-dead-vertices.gio");
         JavaPairRDD<NullWritable, VertexWritable> rdd = sc.newAPIHadoopRDD(conf, KryoInputFormat.class, NullWritable.class, VertexWritable.class);
-        JavaRDD<Tuple2<Vertex, MessageBox<String>>> rdd2 = rdd.map(tuple -> new Tuple2<>(DetachedFactory.detach(tuple._2().get(), true), new MessageBox<>()));
-
-        GraphRDD<String> g = new GraphRDD<>(rdd2.rdd());
-        g = GraphRDD.of(g.mapToPair(tuple -> {
-            tuple._2().sendMessage(1, "hello");
-            return tuple;
-        }));
-
-        g = g.completeIteration();
-        /*g = g.union(g);
-        g = g.<List<String>>reduceByKey((a, b) -> {
-            a.addAll(b);
-            return a;
-        });*/
-        g.foreach(t -> System.out.println(t));
+        JavaPairRDD<Object, SparkMessenger<Double>> rdd2 = rdd.mapToPair(tuple -> new Tuple2<>(tuple._2().get().id(), new SparkMessenger<>(tuple._2().get(), new ArrayList<>())));
+
+        GraphComputerRDD<Double> g = GraphComputerRDD.of(rdd2);
+        FileUtils.deleteDirectory(new File("/tmp/test"));
+        g.saveAsObjectFile("/tmp/test");
+
+        final org.apache.commons.configuration.Configuration vertexProgram = new SerializableConfiguration();
+        final PageRankVertexProgram pageRankVertexProgram = PageRankVertexProgram.build().create();
+        pageRankVertexProgram.storeState(vertexProgram);
+        final SparkMemory memory = new SparkMemory(Collections.emptySet());
+
+        while (!pageRankVertexProgram.terminate(memory)) {
+            g = GraphComputerRDD.of((JavaRDD) sc.objectFile("/tmp/test"));
+            g = g.execute(vertexProgram, memory);
+            g = g.completeIteration();
+            memory.incrIteration();
+            FileUtils.deleteDirectory(new File("/tmp/test"));
+            g.saveAsObjectFile("/tmp/test");
+
+        }
+        g.foreach(t -> System.out.println(t._2().vertex.property(PageRankVertexProgram.PAGE_RANK) + "-->" + t._2().vertex.value("name")));
         System.out.println(g.count());
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3a32aa2e/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
new file mode 100644
index 0000000..88b046e
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
+import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
+import org.apache.tinkerpop.gremlin.process.computer.Memory;
+import org.apache.tinkerpop.gremlin.process.computer.ranking.pagerank.PageRankVertexProgram;
+import org.apache.tinkerpop.gremlin.process.computer.util.MemoryHelper;
+import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
+
+import java.io.Serializable;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public final class SparkMemory implements Memory.Admin, Serializable {
+
+    public final Set<String> memoryKeys = new HashSet<>();
+    public Map<String, Object> previousMap;
+    public Map<String, Object> currentMap;
+    private final AtomicInteger iteration = new AtomicInteger(0);
+    private final AtomicLong runtime = new AtomicLong(0l);
+
+    public SparkMemory(final Set<MapReduce> mapReducers) {
+        this.currentMap = new ConcurrentHashMap<>();
+        this.previousMap = new ConcurrentHashMap<>();
+        //if (null != vertexProgram) {
+        for (final String key : (Set<String>) PageRankVertexProgram.build().create().getMemoryComputeKeys()) {
+            MemoryHelper.validateKey(key);
+            this.memoryKeys.add(key);
+        }
+        //}
+        for (final MapReduce mapReduce : mapReducers) {
+            this.memoryKeys.add(mapReduce.getMemoryKey());
+        }
+    }
+
+    @Override
+    public Set<String> keys() {
+        return this.previousMap.keySet();
+    }
+
+    @Override
+    public void incrIteration() {
+        this.iteration.getAndIncrement();
+    }
+
+    @Override
+    public void setIteration(final int iteration) {
+        this.iteration.set(iteration);
+    }
+
+    @Override
+    public int getIteration() {
+        return this.iteration.get();
+    }
+
+    @Override
+    public void setRuntime(final long runTime) {
+        this.runtime.set(runTime);
+    }
+
+    @Override
+    public long getRuntime() {
+        return this.runtime.get();
+    }
+
+    protected void complete() {
+        this.iteration.decrementAndGet();
+        this.previousMap = this.currentMap;
+    }
+
+    protected void completeSubRound() {
+        this.previousMap = new ConcurrentHashMap<>(this.currentMap);
+
+    }
+
+    @Override
+    public boolean isInitialIteration() {
+        return this.getIteration() == 0;
+    }
+
+    @Override
+    public <R> R get(final String key) throws IllegalArgumentException {
+        final R r = (R) this.previousMap.get(key);
+        if (null == r)
+            throw Memory.Exceptions.memoryDoesNotExist(key);
+        else
+            return r;
+    }
+
+    @Override
+    public long incr(final String key, final long delta) {
+        checkKeyValue(key, delta);
+        this.currentMap.compute(key, (k, v) -> null == v ? delta : delta + (Long) v);
+        return (Long) this.previousMap.getOrDefault(key, 0l) + delta;
+    }
+
+    @Override
+    public boolean and(final String key, final boolean bool) {
+        checkKeyValue(key, bool);
+        this.currentMap.compute(key, (k, v) -> null == v ? bool : bool && (Boolean) v);
+        return (Boolean) this.previousMap.getOrDefault(key, true) && bool;
+    }
+
+    @Override
+    public boolean or(final String key, final boolean bool) {
+        checkKeyValue(key, bool);
+        this.currentMap.compute(key, (k, v) -> null == v ? bool : bool || (Boolean) v);
+        return (Boolean) this.previousMap.getOrDefault(key, true) || bool;
+    }
+
+    @Override
+    public void set(final String key, final Object value) {
+        checkKeyValue(key, value);
+        this.currentMap.put(key, value);
+    }
+
+    @Override
+    public String toString() {
+        return StringFactory.memoryString(this);
+    }
+
+    private void checkKeyValue(final String key, final Object value) {
+        if (!this.memoryKeys.contains(key))
+            throw GraphComputer.Exceptions.providedKeyIsNotAMemoryComputeKey(key);
+        MemoryHelper.validateValue(value);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3a32aa2e/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java
new file mode 100644
index 0000000..470774a
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemoryAccumulator.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.spark.AccumulatorParam;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class SparkMemoryAccumulator implements AccumulatorParam<SparkMemory> {
+    @Override
+    public SparkMemory addAccumulator(final SparkMemory first, final SparkMemory second) {
+        return first;
+    }
+
+    @Override
+    public SparkMemory addInPlace(final SparkMemory first, final SparkMemory second) {
+        return first;
+    }
+
+    @Override
+    public SparkMemory zero(SparkMemory sparkMemory) {
+        return null;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3a32aa2e/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java
new file mode 100644
index 0000000..b18940a
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMessenger.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.tinkerpop.gremlin.process.Traversal;
+import org.apache.tinkerpop.gremlin.process.computer.MessageScope;
+import org.apache.tinkerpop.gremlin.process.computer.Messenger;
+import org.apache.tinkerpop.gremlin.process.graph.traversal.step.map.VertexStep;
+import org.apache.tinkerpop.gremlin.process.graph.traversal.step.sideEffect.StartStep;
+import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalHelper;
+import org.apache.tinkerpop.gremlin.structure.Direction;
+import org.apache.tinkerpop.gremlin.structure.Edge;
+import org.apache.tinkerpop.gremlin.structure.Vertex;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class SparkMessenger<M> implements Serializable, Messenger<M> {
+
+    protected Vertex vertex;
+    protected List<M> incoming;
+    protected Map<Object, List<M>> outgoing = new HashMap<>();
+
+    public SparkMessenger() {
+
+    }
+
+    public SparkMessenger(final Vertex vertex, final List<M> incomingMessages) {
+        this.vertex = vertex;
+        this.incoming = incomingMessages;
+    }
+
+    public void clearIncomingMessages() {
+        this.incoming.clear();
+    }
+
+    public void clearOutgoingMessages() {
+        this.outgoing.clear();
+    }
+
+    @Override
+    public String toString() {
+        return "messageBox[incoming(" + this.incoming.size() + "):outgoing(" + this.outgoing.size() + ")]";
+    }
+
+    @Override
+    public Iterable<M> receiveMessages(final MessageScope messageScope) {
+        return this.incoming;
+    }
+
+    @Override
+    public void sendMessage(final MessageScope messageScope, final M message) {
+        if (messageScope instanceof MessageScope.Local) {
+            final MessageScope.Local<M> localMessageScope = (MessageScope.Local) messageScope;
+            final Traversal.Admin<Vertex, Edge> incidentTraversal = SparkMessenger.setVertexStart(localMessageScope.getIncidentTraversal().get(), this.vertex);
+            final Direction direction = SparkMessenger.getOppositeDirection(incidentTraversal);
+            incidentTraversal.forEachRemaining(edge -> {
+                final Object otherVertexId = edge.iterators().vertexIterator(direction).next().id();
+                List<M> messages = this.outgoing.get(otherVertexId);
+                if (null == messages) {
+                    messages = new ArrayList<>();
+                    this.outgoing.put(otherVertexId, messages);
+                }
+                messages.add(message);
+            });
+        } else {
+            ((MessageScope.Global) messageScope).vertices().forEach(v -> {
+                List<M> messages = this.outgoing.get(v.id());
+                if (null == messages) {
+                    messages = new ArrayList<>();
+                    this.outgoing.put(v.id(), messages);
+                }
+                messages.add(message);
+            });
+        }
+    }
+
+    ///////////
+
+    private static <T extends Traversal.Admin<Vertex, Edge>> T setVertexStart(final Traversal<Vertex, Edge> incidentTraversal, final Vertex vertex) {
+        incidentTraversal.asAdmin().addStep(0, new StartStep<>(incidentTraversal.asAdmin(), vertex));
+        return (T) incidentTraversal;
+    }
+
+    private static Direction getOppositeDirection(final Traversal.Admin<Vertex, Edge> incidentTraversal) {
+        final VertexStep step = TraversalHelper.getLastStepOfAssignableClass(VertexStep.class, incidentTraversal).get();
+        return step.getDirection().opposite();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3a32aa2e/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/ToyVertex.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/ToyVertex.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/ToyVertex.java
new file mode 100644
index 0000000..121ae2d
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/ToyVertex.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.tinkerpop.gremlin.structure.Direction;
+import org.apache.tinkerpop.gremlin.structure.Edge;
+import org.apache.tinkerpop.gremlin.structure.Graph;
+import org.apache.tinkerpop.gremlin.structure.Vertex;
+import org.apache.tinkerpop.gremlin.structure.VertexProperty;
+import org.apache.tinkerpop.gremlin.structure.util.ElementHelper;
+import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
+import org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.Iterator;
+
+/**
+* @author Marko A. Rodriguez (http://markorodriguez.com)
+*/
+public final class ToyVertex implements Vertex, Vertex.Iterators, Serializable {
+
+    private final Object id;
+    private static final String TOY_VERTEX = "toyVertex";
+
+    public ToyVertex(final Object id) {
+        this.id = id;
+    }
+
+    ToyVertex() {
+        this.id = null;
+    }
+
+    @Override
+    public Edge addEdge(final String label, final Vertex inVertex, final Object... keyValues) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Object id() {
+        return this.id;
+    }
+
+    @Override
+    public String label() {
+        return TOY_VERTEX;
+    }
+
+    @Override
+    public Graph graph() {
+        return EmptyGraph.instance();
+    }
+
+    @Override
+    public <V> VertexProperty<V> property(final String key, final V value) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Iterators iterators() {
+        return this;
+    }
+
+    @Override
+    public Iterator<Edge> edgeIterator(Direction direction, String... edgeLabels) {
+        return Collections.emptyIterator();
+    }
+
+    @Override
+    public Iterator<Vertex> vertexIterator(Direction direction, String... edgeLabels) {
+        return Collections.emptyIterator();
+    }
+
+    @Override
+    public <V> Iterator<VertexProperty<V>> propertyIterator(String... propertyKeys) {
+        return Collections.emptyIterator();
+    }
+
+    @Override
+    public int hashCode() {
+        return ElementHelper.hashCode(this);
+    }
+
+    @Override
+    public boolean equals(final Object other) {
+        return ElementHelper.areEqual(this, other);
+    }
+
+    @Override
+    public String toString() {
+        return StringFactory.vertexString(this);
+    }
+}

[20/20] incubator-tinkerpop git commit: Merge branch 'master' into spark

Posted by ok...@apache.org.

Merge branch 'master' into spark


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/e0e08eba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/e0e08eba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/e0e08eba

Branch: refs/heads/master
Commit: e0e08ebaaa2b0713f5c42f5cc1f742b97d6821ce
Parents: 76a75ee 2eb3dba
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Wed Mar 4 07:41:32 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Wed Mar 4 07:41:32 2015 -0700

----------------------------------------------------------------------
 .../server/op/AbstractEvalOpProcessor.java      | 46 ++++++++++++++------
 .../server/op/session/SessionOpProcessor.java   |  2 +-
 2 files changed, 33 insertions(+), 15 deletions(-)
----------------------------------------------------------------------

[09/20] incubator-tinkerpop git commit: Merge branch 'master' into spark

Posted by ok...@apache.org.

Merge branch 'master' into spark


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/3855bdc8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/3855bdc8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/3855bdc8

Branch: refs/heads/master
Commit: 3855bdc8c22e3001cca5777d08b00a81d25493ea
Parents: 051994a a7af852
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Mar 3 09:31:38 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Mar 3 09:31:38 2015 -0700

----------------------------------------------------------------------
 .../step/sideEffect/GroovyGroupTest.groovy      | 16 ++++++---
 .../step/sideEffect/GroovyTreeTest.groovy       | 20 ++++++-----
 .../traversal/step/sideEffect/GroupTest.java    | 35 ++++++++++----------
 .../traversal/step/sideEffect/TreeTest.java     | 16 +++++----
 4 files changed, 50 insertions(+), 37 deletions(-)
----------------------------------------------------------------------

[03/20] incubator-tinkerpop git commit: We now have a basic message passing engine implemented in Spark.

Posted by ok...@apache.org.

We now have a basic message passing engine implemented in Spark.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/e56e70ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/e56e70ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/e56e70ce

Branch: refs/heads/master
Commit: e56e70ce0c866bc20b410fc1cc1ac17d6ee8538d
Parents: 0f0b60b
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Mon Mar 2 14:56:38 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Mon Mar 2 14:56:38 2015 -0700

----------------------------------------------------------------------
 .../gremlin/structure/util/ElementHelper.java   |  6 +-
 .../hadoop/process/computer/spark/GraphRDD.java | 80 ++++++++++++++++++++
 .../process/computer/spark/MessageBox.java      | 64 ++++++++++++++++
 .../hadoop/process/computer/spark/RDDTools.java | 46 +++++++++++
 .../computer/spark/SparkGraphComputer.java      | 30 +++++++-
 5 files changed, 220 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/e56e70ce/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/util/ElementHelper.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/util/ElementHelper.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/util/ElementHelper.java
index e3d8799..4bef2db 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/util/ElementHelper.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/util/ElementHelper.java
@@ -310,10 +310,8 @@ public final class ElementHelper {
      * @throws IllegalArgumentException if either argument is null
      */
     public static boolean areEqual(final Element a, final Object b) {
-        if (null == a)
-            throw Graph.Exceptions.argumentCanNotBeNull("a");
-        if (null == b)
-            throw Graph.Exceptions.argumentCanNotBeNull("b");
+        if (null == b || null == a)
+            return false;
 
         if (a == b)
             return true;

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/e56e70ce/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphRDD.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphRDD.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphRDD.java
new file mode 100644
index 0000000..1c1daa6
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/GraphRDD.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaRDDLike;
+import org.apache.spark.api.java.function.FlatMapFunction2;
+import org.apache.spark.rdd.RDD;
+import org.apache.tinkerpop.gremlin.structure.Vertex;
+import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedVertex;
+import scala.Tuple2;
+import scala.reflect.ManifestFactory;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class GraphRDD<M> extends JavaPairRDD<Vertex, MessageBox<M>> {
+
+    public GraphRDD(final RDD<Tuple2<Vertex, MessageBox<M>>> rdd) {
+        super(rdd, ManifestFactory.classType(Vertex.class), ManifestFactory.classType(MessageBox.class));
+    }
+
+    public GraphRDD(final JavaPairRDD<Vertex, MessageBox<M>> rdd) {
+        super(rdd.rdd(), ManifestFactory.classType(Vertex.class), ManifestFactory.classType(MessageBox.class));
+    }
+
+    public GraphRDD completeIteration() {
+        JavaPairRDD<Vertex, MessageBox<M>> current = this;
+        current = current.mapToPair(tuple -> {
+            tuple._2().clearIncomingMessages();
+            return tuple;
+        });
+        current = current.<Vertex, MessageBox<M>>flatMapToPair(tuple -> {
+            final List<Tuple2<Vertex, MessageBox<M>>> list = tuple._2().outgoing.entrySet().stream().map(entry -> {
+                final Vertex toVertex = new DetachedVertex(entry.getKey(), "vertex", Collections.emptyMap());
+                return new Tuple2<>(toVertex, new MessageBox<>(entry.getValue()));
+            }).collect(Collectors.toList());
+            list.add(new Tuple2<>(tuple._1(), new MessageBox<>()));
+            return list;
+        });
+        current = current.reduceByKey((a, b) -> {
+            a.incoming.addAll(b.incoming);
+            return a;
+        });
+        return new GraphRDD<>(current.rdd());
+    }
+
+    public static <M> GraphRDD<M> of(final JavaPairRDD<Vertex, MessageBox<M>> javaPairRDD) {
+        return new GraphRDD<>(javaPairRDD);
+    }
+
+    //////////////
+
+    @Override
+    public JavaRDD zipPartitions(JavaRDDLike uJavaRDDLike, FlatMapFunction2 iteratorIteratorVFlatMapFunction2) {
+        return (JavaRDD) new JavaRDD<>(null, null);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/e56e70ce/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/MessageBox.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/MessageBox.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/MessageBox.java
new file mode 100644
index 0000000..248960b
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/MessageBox.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class MessageBox<M> implements Serializable {
+
+    protected final List<M> incoming;
+    protected final Map<Object, List<M>> outgoing = new HashMap<>();
+
+    public MessageBox() {
+        this(new ArrayList<>());
+    }
+
+    public MessageBox(final List<M> incomingMessages) {
+        this.incoming = incomingMessages;
+    }
+
+    public void sendMessage(final Object vertexId, final M message) {
+        List<M> messages = this.outgoing.get(vertexId);
+        if (null == messages) {
+            messages = new ArrayList<>();
+            this.outgoing.put(vertexId, messages);
+        }
+        messages.add(message);
+    }
+
+    public List<M> receiveMessages() {
+        return this.incoming;
+    }
+
+    public void clearIncomingMessages() {
+        this.incoming.clear();
+    }
+
+    @Override
+    public String toString() {
+        return "messageBox[incoming(" + this.incoming.size() + "):outgoing(" + this.outgoing.size() + ")]";
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/e56e70ce/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RDDTools.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RDDTools.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RDDTools.java
new file mode 100644
index 0000000..cef6040
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/RDDTools.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.tinkerpop.gremlin.structure.Vertex;
+import scala.Tuple2;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class RDDTools {
+
+    public static <M> void sendMessage(final Tuple2<Vertex, List<M>> tuple, final M message) {
+        tuple._2().add(message);
+    }
+
+    public static <M> Iterable<M> receiveMessages(final Tuple2<Vertex, List<M>> tuple) {
+        return tuple._2();
+    }
+
+    public static <M> JavaPairRDD<Vertex, List<M>> endIteration(final JavaPairRDD<Vertex, List<M>> graph) {
+        return graph.flatMapToPair(tuple -> tuple._2().stream().map(message -> new Tuple2<>(tuple._1(), Arrays.asList(message))).collect(Collectors.toList()));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/e56e70ce/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index fc6ad88..66261ab 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -18,19 +18,27 @@
  */
 package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.NullWritable;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.tinkerpop.gremlin.hadoop.Constants;
 import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable;
+import org.apache.tinkerpop.gremlin.hadoop.structure.io.kryo.KryoInputFormat;
 import org.apache.tinkerpop.gremlin.process.computer.ComputerResult;
 import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
 import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
 import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
 import org.apache.tinkerpop.gremlin.process.computer.util.GraphComputerHelper;
+import org.apache.tinkerpop.gremlin.structure.Vertex;
 import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
+import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import scala.Tuple2;
 
 import java.util.HashSet;
 import java.util.Set;
@@ -61,8 +69,26 @@ public class SparkGraphComputer implements GraphComputer {
         configuration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX);
         configuration.setMaster("local");
         final JavaSparkContext sc = new JavaSparkContext(configuration);
-        JavaRDD<String> rdd = sc.textFile("README.asciidoc");
-        System.out.println("You made it: " + rdd.count());
+        //JavaRDD<String> rdd = sc.textFile("hdfs://localhost:9000/user/marko/religious-traversals.txt");
+        final Configuration conf = new Configuration();
+        conf.set("mapred.input.dir", "hdfs://localhost:9000/user/marko/grateful-dead-vertices.gio");
+        JavaPairRDD<NullWritable, VertexWritable> rdd = sc.newAPIHadoopRDD(conf, KryoInputFormat.class, NullWritable.class, VertexWritable.class);
+        JavaRDD<Tuple2<Vertex, MessageBox<String>>> rdd2 = rdd.map(tuple -> new Tuple2<>(DetachedFactory.detach(tuple._2().get(), true), new MessageBox<>()));
+
+        GraphRDD<String> g = new GraphRDD<>(rdd2.rdd());
+        g = GraphRDD.of(g.mapToPair(tuple -> {
+            tuple._2().sendMessage(1, "hello");
+            return tuple;
+        }));
+
+        g = g.completeIteration();
+        /*g = g.union(g);
+        g = g.<List<String>>reduceByKey((a, b) -> {
+            a.addAll(b);
+            return a;
+        });*/
+        g.foreach(t -> System.out.println(t));
+        System.out.println(g.count());
     }

[16/20] incubator-tinkerpop git commit: OMG -- Apache Configuration is the wooooorst --- the auto , deliminator ... total hole for the last hour.

Posted by ok...@apache.org.

OMG -- Apache Configuration is the wooooorst --- the auto , deliminator ... total hole for the last hour.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/a9d0cf15
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/a9d0cf15
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/a9d0cf15

Branch: refs/heads/master
Commit: a9d0cf15602eaecf89b556e9c2783fb60ca0c847
Parents: b045e61
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Mar 3 16:26:00 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Mar 3 16:26:00 2015 -0700

----------------------------------------------------------------------
 .../computer/util/VertexProgramHelper.java      | 18 +++--
 .../computer/giraph/GiraphGraphComputer.java    |  1 -
 .../spark/SerializableConfiguration.java        | 69 -------------------
 .../computer/spark/SparkGraphComputer.java      | 16 ++---
 .../spark/util/SApacheConfiguration.java        | 70 ++++++++++++++++++++
 5 files changed, 89 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/a9d0cf15/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/VertexProgramHelper.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/VertexProgramHelper.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/VertexProgramHelper.java
index c7c37b8..88c22b4 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/VertexProgramHelper.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/VertexProgramHelper.java
@@ -18,13 +18,14 @@
  */
 package org.apache.tinkerpop.gremlin.process.computer.util;
 
+import org.apache.commons.configuration.AbstractConfiguration;
+import org.apache.commons.configuration.Configuration;
 import org.apache.tinkerpop.gremlin.process.Traversal;
 import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalHelper;
 import org.apache.tinkerpop.gremlin.util.Serializer;
-import org.apache.commons.configuration.Configuration;
 
 import java.io.IOException;
-import java.util.List;
+import java.util.Arrays;
 
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
@@ -35,8 +36,11 @@ public final class VertexProgramHelper {
     }
 
     public static void serialize(final Object object, final Configuration configuration, final String key) {
+        if (configuration instanceof AbstractConfiguration)
+            ((AbstractConfiguration) configuration).setDelimiterParsingDisabled(true);
         try {
-            configuration.setProperty(key, Serializer.serializeObject(object));
+            final String byteString = Arrays.toString(Serializer.serializeObject(object));
+            configuration.setProperty(key, byteString.substring(1, byteString.length() - 1));
         } catch (final IOException e) {
             throw new IllegalArgumentException(e.getMessage(), e);
         }
@@ -44,10 +48,10 @@ public final class VertexProgramHelper {
 
     public static <T> T deserialize(final Configuration configuration, final String key) {
         try {
-            final List byteList = configuration.getList(key);
-            byte[] bytes = new byte[byteList.size()];
-            for (int i = 0; i < byteList.size(); i++) {
-                bytes[i] = Byte.valueOf(byteList.get(i).toString().replace("[", "").replace("]", ""));
+            final String[] stringBytes = configuration.getString(key).split(",");
+            byte[] bytes = new byte[stringBytes.length];
+            for (int i = 0; i < stringBytes.length; i++) {
+                bytes[i] = Byte.valueOf(stringBytes[i].trim());
             }
             return (T) Serializer.deserializeObject(bytes);
         } catch (final IOException | ClassNotFoundException e) {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/a9d0cf15/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
index 56d029c..52d606c 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
@@ -140,7 +140,6 @@ public class GiraphGraphComputer extends Configured implements GraphComputer, To
                 this.loadJars(fs);
                 fs.delete(new Path(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)), true);
                 ToolRunner.run(this, new String[]{});
-                // memory.keys().forEach(k -> LOGGER.error(k + "---" + memory.get(k)));
             } catch (Exception e) {
                 //e.printStackTrace();
                 throw new IllegalStateException(e.getMessage(), e);

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/a9d0cf15/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
deleted file mode 100644
index 73e3d08..0000000
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SerializableConfiguration.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
-
-import org.apache.commons.configuration.AbstractConfiguration;
-import org.apache.commons.configuration.Configuration;
-import org.apache.commons.configuration.ConfigurationUtils;
-
-import java.io.Serializable;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-
-/**
- * @author Marko A. Rodriguez (http://markorodriguez.com)
- */
-public final class SerializableConfiguration extends AbstractConfiguration implements Serializable {
-
-    private final Map<String, Object> configurations = new HashMap<>();
-
-    public SerializableConfiguration() {
-
-    }
-
-    public SerializableConfiguration(final Configuration configuration) {
-        ConfigurationUtils.copy(configuration, this);
-    }
-
-    @Override
-    protected void addPropertyDirect(final String key, final Object value) {
-        this.configurations.put(key, value);
-    }
-
-    @Override
-    public boolean isEmpty() {
-        return this.configurations.isEmpty();
-    }
-
-    @Override
-    public boolean containsKey(final String key) {
-        return this.configurations.containsKey(key);
-    }
-
-    @Override
-    public Object getProperty(final String key) {
-        return this.configurations.get(key);
-    }
-
-    @Override
-    public Iterator<String> getKeys() {
-        return this.configurations.keySet().iterator();
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/a9d0cf15/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index dd004bc..c089c57 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -29,6 +29,7 @@ import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.tinkerpop.gremlin.hadoop.Constants;
+import org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.util.SApacheConfiguration;
 import org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.util.SparkHelper;
 import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable;
@@ -112,8 +113,8 @@ public final class SparkGraphComputer implements GraphComputer {
             this.mapReducers.addAll(this.vertexProgram.getMapReducers());
         }
         // apache and hadoop configurations that are used throughout
-        final org.apache.commons.configuration.Configuration apacheConfiguration = this.hadoopGraph.configuration();
-        final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(this.hadoopGraph.configuration());
+        final org.apache.commons.configuration.Configuration apacheConfiguration = new SApacheConfiguration(this.hadoopGraph.configuration());
+        final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(apacheConfiguration);
 
         return CompletableFuture.<ComputerResult>supplyAsync(() -> {
                     final long startTime = System.currentTimeMillis();
@@ -143,15 +144,14 @@ public final class SparkGraphComputer implements GraphComputer {
                             // set up the vertex program and wire up configurations
                             memory = new SparkMemory(this.vertexProgram, this.mapReducers, sparkContext);
                             this.vertexProgram.setup(memory);
-                            final SerializableConfiguration vertexProgramConfiguration = new SerializableConfiguration();
+                            final SApacheConfiguration vertexProgramConfiguration = new SApacheConfiguration();
                             this.vertexProgram.storeState(vertexProgramConfiguration);
-                            ConfUtil.mergeApacheIntoHadoopConfiguration(vertexProgramConfiguration, hadoopConfiguration);
                             ConfigurationUtils.copy(vertexProgramConfiguration, apacheConfiguration);
-
+                            ConfUtil.mergeApacheIntoHadoopConfiguration(vertexProgramConfiguration, hadoopConfiguration);
                             // execute the vertex program
                             do {
                                 graphRDD = SparkHelper.executeStep(graphRDD, this.vertexProgram, memory, vertexProgramConfiguration);
-                                graphRDD.foreachPartition(iterator -> doNothing()); // i think this is a fast way to execute the rdd
+                                graphRDD.foreachPartition(iterator -> doNothing()); // TODO: i think this is a fast way to execute the rdd
                                 graphRDD.cache(); // TODO: learn about persistence and caching
                                 memory.incrIteration();
                             } while (!this.vertexProgram.terminate(memory));
@@ -185,7 +185,7 @@ public final class SparkGraphComputer implements GraphComputer {
                                     NullWritable.class,
                                     VertexWritable.class);
 
-                            final SerializableConfiguration newApacheConfiguration = new SerializableConfiguration(apacheConfiguration);
+                            final SApacheConfiguration newApacheConfiguration = new SApacheConfiguration(apacheConfiguration);
                             mapReduce.storeState(newApacheConfiguration);
                             // map
                             final JavaPairRDD mapRDD = SparkHelper.executeMap(hadoopGraphRDD, mapReduce, newApacheConfiguration);
@@ -243,7 +243,7 @@ public final class SparkGraphComputer implements GraphComputer {
         return new Features() {
             @Override
             public boolean supportsNonSerializableObjects() {
-                return true;  // TODO
+                return false;
             }
         };
     }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/a9d0cf15/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SApacheConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SApacheConfiguration.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SApacheConfiguration.java
new file mode 100644
index 0000000..dbd468a
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/util/SApacheConfiguration.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.util;
+
+import org.apache.commons.configuration.AbstractConfiguration;
+import org.apache.commons.configuration.Configuration;
+import org.apache.commons.configuration.ConfigurationUtils;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public final class SApacheConfiguration extends AbstractConfiguration implements Serializable {
+
+    private final Map<String, Object> configurations = new HashMap<>();
+
+    public SApacheConfiguration() {
+        this.setDelimiterParsingDisabled(true); // gets me everytime (what a stupid default behavior)
+    }
+
+    public SApacheConfiguration(final Configuration configuration) {
+        this();
+        ConfigurationUtils.copy(configuration, this);
+    }
+
+    @Override
+    protected void addPropertyDirect(final String key, final Object value) {
+        this.configurations.put(key, value);
+    }
+
+    @Override
+    public boolean isEmpty() {
+        return this.configurations.isEmpty();
+    }
+
+    @Override
+    public boolean containsKey(final String key) {
+        return this.configurations.containsKey(key);
+    }
+
+    @Override
+    public Object getProperty(final String key) {
+        return this.configurations.get(key);
+    }
+
+    @Override
+    public Iterator<String> getKeys() {
+        return this.configurations.keySet().iterator();
+    }
+}

[18/20] incubator-tinkerpop git commit: fixed an iteration offset bug in SparkGraphComputer.

Posted by ok...@apache.org.

fixed an iteration offset bug in SparkGraphComputer.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/3b3ddb4d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/3b3ddb4d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/3b3ddb4d

Branch: refs/heads/master
Commit: 3b3ddb4d4a8f6aba0e93cb7c725067c15feab92a
Parents: 406dd68
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Wed Mar 4 07:24:49 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Wed Mar 4 07:24:49 2015 -0700

----------------------------------------------------------------------
 .../tinkerpop/gremlin/hadoop/Constants.java     |  2 ++
 .../computer/spark/SparkGraphComputer.java      | 22 ++++++++++++++------
 .../process/computer/spark/SparkMemory.java     | 15 ++++++++-----
 .../process/computer/TinkerGraphComputer.java   |  4 ++--
 4 files changed, 30 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3b3ddb4d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
index 60ef636..697cab0 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
@@ -49,6 +49,8 @@ public class Constants {
     public static final String GREMLIN_HADOOP_HALT = "gremlin.hadoop.halt";
     public static final String MAP_MEMORY = "gremlin.hadoop.mapMemory";
 
+    public static final String MAPRED_INPUT_DIR = "mapred.input.dir";
+
     public static final String SEQUENCE_WARNING = "The " + Constants.GREMLIN_HADOOP_MEMORY_OUTPUT_FORMAT
             + " is not " + SequenceFileOutputFormat.class.getCanonicalName()
             + " and thus, graph computer memory can not be converted to Java objects";

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3b3ddb4d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index c089c57..6df82e4 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -129,7 +129,7 @@ public final class SparkGraphComputer implements GraphComputer {
                         sparkConfiguration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX + this.vertexProgram);
                         hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
                         if (FileInputFormat.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class)))
-                            hadoopConfiguration.set("mapred.input.dir", hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION)); // necessary for Spark and newAPIHadoopRDD
+                            hadoopConfiguration.set(Constants.MAPRED_INPUT_DIR, hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION)); // necessary for Spark and newAPIHadoopRDD
                         final JavaSparkContext sparkContext = new JavaSparkContext(sparkConfiguration);
                         SparkGraphComputer.loadJars(sparkContext, hadoopConfiguration);
                         ///
@@ -143,18 +143,25 @@ public final class SparkGraphComputer implements GraphComputer {
 
                             // set up the vertex program and wire up configurations
                             memory = new SparkMemory(this.vertexProgram, this.mapReducers, sparkContext);
-                            this.vertexProgram.setup(memory);
+                            this.vertexProgram.setup(memory); // TODO: setup variables are not being broadcasted on first call
                             final SApacheConfiguration vertexProgramConfiguration = new SApacheConfiguration();
                             this.vertexProgram.storeState(vertexProgramConfiguration);
                             ConfigurationUtils.copy(vertexProgramConfiguration, apacheConfiguration);
                             ConfUtil.mergeApacheIntoHadoopConfiguration(vertexProgramConfiguration, hadoopConfiguration);
+
                             // execute the vertex program
-                            do {
+                            while (true) {
+                                memory.setInTask(true);
                                 graphRDD = SparkHelper.executeStep(graphRDD, this.vertexProgram, memory, vertexProgramConfiguration);
                                 graphRDD.foreachPartition(iterator -> doNothing()); // TODO: i think this is a fast way to execute the rdd
                                 graphRDD.cache(); // TODO: learn about persistence and caching
-                                memory.incrIteration();
-                            } while (!this.vertexProgram.terminate(memory));
+                                memory.setInTask(false);
+                                if (this.vertexProgram.terminate(memory)) {
+                                    memory.incrIteration();
+                                    break;
+                                } else
+                                    memory.incrIteration();
+                            }
 
                             // write the output graph back to disk
                             SparkHelper.saveVertexProgramRDD(graphRDD, hadoopConfiguration);
@@ -175,7 +182,10 @@ public final class SparkGraphComputer implements GraphComputer {
                         sparkConfiguration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX + mapReduce);
                         hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
                         if (FileInputFormat.class.isAssignableFrom(hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class)))
-                            hadoopConfiguration.set("mapred.input.dir", hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION) + "/" + Constants.SYSTEM_G);
+                            hadoopConfiguration.set(Constants.MAPRED_INPUT_DIR, null == this.vertexProgram ?
+                                    hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION) : // if no vertex program grab the graph from the input location
+                                    hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION) + "/" + Constants.SYSTEM_G);
+
                         final JavaSparkContext sparkContext = new JavaSparkContext(sparkConfiguration);
                         SparkGraphComputer.loadJars(sparkContext, hadoopConfiguration);
                         // execute the map reduce job

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3b3ddb4d/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
index 402f2d3..b21f752 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkMemory.java
@@ -46,6 +46,7 @@ public final class SparkMemory implements Memory.Admin, Serializable {
     private final AtomicInteger iteration = new AtomicInteger(0);
     private final AtomicLong runtime = new AtomicLong(0l);
     private final Map<String, Accumulator<Rule>> memory = new HashMap<>();
+    private boolean inTask = false;
 
     public SparkMemory(final VertexProgram<?> vertexProgram, final Set<MapReduce> mapReducers, final JavaSparkContext sparkContext) {
         if (null != vertexProgram) {
@@ -104,7 +105,7 @@ public final class SparkMemory implements Memory.Admin, Serializable {
 
     @Override
     public <R> R get(final String key) throws IllegalArgumentException {
-        final R r = (R) this.memory.get(key).value().object;
+        final R r = (R) (this.inTask ? this.memory.get(key).localValue() : this.memory.get(key).value()).object;
         if (null == r)
             throw Memory.Exceptions.memoryDoesNotExist(key);
         else
@@ -115,27 +116,27 @@ public final class SparkMemory implements Memory.Admin, Serializable {
     public long incr(final String key, final long delta) {
         checkKeyValue(key, delta);
         this.memory.get(key).add(new Rule(Rule.Operation.INCR, delta));
-        return (Long) this.memory.get(key).localValue().object + delta;
+        return (Long) (this.inTask ? this.memory.get(key).localValue() : this.memory.get(key).value()).object + delta;
     }
 
     @Override
     public boolean and(final String key, final boolean bool) {
         checkKeyValue(key, bool);
         this.memory.get(key).add(new Rule(Rule.Operation.AND, bool));
-        return (Boolean) this.memory.get(key).localValue().object && bool;
+        return (Boolean) (this.inTask ? this.memory.get(key).localValue() : this.memory.get(key).value()).object && bool;
     }
 
     @Override
     public boolean or(final String key, final boolean bool) {
         checkKeyValue(key, bool);
         this.memory.get(key).add(new Rule(Rule.Operation.OR, bool));
-        return (Boolean) this.memory.get(key).localValue().object || bool;
+        return (Boolean) (this.inTask ? this.memory.get(key).localValue() : this.memory.get(key).value()).object || bool;
     }
 
     @Override
     public void set(final String key, final Object value) {
         checkKeyValue(key, value);
-        this.memory.get(key).add(new Rule(Rule.Operation.SET, value));
+        this.memory.get(key).setValue(new Rule(Rule.Operation.SET, value));
     }
 
     @Override
@@ -143,6 +144,10 @@ public final class SparkMemory implements Memory.Admin, Serializable {
         return StringFactory.memoryString(this);
     }
 
+    public void setInTask(final boolean inTask) {
+        this.inTask = inTask;
+    }
+
     private void checkKeyValue(final String key, final Object value) {
         if (!this.memoryKeys.contains(key))
             throw GraphComputer.Exceptions.providedKeyIsNotAMemoryComputeKey(key);

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3b3ddb4d/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/process/computer/TinkerGraphComputer.java
----------------------------------------------------------------------
diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/process/computer/TinkerGraphComputer.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/process/computer/TinkerGraphComputer.java
index 4132dd9..013bd36 100644
--- a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/process/computer/TinkerGraphComputer.java
+++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/process/computer/TinkerGraphComputer.java
@@ -125,8 +125,8 @@ public class TinkerGraphComputer implements GraphComputer {
                             this.memory.completeSubRound();
                         }
                     }
-                } catch (Exception ex) {
-                    throw new RuntimeException(ex);
+                } catch (final Exception ex) {
+                    throw new IllegalStateException(ex.getMessage(), ex);
                 }
             }

[10/20] incubator-tinkerpop git commit: Spark jar cache supported for sending HADOOP_GREMLIN_LIBS jars to the cluster.

Posted by ok...@apache.org.

Spark jar cache supported for sending HADOOP_GREMLIN_LIBS jars to the cluster.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/3ed0fa6c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/3ed0fa6c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/3ed0fa6c

Branch: refs/heads/master
Commit: 3ed0fa6cfcfb00e68e288059da188ee34337caf4
Parents: 3855bdc
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Mar 3 09:46:38 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Mar 3 09:46:38 2015 -0700

----------------------------------------------------------------------
 hadoop-gremlin/conf/spark-kryo.properties       |  2 +-
 .../computer/giraph/GiraphGraphComputer.java    |  6 ++---
 .../computer/spark/SparkGraphComputer.java      | 25 ++++++++++++++++++--
 3 files changed, 27 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3ed0fa6c/hadoop-gremlin/conf/spark-kryo.properties
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/conf/spark-kryo.properties b/hadoop-gremlin/conf/spark-kryo.properties
index de4df3b..ec8b393 100644
--- a/hadoop-gremlin/conf/spark-kryo.properties
+++ b/hadoop-gremlin/conf/spark-kryo.properties
@@ -35,4 +35,4 @@ gremlin.vertexProgram=org.apache.tinkerpop.gremlin.process.computer.ranking.page
 spark.master=local[4]
 spark.executor.memory=1024m
 spark.eventLog.enabled=true
-#spark.serializer=org.apache.spark.serializer.KryoSerializer
\ No newline at end of file
+spark.serializer=org.apache.spark.serializer.JavaSerializer
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3ed0fa6c/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
index 7a5e362..589c22c 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/giraph/GiraphGraphComputer.java
@@ -191,11 +191,11 @@ public class GiraphGraphComputer extends Configured implements GraphComputer, To
     private void loadJars(final FileSystem fs) {
         final String hadoopGremlinLibsRemote = "hadoop-gremlin-libs";
         if (this.giraphConfiguration.getBoolean(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, true)) {
-            final String giraphGremlinLibsLocal = System.getenv(Constants.HADOOP_GREMLIN_LIBS);
-            if (null == giraphGremlinLibsLocal)
+            final String hadoopGremlinLocalLibs = System.getenv(Constants.HADOOP_GREMLIN_LIBS);
+            if (null == hadoopGremlinLocalLibs)
                 LOGGER.warn(Constants.HADOOP_GREMLIN_LIBS + " is not set -- proceeding regardless");
             else {
-                final String[] paths = giraphGremlinLibsLocal.split(":");
+                final String[] paths = hadoopGremlinLocalLibs.split(":");
                 for (final String path : paths) {
                     final File file = new File(path);
                     if (file.exists()) {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/3ed0fa6c/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index 774c3c7..4b30e16 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -47,6 +47,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import scala.Tuple2;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -54,6 +55,7 @@ import java.util.HashSet;
 import java.util.Set;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Future;
+import java.util.stream.Stream;
 
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
@@ -125,8 +127,9 @@ public class SparkGraphComputer implements GraphComputer {
                             hadoopConfiguration.set("mapred.input.dir", hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION));
 
                         // set up the input format
-                        final JavaSparkContext sc = new JavaSparkContext(sparkConfiguration);
-                        final JavaPairRDD<NullWritable, VertexWritable> rdd = sc.newAPIHadoopRDD(hadoopConfiguration,
+                        final JavaSparkContext sparkContext = new JavaSparkContext(sparkConfiguration);
+                        SparkGraphComputer.loadJars(sparkContext, hadoopConfiguration);
+                        final JavaPairRDD<NullWritable, VertexWritable> rdd = sparkContext.newAPIHadoopRDD(hadoopConfiguration,
                                 (Class<InputFormat<NullWritable, VertexWritable>>) hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputFormat.class),
                                 NullWritable.class,
                                 VertexWritable.class);
@@ -183,6 +186,24 @@ public class SparkGraphComputer implements GraphComputer {
         // a cheap action
     }
 
+    private static void loadJars(final JavaSparkContext sparkContext, final Configuration hadoopConfiguration) {
+        if (hadoopConfiguration.getBoolean(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, true)) {
+            final String hadoopGremlinLocalLibs = System.getenv(Constants.HADOOP_GREMLIN_LIBS);
+            if (null == hadoopGremlinLocalLibs)
+                LOGGER.warn(Constants.HADOOP_GREMLIN_LIBS + " is not set -- proceeding regardless");
+            else {
+                final String[] paths = hadoopGremlinLocalLibs.split(":");
+                for (final String path : paths) {
+                    final File file = new File(path);
+                    if (file.exists())
+                        Stream.of(file.listFiles()).filter(f -> f.getName().endsWith(Constants.DOT_JAR)).forEach(f -> sparkContext.addJar(f.getAbsolutePath()));
+                    else
+                        LOGGER.warn(path + " does not reference a valid directory -- proceeding regardless");
+                }
+            }
+        }
+    }
+
     /////////////////
 
     public static void main(final String[] args) throws Exception {