You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by ok...@apache.org on 2015/05/27 00:16:32 UTC
incubator-tinkerpop git commit: used multi-catch in SparkGraphComputer and added JavaDoc to InputRDD. Added note about InputRDD to asciidocs.

Repository: incubator-tinkerpop
Updated Branches:
  refs/heads/master 715d7ff05 -> cf614e770


used multi-catch in SparkGraphComputer and added JavaDoc to InputRDD. Added note about InputRDD to asciidocs.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/cf614e77
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/cf614e77
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/cf614e77

Branch: refs/heads/master
Commit: cf614e7707caca335e2cccb3f7d49cbdc5c0c575
Parents: 715d7ff
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue May 26 16:16:37 2015 -0600
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue May 26 16:16:37 2015 -0600

----------------------------------------------------------------------
 docs/src/implementations.asciidoc                           | 2 ++
 .../hadoop/process/computer/spark/SparkGraphComputer.java   | 2 +-
 .../gremlin/hadoop/process/computer/spark/io/InputRDD.java  | 9 +++++++++
 3 files changed, 12 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/cf614e77/docs/src/implementations.asciidoc
----------------------------------------------------------------------
diff --git a/docs/src/implementations.asciidoc b/docs/src/implementations.asciidoc
index 18be6b1..990a170 100644
--- a/docs/src/implementations.asciidoc
+++ b/docs/src/implementations.asciidoc
@@ -756,6 +756,8 @@ The `SparkGraphComputer` algorithm leverages Spark's caching abilities to reduce
 
 image::spark-algorithm.png[width=775]
 
+IMPORTANT: If the vendor/user wishes to bypass using Hadoop `InputFormats` for pulling data from their graph system, they can use Spark's RDD constructs directly. There is a `gremlin.hadoop.inputRDD` configuration that references a `Class<? extends InputRDD>`. `InputRDD` has one method that must be implemented: `JavaPairRDD<Object, VertexWritable> readGraphRDD(Configuration configuration, JavaSparkContext sparkContext)`. Thus, the Spark context is provided to the `InputRDD` and the means by which data is pulled from the graph system is in terms of RDD manipulation, not Hadoop `InputFormat` to RDD. Note that `InputFormatRDD` is a type of `InputRDD` that simply uses Hadoop's `InputFormat` to generate the `graphRDD`.
+
 [[mapreducegraphcomputer]]
 MapReduceGraphComputer
 ^^^^^^^^^^^^^^^^^^^^^^

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/cf614e77/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
index 69d2411..1a097ff 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -102,7 +102,7 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer {
                                     .readGraphRDD(apacheConfiguration, sparkContext)
                                     .setName("graphRDD")
                                     .cache();
-                        } catch (final Exception e) {
+                        } catch (final InstantiationException | IllegalAccessException e) {
                             throw new IllegalStateException(e.getMessage(), e);
                         }
                         JavaPairRDD<Object, ViewIncomingPayload<Object>> viewIncomingRDD = null;

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/cf614e77/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/io/InputRDD.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/io/InputRDD.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/io/InputRDD.java
index 2cd5af5..9fd7129 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/io/InputRDD.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/io/InputRDD.java
@@ -27,9 +27,18 @@ import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable;
 
 /**
+ * An InputRDD is used to read data from the underlying graph system and yield the respective adjacency list.
+ * Note that {@link InputFormatRDD} is a type of InputRDD that simply uses the specified {@link org.apache.hadoop.mapreduce.InputFormat} to generate the respective graphRDD.
+ *
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
 public interface InputRDD {
 
+    /**
+     * Read the graphRDD from the underlying graph system.
+     * @param configuration the configuration for the {@link org.apache.tinkerpop.gremlin.hadoop.process.computer.spark.SparkGraphComputer}.
+     * @param sparkContext the Spark context with the requisite methods for generating a {@link JavaPairRDD}.
+     * @return an adjacency list representation of the underlying graph system.
+     */
     public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration configuration, final JavaSparkContext sparkContext);
 }