You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by ok...@apache.org on 2016/01/29 21:19:44 UTC

incubator-tinkerpop git commit: Added ComputerGraph.* classes to the Sugar registry so they will be unloaded and not stay around. For the HadoopGremlinPluginCheck test, I close the context just to be safe. Updated docs around Input/OutputRDD. Added Highl

Repository: incubator-tinkerpop
Updated Branches:
  refs/heads/master 09a5d288c -> b50bb3f74


Added ComputerGraph.* classes to the Sugar registry so they will be unloaded and not stay around. For the HadoopGremlinPluginCheck test, I close the context just to be safe. Updated docs around Input/OutputRDD. Added HighlyCompressedMap.class to Gryo registry (shows up when you have numerous partitions).


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/b50bb3f7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/b50bb3f7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/b50bb3f7

Branch: refs/heads/master
Commit: b50bb3f7449993bab08f0c0e1014baea8c3afa63
Parents: 09a5d28
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Fri Jan 29 13:19:39 2016 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Fri Jan 29 13:19:39 2016 -0700

----------------------------------------------------------------------
 docs/src/reference/implementations.asciidoc       | 18 ++++++++++++++----
 .../process/computer/util/ComputerGraph.java      | 12 ++++++------
 .../gremlin/hadoop/HadoopGraphProvider.java       |  8 ++++++++
 .../groovy/plugin/HadoopGremlinPluginCheck.java   |  2 ++
 .../spark/structure/io/gryo/GryoSerializer.java   |  2 ++
 .../computer/SparkHadoopGraphProvider.java        | 10 ++++++++++
 6 files changed, 42 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/docs/src/reference/implementations.asciidoc
----------------------------------------------------------------------
diff --git a/docs/src/reference/implementations.asciidoc b/docs/src/reference/implementations.asciidoc
index d713cf5..439123d 100644
--- a/docs/src/reference/implementations.asciidoc
+++ b/docs/src/reference/implementations.asciidoc
@@ -1172,13 +1172,13 @@ specified in `HADOOP_GREMLIN_LIBS`.
 [source,shell]
 export HADOOP_GREMLIN_LIBS=$HADOOP_GREMLIN_LIBS:/usr/local/gremlin-console/ext/spark-gremlin/lib
 
-Furthermore the lib directory should be distributed across all servers in the Spark cluster. For this purpose TinkerPop
-provides a helper script, which takes the Spark installation directory and the the spark servers as input:
+Furthermore the `lib/` directory should be distributed across all machines in the SparkServer cluster. For this purpose TinkerPop
+provides a helper script, which takes the Spark installation directory and the the Spark machines as input:
 
 [source,shell]
 bin/init-tp-spark.sh /usr/local/spark spark@10.0.0.1 spark@10.0.0.2 spark@10.0.0.3
 
-Once the `lib` directory is distributed, `SparkGraphComputer` can be used as follows.
+Once the `lib/` directory is distributed, `SparkGraphComputer` can be used as follows.
 
 [gremlin-groovy]
 ----
@@ -1224,11 +1224,21 @@ image::spark-algorithm.png[width=775]
 |gremlin.spark.persistStorageLevel |What `StorageLevel` to use when persisted RDDs via `PersistedOutputRDD` (default `MEMORY_ONLY`).
 |========================================================
 
-If the provider/user wishes to not use Hadoop `InputFormats`, it is possible to leverage Spark's RDD
+InputRDD and OutputRDD
+++++++++++++++++++++++
+
+If the provider/user does not want to use Hadoop `InputFormats`, it is possible to leverage Spark's RDD
 constructs directly. There is a `gremlin.spark.graphInputRDD` configuration that references a `Class<? extends
 InputRDD>`. An `InputRDD` provides a read method that takes a `SparkContext` and returns a graphRDD. Likewise, use
 `gremlin.spark.graphOutputRDD` and the respective `OutputRDD`.
 
+If the graph system provider uses an `InputRDD`, the RDD should maintain an associated `org.apache.spark.Partitioner`. By doing so,
+`SparkGraphComputer` will not partition the loaded graph across the cluster as it has already been partitioned by the graph system provider.
+This can save a significant amount of time and space resources.
+If the `InputRDD` does not have a registered partitioner, `SparkGraphComputer` will partition the graph using
+a `org.apache.spark.HashPartitioner` with the number of partitions being either the number of existing partitions in the input (e.g. input splits)
+or the user specified number of `GraphComputer.workers()`.
+
 Using a Persisted Context
 +++++++++++++++++++++++++
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/ComputerGraph.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/ComputerGraph.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/ComputerGraph.java
index a8e715c..8c818f1 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/ComputerGraph.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/ComputerGraph.java
@@ -118,7 +118,7 @@ public final class ComputerGraph implements Graph {
         throw new UnsupportedOperationException();
     }
 
-    private class ComputerElement implements Element, WrappedElement<Element> {
+    public class ComputerElement implements Element, WrappedElement<Element> {
         private final Element element;
 
         public ComputerElement(final Element element) {
@@ -200,7 +200,7 @@ public final class ComputerGraph implements Graph {
 
     ///////////////////////////////////
 
-    private class ComputerVertex extends ComputerElement implements Vertex, WrappedVertex<Vertex> {
+    public class ComputerVertex extends ComputerElement implements Vertex, WrappedVertex<Vertex> {
 
 
         public ComputerVertex(final Vertex vertex) {
@@ -273,7 +273,7 @@ public final class ComputerGraph implements Graph {
 
     ////////////////////////////
 
-    private class ComputerEdge extends ComputerElement implements Edge, WrappedEdge<Edge> {
+    public class ComputerEdge extends ComputerElement implements Edge, WrappedEdge<Edge> {
 
         public ComputerEdge(final Edge edge) {
             super(edge);
@@ -313,7 +313,7 @@ public final class ComputerGraph implements Graph {
 
     ///////////////////////////
 
-    private class ComputerVertexProperty<V> extends ComputerElement implements VertexProperty<V>, WrappedVertexProperty<VertexProperty<V>> {
+    public class ComputerVertexProperty<V> extends ComputerElement implements VertexProperty<V>, WrappedVertexProperty<VertexProperty<V>> {
         public ComputerVertexProperty(final VertexProperty<V> vertexProperty) {
             super(vertexProperty);
         }
@@ -351,7 +351,7 @@ public final class ComputerGraph implements Graph {
 
     ///////////////////////////
 
-    private class ComputerProperty<V> implements Property<V>, WrappedProperty<Property<V>> {
+    public class ComputerProperty<V> implements Property<V>, WrappedProperty<Property<V>> {
 
         private final Property<V> property;
 
@@ -413,7 +413,7 @@ public final class ComputerGraph implements Graph {
 
     ///////////////////////////
 
-    private class ComputerAdjacentVertex implements Vertex, WrappedVertex<Vertex> {
+    public class ComputerAdjacentVertex implements Vertex, WrappedVertex<Vertex> {
 
         private final Vertex adjacentVertex;
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
index 90f5132..9600dad 100644
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
@@ -31,6 +31,7 @@ import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopVertexProperty;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.graphson.GraphSONInputFormat;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat;
+import org.apache.tinkerpop.gremlin.process.computer.util.ComputerGraph;
 import org.apache.tinkerpop.gremlin.structure.Graph;
 import org.apache.tinkerpop.gremlin.structure.io.graphson.GraphSONResourceAccess;
 import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoResourceAccess;
@@ -62,6 +63,13 @@ public class HadoopGraphProvider extends AbstractGraphProvider {
         add(HadoopProperty.class);
         add(HadoopVertex.class);
         add(HadoopVertexProperty.class);
+        add(ComputerGraph.class);
+        add(ComputerGraph.ComputerElement.class);
+        add(ComputerGraph.ComputerVertex.class);
+        add(ComputerGraph.ComputerEdge.class);
+        add(ComputerGraph.ComputerVertexProperty.class);
+        add(ComputerGraph.ComputerAdjacentVertex.class);
+        add(ComputerGraph.ComputerProperty.class);
     }};
 
     static {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/groovy/plugin/HadoopGremlinPluginCheck.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/groovy/plugin/HadoopGremlinPluginCheck.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/groovy/plugin/HadoopGremlinPluginCheck.java
index 711f63e..0639609 100644
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/groovy/plugin/HadoopGremlinPluginCheck.java
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/groovy/plugin/HadoopGremlinPluginCheck.java
@@ -23,6 +23,7 @@ import org.apache.tinkerpop.gremlin.AbstractGremlinTest;
 import org.apache.tinkerpop.gremlin.LoadGraphWith;
 import org.apache.tinkerpop.gremlin.TestHelper;
 import org.apache.tinkerpop.gremlin.groovy.plugin.RemoteAcceptor;
+import org.apache.tinkerpop.gremlin.groovy.util.SugarTestHelper;
 import org.apache.tinkerpop.gremlin.groovy.util.TestableConsolePluginAcceptor;
 import org.apache.tinkerpop.gremlin.hadoop.Constants;
 import org.apache.tinkerpop.gremlin.hadoop.HadoopGremlinSuite;
@@ -80,6 +81,7 @@ public class HadoopGremlinPluginCheck extends AbstractGremlinTest {
     @Test
     @LoadGraphWith(LoadGraphWith.GraphData.MODERN)
     public void shouldSupportRemoteSugarTraversal() throws Exception {
+        SugarTestHelper.clearRegistry(this.graphProvider);
         this.console.addBinding("graph", this.graph);
         this.console.addBinding("g", this.g);
         this.remote.connect(Arrays.asList("graph"));

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/GryoSerializer.java
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/GryoSerializer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/GryoSerializer.java
index 564d9d7..a66b146 100644
--- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/GryoSerializer.java
+++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/GryoSerializer.java
@@ -28,6 +28,7 @@ import org.apache.spark.api.python.PythonBroadcast;
 import org.apache.spark.broadcast.HttpBroadcast;
 import org.apache.spark.network.util.ByteUnit;
 import org.apache.spark.scheduler.CompressedMapStatus;
+import org.apache.spark.scheduler.HighlyCompressedMapStatus;
 import org.apache.spark.serializer.Serializer;
 import org.apache.spark.serializer.SerializerInstance;
 import org.apache.spark.util.SerializableConfiguration;
@@ -88,6 +89,7 @@ public final class GryoSerializer extends Serializer {
                                 .addCustom(CompactBuffer.class, new JavaSerializer())
                                 .addCustom(CompactBuffer[].class, new JavaSerializer())
                                 .addCustom(CompressedMapStatus.class, new JavaSerializer())
+                                .addCustom(HighlyCompressedMapStatus.class, new JavaSerializer())
                                 .addCustom(HttpBroadcast.class, new JavaSerializer())
                                 .addCustom(PythonBroadcast.class, new JavaSerializer())
                                 .addCustom(BoxedUnit.class, new JavaSerializer())

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
index 19085da..f384063 100644
--- a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
+++ b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
@@ -20,11 +20,14 @@ package org.apache.tinkerpop.gremlin.spark.process.computer;
 
 import org.apache.tinkerpop.gremlin.GraphProvider;
 import org.apache.tinkerpop.gremlin.LoadGraphWith;
+import org.apache.tinkerpop.gremlin.groovy.util.SugarTestHelper;
 import org.apache.tinkerpop.gremlin.hadoop.Constants;
 import org.apache.tinkerpop.gremlin.hadoop.HadoopGraphProvider;
+import org.apache.tinkerpop.gremlin.hadoop.groovy.plugin.HadoopGremlinPluginCheck;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorageCheck;
 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
 import org.apache.tinkerpop.gremlin.process.traversal.engine.ComputerTraversalEngine;
+import org.apache.tinkerpop.gremlin.spark.structure.Spark;
 import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD;
 import org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorageCheck;
 import org.apache.tinkerpop.gremlin.spark.structure.io.ToyGraphInputRDD;
@@ -55,6 +58,13 @@ public final class SparkHadoopGraphProvider extends HadoopGraphProvider {
             config.put(Constants.GREMLIN_SPARK_GRAPH_INPUT_RDD, ToyGraphInputRDD.class.getCanonicalName());
             config.put(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, PersistedOutputRDD.class.getCanonicalName());
         }
+
+        // sugar plugin causes meta-method issues with a persisted context
+        if (test.equals(HadoopGremlinPluginCheck.class)) {
+            Spark.close();
+            SugarTestHelper.clearRegistry(this);
+        }
+
         /// spark configuration
         config.put("spark.master", "local[4]");
         config.put("spark.serializer", GryoSerializer.class.getCanonicalName());