You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by ok...@apache.org on 2016/01/29 21:19:44 UTC
incubator-tinkerpop git commit: Added ComputerGraph.* classes to the
Sugar registry so they will be unloaded and not stay around. For the
HadoopGremlinPluginCheck test,
I close the context just to be safe. Updated docs around Input/OutputRDD.
Added Highl
Repository: incubator-tinkerpop
Updated Branches:
refs/heads/master 09a5d288c -> b50bb3f74
Added ComputerGraph.* classes to the Sugar registry so they will be unloaded and not stay around. For the HadoopGremlinPluginCheck test, I close the context just to be safe. Updated docs around Input/OutputRDD. Added HighlyCompressedMap.class to Gryo registry (shows up when you have numerous partitions).
Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/b50bb3f7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/b50bb3f7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/b50bb3f7
Branch: refs/heads/master
Commit: b50bb3f7449993bab08f0c0e1014baea8c3afa63
Parents: 09a5d28
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Fri Jan 29 13:19:39 2016 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Fri Jan 29 13:19:39 2016 -0700
----------------------------------------------------------------------
docs/src/reference/implementations.asciidoc | 18 ++++++++++++++----
.../process/computer/util/ComputerGraph.java | 12 ++++++------
.../gremlin/hadoop/HadoopGraphProvider.java | 8 ++++++++
.../groovy/plugin/HadoopGremlinPluginCheck.java | 2 ++
.../spark/structure/io/gryo/GryoSerializer.java | 2 ++
.../computer/SparkHadoopGraphProvider.java | 10 ++++++++++
6 files changed, 42 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/docs/src/reference/implementations.asciidoc
----------------------------------------------------------------------
diff --git a/docs/src/reference/implementations.asciidoc b/docs/src/reference/implementations.asciidoc
index d713cf5..439123d 100644
--- a/docs/src/reference/implementations.asciidoc
+++ b/docs/src/reference/implementations.asciidoc
@@ -1172,13 +1172,13 @@ specified in `HADOOP_GREMLIN_LIBS`.
[source,shell]
export HADOOP_GREMLIN_LIBS=$HADOOP_GREMLIN_LIBS:/usr/local/gremlin-console/ext/spark-gremlin/lib
-Furthermore the lib directory should be distributed across all servers in the Spark cluster. For this purpose TinkerPop
-provides a helper script, which takes the Spark installation directory and the the spark servers as input:
+Furthermore the `lib/` directory should be distributed across all machines in the SparkServer cluster. For this purpose TinkerPop
+provides a helper script, which takes the Spark installation directory and the the Spark machines as input:
[source,shell]
bin/init-tp-spark.sh /usr/local/spark spark@10.0.0.1 spark@10.0.0.2 spark@10.0.0.3
-Once the `lib` directory is distributed, `SparkGraphComputer` can be used as follows.
+Once the `lib/` directory is distributed, `SparkGraphComputer` can be used as follows.
[gremlin-groovy]
----
@@ -1224,11 +1224,21 @@ image::spark-algorithm.png[width=775]
|gremlin.spark.persistStorageLevel |What `StorageLevel` to use when persisted RDDs via `PersistedOutputRDD` (default `MEMORY_ONLY`).
|========================================================
-If the provider/user wishes to not use Hadoop `InputFormats`, it is possible to leverage Spark's RDD
+InputRDD and OutputRDD
+++++++++++++++++++++++
+
+If the provider/user does not want to use Hadoop `InputFormats`, it is possible to leverage Spark's RDD
constructs directly. There is a `gremlin.spark.graphInputRDD` configuration that references a `Class<? extends
InputRDD>`. An `InputRDD` provides a read method that takes a `SparkContext` and returns a graphRDD. Likewise, use
`gremlin.spark.graphOutputRDD` and the respective `OutputRDD`.
+If the graph system provider uses an `InputRDD`, the RDD should maintain an associated `org.apache.spark.Partitioner`. By doing so,
+`SparkGraphComputer` will not partition the loaded graph across the cluster as it has already been partitioned by the graph system provider.
+This can save a significant amount of time and space resources.
+If the `InputRDD` does not have a registered partitioner, `SparkGraphComputer` will partition the graph using
+a `org.apache.spark.HashPartitioner` with the number of partitions being either the number of existing partitions in the input (e.g. input splits)
+or the user specified number of `GraphComputer.workers()`.
+
Using a Persisted Context
+++++++++++++++++++++++++
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/ComputerGraph.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/ComputerGraph.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/ComputerGraph.java
index a8e715c..8c818f1 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/ComputerGraph.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/util/ComputerGraph.java
@@ -118,7 +118,7 @@ public final class ComputerGraph implements Graph {
throw new UnsupportedOperationException();
}
- private class ComputerElement implements Element, WrappedElement<Element> {
+ public class ComputerElement implements Element, WrappedElement<Element> {
private final Element element;
public ComputerElement(final Element element) {
@@ -200,7 +200,7 @@ public final class ComputerGraph implements Graph {
///////////////////////////////////
- private class ComputerVertex extends ComputerElement implements Vertex, WrappedVertex<Vertex> {
+ public class ComputerVertex extends ComputerElement implements Vertex, WrappedVertex<Vertex> {
public ComputerVertex(final Vertex vertex) {
@@ -273,7 +273,7 @@ public final class ComputerGraph implements Graph {
////////////////////////////
- private class ComputerEdge extends ComputerElement implements Edge, WrappedEdge<Edge> {
+ public class ComputerEdge extends ComputerElement implements Edge, WrappedEdge<Edge> {
public ComputerEdge(final Edge edge) {
super(edge);
@@ -313,7 +313,7 @@ public final class ComputerGraph implements Graph {
///////////////////////////
- private class ComputerVertexProperty<V> extends ComputerElement implements VertexProperty<V>, WrappedVertexProperty<VertexProperty<V>> {
+ public class ComputerVertexProperty<V> extends ComputerElement implements VertexProperty<V>, WrappedVertexProperty<VertexProperty<V>> {
public ComputerVertexProperty(final VertexProperty<V> vertexProperty) {
super(vertexProperty);
}
@@ -351,7 +351,7 @@ public final class ComputerGraph implements Graph {
///////////////////////////
- private class ComputerProperty<V> implements Property<V>, WrappedProperty<Property<V>> {
+ public class ComputerProperty<V> implements Property<V>, WrappedProperty<Property<V>> {
private final Property<V> property;
@@ -413,7 +413,7 @@ public final class ComputerGraph implements Graph {
///////////////////////////
- private class ComputerAdjacentVertex implements Vertex, WrappedVertex<Vertex> {
+ public class ComputerAdjacentVertex implements Vertex, WrappedVertex<Vertex> {
private final Vertex adjacentVertex;
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
index 90f5132..9600dad 100644
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
@@ -31,6 +31,7 @@ import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopVertexProperty;
import org.apache.tinkerpop.gremlin.hadoop.structure.io.graphson.GraphSONInputFormat;
import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat;
import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat;
+import org.apache.tinkerpop.gremlin.process.computer.util.ComputerGraph;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.io.graphson.GraphSONResourceAccess;
import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoResourceAccess;
@@ -62,6 +63,13 @@ public class HadoopGraphProvider extends AbstractGraphProvider {
add(HadoopProperty.class);
add(HadoopVertex.class);
add(HadoopVertexProperty.class);
+ add(ComputerGraph.class);
+ add(ComputerGraph.ComputerElement.class);
+ add(ComputerGraph.ComputerVertex.class);
+ add(ComputerGraph.ComputerEdge.class);
+ add(ComputerGraph.ComputerVertexProperty.class);
+ add(ComputerGraph.ComputerAdjacentVertex.class);
+ add(ComputerGraph.ComputerProperty.class);
}};
static {
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/groovy/plugin/HadoopGremlinPluginCheck.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/groovy/plugin/HadoopGremlinPluginCheck.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/groovy/plugin/HadoopGremlinPluginCheck.java
index 711f63e..0639609 100644
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/groovy/plugin/HadoopGremlinPluginCheck.java
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/groovy/plugin/HadoopGremlinPluginCheck.java
@@ -23,6 +23,7 @@ import org.apache.tinkerpop.gremlin.AbstractGremlinTest;
import org.apache.tinkerpop.gremlin.LoadGraphWith;
import org.apache.tinkerpop.gremlin.TestHelper;
import org.apache.tinkerpop.gremlin.groovy.plugin.RemoteAcceptor;
+import org.apache.tinkerpop.gremlin.groovy.util.SugarTestHelper;
import org.apache.tinkerpop.gremlin.groovy.util.TestableConsolePluginAcceptor;
import org.apache.tinkerpop.gremlin.hadoop.Constants;
import org.apache.tinkerpop.gremlin.hadoop.HadoopGremlinSuite;
@@ -80,6 +81,7 @@ public class HadoopGremlinPluginCheck extends AbstractGremlinTest {
@Test
@LoadGraphWith(LoadGraphWith.GraphData.MODERN)
public void shouldSupportRemoteSugarTraversal() throws Exception {
+ SugarTestHelper.clearRegistry(this.graphProvider);
this.console.addBinding("graph", this.graph);
this.console.addBinding("g", this.g);
this.remote.connect(Arrays.asList("graph"));
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/GryoSerializer.java
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/GryoSerializer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/GryoSerializer.java
index 564d9d7..a66b146 100644
--- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/GryoSerializer.java
+++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/GryoSerializer.java
@@ -28,6 +28,7 @@ import org.apache.spark.api.python.PythonBroadcast;
import org.apache.spark.broadcast.HttpBroadcast;
import org.apache.spark.network.util.ByteUnit;
import org.apache.spark.scheduler.CompressedMapStatus;
+import org.apache.spark.scheduler.HighlyCompressedMapStatus;
import org.apache.spark.serializer.Serializer;
import org.apache.spark.serializer.SerializerInstance;
import org.apache.spark.util.SerializableConfiguration;
@@ -88,6 +89,7 @@ public final class GryoSerializer extends Serializer {
.addCustom(CompactBuffer.class, new JavaSerializer())
.addCustom(CompactBuffer[].class, new JavaSerializer())
.addCustom(CompressedMapStatus.class, new JavaSerializer())
+ .addCustom(HighlyCompressedMapStatus.class, new JavaSerializer())
.addCustom(HttpBroadcast.class, new JavaSerializer())
.addCustom(PythonBroadcast.class, new JavaSerializer())
.addCustom(BoxedUnit.class, new JavaSerializer())
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/b50bb3f7/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
index 19085da..f384063 100644
--- a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
+++ b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
@@ -20,11 +20,14 @@ package org.apache.tinkerpop.gremlin.spark.process.computer;
import org.apache.tinkerpop.gremlin.GraphProvider;
import org.apache.tinkerpop.gremlin.LoadGraphWith;
+import org.apache.tinkerpop.gremlin.groovy.util.SugarTestHelper;
import org.apache.tinkerpop.gremlin.hadoop.Constants;
import org.apache.tinkerpop.gremlin.hadoop.HadoopGraphProvider;
+import org.apache.tinkerpop.gremlin.hadoop.groovy.plugin.HadoopGremlinPluginCheck;
import org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorageCheck;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
import org.apache.tinkerpop.gremlin.process.traversal.engine.ComputerTraversalEngine;
+import org.apache.tinkerpop.gremlin.spark.structure.Spark;
import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD;
import org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorageCheck;
import org.apache.tinkerpop.gremlin.spark.structure.io.ToyGraphInputRDD;
@@ -55,6 +58,13 @@ public final class SparkHadoopGraphProvider extends HadoopGraphProvider {
config.put(Constants.GREMLIN_SPARK_GRAPH_INPUT_RDD, ToyGraphInputRDD.class.getCanonicalName());
config.put(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, PersistedOutputRDD.class.getCanonicalName());
}
+
+ // sugar plugin causes meta-method issues with a persisted context
+ if (test.equals(HadoopGremlinPluginCheck.class)) {
+ Spark.close();
+ SugarTestHelper.clearRegistry(this);
+ }
+
/// spark configuration
config.put("spark.master", "local[4]");
config.put("spark.serializer", GryoSerializer.class.getCanonicalName());