You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by dk...@apache.org on 2015/11/04 20:09:09 UTC

[7/9] incubator-tinkerpop git commit: fixed LocalPropertyTest to use KryoSerializer instead of GryoSerializer.

fixed LocalPropertyTest to use KryoSerializer instead of GryoSerializer.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/2edefa60
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/2edefa60
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/2edefa60

Branch: refs/heads/TINKERPOP3-904
Commit: 2edefa604c30c5f93d51f0f624241e4e80ac0725
Parents: ebec095 caf0e2a
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Wed Nov 4 11:39:13 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Wed Nov 4 11:39:13 2015 -0700

----------------------------------------------------------------------
 CHANGELOG.asciidoc                              |   1 +
 docs/src/implementations.asciidoc               |   9 ++
 .../process/computer/SparkGraphComputer.java    |  32 ++++++
 .../process/computer/LocalPropertyTest.java     | 100 +++++++++++++++++++
 4 files changed, 142 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/2edefa60/CHANGELOG.asciidoc
----------------------------------------------------------------------
diff --cc CHANGELOG.asciidoc
index 31671c0,331ad93..085bed7
--- a/CHANGELOG.asciidoc
+++ b/CHANGELOG.asciidoc
@@@ -25,9 -25,7 +25,10 @@@ image::https://raw.githubusercontent.co
  TinkerPop 3.1.0 (NOT OFFICIALLY RELEASED YET)
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  
 -* Added the ability to set thread local properties in `SparkGraphComputer` when using a persistent context
 +* Integrated `NumberHelper` in `SumStep`, `MinStep`, `MaxStep` and `MeanStep` (local and global step variants).
 +* `CountMatchAlgorithm`, in OLAP, now biases traversal selection towards those traversals that start at the current traverser location to reduce message passing.
 +* Fixed a file stream bug in Hadoop OLTP that showed up if the streamed file was more than 2G of data.
++* Added the ability to set thread local properties in `SparkGraphComputer` when using a persistent context.
  * Bumped to Neo4j 2.3.0.
  * Added `PersistedInputRDD` and `PersistedOutputRDD` which enables `SparkGraphComputer` to store the graph RDD in the context between jobs (no HDFS serialization required).
  * Renamed the `public static String` configuration variable names of TinkerGraph (deprecated old variables).

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/2edefa60/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/LocalPropertyTest.java
----------------------------------------------------------------------
diff --cc spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/LocalPropertyTest.java
index 0000000,57c3526..e0fe796
mode 000000,100644..100644
--- a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/LocalPropertyTest.java
+++ b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/LocalPropertyTest.java
@@@ -1,0 -1,102 +1,100 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied.  See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+ 
+ package org.apache.tinkerpop.gremlin.spark.process.computer;
+ 
+ import org.apache.commons.configuration.BaseConfiguration;
+ import org.apache.commons.configuration.Configuration;
+ import org.apache.spark.SparkConf;
+ import org.apache.spark.SparkContext;
+ import org.apache.spark.api.java.JavaSparkContext;
+ import org.apache.spark.api.java.JavaSparkStatusTracker;
++import org.apache.spark.serializer.KryoSerializer;
+ import org.apache.tinkerpop.gremlin.hadoop.Constants;
+ import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+ import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat;
+ import org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil;
+ import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
+ import org.apache.tinkerpop.gremlin.process.computer.traversal.TraversalVertexProgram;
+ import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
+ import org.apache.tinkerpop.gremlin.process.traversal.engine.ComputerTraversalEngine;
+ import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedInputRDD;
+ import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD;
 -import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer;
+ import org.apache.tinkerpop.gremlin.structure.Graph;
+ import org.apache.tinkerpop.gremlin.structure.util.GraphFactory;
+ import org.junit.Test;
+ 
+ import java.util.UUID;
+ 
+ import static org.junit.Assert.assertTrue;
+ 
 -public class LocalPropertyTest
 -{
++public class LocalPropertyTest {
+ 
+ 
+     @Test
 -    public void shouldSetThreadLocalProperties() throws Exception
 -    {
++    public void shouldSetThreadLocalProperties() throws Exception {
+         final String testName = "ThreadLocalProperties";
+         final String rddName = "target/test-output/" + UUID.randomUUID();
+         final Configuration configuration = new BaseConfiguration();
+         configuration.setProperty("spark.master", "local[4]");
 -        configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName());
++        configuration.setProperty("spark.serializer", KryoSerializer.class.getCanonicalName());
+         configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName());
+         configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
+         configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, GryoInputFormat.class.getCanonicalName());
+         configuration.setProperty(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, PersistedOutputRDD.class.getCanonicalName());
+         configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
+         configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
+         configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
+         configuration.setProperty("spark.jobGroup.id", "22");
+         Graph graph = GraphFactory.open(configuration);
+         graph.compute(SparkGraphComputer.class)
+                 .result(GraphComputer.ResultGraph.NEW)
+                 .persist(GraphComputer.Persist.EDGES)
+                 .program(TraversalVertexProgram.build()
+                         .traversal(GraphTraversalSource.build().engine(ComputerTraversalEngine.build().computer(SparkGraphComputer.class)),
+                                 "gremlin-groovy",
+                                 "g.V()").create(graph)).submit().get();
+         ////////
+         SparkConf sparkConfiguration = new SparkConf();
+         sparkConfiguration.setAppName(testName);
+         ConfUtil.makeHadoopConfiguration(configuration).forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
+         JavaSparkContext sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration));
+         JavaSparkStatusTracker statusTracker = sparkContext.statusTracker();
+         assertTrue(statusTracker.getJobIdsForGroup("22").length >= 1);
+         assertTrue(PersistedInputRDD.getPersistedRDD(sparkContext, rddName).isPresent());
+         ///////
+         configuration.setProperty(Constants.GREMLIN_SPARK_GRAPH_INPUT_RDD, PersistedInputRDD.class.getCanonicalName());
+         configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName);
+         configuration.setProperty(Constants.GREMLIN_SPARK_GRAPH_OUTPUT_RDD, null);
+         configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);
+         configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false);
+         configuration.setProperty("spark.jobGroup.id", "44");
+         graph = GraphFactory.open(configuration);
+         graph.compute(SparkGraphComputer.class)
+                 .result(GraphComputer.ResultGraph.NEW)
+                 .persist(GraphComputer.Persist.NOTHING)
+                 .program(TraversalVertexProgram.build()
+                         .traversal(GraphTraversalSource.build().engine(ComputerTraversalEngine.build().computer(SparkGraphComputer.class)),
+                                 "gremlin-groovy",
+                                 "g.V()").create(graph)).submit().get();
+         ///////
+         assertTrue(statusTracker.getJobIdsForGroup("44").length >= 1);
+     }
+ }
+