You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by ok...@apache.org on 2015/09/09 22:20:41 UTC

[11/18] incubator-tinkerpop git commit: Spark 1.4.1 working with TP3 (Spark's dependency scene is a mess).

Spark 1.4.1 working with TP3 (Spark's dependency scene is a mess).


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/8de580e6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/8de580e6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/8de580e6

Branch: refs/heads/master
Commit: 8de580e663f9fd8c04f8c413df3cb66a42e5a577
Parents: 1303acf
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Sep 8 12:17:32 2015 -0600
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Sep 8 12:17:32 2015 -0600

----------------------------------------------------------------------
 .../gremlin/hadoop/HadoopGraphProvider.java     | 19 --------
 spark-gremlin/pom.xml                           | 40 ++++++++++++++--
 spark-gremlin/src/assembly/hadoop-job.xml       | 39 +++++++++++++++
 spark-gremlin/src/assembly/standalone.xml       | 48 +++++++++++++++++++
 .../spark/groovy/plugin/SparkGremlinPlugin.java | 50 +++++++++++++++++++-
 .../spark/process/HadoopGraphProvider.java      |  3 --
 6 files changed, 172 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8de580e6/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
index ba0e75e..c05d2ad 100644
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
@@ -43,7 +43,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.Set;
-import java.util.concurrent.TimeUnit;
 
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
@@ -109,24 +108,6 @@ public class HadoopGraphProvider extends AbstractGraphProvider {
             put(Constants.GREMLIN_HADOOP_GRAPH_OUTPUT_FORMAT, GryoOutputFormat.class.getCanonicalName());
             put(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "hadoop-gremlin/target/test-output");
             put(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
-            /// giraph configuration
-            //put(GiraphConstants.MIN_WORKERS, 1);
-            //put(GiraphConstants.MAX_WORKERS, 1);
-            //put(GiraphConstants.SPLIT_MASTER_WORKER.getKey(), false);
-            //put(GiraphConstants.ZOOKEEPER_SERVER_PORT.getKey(), 2181);  // you must have a local zookeeper running on this port
-            //put(GiraphConstants.NETTY_SERVER_USE_EXECUTION_HANDLER.getKey(), false); // this prevents so many integration tests running out of threads
-            //put(GiraphConstants.NETTY_CLIENT_USE_EXECUTION_HANDLER.getKey(), false); // this prevents so many integration tests running out of threads
-            //put(GiraphConstants.NUM_INPUT_THREADS.getKey(), 3);
-            //put(GiraphConstants.NUM_COMPUTE_THREADS.getKey(), 3);
-            //put(GiraphConstants.MAX_MASTER_SUPERSTEP_WAIT_MSECS.getKey(), TimeUnit.MINUTES.toMillis(60L));
-            //put("mapred.reduce.tasks", 4);
-            //put("giraph.vertexOutputFormatThreadSafe", false);
-            //put("giraph.numOutputThreads", 3);
-
-            /// spark configuration
-            put("spark.master", "local[4]");
-            put("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
-            // put("spark.kryo.registrationRequired",true);
         }};
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8de580e6/spark-gremlin/pom.xml
----------------------------------------------------------------------
diff --git a/spark-gremlin/pom.xml b/spark-gremlin/pom.xml
index f1c696e..a0bd25e 100644
--- a/spark-gremlin/pom.xml
+++ b/spark-gremlin/pom.xml
@@ -17,7 +17,6 @@
   ~ specific language governing permissions and limitations
   ~ under the License.
   -->
-
 <project xmlns="http://maven.apache.org/POM/4.0.0"
          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
@@ -34,6 +33,20 @@
             <groupId>org.apache.tinkerpop</groupId>
             <artifactId>gremlin-core</artifactId>
             <version>${project.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-databind</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-annotations</artifactId>
+                </exclusion>
+            </exclusions>
         </dependency>
         <dependency>
             <groupId>org.apache.tinkerpop</groupId>
@@ -65,13 +78,21 @@
                     <groupId>org.mortbay.jetty</groupId>
                     <artifactId>jsp-api-2.1</artifactId>
                 </exclusion>
+                <exclusion>
+                    <groupId>com.sun.jersey</groupId>
+                    <artifactId>jersey-server</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.sun.jersey</groupId>
+                    <artifactId>jersey-core</artifactId>
+                </exclusion>
             </exclusions>
         </dependency>
         <!-- SPARK GRAPH COMPUTER -->
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-core_2.10</artifactId>
-            <version>1.2.1</version>
+            <version>1.4.1</version>
             <exclusions>
                 <!-- self conflicts -->
                 <exclusion>
@@ -83,6 +104,10 @@
                     <artifactId>scala-library</artifactId>
                 </exclusion>
                 <exclusion>
+                    <groupId>org.scala-lang</groupId>
+                    <artifactId>scala-reflect</artifactId>
+                </exclusion>
+                <exclusion>
                     <groupId>log4j</groupId>
                     <artifactId>log4j</artifactId>
                 </exclusion>
@@ -107,6 +132,10 @@
                     <groupId>org.slf4j</groupId>
                     <artifactId>jcl-over-slf4j</artifactId>
                 </exclusion>
+                <exclusion>
+                    <groupId>org.apache.ivy</groupId>
+                    <artifactId>ivy</artifactId>
+                </exclusion>
                 <!-- gremlin-groovy conflicts -->
                 <exclusion>
                     <groupId>jline</groupId>
@@ -120,7 +149,7 @@
                 <!-- lgpl conflicts -->
                 <exclusion>
                     <groupId>com.google.code.findbugs</groupId>
-                    <artifactId>findbugs</artifactId>
+                    <artifactId>jsr305</artifactId>
                 </exclusion>
             </exclusions>
         </dependency>
@@ -130,6 +159,11 @@
             <artifactId>scala-library</artifactId>
             <version>2.10.3</version>
         </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>2.4.4</version>
+        </dependency>
         <!-- TEST -->
         <dependency>
             <groupId>org.apache.tinkerpop</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8de580e6/spark-gremlin/src/assembly/hadoop-job.xml
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/assembly/hadoop-job.xml b/spark-gremlin/src/assembly/hadoop-job.xml
new file mode 100644
index 0000000..3093016
--- /dev/null
+++ b/spark-gremlin/src/assembly/hadoop-job.xml
@@ -0,0 +1,39 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+<assembly>
+    <id>job</id>
+    <formats>
+        <format>jar</format>
+    </formats>
+    <includeBaseDirectory>false</includeBaseDirectory>
+    <dependencySets>
+        <dependencySet>
+            <unpack>false</unpack>
+            <scope>runtime</scope>
+            <outputDirectory>lib</outputDirectory>
+            <excludes>
+                <exclude>${groupId}:${artifactId}</exclude>
+            </excludes>
+        </dependencySet>
+        <dependencySet>
+            <unpack>true</unpack>
+            <includes>
+                <include>${groupId}:${artifactId}</include>
+            </includes>
+        </dependencySet>
+    </dependencySets>
+</assembly>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8de580e6/spark-gremlin/src/assembly/standalone.xml
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/assembly/standalone.xml b/spark-gremlin/src/assembly/standalone.xml
new file mode 100644
index 0000000..fa7cef1
--- /dev/null
+++ b/spark-gremlin/src/assembly/standalone.xml
@@ -0,0 +1,48 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.
+  -->
+<assembly>
+    <id>standalone</id>
+    <formats>
+        <format>dir</format>
+    </formats>
+    <includeBaseDirectory>false</includeBaseDirectory>
+    <fileSets>
+        <fileSet>
+            <directory>src/main/bin</directory>
+            <outputDirectory>/bin</outputDirectory>
+            <fileMode>0755</fileMode>
+        </fileSet>
+        <fileSet>
+            <directory>target/*.jar</directory>
+            <outputDirectory>/lib</outputDirectory>
+        </fileSet>
+    </fileSets>
+    <dependencySets>
+        <dependencySet>
+            <outputDirectory>/lib</outputDirectory>
+            <unpack>false</unpack>
+            <scope>compile</scope>
+        </dependencySet>
+        <dependencySet>
+            <outputDirectory>/lib</outputDirectory>
+            <unpack>false</unpack>
+            <scope>provided</scope>
+        </dependencySet>
+    </dependencySets>
+</assembly>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8de580e6/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/groovy/plugin/SparkGremlinPlugin.java
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/groovy/plugin/SparkGremlinPlugin.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/groovy/plugin/SparkGremlinPlugin.java
index 0b04300..51a88b3 100644
--- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/groovy/plugin/SparkGremlinPlugin.java
+++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/groovy/plugin/SparkGremlinPlugin.java
@@ -19,8 +19,54 @@
 
 package org.apache.tinkerpop.gremlin.spark.groovy.plugin;
 
+import org.apache.tinkerpop.gremlin.groovy.plugin.AbstractGremlinPlugin;
+import org.apache.tinkerpop.gremlin.groovy.plugin.IllegalEnvironmentException;
+import org.apache.tinkerpop.gremlin.groovy.plugin.PluginAcceptor;
+import org.apache.tinkerpop.gremlin.groovy.plugin.PluginInitializationException;
+import org.apache.tinkerpop.gremlin.groovy.plugin.RemoteAcceptor;
+import org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer;
+
+import java.util.HashSet;
+import java.util.Optional;
+import java.util.Set;
+
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
-public class SparkGremlinPlugin {
-}
+public class SparkGremlinPlugin extends AbstractGremlinPlugin {
+
+    protected static String NAME = "tinkerpop.spark";
+
+    protected static final Set<String> IMPORTS = new HashSet<String>() {{
+        add(IMPORT_SPACE + SparkGraphComputer.class.getPackage().getName() + DOT_STAR);
+    }};
+
+    public SparkGremlinPlugin() {
+        super(true);
+    }
+
+    @Override
+    public String getName() {
+        return NAME;
+    }
+
+    @Override
+    public void afterPluginTo(final PluginAcceptor pluginAcceptor) throws PluginInitializationException, IllegalEnvironmentException {
+        pluginAcceptor.addImports(IMPORTS);
+        try {
+            pluginAcceptor.eval(String.format("Logger.getLogger(%s).setLevel(Level.INFO)", SparkGraphComputer.class.getName()));
+        } catch (final Exception e) {
+            throw new PluginInitializationException(e.getMessage(), e);
+        }
+    }
+
+    @Override
+    public boolean requireRestart() {
+        return true;
+    }
+
+    @Override
+    public Optional<RemoteAcceptor> remoteAcceptor() {
+        return Optional.empty();
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/8de580e6/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/HadoopGraphProvider.java
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/HadoopGraphProvider.java b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/HadoopGraphProvider.java
index ebda863..cdc4a0b 100644
--- a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/HadoopGraphProvider.java
+++ b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/HadoopGraphProvider.java
@@ -109,9 +109,6 @@ public class HadoopGraphProvider extends AbstractGraphProvider {
             put(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "hadoop-gremlin/target/test-output");
             put(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
             put("mapred.reduce.tasks", 4);
-            //put("giraph.vertexOutputFormatThreadSafe", false);
-            //put("giraph.numOutputThreads", 3);
-
             /// spark configuration
             put("spark.master", "local[4]");
             put("spark.serializer", "org.apache.spark.serializer.KryoSerializer");