You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by ok...@apache.org on 2015/03/02 17:24:44 UTC
incubator-tinkerpop git commit: first push on the spark
adaptor....crazy dep hell.
Repository: incubator-tinkerpop
Updated Branches:
refs/heads/spark [created] 531b86a8f
first push on the spark adaptor....crazy dep hell.
Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/531b86a8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/531b86a8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/531b86a8
Branch: refs/heads/spark
Commit: 531b86a8f82ee683f4c7d853d502ed5a6eeeeb46
Parents: 0d7802d
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Mon Mar 2 09:24:46 2015 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Mon Mar 2 09:24:46 2015 -0700
----------------------------------------------------------------------
hadoop-gremlin/pom.xml | 159 ++++++++++++++++++-
.../tinkerpop/gremlin/hadoop/Constants.java | 1 +
.../computer/spark/SparkGraphComputer.java | 113 +++++++++++++
3 files changed, 272 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/531b86a8/hadoop-gremlin/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/pom.xml b/hadoop-gremlin/pom.xml
index 52496d2..84497c9 100644
--- a/hadoop-gremlin/pom.xml
+++ b/hadoop-gremlin/pom.xml
@@ -45,8 +45,55 @@ limitations under the License.
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
+ <exclusions>
+ <exclusion>
+ <groupId>net.java.dev.jets3t</groupId>
+ <artifactId>jets3t</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-net</groupId>
+ <artifactId>commons-net</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>javax.servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-parent</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-sslengine</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-sslengine</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-parent</artifactId>
+ </exclusion>
+ </exclusions>
<!--<scope>provided</scope>-->
</dependency>
+ <!-- GIRAPH GRAPH COMPUTER -->
<dependency>
<groupId>org.apache.giraph</groupId>
<artifactId>giraph-core</artifactId>
@@ -71,12 +118,122 @@ limitations under the License.
<groupId>jline</groupId>
<artifactId>jline</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jboss.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.zookeeper</groupId>
+ <artifactId>zookeeper</artifactId>
+ </exclusion>
</exclusions>
</dependency>
- <!-- consistent dependencies chosen for hadoop-core -->
+ <!-- SPARK GRAPH COMPUTER -->
<dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_2.10</artifactId>
+ <version>1.2.1</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-app</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jcl-over-slf4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.esotericsoftware.kryo</groupId>
+ <artifactId>kryo</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>jline</groupId>
+ <artifactId>jline</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons</groupId>
+ <artifactId>commons-lang</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-lang3</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <!-- consistent dependencies chosen for hadoop-core -->
+ <!--<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
+
+ </dependency>-->
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ <version>2.10.3</version>
</dependency>
<dependency>
<groupId>org.apache.tinkerpop</groupId>
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/531b86a8/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
index 46cf993..f229b17 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/Constants.java
@@ -35,6 +35,7 @@ public class Constants {
public static final String GREMLIN_HADOOP_JOB_PREFIX = "HadoopGremlin: ";
public static final String GREMLIN_HADOOP_GIRAPH_JOB_PREFIX = "HadoopGremlin(Giraph): ";
public static final String GREMLIN_HADOOP_MAP_REDUCE_JOB_PREFIX = "HadoopGremlin(MapReduce): ";
+ public static final String GREMLIN_HADOOP_SPARK_JOB_PREFIX = "HadoopGremlin(Spark): ";
public static final String HADOOP_GREMLIN_LIBS = "HADOOP_GREMLIN_LIBS";
public static final String DOT_JAR = ".jar";
public static final String GREMLIN_HADOOP_DERIVE_MEMORY = "gremlin.hadoop.deriveMemory";
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/531b86a8/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
new file mode 100644
index 0000000..0586a14
--- /dev/null
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/process/computer/spark/SparkGraphComputer.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.hadoop.process.computer.spark;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.tinkerpop.gremlin.hadoop.Constants;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.process.computer.ComputerResult;
+import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
+import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
+import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
+import org.apache.tinkerpop.gremlin.process.computer.util.GraphComputerHelper;
+import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.Future;
+
+/**
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
+ */
+public class SparkGraphComputer implements GraphComputer {
+
+ public static final Logger LOGGER = LoggerFactory.getLogger(SparkGraphComputer.class);
+
+ protected final SparkConf configuration = new SparkConf();
+
+ protected final HadoopGraph hadoopGraph;
+
+ private boolean executed = false;
+ private final Set<MapReduce> mapReduces = new HashSet<>();
+ private VertexProgram vertexProgram;
+
+ public SparkGraphComputer(final HadoopGraph hadoopGraph) {
+ this.hadoopGraph = hadoopGraph;
+ }
+
+ public static void main(final String[] args) {
+ final SparkConf configuration = new SparkConf();
+ configuration.setAppName(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX);
+ configuration.setMaster("local");
+ final JavaSparkContext sc = new JavaSparkContext(configuration);
+ JavaRDD<String> rdd = sc.textFile("religious-traversals.txt");
+ System.out.println(rdd.count());
+ }
+
+
+ @Override
+ public GraphComputer isolation(final Isolation isolation) {
+ if (!isolation.equals(Isolation.BSP))
+ throw GraphComputer.Exceptions.isolationNotSupported(isolation);
+ return this;
+ }
+
+ @Override
+ public GraphComputer program(final VertexProgram vertexProgram) {
+ this.vertexProgram = vertexProgram;
+ return this;
+ }
+
+ @Override
+ public GraphComputer mapReduce(final MapReduce mapReduce) {
+ this.mapReduces.add(mapReduce);
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ return StringFactory.graphComputerString(this);
+ }
+
+ @Override
+ public Future<ComputerResult> submit() {
+ if (this.executed)
+ throw Exceptions.computerHasAlreadyBeenSubmittedAVertexProgram();
+ else
+ this.executed = true;
+
+ // it is not possible execute a computer if it has no vertex program nor mapreducers
+ if (null == this.vertexProgram && this.mapReduces.isEmpty())
+ throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
+ // it is possible to run mapreducers without a vertex program
+ if (null != this.vertexProgram)
+ GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
+
+ final long startTime = System.currentTimeMillis();
+ return CompletableFuture.<ComputerResult>supplyAsync(() -> {
+ return null;
+ });
+ }
+
+}