You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by ok...@apache.org on 2015/05/19 16:31:14 UTC
incubator-tinkerpop git commit: old text in docs removed.
MapReduce.NullObject pattern copied from Hadoop's NullWritable.
CountMapReduce, Max,
Min assume a full reduction on key -- was a bug in the past. Can't seem to
reproduce it anymore. HadoopPools wi
Repository: incubator-tinkerpop
Updated Branches:
refs/heads/master 597d98f16 -> befabe20a
old text in docs removed. MapReduce.NullObject pattern copied from Hadoop's NullWritable. CountMapReduce, Max, Min assume a full reduction on key -- was a bug in the past. Can't seem to reproduce it anymore. HadoopPools will try and intiitlize, if not, a default intialization is provided.
Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/befabe20
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/befabe20
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/befabe20
Branch: refs/heads/master
Commit: befabe20aad9a0b626db7c322d172979391fec62
Parents: 597d98f
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue May 19 08:31:11 2015 -0600
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue May 19 08:31:21 2015 -0600
----------------------------------------------------------------------
docs/src/implementations.asciidoc | 2 --
.../gremlin/process/computer/MapReduce.java | 22 +++++++-------------
.../traversal/step/map/CountGlobalStep.java | 6 +-----
.../traversal/step/map/MaxGlobalStep.java | 16 +++++---------
.../traversal/step/map/MinGlobalStep.java | 9 +-------
.../hadoop/structure/io/HadoopPools.java | 17 ++++++++++-----
.../gremlin/hadoop/HadoopGraphProvider.java | 17 +++++++++++++--
7 files changed, 41 insertions(+), 48 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/docs/src/implementations.asciidoc
----------------------------------------------------------------------
diff --git a/docs/src/implementations.asciidoc b/docs/src/implementations.asciidoc
index 73093d1..bfec9cd 100644
--- a/docs/src/implementations.asciidoc
+++ b/docs/src/implementations.asciidoc
@@ -667,8 +667,6 @@ WARNING: Giraph uses a large number of Hadoop counters. The default for Hadoop i
WARNING: The maximum number of workers can be no larger than the number of map-slots in the Hadoop cluster minus 1. For example, if the Hadoop cluster has 4 map slots, then `giraph.maxWorkers` can not be larger than 3. One map-slot is reserved for the master compute node and all other slots can be allocated as workers to execute the VertexPrograms on the vertices of the graph.
-IMPORTANT: `GiraphGraphComputer` is the default graph computer for `HadoopGraph`. If another graph computer is desired, then it can be set using `HadoopGraph.compute(...)` or via the properties file `gremlin.hadoop.defaultGraphComputer` setting.
-
[source,text]
gremlin> g = graph.traversal(computer()) // GiraphGraphComputer is the default graph computer when no class is specified
==>graphtraversalsource[hadoopgraph[gryoinputformat->gryooutputformat], giraphgraphcomputer]
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/MapReduce.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/MapReduce.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/MapReduce.java
index 4852a9c..a75c4c8 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/MapReduce.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/MapReduce.java
@@ -22,9 +22,6 @@ import org.apache.commons.configuration.Configuration;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.Vertex;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.lang.reflect.Constructor;
import java.util.Comparator;
@@ -274,7 +271,7 @@ public interface MapReduce<MK, MV, RK, RV, R> extends Cloneable {
/**
* A convenience singleton when a single key is needed so that all emitted values converge to the same combiner/reducer.
*/
- public static class NullObject implements Comparable<NullObject>, Serializable {
+ public static class NullObject implements Comparable, Serializable {
private static final NullObject INSTANCE = new NullObject();
private static final String NULL_OBJECT = "";
@@ -284,7 +281,7 @@ public interface MapReduce<MK, MV, RK, RV, R> extends Cloneable {
@Override
public int hashCode() {
- return 666666666;
+ return 0;
}
@Override
@@ -293,21 +290,16 @@ public interface MapReduce<MK, MV, RK, RV, R> extends Cloneable {
}
@Override
- public int compareTo(final NullObject nullObject) {
- return 0;
+ public int compareTo(final Object object) {
+ if (object instanceof NullObject)
+ return 0;
+ else
+ throw new IllegalArgumentException("The " + NullObject.class.getSimpleName() + " can not be compared with " + object.getClass().getSimpleName());
}
@Override
public String toString() {
return NULL_OBJECT;
}
-
- private void readObject(final ObjectInputStream inputStream) throws ClassNotFoundException, IOException {
-
- }
-
- private void writeObject(final ObjectOutputStream outputStream) throws IOException {
-
- }
}
}
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/CountGlobalStep.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/CountGlobalStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/CountGlobalStep.java
index 843de07..409c445 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/CountGlobalStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/CountGlobalStep.java
@@ -122,11 +122,7 @@ public final class CountGlobalStep<S> extends ReducingBarrierStep<S, Long> imple
@Override
public Long generateFinalResult(final Iterator<KeyValue<NullObject, Long>> keyValues) {
- long count = 0l;
- while (keyValues.hasNext()) {
- count = count + keyValues.next().getValue();
- }
- return count;
+ return keyValues.hasNext() ? keyValues.next().getValue() : 0L;
}
public static final CountGlobalMapReduce instance() {
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MaxGlobalStep.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MaxGlobalStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MaxGlobalStep.java
index 8646ca6..d7dc34d 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MaxGlobalStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MaxGlobalStep.java
@@ -18,14 +18,14 @@
*/
package org.apache.tinkerpop.gremlin.process.traversal.step.map;
-import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
-import org.apache.tinkerpop.gremlin.process.traversal.Traverser;
import org.apache.tinkerpop.gremlin.process.computer.KeyValue;
import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
import org.apache.tinkerpop.gremlin.process.computer.traversal.TraversalVertexProgram;
import org.apache.tinkerpop.gremlin.process.computer.util.StaticMapReduce;
-import org.apache.tinkerpop.gremlin.process.traversal.step.util.ReducingBarrierStep;
+import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
+import org.apache.tinkerpop.gremlin.process.traversal.Traverser;
import org.apache.tinkerpop.gremlin.process.traversal.step.MapReducer;
+import org.apache.tinkerpop.gremlin.process.traversal.step.util.ReducingBarrierStep;
import org.apache.tinkerpop.gremlin.process.traversal.traverser.TraverserRequirement;
import org.apache.tinkerpop.gremlin.process.traversal.traverser.util.TraverserSet;
import org.apache.tinkerpop.gremlin.structure.Vertex;
@@ -123,14 +123,8 @@ public final class MaxGlobalStep<S extends Number> extends ReducingBarrierStep<S
@Override
public Number generateFinalResult(final Iterator<KeyValue<NullObject, Number>> keyValues) {
- if(!keyValues.hasNext()) return Double.NaN;
- Number max = -Double.MAX_VALUE;
- while (keyValues.hasNext()) {
- final Number value = keyValues.next().getValue();
- if (value.doubleValue() > max.doubleValue())
- max = value;
- }
- return max;
+ return keyValues.hasNext() ? keyValues.next().getValue() : Double.NaN;
+
}
public static final MaxGlobalMapReduce instance() {
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MinGlobalStep.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MinGlobalStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MinGlobalStep.java
index a8b1ee3..6a3ba10 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MinGlobalStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MinGlobalStep.java
@@ -123,14 +123,7 @@ public final class MinGlobalStep<S extends Number> extends ReducingBarrierStep<S
@Override
public Number generateFinalResult(final Iterator<KeyValue<NullObject, Number>> keyValues) {
- if(!keyValues.hasNext()) return Double.NaN;
- Number min = Double.MAX_VALUE;
- while (keyValues.hasNext()) {
- final Number value = keyValues.next().getValue();
- if (value.doubleValue() < min.doubleValue())
- min = value;
- }
- return min;
+ return keyValues.hasNext() ? keyValues.next().getValue() : Double.NaN;
}
public static final MinGlobalMapReduce instance() {
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/HadoopPools.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/HadoopPools.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/HadoopPools.java
index bd2d1cf..bff3960 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/HadoopPools.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/HadoopPools.java
@@ -21,32 +21,39 @@ package org.apache.tinkerpop.gremlin.hadoop.structure.io;
import org.apache.commons.configuration.Configuration;
import org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil;
import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoPool;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* @author Marko A. Rodriguez (http://markorodriguez.com)
*/
public final class HadoopPools {
+ private static final Logger LOGGER = LoggerFactory.getLogger(HadoopPools.class);
+
private HadoopPools() {
}
- private static GryoPool GRYO_POOL;
+ private static GryoPool GRYO_POOL = new GryoPool(256);
+ private static boolean INITIALIZED = false;
public synchronized static void initialize(final Configuration configuration) {
- if (null == GRYO_POOL) {
+ if (!INITIALIZED) {
+ INITIALIZED = true;
GRYO_POOL = new GryoPool(configuration);
}
}
public synchronized static void initialize(final org.apache.hadoop.conf.Configuration configuration) {
- if (null == GRYO_POOL) {
+ if (!INITIALIZED) {
+ INITIALIZED = true;
GRYO_POOL = new GryoPool(ConfUtil.makeApacheConfiguration(configuration));
}
}
public static GryoPool getGryoPool() {
- if (null == GRYO_POOL)
- throw new IllegalStateException("The GryoPool has not been initialized");
+ if (!INITIALIZED)
+ LOGGER.info("The " + HadoopPools.class.getSimpleName() + " has not be initialized, using the default pool"); // TODO: this is necessary because we can't get the pool intialized in the Merger code of the Hadoop process.
return GRYO_POOL;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
index 1aeb6dd..1e04fac 100644
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
@@ -24,7 +24,13 @@ import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.tinkerpop.gremlin.AbstractGraphProvider;
import org.apache.tinkerpop.gremlin.LoadGraphWith;
import org.apache.tinkerpop.gremlin.TestHelper;
-import org.apache.tinkerpop.gremlin.hadoop.structure.*;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopEdge;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopElement;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraphVariables;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopProperty;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopVertex;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopVertexProperty;
import org.apache.tinkerpop.gremlin.hadoop.structure.io.graphson.GraphSONInputFormat;
import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat;
import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat;
@@ -33,7 +39,13 @@ import org.apache.tinkerpop.gremlin.structure.io.graphson.GraphSONResourceAccess
import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoResourceAccess;
import org.apache.tinkerpop.gremlin.structure.io.script.ScriptResourceAccess;
-import java.util.*;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
/**
* @author Marko A. Rodriguez (http://markorodriguez.com)
@@ -111,6 +123,7 @@ public class HadoopGraphProvider extends AbstractGraphProvider {
put(GiraphConstants.NETTY_CLIENT_USE_EXECUTION_HANDLER.getKey(), false); // this prevents so many integration tests running out of threads
put(GiraphConstants.NUM_INPUT_THREADS.getKey(), 3);
put(GiraphConstants.NUM_COMPUTE_THREADS.getKey(), 3);
+ put("mapred.reduce.tasks", 4);
//put("giraph.vertexOutputFormatThreadSafe", false);
//put("giraph.numOutputThreads", 3);