You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by ok...@apache.org on 2015/05/19 16:31:14 UTC

incubator-tinkerpop git commit: old text in docs removed. MapReduce.NullObject pattern copied from Hadoop's NullWritable. CountMapReduce, Max, Min assume a full reduction on key -- was a bug in the past. Can't seem to reproduce it anymore. HadoopPools wi

Repository: incubator-tinkerpop
Updated Branches:
  refs/heads/master 597d98f16 -> befabe20a


old text in docs removed. MapReduce.NullObject pattern copied from Hadoop's NullWritable. CountMapReduce, Max, Min assume a full reduction on key -- was a bug in the past. Can't seem to reproduce it anymore. HadoopPools will try and intiitlize, if not, a default intialization is provided.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/befabe20
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/befabe20
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/befabe20

Branch: refs/heads/master
Commit: befabe20aad9a0b626db7c322d172979391fec62
Parents: 597d98f
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue May 19 08:31:11 2015 -0600
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue May 19 08:31:21 2015 -0600

----------------------------------------------------------------------
 docs/src/implementations.asciidoc               |  2 --
 .../gremlin/process/computer/MapReduce.java     | 22 +++++++-------------
 .../traversal/step/map/CountGlobalStep.java     |  6 +-----
 .../traversal/step/map/MaxGlobalStep.java       | 16 +++++---------
 .../traversal/step/map/MinGlobalStep.java       |  9 +-------
 .../hadoop/structure/io/HadoopPools.java        | 17 ++++++++++-----
 .../gremlin/hadoop/HadoopGraphProvider.java     | 17 +++++++++++++--
 7 files changed, 41 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/docs/src/implementations.asciidoc
----------------------------------------------------------------------
diff --git a/docs/src/implementations.asciidoc b/docs/src/implementations.asciidoc
index 73093d1..bfec9cd 100644
--- a/docs/src/implementations.asciidoc
+++ b/docs/src/implementations.asciidoc
@@ -667,8 +667,6 @@ WARNING: Giraph uses a large number of Hadoop counters. The default for Hadoop i
 
 WARNING: The maximum number of workers can be no larger than the number of map-slots in the Hadoop cluster minus 1. For example, if the Hadoop cluster has 4 map slots, then `giraph.maxWorkers` can not be larger than 3. One map-slot is reserved for the master compute node and all other slots can be allocated as workers to execute the VertexPrograms on the vertices of the graph.
 
-IMPORTANT: `GiraphGraphComputer` is the default graph computer for `HadoopGraph`. If another graph computer is desired, then it can be set using `HadoopGraph.compute(...)` or via the properties file `gremlin.hadoop.defaultGraphComputer` setting.
-
 [source,text]
 gremlin> g = graph.traversal(computer()) // GiraphGraphComputer is the default graph computer when no class is specified
 ==>graphtraversalsource[hadoopgraph[gryoinputformat->gryooutputformat], giraphgraphcomputer]

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/MapReduce.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/MapReduce.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/MapReduce.java
index 4852a9c..a75c4c8 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/MapReduce.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/MapReduce.java
@@ -22,9 +22,6 @@ import org.apache.commons.configuration.Configuration;
 import org.apache.tinkerpop.gremlin.structure.Graph;
 import org.apache.tinkerpop.gremlin.structure.Vertex;
 
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
 import java.io.Serializable;
 import java.lang.reflect.Constructor;
 import java.util.Comparator;
@@ -274,7 +271,7 @@ public interface MapReduce<MK, MV, RK, RV, R> extends Cloneable {
     /**
      * A convenience singleton when a single key is needed so that all emitted values converge to the same combiner/reducer.
      */
-    public static class NullObject implements Comparable<NullObject>, Serializable {
+    public static class NullObject implements Comparable, Serializable {
         private static final NullObject INSTANCE = new NullObject();
         private static final String NULL_OBJECT = "";
 
@@ -284,7 +281,7 @@ public interface MapReduce<MK, MV, RK, RV, R> extends Cloneable {
 
         @Override
         public int hashCode() {
-            return 666666666;
+            return 0;
         }
 
         @Override
@@ -293,21 +290,16 @@ public interface MapReduce<MK, MV, RK, RV, R> extends Cloneable {
         }
 
         @Override
-        public int compareTo(final NullObject nullObject) {
-            return 0;
+        public int compareTo(final Object object) {
+            if (object instanceof NullObject)
+                return 0;
+            else
+                throw new IllegalArgumentException("The " + NullObject.class.getSimpleName() + " can not be compared with " + object.getClass().getSimpleName());
         }
 
         @Override
         public String toString() {
             return NULL_OBJECT;
         }
-
-        private void readObject(final ObjectInputStream inputStream) throws ClassNotFoundException, IOException {
-
-        }
-
-        private void writeObject(final ObjectOutputStream outputStream) throws IOException {
-
-        }
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/CountGlobalStep.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/CountGlobalStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/CountGlobalStep.java
index 843de07..409c445 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/CountGlobalStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/CountGlobalStep.java
@@ -122,11 +122,7 @@ public final class CountGlobalStep<S> extends ReducingBarrierStep<S, Long> imple
 
         @Override
         public Long generateFinalResult(final Iterator<KeyValue<NullObject, Long>> keyValues) {
-            long count = 0l;
-            while (keyValues.hasNext()) {
-                count = count + keyValues.next().getValue();
-            }
-            return count;
+            return keyValues.hasNext() ? keyValues.next().getValue() : 0L;
         }
 
         public static final CountGlobalMapReduce instance() {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MaxGlobalStep.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MaxGlobalStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MaxGlobalStep.java
index 8646ca6..d7dc34d 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MaxGlobalStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MaxGlobalStep.java
@@ -18,14 +18,14 @@
  */
 package org.apache.tinkerpop.gremlin.process.traversal.step.map;
 
-import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
-import org.apache.tinkerpop.gremlin.process.traversal.Traverser;
 import org.apache.tinkerpop.gremlin.process.computer.KeyValue;
 import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
 import org.apache.tinkerpop.gremlin.process.computer.traversal.TraversalVertexProgram;
 import org.apache.tinkerpop.gremlin.process.computer.util.StaticMapReduce;
-import org.apache.tinkerpop.gremlin.process.traversal.step.util.ReducingBarrierStep;
+import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
+import org.apache.tinkerpop.gremlin.process.traversal.Traverser;
 import org.apache.tinkerpop.gremlin.process.traversal.step.MapReducer;
+import org.apache.tinkerpop.gremlin.process.traversal.step.util.ReducingBarrierStep;
 import org.apache.tinkerpop.gremlin.process.traversal.traverser.TraverserRequirement;
 import org.apache.tinkerpop.gremlin.process.traversal.traverser.util.TraverserSet;
 import org.apache.tinkerpop.gremlin.structure.Vertex;
@@ -123,14 +123,8 @@ public final class MaxGlobalStep<S extends Number> extends ReducingBarrierStep<S
 
         @Override
         public Number generateFinalResult(final Iterator<KeyValue<NullObject, Number>> keyValues) {
-            if(!keyValues.hasNext()) return Double.NaN;
-            Number max = -Double.MAX_VALUE;
-            while (keyValues.hasNext()) {
-                final Number value = keyValues.next().getValue();
-                if (value.doubleValue() > max.doubleValue())
-                    max = value;
-            }
-            return max;
+            return keyValues.hasNext() ? keyValues.next().getValue() : Double.NaN;
+
         }
 
         public static final MaxGlobalMapReduce instance() {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MinGlobalStep.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MinGlobalStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MinGlobalStep.java
index a8b1ee3..6a3ba10 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MinGlobalStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MinGlobalStep.java
@@ -123,14 +123,7 @@ public final class MinGlobalStep<S extends Number> extends ReducingBarrierStep<S
 
         @Override
         public Number generateFinalResult(final Iterator<KeyValue<NullObject, Number>> keyValues) {
-            if(!keyValues.hasNext()) return Double.NaN;
-            Number min = Double.MAX_VALUE;
-            while (keyValues.hasNext()) {
-                final Number value = keyValues.next().getValue();
-                if (value.doubleValue() < min.doubleValue())
-                    min = value;
-            }
-            return min;
+            return keyValues.hasNext() ? keyValues.next().getValue() : Double.NaN;
         }
 
         public static final MinGlobalMapReduce instance() {

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/HadoopPools.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/HadoopPools.java b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/HadoopPools.java
index bd2d1cf..bff3960 100644
--- a/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/HadoopPools.java
+++ b/hadoop-gremlin/src/main/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/HadoopPools.java
@@ -21,32 +21,39 @@ package org.apache.tinkerpop.gremlin.hadoop.structure.io;
 import org.apache.commons.configuration.Configuration;
 import org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil;
 import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoPool;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
 public final class HadoopPools {
 
+    private static final Logger LOGGER = LoggerFactory.getLogger(HadoopPools.class);
+
     private HadoopPools() {
     }
 
-    private static GryoPool GRYO_POOL;
+    private static GryoPool GRYO_POOL = new GryoPool(256);
+    private static boolean INITIALIZED = false;
 
     public synchronized static void initialize(final Configuration configuration) {
-        if (null == GRYO_POOL) {
+        if (!INITIALIZED) {
+            INITIALIZED = true;
             GRYO_POOL = new GryoPool(configuration);
         }
     }
 
     public synchronized static void initialize(final org.apache.hadoop.conf.Configuration configuration) {
-        if (null == GRYO_POOL) {
+        if (!INITIALIZED) {
+            INITIALIZED = true;
             GRYO_POOL = new GryoPool(ConfUtil.makeApacheConfiguration(configuration));
         }
     }
 
     public static GryoPool getGryoPool() {
-        if (null == GRYO_POOL)
-            throw new IllegalStateException("The GryoPool has not been initialized");
+        if (!INITIALIZED)
+            LOGGER.info("The " + HadoopPools.class.getSimpleName() + " has not be initialized, using the default pool");     // TODO: this is necessary because we can't get the pool intialized in the Merger code of the Hadoop process.
         return GRYO_POOL;
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/befabe20/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
index 1aeb6dd..1e04fac 100644
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/HadoopGraphProvider.java
@@ -24,7 +24,13 @@ import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.tinkerpop.gremlin.AbstractGraphProvider;
 import org.apache.tinkerpop.gremlin.LoadGraphWith;
 import org.apache.tinkerpop.gremlin.TestHelper;
-import org.apache.tinkerpop.gremlin.hadoop.structure.*;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopEdge;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopElement;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraphVariables;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopProperty;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopVertex;
+import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopVertexProperty;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.graphson.GraphSONInputFormat;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat;
 import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoOutputFormat;
@@ -33,7 +39,13 @@ import org.apache.tinkerpop.gremlin.structure.io.graphson.GraphSONResourceAccess
 import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoResourceAccess;
 import org.apache.tinkerpop.gremlin.structure.io.script.ScriptResourceAccess;
 
-import java.util.*;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
 
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
@@ -111,6 +123,7 @@ public class HadoopGraphProvider extends AbstractGraphProvider {
             put(GiraphConstants.NETTY_CLIENT_USE_EXECUTION_HANDLER.getKey(), false); // this prevents so many integration tests running out of threads
             put(GiraphConstants.NUM_INPUT_THREADS.getKey(), 3);
             put(GiraphConstants.NUM_COMPUTE_THREADS.getKey(), 3);
+            put("mapred.reduce.tasks", 4);
             //put("giraph.vertexOutputFormatThreadSafe", false);
             //put("giraph.numOutputThreads", 3);