You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ni...@apache.org on 2013/03/28 19:26:00 UTC

git commit: GIRAPH-594: auto set reusing objects (nitay)

Updated Branches:
  refs/heads/trunk f4deb5ca8 -> 2a6c9d563


GIRAPH-594: auto set reusing objects (nitay)


Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/2a6c9d56
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/2a6c9d56
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/2a6c9d56

Branch: refs/heads/trunk
Commit: 2a6c9d5630508439c9fc4cee175617ff63d59a7d
Parents: f4deb5c
Author: Nitay Joffe <ni...@apache.org>
Authored: Thu Mar 28 14:09:35 2013 -0400
Committer: Nitay Joffe <ni...@apache.org>
Committed: Thu Mar 28 14:10:00 2013 -0400

----------------------------------------------------------------------
 CHANGELOG                                          |    2 +
 .../java/org/apache/giraph/conf/GiraphClasses.java |    2 +-
 .../apache/giraph/conf/GiraphConfiguration.java    |   48 +++++++++++++++
 .../conf/ImmutableClassesGiraphConfiguration.java  |   31 ++--------
 .../giraph/partition/ByteArrayPartition.java       |   10 ++-
 .../giraph/partition/ReusesObjectsPartition.java   |   36 +++++++++++
 .../org/apache/giraph/hive/HiveGiraphRunner.java   |    2 +-
 .../hive/input/vertex/SimpleHiveToVertex.java      |    6 +--
 8 files changed, 100 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 8da6a12..ab59833 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,8 @@
 Giraph Change Log
 
 Release 0.2.0 - unreleased
+  GIRAPH-594: auto set reusing objects (nitay)
+
   GIRAPH-597: Don't reuse vertex by default in SimpleHiveToVertex (majakabiljo)
 
   GIRAPH-588: More flexible Hive input (majakabiljo)

http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
index 23dab79..e03859a 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
@@ -147,7 +147,7 @@ public class GiraphClasses<I extends WritableComparable,
    *
    * @param conf Configuration to read from.
    */
-  public void readFromConf(Configuration conf) {
+  private void readFromConf(Configuration conf) {
     // set pre-validated generic parameter types into Configuration
     vertexClass = (Class<? extends Vertex<I, V, E, M>>) VERTEX_CLASS.get(conf);
     List<Class<?>> classList = ReflectionUtils.getTypeArguments(Vertex.class,

http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
index dee8e98..963b82a 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
@@ -20,6 +20,7 @@ package org.apache.giraph.conf;
 
 import org.apache.giraph.aggregators.AggregatorWriter;
 import org.apache.giraph.combiner.Combiner;
+import org.apache.giraph.edge.ReuseObjectsVertexEdges;
 import org.apache.giraph.edge.VertexEdges;
 import org.apache.giraph.graph.Vertex;
 import org.apache.giraph.graph.VertexResolver;
@@ -33,6 +34,7 @@ import org.apache.giraph.master.MasterObserver;
 import org.apache.giraph.partition.GraphPartitionerFactory;
 import org.apache.giraph.partition.Partition;
 import org.apache.giraph.partition.PartitionContext;
+import org.apache.giraph.partition.ReusesObjectsPartition;
 import org.apache.giraph.worker.WorkerContext;
 import org.apache.giraph.worker.WorkerObserver;
 import org.apache.hadoop.conf.Configuration;
@@ -85,6 +87,15 @@ public class GiraphConfiguration extends Configuration
   }
 
   /**
+   * Get the vertex edges class
+   *
+   * @return vertex edges class
+   */
+  public Class<? extends VertexEdges> getVertexEdgesClass() {
+    return VERTEX_EDGES_CLASS.get(this);
+  }
+
+  /**
    * Set the vertex edges class
    *
    * @param vertexEdgesClass Determines the way edges are stored
@@ -106,6 +117,43 @@ public class GiraphConfiguration extends Configuration
   }
 
   /**
+   * True if the {@link VertexEdges} implementation copies the passed edges
+   * to its own data structure, i.e. it doesn't keep references to Edge
+   * objects, target vertex ids or edge values passed to add() or
+   * initialize().
+   * This makes it possible to reuse edge objects passed to the data
+   * structure, to minimize object instantiation (see for example
+   * EdgeStore#addPartitionEdges()).
+   *
+   * @return True iff we can reuse the edge objects
+   */
+  public boolean reuseEdgeObjects() {
+    return ReuseObjectsVertexEdges.class.isAssignableFrom(
+        getVertexEdgesClass());
+  }
+
+  /**
+   * True if the {@link Partition} implementation copies the passed vertices
+   * to its own data structure, i.e. it doesn't keep references to Vertex
+   * objects passed to it.
+   * This makes it possible to reuse vertex objects passed to the data
+   * structure, to minimize object instantiation.
+   *
+   * @return True iff we can reuse the vertex objects
+   */
+  public boolean reuseVertexObjects() {
+    return ReusesObjectsPartition.class.isAssignableFrom(getPartitionClass());
+  }
+
+  /**
+   * Get Partition class used
+   * @return Partition class
+   */
+  public Class<? extends Partition> getPartitionClass() {
+    return PARTITION_CLASS.get(this);
+  }
+
+  /**
    * Set the vertex input format class (required)
    *
    * @param vertexInputFormatClass Determines how graph is input

http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
index 0af8b97..76f6105 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
@@ -20,9 +20,12 @@ package org.apache.giraph.conf;
 
 import org.apache.giraph.aggregators.AggregatorWriter;
 import org.apache.giraph.combiner.Combiner;
+import org.apache.giraph.edge.Edge;
+import org.apache.giraph.edge.EdgeFactory;
 import org.apache.giraph.edge.ReusableEdge;
-import org.apache.giraph.edge.ReuseObjectsVertexEdges;
+import org.apache.giraph.edge.VertexEdges;
 import org.apache.giraph.graph.GraphState;
+import org.apache.giraph.graph.Vertex;
 import org.apache.giraph.graph.VertexResolver;
 import org.apache.giraph.graph.VertexValueFactory;
 import org.apache.giraph.io.EdgeInputFormat;
@@ -41,10 +44,6 @@ import org.apache.giraph.utils.ExtendedDataOutput;
 import org.apache.giraph.utils.ReflectionUtils;
 import org.apache.giraph.utils.UnsafeByteArrayInputStream;
 import org.apache.giraph.utils.UnsafeByteArrayOutputStream;
-import org.apache.giraph.edge.Edge;
-import org.apache.giraph.edge.EdgeFactory;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.edge.VertexEdges;
 import org.apache.giraph.worker.WorkerContext;
 import org.apache.giraph.worker.WorkerObserver;
 import org.apache.hadoop.conf.Configuration;
@@ -566,11 +565,7 @@ public class ImmutableClassesGiraphConfiguration<I extends WritableComparable,
     }
   }
 
-  /**
-   * Get the user's subclassed {@link VertexEdges}
-   *
-   * @return User's vertex edges class
-   */
+  @Override
   public Class<? extends VertexEdges<I, E>> getVertexEdgesClass() {
     return classes.getVertexEdgesClass();
   }
@@ -595,22 +590,6 @@ public class ImmutableClassesGiraphConfiguration<I extends WritableComparable,
   }
 
   /**
-   * True if the {@link VertexEdges} implementation copies the passed edges
-   * to its own data structure, i.e. it doesn't keep references to Edge
-   * objects, target vertex ids or edge values passed to add() or
-   * initialize().
-   * This makes it possible to reuse edge objects passed to the data
-   * structure, to minimize object instantiation (see for example
-   * EdgeStore#addPartitionEdges()).
-   *
-   * @return True iff we can reuse the edge objects
-   */
-  public boolean reuseEdgeObjects() {
-    return ReuseObjectsVertexEdges.class.isAssignableFrom(
-        getVertexEdgesClass());
-  }
-
-  /**
    * Create a user {@link VertexEdges}
    *
    * @return Instantiated user VertexEdges

http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java b/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java
index 12aa6fb..dd8c974 100644
--- a/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java
+++ b/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java
@@ -17,15 +17,16 @@
  */
 package org.apache.giraph.partition;
 
-import com.google.common.collect.MapMaker;
-import com.google.common.primitives.Ints;
+import org.apache.giraph.graph.Vertex;
 import org.apache.giraph.utils.UnsafeByteArrayInputStream;
 import org.apache.giraph.utils.WritableUtils;
-import org.apache.giraph.graph.Vertex;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.util.Progressable;
 
+import com.google.common.collect.MapMaker;
+import com.google.common.primitives.Ints;
+
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
@@ -45,7 +46,8 @@ import java.util.concurrent.ConcurrentMap;
  */
 public class ByteArrayPartition<I extends WritableComparable,
     V extends Writable, E extends Writable, M extends Writable>
-    extends BasicPartition<I, V, E, M> {
+    extends BasicPartition<I, V, E, M>
+    implements ReusesObjectsPartition<I, V, E, M> {
   /**
    * Vertex map for this range (keyed by index).  Note that the byte[] is a
    * serialized vertex with the first four bytes as the length of the vertex

http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java b/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java
new file mode 100644
index 0000000..4153d55
--- /dev/null
+++ b/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.partition;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * Empty interface to characterize {@link Partition} implementations that
+ * don't keep references to the Vertex objects they are passed.
+ * The Giraph infrastructure can exploit this characteristic by reusing Vertex
+ * objects.
+ *
+ * @param <I> Vertex id
+ * @param <V> Vertex value
+ * @param <E> Edge value
+ * @param <M> Message value
+ */
+public interface ReusesObjectsPartition<I extends WritableComparable,
+    V extends Writable, E extends Writable, M extends Writable>
+    extends Partition<I, V, E, M> { }

http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java
----------------------------------------------------------------------
diff --git a/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java b/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java
index 0039dd6..63e9f95 100644
--- a/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java
+++ b/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java
@@ -26,6 +26,7 @@ import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.giraph.conf.GiraphClasses;
 import org.apache.giraph.conf.GiraphConfiguration;
+import org.apache.giraph.graph.Vertex;
 import org.apache.giraph.hive.common.HiveProfiles;
 import org.apache.giraph.hive.input.edge.HiveEdgeInputFormat;
 import org.apache.giraph.hive.input.edge.HiveEdgeReader;
@@ -37,7 +38,6 @@ import org.apache.giraph.hive.output.HiveVertexOutputFormat;
 import org.apache.giraph.hive.output.HiveVertexWriter;
 import org.apache.giraph.hive.output.VertexToHive;
 import org.apache.giraph.job.GiraphJob;
-import org.apache.giraph.graph.Vertex;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.util.Tool;

http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java
----------------------------------------------------------------------
diff --git a/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java b/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java
index 651aefd..a4acd2f 100644
--- a/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java
+++ b/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java
@@ -18,7 +18,6 @@
 
 package org.apache.giraph.hive.input.vertex;
 
-import org.apache.giraph.conf.BooleanConfOption;
 import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
 import org.apache.giraph.edge.Edge;
 import org.apache.giraph.graph.Vertex;
@@ -42,9 +41,6 @@ import java.util.Iterator;
 public abstract class SimpleHiveToVertex<I extends WritableComparable,
     V extends Writable, E extends Writable, M extends Writable>
     extends AbstractHiveToVertex<I, V, E, M> {
-  /** Configuration option for whether to reuse vertex */
-  public static final BooleanConfOption REUSE_VERTEX_KEY =
-      new BooleanConfOption("giraph.hive.reuse.vertex", false);
   /** Hive records which we are reading from */
   private Iterator<HiveRecord> records;
 
@@ -78,7 +74,7 @@ public abstract class SimpleHiveToVertex<I extends WritableComparable,
   @Override
   public void setConf(ImmutableClassesGiraphConfiguration<I, V, E, M> conf) {
     super.setConf(conf);
-    if (REUSE_VERTEX_KEY.get(conf)) {
+    if (conf.reuseVertexObjects()) {
       reusableVertex = getConf().createVertex();
     }
   }