You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ni...@apache.org on 2013/03/28 19:26:00 UTC
git commit: GIRAPH-594: auto set reusing objects (nitay)
Updated Branches:
refs/heads/trunk f4deb5ca8 -> 2a6c9d563
GIRAPH-594: auto set reusing objects (nitay)
Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/2a6c9d56
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/2a6c9d56
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/2a6c9d56
Branch: refs/heads/trunk
Commit: 2a6c9d5630508439c9fc4cee175617ff63d59a7d
Parents: f4deb5c
Author: Nitay Joffe <ni...@apache.org>
Authored: Thu Mar 28 14:09:35 2013 -0400
Committer: Nitay Joffe <ni...@apache.org>
Committed: Thu Mar 28 14:10:00 2013 -0400
----------------------------------------------------------------------
CHANGELOG | 2 +
.../java/org/apache/giraph/conf/GiraphClasses.java | 2 +-
.../apache/giraph/conf/GiraphConfiguration.java | 48 +++++++++++++++
.../conf/ImmutableClassesGiraphConfiguration.java | 31 ++--------
.../giraph/partition/ByteArrayPartition.java | 10 ++-
.../giraph/partition/ReusesObjectsPartition.java | 36 +++++++++++
.../org/apache/giraph/hive/HiveGiraphRunner.java | 2 +-
.../hive/input/vertex/SimpleHiveToVertex.java | 6 +--
8 files changed, 100 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 8da6a12..ab59833 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,8 @@
Giraph Change Log
Release 0.2.0 - unreleased
+ GIRAPH-594: auto set reusing objects (nitay)
+
GIRAPH-597: Don't reuse vertex by default in SimpleHiveToVertex (majakabiljo)
GIRAPH-588: More flexible Hive input (majakabiljo)
http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
index 23dab79..e03859a 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
@@ -147,7 +147,7 @@ public class GiraphClasses<I extends WritableComparable,
*
* @param conf Configuration to read from.
*/
- public void readFromConf(Configuration conf) {
+ private void readFromConf(Configuration conf) {
// set pre-validated generic parameter types into Configuration
vertexClass = (Class<? extends Vertex<I, V, E, M>>) VERTEX_CLASS.get(conf);
List<Class<?>> classList = ReflectionUtils.getTypeArguments(Vertex.class,
http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
index dee8e98..963b82a 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
@@ -20,6 +20,7 @@ package org.apache.giraph.conf;
import org.apache.giraph.aggregators.AggregatorWriter;
import org.apache.giraph.combiner.Combiner;
+import org.apache.giraph.edge.ReuseObjectsVertexEdges;
import org.apache.giraph.edge.VertexEdges;
import org.apache.giraph.graph.Vertex;
import org.apache.giraph.graph.VertexResolver;
@@ -33,6 +34,7 @@ import org.apache.giraph.master.MasterObserver;
import org.apache.giraph.partition.GraphPartitionerFactory;
import org.apache.giraph.partition.Partition;
import org.apache.giraph.partition.PartitionContext;
+import org.apache.giraph.partition.ReusesObjectsPartition;
import org.apache.giraph.worker.WorkerContext;
import org.apache.giraph.worker.WorkerObserver;
import org.apache.hadoop.conf.Configuration;
@@ -85,6 +87,15 @@ public class GiraphConfiguration extends Configuration
}
/**
+ * Get the vertex edges class
+ *
+ * @return vertex edges class
+ */
+ public Class<? extends VertexEdges> getVertexEdgesClass() {
+ return VERTEX_EDGES_CLASS.get(this);
+ }
+
+ /**
* Set the vertex edges class
*
* @param vertexEdgesClass Determines the way edges are stored
@@ -106,6 +117,43 @@ public class GiraphConfiguration extends Configuration
}
/**
+ * True if the {@link VertexEdges} implementation copies the passed edges
+ * to its own data structure, i.e. it doesn't keep references to Edge
+ * objects, target vertex ids or edge values passed to add() or
+ * initialize().
+ * This makes it possible to reuse edge objects passed to the data
+ * structure, to minimize object instantiation (see for example
+ * EdgeStore#addPartitionEdges()).
+ *
+ * @return True iff we can reuse the edge objects
+ */
+ public boolean reuseEdgeObjects() {
+ return ReuseObjectsVertexEdges.class.isAssignableFrom(
+ getVertexEdgesClass());
+ }
+
+ /**
+ * True if the {@link Partition} implementation copies the passed vertices
+ * to its own data structure, i.e. it doesn't keep references to Vertex
+ * objects passed to it.
+ * This makes it possible to reuse vertex objects passed to the data
+ * structure, to minimize object instantiation.
+ *
+ * @return True iff we can reuse the vertex objects
+ */
+ public boolean reuseVertexObjects() {
+ return ReusesObjectsPartition.class.isAssignableFrom(getPartitionClass());
+ }
+
+ /**
+ * Get Partition class used
+ * @return Partition class
+ */
+ public Class<? extends Partition> getPartitionClass() {
+ return PARTITION_CLASS.get(this);
+ }
+
+ /**
* Set the vertex input format class (required)
*
* @param vertexInputFormatClass Determines how graph is input
http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
index 0af8b97..76f6105 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
@@ -20,9 +20,12 @@ package org.apache.giraph.conf;
import org.apache.giraph.aggregators.AggregatorWriter;
import org.apache.giraph.combiner.Combiner;
+import org.apache.giraph.edge.Edge;
+import org.apache.giraph.edge.EdgeFactory;
import org.apache.giraph.edge.ReusableEdge;
-import org.apache.giraph.edge.ReuseObjectsVertexEdges;
+import org.apache.giraph.edge.VertexEdges;
import org.apache.giraph.graph.GraphState;
+import org.apache.giraph.graph.Vertex;
import org.apache.giraph.graph.VertexResolver;
import org.apache.giraph.graph.VertexValueFactory;
import org.apache.giraph.io.EdgeInputFormat;
@@ -41,10 +44,6 @@ import org.apache.giraph.utils.ExtendedDataOutput;
import org.apache.giraph.utils.ReflectionUtils;
import org.apache.giraph.utils.UnsafeByteArrayInputStream;
import org.apache.giraph.utils.UnsafeByteArrayOutputStream;
-import org.apache.giraph.edge.Edge;
-import org.apache.giraph.edge.EdgeFactory;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.edge.VertexEdges;
import org.apache.giraph.worker.WorkerContext;
import org.apache.giraph.worker.WorkerObserver;
import org.apache.hadoop.conf.Configuration;
@@ -566,11 +565,7 @@ public class ImmutableClassesGiraphConfiguration<I extends WritableComparable,
}
}
- /**
- * Get the user's subclassed {@link VertexEdges}
- *
- * @return User's vertex edges class
- */
+ @Override
public Class<? extends VertexEdges<I, E>> getVertexEdgesClass() {
return classes.getVertexEdgesClass();
}
@@ -595,22 +590,6 @@ public class ImmutableClassesGiraphConfiguration<I extends WritableComparable,
}
/**
- * True if the {@link VertexEdges} implementation copies the passed edges
- * to its own data structure, i.e. it doesn't keep references to Edge
- * objects, target vertex ids or edge values passed to add() or
- * initialize().
- * This makes it possible to reuse edge objects passed to the data
- * structure, to minimize object instantiation (see for example
- * EdgeStore#addPartitionEdges()).
- *
- * @return True iff we can reuse the edge objects
- */
- public boolean reuseEdgeObjects() {
- return ReuseObjectsVertexEdges.class.isAssignableFrom(
- getVertexEdgesClass());
- }
-
- /**
* Create a user {@link VertexEdges}
*
* @return Instantiated user VertexEdges
http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java b/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java
index 12aa6fb..dd8c974 100644
--- a/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java
+++ b/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java
@@ -17,15 +17,16 @@
*/
package org.apache.giraph.partition;
-import com.google.common.collect.MapMaker;
-import com.google.common.primitives.Ints;
+import org.apache.giraph.graph.Vertex;
import org.apache.giraph.utils.UnsafeByteArrayInputStream;
import org.apache.giraph.utils.WritableUtils;
-import org.apache.giraph.graph.Vertex;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.util.Progressable;
+import com.google.common.collect.MapMaker;
+import com.google.common.primitives.Ints;
+
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
@@ -45,7 +46,8 @@ import java.util.concurrent.ConcurrentMap;
*/
public class ByteArrayPartition<I extends WritableComparable,
V extends Writable, E extends Writable, M extends Writable>
- extends BasicPartition<I, V, E, M> {
+ extends BasicPartition<I, V, E, M>
+ implements ReusesObjectsPartition<I, V, E, M> {
/**
* Vertex map for this range (keyed by index). Note that the byte[] is a
* serialized vertex with the first four bytes as the length of the vertex
http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java b/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java
new file mode 100644
index 0000000..4153d55
--- /dev/null
+++ b/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.partition;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * Empty interface to characterize {@link Partition} implementations that
+ * don't keep references to the Vertex objects they are passed.
+ * The Giraph infrastructure can exploit this characteristic by reusing Vertex
+ * objects.
+ *
+ * @param <I> Vertex id
+ * @param <V> Vertex value
+ * @param <E> Edge value
+ * @param <M> Message value
+ */
+public interface ReusesObjectsPartition<I extends WritableComparable,
+ V extends Writable, E extends Writable, M extends Writable>
+ extends Partition<I, V, E, M> { }
http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java
----------------------------------------------------------------------
diff --git a/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java b/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java
index 0039dd6..63e9f95 100644
--- a/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java
+++ b/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java
@@ -26,6 +26,7 @@ import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.giraph.conf.GiraphClasses;
import org.apache.giraph.conf.GiraphConfiguration;
+import org.apache.giraph.graph.Vertex;
import org.apache.giraph.hive.common.HiveProfiles;
import org.apache.giraph.hive.input.edge.HiveEdgeInputFormat;
import org.apache.giraph.hive.input.edge.HiveEdgeReader;
@@ -37,7 +38,6 @@ import org.apache.giraph.hive.output.HiveVertexOutputFormat;
import org.apache.giraph.hive.output.HiveVertexWriter;
import org.apache.giraph.hive.output.VertexToHive;
import org.apache.giraph.job.GiraphJob;
-import org.apache.giraph.graph.Vertex;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.util.Tool;
http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java
----------------------------------------------------------------------
diff --git a/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java b/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java
index 651aefd..a4acd2f 100644
--- a/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java
+++ b/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java
@@ -18,7 +18,6 @@
package org.apache.giraph.hive.input.vertex;
-import org.apache.giraph.conf.BooleanConfOption;
import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.graph.Vertex;
@@ -42,9 +41,6 @@ import java.util.Iterator;
public abstract class SimpleHiveToVertex<I extends WritableComparable,
V extends Writable, E extends Writable, M extends Writable>
extends AbstractHiveToVertex<I, V, E, M> {
- /** Configuration option for whether to reuse vertex */
- public static final BooleanConfOption REUSE_VERTEX_KEY =
- new BooleanConfOption("giraph.hive.reuse.vertex", false);
/** Hive records which we are reading from */
private Iterator<HiveRecord> records;
@@ -78,7 +74,7 @@ public abstract class SimpleHiveToVertex<I extends WritableComparable,
@Override
public void setConf(ImmutableClassesGiraphConfiguration<I, V, E, M> conf) {
super.setConf(conf);
- if (REUSE_VERTEX_KEY.get(conf)) {
+ if (conf.reuseVertexObjects()) {
reusableVertex = getConf().createVertex();
}
}