You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ni...@apache.org on 2013/02/18 00:14:11 UTC
git commit: GIRAPH-520: ReverseEdgeDuplicator (nitay)
Updated Branches:
refs/heads/trunk 06d64a29d -> 89bba9534
GIRAPH-520: ReverseEdgeDuplicator (nitay)
Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/89bba953
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/89bba953
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/89bba953
Branch: refs/heads/trunk
Commit: 89bba9534b092d85027c8de81bf7f174760bc24f
Parents: 06d64a2
Author: Nitay Joffe <ni...@apache.org>
Authored: Sat Feb 16 23:28:38 2013 -0500
Committer: Nitay Joffe <ni...@apache.org>
Committed: Sun Feb 17 18:11:31 2013 -0500
----------------------------------------------------------------------
CHANGELOG | 2 +
.../apache/giraph/graph/ReverseEdgeDuplicator.java | 115 +++++++++++++++
.../formats/IntNullReverseTextEdgeInputFormat.java | 47 ++++++
.../java/org/apache/giraph/io/TestEdgeInput.java | 34 ++++-
4 files changed, 197 insertions(+), 1 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 2fb626e..5204023 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,8 @@
Giraph Change Log
Release 0.2.0 - unreleased
+ GIRAPH-520: ReverseEdgeDuplicator (nitay)
+
GIRAPH-522: JMap Dumper (nitay)
GIRAPH-517: Use stable hcatalog 0.5.0-incubating (nitay)
http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java b/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java
new file mode 100644
index 0000000..4415cc2
--- /dev/null
+++ b/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.graph;
+
+import org.apache.giraph.io.EdgeReader;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+
+/**
+ * An EdgeReader that creates the opposite direction edge for each edge read.
+ * Used to create an undirected graph from a directed input.
+ * This class is a decorator around any other EdgeReader.
+ *
+ * @param <I> Vertex ID
+ * @param <E> Edge Value
+ */
+public class ReverseEdgeDuplicator<I extends WritableComparable,
+ E extends Writable> implements EdgeReader<I, E> {
+ /** The underlying EdgeReader to wrap */
+ private final EdgeReader<I, E> baseReader;
+
+ /** Whether the reverse edge stored currently is valid */
+ private boolean haveReverseEdge = true;
+ /** Reverse of the edge last read */
+ private Edge<I, E> reverseEdge;
+ /** Reverse source of last edge, in other words last edge's target */
+ private I reverseSourceId;
+
+ /**
+ * Constructor
+ * @param baseReader EdgeReader to wrap
+ */
+ public ReverseEdgeDuplicator(EdgeReader<I, E> baseReader) {
+ this.baseReader = baseReader;
+ }
+
+ /**
+ * Get wrapped EdgeReader
+ * @return EdgeReader
+ */
+ public EdgeReader<I, E> getBaseReader() {
+ return baseReader;
+ }
+
+ @Override
+ public void initialize(InputSplit inputSplit, TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ baseReader.initialize(inputSplit, context);
+ haveReverseEdge = true;
+ }
+
+ @Override
+ public boolean nextEdge() throws IOException, InterruptedException {
+ boolean result = true;
+ if (haveReverseEdge) {
+ result = baseReader.nextEdge();
+ haveReverseEdge = false;
+ } else {
+ Edge<I, E> currentEdge = baseReader.getCurrentEdge();
+ reverseSourceId = currentEdge.getTargetVertexId();
+ reverseEdge = EdgeFactory.create(baseReader.getCurrentSourceId(),
+ currentEdge.getValue());
+ haveReverseEdge = true;
+ }
+ return result;
+ }
+
+ @Override
+ public I getCurrentSourceId() throws IOException, InterruptedException {
+ if (haveReverseEdge) {
+ return reverseSourceId;
+ } else {
+ return baseReader.getCurrentSourceId();
+ }
+ }
+
+ @Override
+ public Edge<I, E> getCurrentEdge() throws IOException, InterruptedException {
+ if (haveReverseEdge) {
+ return reverseEdge;
+ } else {
+ return baseReader.getCurrentEdge();
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ baseReader.close();
+ }
+
+ @Override
+ public float getProgress() throws IOException, InterruptedException {
+ return baseReader.getProgress();
+ }
+}
http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java b/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java
new file mode 100644
index 0000000..1e3b643
--- /dev/null
+++ b/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.io.formats;
+
+import org.apache.giraph.graph.ReverseEdgeDuplicator;
+import org.apache.giraph.io.EdgeReader;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+
+/**
+ * Simple text-based {@link org.apache.giraph.io.EdgeInputFormat} for
+ * unweighted graphs with int ids.
+ *
+ * Each line consists of: source_vertex, target_vertex
+ *
+ * This version also creates the reverse edges.
+ */
+public class IntNullReverseTextEdgeInputFormat
+ extends IntNullTextEdgeInputFormat {
+ @Override
+ public EdgeReader<IntWritable, NullWritable> createEdgeReader(
+ InputSplit split, TaskAttemptContext context) throws IOException {
+ EdgeReader<IntWritable, NullWritable> edgeReader =
+ super.createEdgeReader(split, context);
+ return new ReverseEdgeDuplicator<IntWritable, NullWritable>(edgeReader);
+ }
+}
http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java b/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java
index 112a267..c080622 100644
--- a/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java
+++ b/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java
@@ -20,11 +20,12 @@ package org.apache.giraph.io;
import org.apache.giraph.BspCase;
import org.apache.giraph.conf.GiraphClasses;
-import org.apache.giraph.vertex.EdgeListVertex;
import org.apache.giraph.io.formats.IdWithValueTextOutputFormat;
import org.apache.giraph.io.formats.IntIntTextVertexValueInputFormat;
+import org.apache.giraph.io.formats.IntNullReverseTextEdgeInputFormat;
import org.apache.giraph.io.formats.IntNullTextEdgeInputFormat;
import org.apache.giraph.utils.InternalVertexRunner;
+import org.apache.giraph.vertex.EdgeListVertex;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.junit.Test;
@@ -75,6 +76,37 @@ public class TestEdgeInput extends BspCase {
assertEquals(1, (int) values.get(4));
}
+ // It should be able to build a graph starting from the edges only.
+ // Using ReverseEdgeDuplicator it should also create the reverse edges.
+ // Vertices should be implicitly created with default values.
+ @Test
+ public void testEdgesOnlyWithReverse() throws Exception {
+ String[] edges = new String[] {
+ "1 2",
+ "2 3",
+ "2 4",
+ "4 1"
+ };
+
+ GiraphClasses classes = new GiraphClasses();
+ classes.setVertexClass(TestVertexWithNumEdges.class);
+ classes.setEdgeInputFormatClass(IntNullReverseTextEdgeInputFormat.class);
+ classes.setVertexOutputFormatClass(IdWithValueTextOutputFormat.class);
+ Map<String, String> params = ImmutableMap.of();
+ Iterable<String> results = InternalVertexRunner.run(classes, params,
+ null, edges);
+
+ Map<Integer, Integer> values = parseResults(results);
+
+ // Check that all vertices with outgoing edges have been created
+ assertEquals(4, values.size());
+ // Check the number of edges for each vertex
+ assertEquals(2, (int) values.get(1));
+ assertEquals(3, (int) values.get(2));
+ assertEquals(1, (int) values.get(3));
+ assertEquals(2, (int) values.get(4));
+ }
+
// It should be able to build a graph by specifying vertex data and edges
// as separate input formats.
@Test