You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ni...@apache.org on 2013/02/18 00:14:11 UTC

git commit: GIRAPH-520: ReverseEdgeDuplicator (nitay)

Updated Branches:
  refs/heads/trunk 06d64a29d -> 89bba9534


GIRAPH-520: ReverseEdgeDuplicator (nitay)


Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/89bba953
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/89bba953
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/89bba953

Branch: refs/heads/trunk
Commit: 89bba9534b092d85027c8de81bf7f174760bc24f
Parents: 06d64a2
Author: Nitay Joffe <ni...@apache.org>
Authored: Sat Feb 16 23:28:38 2013 -0500
Committer: Nitay Joffe <ni...@apache.org>
Committed: Sun Feb 17 18:11:31 2013 -0500

----------------------------------------------------------------------
 CHANGELOG                                          |    2 +
 .../apache/giraph/graph/ReverseEdgeDuplicator.java |  115 +++++++++++++++
 .../formats/IntNullReverseTextEdgeInputFormat.java |   47 ++++++
 .../java/org/apache/giraph/io/TestEdgeInput.java   |   34 ++++-
 4 files changed, 197 insertions(+), 1 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 2fb626e..5204023 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,8 @@
 Giraph Change Log
 
 Release 0.2.0 - unreleased
+  GIRAPH-520: ReverseEdgeDuplicator (nitay)
+
   GIRAPH-522: JMap Dumper (nitay)
 
   GIRAPH-517: Use stable hcatalog 0.5.0-incubating (nitay)

http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java b/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java
new file mode 100644
index 0000000..4415cc2
--- /dev/null
+++ b/giraph-core/src/main/java/org/apache/giraph/graph/ReverseEdgeDuplicator.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.graph;
+
+import org.apache.giraph.io.EdgeReader;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+
+/**
+ * An EdgeReader that creates the opposite direction edge for each edge read.
+ * Used to create an undirected graph from a directed input.
+ * This class is a decorator around any other EdgeReader.
+ *
+ * @param <I> Vertex ID
+ * @param <E> Edge Value
+ */
+public class ReverseEdgeDuplicator<I extends WritableComparable,
+    E extends Writable> implements EdgeReader<I, E> {
+  /** The underlying EdgeReader to wrap */
+  private final EdgeReader<I, E> baseReader;
+
+  /** Whether the reverse edge stored currently is valid */
+  private boolean haveReverseEdge = true;
+  /** Reverse of the edge last read */
+  private Edge<I, E> reverseEdge;
+  /** Reverse source of last edge, in other words last edge's target */
+  private I reverseSourceId;
+
+  /**
+   * Constructor
+   * @param baseReader EdgeReader to wrap
+   */
+  public ReverseEdgeDuplicator(EdgeReader<I, E> baseReader) {
+    this.baseReader = baseReader;
+  }
+
+  /**
+   * Get wrapped EdgeReader
+   * @return EdgeReader
+   */
+  public EdgeReader<I, E> getBaseReader() {
+    return baseReader;
+  }
+
+  @Override
+  public void initialize(InputSplit inputSplit, TaskAttemptContext context)
+    throws IOException, InterruptedException {
+    baseReader.initialize(inputSplit, context);
+    haveReverseEdge = true;
+  }
+
+  @Override
+  public boolean nextEdge() throws IOException, InterruptedException {
+    boolean result = true;
+    if (haveReverseEdge) {
+      result = baseReader.nextEdge();
+      haveReverseEdge = false;
+    } else {
+      Edge<I, E> currentEdge = baseReader.getCurrentEdge();
+      reverseSourceId = currentEdge.getTargetVertexId();
+      reverseEdge = EdgeFactory.create(baseReader.getCurrentSourceId(),
+          currentEdge.getValue());
+      haveReverseEdge = true;
+    }
+    return result;
+  }
+
+  @Override
+  public I getCurrentSourceId() throws IOException, InterruptedException {
+    if (haveReverseEdge) {
+      return reverseSourceId;
+    } else {
+      return baseReader.getCurrentSourceId();
+    }
+  }
+
+  @Override
+  public Edge<I, E> getCurrentEdge() throws IOException, InterruptedException {
+    if (haveReverseEdge) {
+      return reverseEdge;
+    } else {
+      return baseReader.getCurrentEdge();
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    baseReader.close();
+  }
+
+  @Override
+  public float getProgress() throws IOException, InterruptedException {
+    return baseReader.getProgress();
+  }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java b/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java
new file mode 100644
index 0000000..1e3b643
--- /dev/null
+++ b/giraph-core/src/main/java/org/apache/giraph/io/formats/IntNullReverseTextEdgeInputFormat.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.io.formats;
+
+import org.apache.giraph.graph.ReverseEdgeDuplicator;
+import org.apache.giraph.io.EdgeReader;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+
+/**
+ * Simple text-based {@link org.apache.giraph.io.EdgeInputFormat} for
+ * unweighted graphs with int ids.
+ *
+ * Each line consists of: source_vertex, target_vertex
+ *
+ * This version also creates the reverse edges.
+ */
+public class IntNullReverseTextEdgeInputFormat
+    extends IntNullTextEdgeInputFormat {
+  @Override
+  public EdgeReader<IntWritable, NullWritable> createEdgeReader(
+      InputSplit split, TaskAttemptContext context) throws IOException {
+    EdgeReader<IntWritable, NullWritable> edgeReader =
+        super.createEdgeReader(split, context);
+    return new ReverseEdgeDuplicator<IntWritable, NullWritable>(edgeReader);
+  }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/89bba953/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java b/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java
index 112a267..c080622 100644
--- a/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java
+++ b/giraph-core/src/test/java/org/apache/giraph/io/TestEdgeInput.java
@@ -20,11 +20,12 @@ package org.apache.giraph.io;
 
 import org.apache.giraph.BspCase;
 import org.apache.giraph.conf.GiraphClasses;
-import org.apache.giraph.vertex.EdgeListVertex;
 import org.apache.giraph.io.formats.IdWithValueTextOutputFormat;
 import org.apache.giraph.io.formats.IntIntTextVertexValueInputFormat;
+import org.apache.giraph.io.formats.IntNullReverseTextEdgeInputFormat;
 import org.apache.giraph.io.formats.IntNullTextEdgeInputFormat;
 import org.apache.giraph.utils.InternalVertexRunner;
+import org.apache.giraph.vertex.EdgeListVertex;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.junit.Test;
@@ -75,6 +76,37 @@ public class TestEdgeInput extends BspCase {
     assertEquals(1, (int) values.get(4));
   }
 
+  // It should be able to build a graph starting from the edges only.
+  // Using ReverseEdgeDuplicator it should also create the reverse edges.
+  // Vertices should be implicitly created with default values.
+  @Test
+  public void testEdgesOnlyWithReverse() throws Exception {
+    String[] edges = new String[] {
+        "1 2",
+        "2 3",
+        "2 4",
+        "4 1"
+    };
+
+    GiraphClasses classes = new GiraphClasses();
+    classes.setVertexClass(TestVertexWithNumEdges.class);
+    classes.setEdgeInputFormatClass(IntNullReverseTextEdgeInputFormat.class);
+    classes.setVertexOutputFormatClass(IdWithValueTextOutputFormat.class);
+    Map<String, String> params = ImmutableMap.of();
+    Iterable<String> results = InternalVertexRunner.run(classes, params,
+        null, edges);
+
+    Map<Integer, Integer> values = parseResults(results);
+
+    // Check that all vertices with outgoing edges have been created
+    assertEquals(4, values.size());
+    // Check the number of edges for each vertex
+    assertEquals(2, (int) values.get(1));
+    assertEquals(3, (int) values.get(2));
+    assertEquals(1, (int) values.get(3));
+    assertEquals(2, (int) values.get(4));
+  }
+
   // It should be able to build a graph by specifying vertex data and edges
   // as separate input formats.
   @Test