You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ma...@apache.org on 2014/01/09 00:22:52 UTC

git commit: updated refs/heads/trunk to 28f4d1b

Updated Branches:
  refs/heads/trunk ca442deeb -> 28f4d1b09


GIRAPH-820: add a configuration option to skip creating source vertices present only in edge input (pavanka via majakabiljo)


Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/28f4d1b0
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/28f4d1b0
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/28f4d1b0

Branch: refs/heads/trunk
Commit: 28f4d1b09b712c02c2003a413a1118fd218030cf
Parents: ca442de
Author: Maja Kabiljo <ma...@fb.com>
Authored: Wed Jan 8 15:21:40 2014 -0800
Committer: Maja Kabiljo <ma...@fb.com>
Committed: Wed Jan 8 15:21:40 2014 -0800

----------------------------------------------------------------------
 CHANGELOG                                       |   3 +
 .../apache/giraph/conf/GiraphConfiguration.java |  16 ++
 .../org/apache/giraph/conf/GiraphConstants.java |   9 ++
 .../java/org/apache/giraph/edge/EdgeStore.java  |  13 +-
 .../giraph/io/TestCreateSourceVertex.java       | 156 +++++++++++++++++++
 5 files changed, 193 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/giraph/blob/28f4d1b0/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 6d77b5b..16a8777 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,9 @@
 Giraph Change Log
 
 Release 1.1.0 - unreleased
+  GIRAPH-820: add a configuration option to skip creating source vertices present only 
+  in edge input (pavanka via majakabiljo)
+
   GIRAPH-742: Worker task finishSuperstep status reporting the wrong superstep number
   (cmuchins via majakabiljo)
 

http://git-wip-us.apache.org/repos/asf/giraph/blob/28f4d1b0/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
index d066513..c8b7d36 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
@@ -1161,4 +1161,20 @@ public class GiraphConfiguration extends Configuration
   public boolean isOneToAllMsgSendingEnabled() {
     return ONE_TO_ALL_MSG_SENDING.isTrue(this);
   }
+
+  /**
+   * Get option whether to create a source vertex present only in edge input
+   * @return CREATE_EDGE_SOURCE_VERTICES option
+   */
+  public boolean getCreateSourceVertex() {
+    return CREATE_EDGE_SOURCE_VERTICES.get(this);
+  }
+
+  /**
+   * set option whether to create a source vertex present only in edge input
+   * @param createVertex create source vertex option
+   */
+  public void setCreateSourceVertex(boolean createVertex) {
+    CREATE_EDGE_SOURCE_VERTICES.set(this, createVertex);
+  }
 }

http://git-wip-us.apache.org/repos/asf/giraph/blob/28f4d1b0/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
index 3f379f1..63f38df 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
@@ -970,6 +970,15 @@ public interface GiraphConstants {
         "one-to-all message sending strategy");
 
   /**
+   * This option can be used to specify if a source vertex present in edge
+   * input but not in vertex input can be created
+   */
+  BooleanConfOption CREATE_EDGE_SOURCE_VERTICES =
+      new BooleanConfOption("giraph.createEdgeSourceVertices", true,
+          "Create a source vertex if present in edge input but not " +
+          "necessarily in vertex input");
+
+  /**
    * This counter group will contain one counter whose name is the ZooKeeper
    * server:port which this job is using
    */

http://git-wip-us.apache.org/repos/asf/giraph/blob/28f4d1b0/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java b/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java
index 1694d36..dd8f2a3 100644
--- a/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java
+++ b/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java
@@ -156,6 +156,8 @@ public class EdgeStore<I extends WritableComparable,
    * Note: this method is not thread-safe.
    */
   public void moveEdgesToVertices() {
+    final boolean createSourceVertex = configuration.
+        getCreateSourceVertex();
     if (transientEdges.isEmpty()) {
       if (LOG.isInfoEnabled()) {
         LOG.info("moveEdgesToVertices: No edges to move");
@@ -191,10 +193,13 @@ public class EdgeStore<I extends WritableComparable,
                 // If the source vertex doesn't exist, create it. Otherwise,
                 // just set the edges.
                 if (vertex == null) {
-                  vertex = configuration.createVertex();
-                  vertex.initialize(vertexId, configuration.createVertexValue(),
-                      outEdges);
-                  partition.putVertex(vertex);
+                  if (createSourceVertex) {
+                    // createVertex only if it is allowed by configuration
+                    vertex = configuration.createVertex();
+                    vertex.initialize(vertexId,
+                        configuration.createVertexValue(), outEdges);
+                    partition.putVertex(vertex);
+                  }
                 } else {
                   // A vertex may exist with or without edges initially
                   // and optimize the case of no initial edges

http://git-wip-us.apache.org/repos/asf/giraph/blob/28f4d1b0/giraph-core/src/test/java/org/apache/giraph/io/TestCreateSourceVertex.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/test/java/org/apache/giraph/io/TestCreateSourceVertex.java b/giraph-core/src/test/java/org/apache/giraph/io/TestCreateSourceVertex.java
new file mode 100644
index 0000000..039e975
--- /dev/null
+++ b/giraph-core/src/test/java/org/apache/giraph/io/TestCreateSourceVertex.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.io;
+
+import com.google.common.collect.Maps;
+import org.apache.giraph.conf.GiraphConfiguration;
+import org.apache.giraph.edge.ByteArrayEdges;
+import org.apache.giraph.io.formats.IdWithValueTextOutputFormat;
+import org.apache.giraph.io.formats.IntIntNullTextVertexInputFormat;
+import org.apache.giraph.io.formats.IntNullTextEdgeInputFormat;
+import org.apache.giraph.utils.ComputationCountEdges;
+import org.apache.giraph.utils.IntIntNullNoOpComputation;
+import org.apache.giraph.utils.InternalVertexRunner;
+import org.junit.Test;
+
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test createSourceVertex configuration option
+ */
+public class TestCreateSourceVertex {
+  @Test
+  public void testPositiveCreateSourceVertex() throws Exception {
+    String [] vertices = new String[] {
+        "1 0",
+        "2 0",
+        "3 0",
+        "4 0",
+    };
+    String [] edges = new String[] {
+        "1 2",
+        "1 5",
+        "2 4",
+        "2 1",
+        "3 4",
+        "4 1",
+        "4 5",
+        "6 2",
+        "7 8",
+        "4 8",
+    };
+
+    GiraphConfiguration conf = getConf();
+    conf.setCreateSourceVertex(false);
+
+    Iterable<String> results = InternalVertexRunner.run(conf, vertices, edges);
+    Map<Integer, Integer> values = parseResults(results);
+
+    // Check that only vertices from vertex input are present in output graph
+    assertEquals(4, values.size());
+    // Check that the ids of vertices in output graph exactly match vertex input
+    assertTrue(values.containsKey(1));
+    assertTrue(values.containsKey(2));
+    assertTrue(values.containsKey(3));
+    assertTrue(values.containsKey(4));
+
+    conf.setComputationClass(ComputationCountEdges.class);
+    results = InternalVertexRunner.run(conf, vertices, edges);
+    values = parseResults(results);
+
+    // Check the number of edges of each vertex
+    assertEquals(2, (int) values.get(1));
+    assertEquals(2, (int) values.get(2));
+    assertEquals(1, (int) values.get(3));
+    assertEquals(3, (int) values.get(4));
+  }
+
+  @Test
+  public void testNegativeCreateSourceVertex() throws Exception {
+    String [] vertices = new String[] {
+        "1 0",
+        "2 0",
+        "3 0",
+        "4 0",
+    };
+    String [] edges = new String[] {
+        "1 2",
+        "1 5",
+        "2 4",
+        "2 1",
+        "3 4",
+        "4 1",
+        "4 5",
+        "6 2",
+        "7 8",
+        "4 8",
+    };
+
+    GiraphConfiguration conf = getConf();
+
+    Iterable<String> results = InternalVertexRunner.run(conf, vertices, edges);
+    Map<Integer, Integer> values = parseResults(results);
+
+    // Check that only vertices from vertex input are present in output graph
+    assertEquals(6, values.size());
+    // Check that the ids of vertices in output graph exactly match vertex input
+    assertTrue(values.containsKey(1));
+    assertTrue(values.containsKey(2));
+    assertTrue(values.containsKey(3));
+    assertTrue(values.containsKey(4));
+    assertTrue(values.containsKey(6));
+    assertTrue(values.containsKey(7));
+
+    conf.setComputationClass(ComputationCountEdges.class);
+    results = InternalVertexRunner.run(conf, vertices, edges);
+    values = parseResults(results);
+
+    // Check the number of edges of each vertex
+    assertEquals(2, (int) values.get(1));
+    assertEquals(2, (int) values.get(2));
+    assertEquals(1, (int) values.get(3));
+    assertEquals(3, (int) values.get(4));
+    assertEquals(1, (int) values.get(6));
+    assertEquals(1, (int) values.get(7));
+  }
+
+  private GiraphConfiguration getConf() {
+    GiraphConfiguration conf = new GiraphConfiguration();
+    conf.setComputationClass(IntIntNullNoOpComputation.class);
+    conf.setOutEdgesClass(ByteArrayEdges.class);
+    conf.setVertexInputFormatClass(IntIntNullTextVertexInputFormat.class);
+    conf.setEdgeInputFormatClass(IntNullTextEdgeInputFormat.class);
+    conf.setVertexOutputFormatClass(IdWithValueTextOutputFormat.class);
+    return conf;
+  }
+
+  private static Map<Integer, Integer> parseResults(Iterable<String> results) {
+    Map<Integer, Integer> values = Maps.newHashMap();
+    for (String line : results) {
+      String[] tokens = line.split("\\s+");
+      int id = Integer.valueOf(tokens[0]);
+      int value = Integer.valueOf(tokens[1]);
+      values.put(id, value);
+    }
+    return values;
+  }
+}