You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ma...@apache.org on 2014/07/16 19:22:46 UTC

git commit: updated refs/heads/trunk to 0a90177

Repository: giraph
Updated Branches:
  refs/heads/trunk 25d912cd9 -> 0a9017716


GIRAPH-921: Create ByteValueVertex to store vertex values as bytes without object instance (akyrola via majakabiljo)


Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/0a901771
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/0a901771
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/0a901771

Branch: refs/heads/trunk
Commit: 0a90177161d65ff2802d3926599561adc985dc65
Parents: 25d912c
Author: Maja Kabiljo <ma...@fb.com>
Authored: Wed Jul 16 10:21:22 2014 -0700
Committer: Maja Kabiljo <ma...@fb.com>
Committed: Wed Jul 16 10:21:22 2014 -0700

----------------------------------------------------------------------
 CHANGELOG                                       |   2 +
 .../org/apache/giraph/conf/GiraphClasses.java   |  16 ++-
 .../apache/giraph/conf/GiraphConfiguration.java |  11 ++
 .../org/apache/giraph/conf/GiraphConstants.java |  12 +-
 .../org/apache/giraph/conf/GiraphTypes.java     |  11 ++
 .../ImmutableClassesGiraphConfiguration.java    |  11 +-
 .../apache/giraph/graph/ByteValueVertex.java    | 114 +++++++++++++++++++
 .../apache/giraph/utils/ConfigurationUtils.java |   1 +
 .../giraph/graph/TestByteValueVertex.java       |  85 ++++++++++++++
 .../apache/giraph/graph/TestVertexAndEdges.java |   4 +-
 10 files changed, 255 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/giraph/blob/0a901771/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index e1c1227..7287490 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,8 @@
 Giraph Change Log
 
 Release 1.1.0 - unreleased
+  GIRAPH-921: Create ByteValueVertex to store vertex values as bytes without object instance (akyrola via majakabiljo)
+
   GIRAPH-929: setIfUnset for EnumConfOption (pavanka)  
 
   GIRAPH-912: Support succinct representation of messages in messagestores (pavanka)

http://git-wip-us.apache.org/repos/asf/giraph/blob/0a901771/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
index e7b18aa..7fe85f1 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java
@@ -25,10 +25,11 @@ import org.apache.giraph.edge.OutEdges;
 import org.apache.giraph.factories.ComputationFactory;
 import org.apache.giraph.factories.DefaultComputationFactory;
 import org.apache.giraph.graph.Computation;
-import org.apache.giraph.graph.DefaultVertexValueCombiner;
 import org.apache.giraph.graph.DefaultVertexResolver;
-import org.apache.giraph.graph.VertexValueCombiner;
 import org.apache.giraph.graph.VertexResolver;
+import org.apache.giraph.graph.Vertex;
+import org.apache.giraph.graph.VertexValueCombiner;
+import org.apache.giraph.graph.DefaultVertexValueCombiner;
 import org.apache.giraph.io.EdgeInputFormat;
 import org.apache.giraph.io.EdgeOutputFormat;
 import org.apache.giraph.io.MappingInputFormat;
@@ -236,6 +237,17 @@ public class GiraphClasses<I extends WritableComparable,
     return giraphTypes.getVertexIdClass();
   }
 
+
+  /**
+   * Get Vertex implementation class
+   *
+   * @return Vertex implementation class
+   */
+  public Class<? extends Vertex> getVertexClass() {
+    return giraphTypes.getVertexClass();
+  }
+
+
   /**
    * Get Vertex Value class
    *

http://git-wip-us.apache.org/repos/asf/giraph/blob/0a901771/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
index 953f49f..f35fbc1 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java
@@ -23,6 +23,7 @@ import org.apache.giraph.combiner.MessageCombiner;
 import org.apache.giraph.edge.OutEdges;
 import org.apache.giraph.edge.ReuseObjectsOutEdges;
 import org.apache.giraph.factories.ComputationFactory;
+import org.apache.giraph.graph.Vertex;
 import org.apache.giraph.graph.VertexValueCombiner;
 import org.apache.giraph.graph.VertexResolver;
 import org.apache.giraph.factories.VertexValueFactory;
@@ -173,6 +174,16 @@ public class GiraphConfiguration extends Configuration
   }
 
   /**
+   * Set the vertex implementation class
+   *
+   * @param vertexClass class of the vertex implementation
+   */
+  public final void setVertexClass(Class<? extends Vertex> vertexClass) {
+    VERTEX_CLASS.set(this, vertexClass);
+  }
+
+
+  /**
    * Set the vertex edges class used during edge-based input (if different
    * from the one used during computation)
    *

http://git-wip-us.apache.org/repos/asf/giraph/blob/0a901771/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
index ab0570f..81c0e0b 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java
@@ -39,11 +39,13 @@ import org.apache.giraph.factories.MessageValueFactory;
 import org.apache.giraph.factories.VertexIdFactory;
 import org.apache.giraph.factories.VertexValueFactory;
 import org.apache.giraph.graph.Computation;
-import org.apache.giraph.graph.DefaultVertexValueCombiner;
+import org.apache.giraph.graph.DefaultVertex;
 import org.apache.giraph.graph.DefaultVertexResolver;
+import org.apache.giraph.graph.DefaultVertexValueCombiner;
 import org.apache.giraph.graph.Language;
-import org.apache.giraph.graph.VertexValueCombiner;
+import org.apache.giraph.graph.Vertex;
 import org.apache.giraph.graph.VertexResolver;
+import org.apache.giraph.graph.VertexValueCombiner;
 import org.apache.giraph.io.EdgeInputFormat;
 import org.apache.giraph.io.EdgeOutputFormat;
 import org.apache.giraph.io.MappingInputFormat;
@@ -274,7 +276,11 @@ public interface GiraphConstants {
       ClassConfOption.create("giraph.vertexInputFilterClass",
           DefaultVertexInputFilter.class, VertexInputFilter.class,
           "VertexInputFilter class");
-
+  /** Vertex class */
+  ClassConfOption<Vertex> VERTEX_CLASS =
+      ClassConfOption.create("giraph.vertexClass",
+          DefaultVertex.class, Vertex.class,
+          "Vertex class");
   /** VertexOutputFormat class */
   ClassConfOption<VertexOutputFormat> VERTEX_OUTPUT_FORMAT_CLASS =
       ClassConfOption.create("giraph.vertexOutputFormatClass", null,

http://git-wip-us.apache.org/repos/asf/giraph/blob/0a901771/giraph-core/src/main/java/org/apache/giraph/conf/GiraphTypes.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphTypes.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphTypes.java
index 0727270..6c854f3 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphTypes.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphTypes.java
@@ -17,6 +17,8 @@
  */
 package org.apache.giraph.conf;
 
+import org.apache.giraph.graph.DefaultVertex;
+import org.apache.giraph.graph.Vertex;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
@@ -28,6 +30,7 @@ import static org.apache.giraph.conf.GiraphConstants.INCOMING_MESSAGE_VALUE_CLAS
 import static org.apache.giraph.conf.GiraphConstants.OUTGOING_MESSAGE_VALUE_CLASS;
 import static org.apache.giraph.conf.GiraphConstants.VERTEX_ID_CLASS;
 import static org.apache.giraph.conf.GiraphConstants.VERTEX_VALUE_CLASS;
+import static org.apache.giraph.conf.GiraphConstants.VERTEX_CLASS;
 import static org.apache.giraph.utils.ConfigurationUtils.getTypesHolderClass;
 import static org.apache.giraph.utils.ReflectionUtils.getTypeArguments;
 
@@ -50,6 +53,9 @@ public class GiraphTypes<I extends WritableComparable, V extends Writable,
   private Class<? extends Writable> incomingMessageValueClass;
   /** Outgoing message value class */
   private Class<? extends Writable> outgoingMessageValueClass;
+  /** Vertex implementation class */
+  private Class<? extends Vertex> vertexClass = DefaultVertex.class;
+
 
   /**
    * Empty Constructor
@@ -128,6 +134,7 @@ public class GiraphTypes<I extends WritableComparable, V extends Writable,
     edgeValueClass = (Class<E>) EDGE_VALUE_CLASS.get(conf);
     incomingMessageValueClass = INCOMING_MESSAGE_VALUE_CLASS.get(conf);
     outgoingMessageValueClass = OUTGOING_MESSAGE_VALUE_CLASS.get(conf);
+    vertexClass = VERTEX_CLASS.get(conf);
   }
 
   /**
@@ -189,6 +196,10 @@ public class GiraphTypes<I extends WritableComparable, V extends Writable,
     return vertexValueClass;
   }
 
+  public Class<? extends Vertex> getVertexClass() {
+    return vertexClass;
+  }
+
   public void setEdgeValueClass(Class<E> edgeValueClass) {
     this.edgeValueClass = edgeValueClass;
   }

http://git-wip-us.apache.org/repos/asf/giraph/blob/0a901771/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
index 3121fa8..32d08f8 100644
--- a/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
+++ b/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java
@@ -39,7 +39,6 @@ import org.apache.giraph.factories.ValueFactories;
 import org.apache.giraph.factories.VertexIdFactory;
 import org.apache.giraph.factories.VertexValueFactory;
 import org.apache.giraph.graph.Computation;
-import org.apache.giraph.graph.DefaultVertex;
 import org.apache.giraph.graph.Language;
 import org.apache.giraph.graph.Vertex;
 import org.apache.giraph.graph.VertexResolver;
@@ -90,6 +89,7 @@ import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.util.Progressable;
 
+
 /**
  * The classes set here are immutable, the remaining configuration is mutable.
  * Classes are immutable and final to provide the best performance for
@@ -113,6 +113,7 @@ public class ImmutableClassesGiraphConfiguration<I extends WritableComparable,
   /** Whether values (IVEMM) need Jython wrappers */
   private final PerGraphTypeBoolean valueNeedsWrappers;
 
+
   /**
    * Use unsafe serialization? Cached for fast access to instantiate the
    * extended data input/output classes
@@ -675,12 +676,12 @@ public class ImmutableClassesGiraphConfiguration<I extends WritableComparable,
    * @return Instantiated vertex
    */
   public Vertex<I, V, E> createVertex() {
-    Vertex<I, V, E> vertex = new DefaultVertex<I, V, E>();
-    vertex.setConf(this);
-    return vertex;
+    Class vertexClass = classes.getVertexClass();
+    return (Vertex<I, V, E>) ReflectionUtils.newInstance(vertexClass, this);
   }
 
-  /**
+
+ /**
    * Get the user's subclassed vertex index class.
    *
    * @return User's vertex index class

http://git-wip-us.apache.org/repos/asf/giraph/blob/0a901771/giraph-core/src/main/java/org/apache/giraph/graph/ByteValueVertex.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/graph/ByteValueVertex.java b/giraph-core/src/main/java/org/apache/giraph/graph/ByteValueVertex.java
new file mode 100644
index 0000000..fdabc81
--- /dev/null
+++ b/giraph-core/src/main/java/org/apache/giraph/graph/ByteValueVertex.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.graph;
+
+import org.apache.giraph.edge.Edge;
+import org.apache.giraph.utils.UnsafeByteArrayInputStream;
+import org.apache.giraph.utils.UnsafeByteArrayOutputStream;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+
+import java.io.DataInput;
+import java.io.IOException;
+
+
+/**
+ * Special version of vertex that holds the value in raw byte form to save
+ * memory.
+ *
+ * @param <I> Vertex id
+ * @param <V> Vertex data
+ * @param <E> Edge data
+ */
+public class ByteValueVertex<I extends WritableComparable,
+        V extends Writable, E extends Writable>
+        extends DefaultVertex<I, V, E> {
+
+  /** Vertex value stored as raw bytes */
+  private byte[] valueBytes;
+  /** Value as an cached object that is only valid during the vertex update */
+  private V cachedValue = null;
+
+  @Override
+  public V getValue() {
+    if (cachedValue != null) {
+      return cachedValue; // Return always same instance
+    }
+    DataInput dis = new UnsafeByteArrayInputStream(valueBytes);
+    cachedValue = getConf().createVertexValue();
+    try {
+      cachedValue.readFields(dis);
+    } catch (IOException ioe) {
+      throw new RuntimeException("Could not deserialize vertex value", ioe);
+    }
+    // Forget the serialized data, because we have cached the object
+    valueBytes = null;
+    return cachedValue;
+  }
+
+  /**
+   * Serializes the value to bytes, stored in field valueBytes
+   * @param value new vertex value
+   */
+  private void setSerializedValue(V value) {
+    UnsafeByteArrayOutputStream bos = new UnsafeByteArrayOutputStream();
+    try {
+      value.write(bos);
+      bos.close();
+    } catch (IOException ioe) {
+      throw new RuntimeException("Could not serialize vertex value", ioe);
+    }
+    this.valueBytes = bos.toByteArray();
+    cachedValue = null;
+  }
+
+  @Override
+  public void setValue(V value) {
+    if (cachedValue != null) {
+      cachedValue = value;
+    } else {
+      setSerializedValue(value);
+    }
+  }
+
+  @Override
+  public void initialize(I id, V value, Iterable<Edge<I, E>> edges) {
+    // Set the parent's value to null, and instead use our own setter
+    super.initialize(id, null, edges);
+    setValue(value);
+  }
+
+  @Override
+  public void initialize(I id, V value) {
+    super.initialize(id, null);
+    setValue(value);
+  }
+
+  @Override
+  public void unwrapMutableEdges() {
+    // This method is called always after compute(vertex), so
+    // we use this to commit the vertex value.
+    if (cachedValue != null) {
+      // This means the value has been requested from vertex
+      // and possible mutated -- so we need to update the byte array
+      setSerializedValue(cachedValue);
+      cachedValue = null;  // Uncache the value
+    }
+    super.unwrapMutableEdges();
+  }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/0a901771/giraph-core/src/main/java/org/apache/giraph/utils/ConfigurationUtils.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/ConfigurationUtils.java b/giraph-core/src/main/java/org/apache/giraph/utils/ConfigurationUtils.java
index da4b98e..917f881 100644
--- a/giraph-core/src/main/java/org/apache/giraph/utils/ConfigurationUtils.java
+++ b/giraph-core/src/main/java/org/apache/giraph/utils/ConfigurationUtils.java
@@ -433,6 +433,7 @@ public final class ConfigurationUtils {
         }
       }
     }
+
     // YARN-ONLY OPTIONS
     if (cmd.hasOption("yj")) {
       conf.setYarnLibJars(cmd.getOptionValue("yj"));

http://git-wip-us.apache.org/repos/asf/giraph/blob/0a901771/giraph-core/src/test/java/org/apache/giraph/graph/TestByteValueVertex.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/test/java/org/apache/giraph/graph/TestByteValueVertex.java b/giraph-core/src/test/java/org/apache/giraph/graph/TestByteValueVertex.java
new file mode 100644
index 0000000..740e944
--- /dev/null
+++ b/giraph-core/src/test/java/org/apache/giraph/graph/TestByteValueVertex.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.graph;
+
+import org.apache.giraph.conf.GiraphConfiguration;
+import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
+import org.apache.giraph.edge.ArrayListEdges;
+import org.apache.giraph.edge.OutEdges;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+/**
+ * Tests whether ByteValueVertex works -- same test as for DefaultVertex
+ * but with different factory method for vertices.
+ */
+public class TestByteValueVertex extends TestVertexAndEdges {
+
+    protected Vertex<LongWritable, FloatWritable, DoubleWritable>
+    instantiateVertex(Class<? extends OutEdges> edgesClass) {
+      GiraphConfiguration giraphConfiguration = new GiraphConfiguration();
+      giraphConfiguration.setComputationClass(TestComputation.class);
+      giraphConfiguration.setOutEdgesClass(edgesClass);
+      giraphConfiguration.setVertexClass(ByteValueVertex.class);
+
+      ImmutableClassesGiraphConfiguration immutableClassesGiraphConfiguration =
+                new ImmutableClassesGiraphConfiguration(giraphConfiguration);
+      Vertex bv = immutableClassesGiraphConfiguration.createVertex();
+      assertTrue(bv instanceof ByteValueVertex);
+      return bv;
+    }
+
+    @Test
+    public void testCachedValue() {
+      ByteValueVertex<LongWritable, FloatWritable, DoubleWritable> byteValueVertex =
+        (ByteValueVertex<LongWritable, FloatWritable, DoubleWritable>)
+          instantiateVertex(ArrayListEdges.class);
+
+      FloatWritable origValue = new FloatWritable(492.2f);
+      byteValueVertex.setValue(origValue);
+
+      // Check value is correct
+      assertEquals(492.2f, byteValueVertex.getValue().get(), 0.0f);
+
+      // Change value and see it is reflected correctly
+      FloatWritable gotValue = byteValueVertex.getValue();
+      gotValue.set(33.3f);
+      assertEquals(33.3f, byteValueVertex.getValue().get(), 0.0f);
+
+      // Change the object and set that the cached value also changes
+      FloatWritable newValue = new FloatWritable(99.9f);
+      byteValueVertex.setValue(newValue);
+      assertEquals(99.9f, byteValueVertex.getValue().get(), 0.0f);
+
+      // Reference should be now newValue
+      assertTrue(newValue == byteValueVertex.getValue());
+
+      // Commit the changes... (called after vertex update)
+      byteValueVertex.unwrapMutableEdges();
+
+      // Now the value reference should be new
+      assertFalse(newValue == byteValueVertex.getValue());
+
+      // But value data should be correct
+      assertEquals(99.9f, byteValueVertex.getValue().get(), 0.0f);
+    }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/0a901771/giraph-core/src/test/java/org/apache/giraph/graph/TestVertexAndEdges.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/test/java/org/apache/giraph/graph/TestVertexAndEdges.java b/giraph-core/src/test/java/org/apache/giraph/graph/TestVertexAndEdges.java
index 86d75a3..01c2613 100644
--- a/giraph-core/src/test/java/org/apache/giraph/graph/TestVertexAndEdges.java
+++ b/giraph-core/src/test/java/org/apache/giraph/graph/TestVertexAndEdges.java
@@ -161,7 +161,7 @@ public class TestVertexAndEdges {
     edgesClasses.add(LongDoubleHashMapEdges.class);
   }
 
-  private Vertex<LongWritable, FloatWritable, DoubleWritable>
+  protected Vertex<LongWritable, FloatWritable, DoubleWritable>
   instantiateVertex(Class<? extends OutEdges> edgesClass) {
     GiraphConfiguration giraphConfiguration = new GiraphConfiguration();
     giraphConfiguration.setComputationClass(TestComputation.class);
@@ -359,7 +359,7 @@ public class TestVertexAndEdges {
     }
   }
 
-  private Vertex<LongWritable, FloatWritable, DoubleWritable>
+  protected Vertex<LongWritable, FloatWritable, DoubleWritable>
   buildVertex(Class<? extends OutEdges> edgesClass) {
     Vertex<LongWritable, FloatWritable, DoubleWritable> vertex =
         instantiateVertex(edgesClass);