You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by jg...@apache.org on 2011/10/28 21:19:45 UTC
svn commit: r1190510 - in /incubator/giraph/trunk: CHANGELOG
src/main/java/org/apache/giraph/lib/TextDoubleDoubleAdjacencyListVertexInputFormat.java
src/test/java/org/apache/giraph/lib/TestTextDoubleDoubleAdjacencyListVertexInputFormat.java
Author: jghoman
Date: Fri Oct 28 19:19:44 2011
New Revision: 1190510
URL: http://svn.apache.org/viewvc?rev=1190510&view=rev
Log:
GIRAPH-67. Provide AdjacencyList InputFormat for Ids of Strings and double values.
Added:
incubator/giraph/trunk/src/main/java/org/apache/giraph/lib/TextDoubleDoubleAdjacencyListVertexInputFormat.java
incubator/giraph/trunk/src/test/java/org/apache/giraph/lib/TestTextDoubleDoubleAdjacencyListVertexInputFormat.java
Modified:
incubator/giraph/trunk/CHANGELOG
Modified: incubator/giraph/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/CHANGELOG?rev=1190510&r1=1190509&r2=1190510&view=diff
==============================================================================
--- incubator/giraph/trunk/CHANGELOG (original)
+++ incubator/giraph/trunk/CHANGELOG Fri Oct 28 19:19:44 2011
@@ -2,6 +2,9 @@ Giraph Change Log
Release 0.70.0 - unreleased
+ GIRAPH-67. Provide AdjacencyList InputFormat for Ids of Strings and
+ double values. (jghoman)
+
GIRAPH-56. Create a CSV TextOutputFormat. (jghoman)
GIRAPH-66: Add presentations section to website. (jghoman)
Added: incubator/giraph/trunk/src/main/java/org/apache/giraph/lib/TextDoubleDoubleAdjacencyListVertexInputFormat.java
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/main/java/org/apache/giraph/lib/TextDoubleDoubleAdjacencyListVertexInputFormat.java?rev=1190510&view=auto
==============================================================================
--- incubator/giraph/trunk/src/main/java/org/apache/giraph/lib/TextDoubleDoubleAdjacencyListVertexInputFormat.java (added)
+++ incubator/giraph/trunk/src/main/java/org/apache/giraph/lib/TextDoubleDoubleAdjacencyListVertexInputFormat.java Fri Oct 28 19:19:44 2011
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.lib;
+
+import org.apache.giraph.graph.Edge;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+
+/**
+ * Class to read graphs stored as adjacency lists with ids represented by
+ * Strings and values as doubles. This is a good inputformat for reading
+ * graphs where the id types do not matter and can be stashed in a String.
+ */
+public class TextDoubleDoubleAdjacencyListVertexInputFormat
+ extends TextVertexInputFormat<Text, DoubleWritable, DoubleWritable> {
+
+ static class VertexReader extends AdjacencyListVertexReader<Text,
+ DoubleWritable, DoubleWritable> {
+
+ VertexReader(RecordReader<LongWritable, Text> lineRecordReader) {
+ super(lineRecordReader);
+ }
+
+ VertexReader(RecordReader<LongWritable, Text> lineRecordReader,
+ LineSanitizer sanitizer) {
+ super(lineRecordReader, sanitizer);
+ }
+
+ @Override
+ public void decodeId(String s, Text id) {
+ id.set(s);
+ }
+
+ @Override
+ public void decodeValue(String s, DoubleWritable value) {
+ value.set(Double.valueOf(s));
+ }
+
+ @Override
+ public void decodeEdge(String s1, String s2, Edge<Text, DoubleWritable>
+ textIntWritableEdge) {
+ textIntWritableEdge.setDestVertexId(new Text(s1));
+ textIntWritableEdge.setEdgeValue(new DoubleWritable(Double.valueOf(s2)));
+ }
+ }
+
+ @Override
+ public org.apache.giraph.graph.VertexReader createVertexReader(InputSplit split,
+ TaskAttemptContext context) throws IOException {
+ return new VertexReader(textInputFormat.createRecordReader(split, context));
+ }
+
+}
Added: incubator/giraph/trunk/src/test/java/org/apache/giraph/lib/TestTextDoubleDoubleAdjacencyListVertexInputFormat.java
URL: http://svn.apache.org/viewvc/incubator/giraph/trunk/src/test/java/org/apache/giraph/lib/TestTextDoubleDoubleAdjacencyListVertexInputFormat.java?rev=1190510&view=auto
==============================================================================
--- incubator/giraph/trunk/src/test/java/org/apache/giraph/lib/TestTextDoubleDoubleAdjacencyListVertexInputFormat.java (added)
+++ incubator/giraph/trunk/src/test/java/org/apache/giraph/lib/TestTextDoubleDoubleAdjacencyListVertexInputFormat.java Fri Oct 28 19:19:44 2011
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.lib;
+
+
+import junit.framework.TestCase;
+import org.apache.giraph.graph.GiraphJob;
+import org.apache.giraph.graph.MutableVertex;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+public class TestTextDoubleDoubleAdjacencyListVertexInputFormat extends TestCase {
+
+ private RecordReader<LongWritable, Text> rr;
+ private Configuration conf;
+ private TaskAttemptContext tac;
+
+ public void setUp() throws IOException, InterruptedException {
+ rr = mock(RecordReader.class);
+ when(rr.nextKeyValue()).thenReturn(true).thenReturn(false);
+ conf = new Configuration();
+ conf.setClass(GiraphJob.VERTEX_INDEX_CLASS, Text.class, Writable.class);
+ conf.setClass(GiraphJob.VERTEX_VALUE_CLASS, DoubleWritable.class, Writable.class);
+ tac = mock(TaskAttemptContext.class);
+ when(tac.getConfiguration()).thenReturn(conf);
+ }
+
+ public void testIndexMustHaveValue() throws IOException, InterruptedException {
+ String input = "hi";
+
+ when(rr.getCurrentValue()).thenReturn(new Text(input));
+ TextDoubleDoubleAdjacencyListVertexInputFormat.VertexReader vr =
+ new TextDoubleDoubleAdjacencyListVertexInputFormat.VertexReader(rr);
+
+ vr.initialize(null, tac);
+ MutableVertex<Text, DoubleWritable, DoubleWritable, BooleanWritable>
+ mutableVertex = mock(MutableVertex.class);
+
+ try {
+ vr.next(mutableVertex);
+ fail("Should have thrown an IllegalArgumentException");
+ } catch (IllegalArgumentException iae) {
+ assertTrue(iae.getMessage().startsWith("Line did not split correctly: "));
+ }
+ }
+
+ public void testEdgesMustHaveValues() throws IOException, InterruptedException {
+ String input = "index\t55.66\tindex2";
+
+ when(rr.getCurrentValue()).thenReturn(new Text(input));
+ TextDoubleDoubleAdjacencyListVertexInputFormat.VertexReader vr =
+ new TextDoubleDoubleAdjacencyListVertexInputFormat.VertexReader(rr);
+
+ vr.initialize(null, tac);
+ MutableVertex<Text, DoubleWritable, DoubleWritable, BooleanWritable>
+ mutableVertex = mock(MutableVertex.class);
+ try {
+ vr.next(mutableVertex);
+ fail("Should have thrown an IllegalArgumentException");
+ } catch (IllegalArgumentException iae) {
+ assertTrue(iae.getMessage().startsWith("Line did not split correctly: "));
+ }
+ }
+
+ public void testHappyPath() throws IOException, InterruptedException {
+ String input = "Hi\t0\tCiao\t1.123\tBomdia\t2.234\tOla\t3.345";
+
+ when(rr.getCurrentValue()).thenReturn(new Text(input));
+ TextDoubleDoubleAdjacencyListVertexInputFormat.VertexReader vr =
+ new TextDoubleDoubleAdjacencyListVertexInputFormat.VertexReader(rr);
+
+ vr.initialize(null, tac);
+ MutableVertex<Text, DoubleWritable, DoubleWritable, BooleanWritable>
+ mutableVertex = mock(MutableVertex.class);
+
+ assertTrue("Should have been able to read vertex", vr.next(mutableVertex));
+ verify(mutableVertex).setVertexId(new Text("Hi"));
+ verify(mutableVertex).setVertexValue(new DoubleWritable(0));
+ verify(mutableVertex).addEdge(new Text("Ciao"), new DoubleWritable(1.123d));
+ verify(mutableVertex).addEdge(new Text("Bomdia"), new DoubleWritable(2.234d));
+ verify(mutableVertex).addEdge(new Text("Ola"), new DoubleWritable(3.345d));
+ verifyNoMoreInteractions(mutableVertex);
+ }
+
+ public void testLineSanitizer() throws IOException, InterruptedException {
+ String input = "Bye\t0.01\tCiao\t1.001\tTchau\t2.0001\tAdios\t3.00001";
+
+ AdjacencyListVertexReader.LineSanitizer toUpper =
+ new AdjacencyListVertexReader.LineSanitizer() {
+ @Override
+ public String sanitize(String s) {
+ return s.toUpperCase();
+ }
+ };
+
+ when(rr.getCurrentValue()).thenReturn(new Text(input));
+ TextDoubleDoubleAdjacencyListVertexInputFormat.VertexReader vr =
+ new TextDoubleDoubleAdjacencyListVertexInputFormat.VertexReader(rr, toUpper);
+
+ vr.initialize(null, tac);
+ MutableVertex<Text, DoubleWritable, DoubleWritable, BooleanWritable>
+ mutableVertex = mock(MutableVertex.class);
+ assertTrue("Should have been able to read vertex", vr.next(mutableVertex));
+ verify(mutableVertex).setVertexId(new Text("BYE"));
+ verify(mutableVertex).setVertexValue(new DoubleWritable(0.01d));
+ verify(mutableVertex).addEdge(new Text("CIAO"), new DoubleWritable(1.001d));
+ verify(mutableVertex).addEdge(new Text("TCHAU"), new DoubleWritable(2.0001d));
+ verify(mutableVertex).addEdge(new Text("ADIOS"), new DoubleWritable(3.00001d));
+ verifyNoMoreInteractions(mutableVertex);
+ }
+
+ public void testDifferentSeparators() throws IOException, InterruptedException {
+ String input = "alpha:42:beta:99";
+
+ when(rr.getCurrentValue()).thenReturn(new Text(input));
+ conf.set(AdjacencyListVertexReader.LINE_TOKENIZE_VALUE, ":");
+ TextDoubleDoubleAdjacencyListVertexInputFormat.VertexReader vr =
+ new TextDoubleDoubleAdjacencyListVertexInputFormat.VertexReader(rr);
+
+ vr.initialize(null, tac);
+ MutableVertex<Text, DoubleWritable, DoubleWritable, BooleanWritable>
+ mutableVertex = mock(MutableVertex.class);
+ assertTrue("Should have been able to read vertex", vr.next(mutableVertex));
+ verify(mutableVertex).setVertexId(new Text("alpha"));
+ verify(mutableVertex).setVertexValue(new DoubleWritable(42));
+ verify(mutableVertex).addEdge(new Text("beta"), new DoubleWritable(99));
+ verifyNoMoreInteractions(mutableVertex);
+ }
+
+}