You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ni...@apache.org on 2013/01/02 20:04:01 UTC

[2/4] GIRAPH-458: split formats module into accumulo, hbase, hcatalog (nitay)

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java b/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java
deleted file mode 100644
index b670144..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.accumulo.edgemarker;
-
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.giraph.graph.Edge;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexReader;
-import org.apache.giraph.io.accumulo.AccumuloVertexInputFormat;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import com.google.common.collect.Lists;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.regex.Pattern;
-
-/*
- Example subclass which reads in Key/Value pairs to construct vertex objects.
- */
-public class AccumuloEdgeInputFormat
-        extends AccumuloVertexInputFormat<Text, Text, Text, Text> {
-
-    private static final Text uselessEdgeValue = new Text();
-    private Configuration conf;
-    public VertexReader<Text, Text, Text, Text>
-    createVertexReader(InputSplit split, TaskAttemptContext context)
-            throws IOException {
-        try {
-
-        return new AccumuloEdgeVertexReader(
-                accumuloInputFormat.createRecordReader(split, context)) {
-        };
-        } catch (InterruptedException e) {
-            throw new IOException(e);
-        }
-
-    }
-    /*
-        Reader takes Key/Value pairs from the underlying input format.
-     */
-    public static class AccumuloEdgeVertexReader
-            extends AccumuloVertexReader<Text, Text, Text, Text> {
-
-        public static final Pattern commaPattern = Pattern.compile("[,]");
-
-        public AccumuloEdgeVertexReader(RecordReader<Key, Value> recordReader) {
-            super(recordReader);
-        }
-
-
-        public boolean nextVertex() throws IOException, InterruptedException {
-            return getRecordReader().nextKeyValue();
-        }
-
-        /*
-       Each Key/Value contains the information needed to construct the vertices.
-         */
-        public Vertex<Text, Text, Text, Text> getCurrentVertex()
-                throws IOException, InterruptedException {
-              Key key = getRecordReader().getCurrentKey();
-              Value value = getRecordReader().getCurrentValue();
-              Vertex<Text, Text, Text, Text> vertex =
-                  getConfiguration().createVertex();
-              Text vertexId = key.getRow();
-              List<Edge<Text, Text>> edges = Lists.newLinkedList();
-              String edge = new String(value.get());
-              Text edgeId = new Text(edge);
-              edges.add(new Edge<Text, Text>(edgeId, uselessEdgeValue));
-              vertex.initialize(vertexId, new Text(), edges);
-
-            return vertex;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java b/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java
deleted file mode 100644
index ff00fd6..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.accumulo.edgemarker;
-
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.giraph.io.accumulo.AccumuloVertexOutputFormat;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexWriter;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.io.IOException;
-
-/*
- Example subclass for writing vertices back to Accumulo.
- */
-public class AccumuloEdgeOutputFormat
-        extends AccumuloVertexOutputFormat<Text, Text, Text> {
-
-    public VertexWriter<Text, Text, Text>
-    createVertexWriter(TaskAttemptContext context)
-            throws IOException, InterruptedException {
-        RecordWriter<Text, Mutation> writer =
-                accumuloOutputFormat.getRecordWriter(context);
-        String tableName = context.getConfiguration().get(OUTPUT_TABLE);
-        if(tableName == null)
-           throw new IOException("Forgot to set table name " +
-                   "using AccumuloVertexOutputFormat.OUTPUT_TABLE");
-        return new AccumuloEdgeVertexWriter(writer, tableName);
-    }
-
-    /*
-    Wraps RecordWriter for writing Mutations back to the configured Accumulo Table.
-     */
-    public static class AccumuloEdgeVertexWriter
-            extends AccumuloVertexWriter<Text, Text, Text> {
-
-        private final Text CF = new Text("cf");
-        private final Text PARENT =  new Text("parent");
-        private Text tableName;
-
-        public AccumuloEdgeVertexWriter(
-                RecordWriter<Text, Mutation> writer, String tableName) {
-            super(writer);
-            this.tableName = new Text(tableName);
-        }
-        /*
-         Write back a mutation that adds a qualifier for 'parent' containing the vertex value
-         as the cell value. Assume the vertex ID corresponds to a key.
-         */
-        public void writeVertex(Vertex<Text, Text, Text, ?> vertex)
-                throws IOException, InterruptedException {
-              RecordWriter<Text, Mutation> writer = getRecordWriter();
-              Mutation mt = new Mutation(vertex.getId());
-              mt.put(CF, PARENT, new Value(
-                  vertex.getValue().toString().getBytes()));
-              writer.write(tableName, mt);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java b/giraph-formats/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
deleted file mode 100644
index c09913d..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hbase;
-
-
-import org.apache.giraph.BspCase;
-import org.apache.giraph.conf.GiraphConfiguration;
-import org.apache.giraph.graph.EdgeListVertex;
-import org.apache.giraph.graph.GiraphJob;
-import org.apache.giraph.io.hbase.edgemarker.TableEdgeInputFormat;
-import org.apache.giraph.io.hbase.edgemarker.TableEdgeOutputFormat;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
-import org.apache.hadoop.hbase.client.HTable;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv;
-import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
-import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.GenericOptionsParser;
-import org.apache.log4j.Logger;
-import org.junit.Test;
-
-import java.io.File;
-import java.io.IOException;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-/*
-Test case for HBase reading/writing vertices from an HBase instance.
-*/
-public class TestHBaseRootMarkerVertextFormat extends BspCase {
-
-    /**
-     * Create the test case
-     *
-     * @param testName name of the test case
-     */
-    private HBaseTestingUtility testUtil = new HBaseTestingUtility();
-    private final Logger log = Logger.getLogger(TestHBaseRootMarkerVertextFormat.class);
-
-    private final String TABLE_NAME = "simple_graph";
-    private final String FAMILY = "cf";
-    private final String QUALIFER = "children";
-    private final String OUTPUT_FIELD = "parent";
-
-    public TestHBaseRootMarkerVertextFormat() {
-        super(TestHBaseRootMarkerVertextFormat.class.getName());
-    }
-
-    @Test
-    public void testHBaseInputOutput() throws Exception{
-
-        if (System.getProperty("prop.mapred.job.tracker") != null) {
-            if(log.isInfoEnabled())
-                log.info("testHBaseInputOutput: Ignore this test if not local mode.");
-            return;
-        }
-
-        File jarTest = new File(System.getProperty("prop.jarLocation"));
-        if(!jarTest.exists()) {
-            fail("Could not find Giraph jar at " +
-                    "location specified by 'prop.jarLocation'. " +
-                    "Make sure you built the main Giraph artifact?.");
-        }
-
-        String INPUT_FILE = "graph.csv";
-        //First let's load some data using ImportTsv into our mock table.
-        String[] args = new String[] {
-                "-Dimporttsv.columns=HBASE_ROW_KEY,cf:"+QUALIFER,
-                "-Dimporttsv.separator=" + "\u002c",
-                TABLE_NAME,
-                INPUT_FILE
-        };
-
-
-        MiniHBaseCluster cluster = testUtil.startMiniCluster();
-
-        GenericOptionsParser opts =
-                new GenericOptionsParser(cluster.getConfiguration(), args);
-        Configuration conf = opts.getConfiguration();
-        args = opts.getRemainingArgs();
-
-        try {
-
-            FileSystem fs = FileSystem.get(conf);
-            FSDataOutputStream op = fs.create(new Path(INPUT_FILE), true);
-            String line1 = "0001,0002\n";
-            String line2 = "0002,0004\n";
-            String line3 = "0003,0005\n";
-            String line4 = "0004,-1\n";
-            String line5 = "0005,-1\n";
-            op.write(line1.getBytes());
-            op.write(line2.getBytes());
-            op.write(line3.getBytes());
-            op.write(line4.getBytes());
-            op.write(line5.getBytes());
-            op.close();
-
-            final byte[] FAM = Bytes.toBytes(FAMILY);
-            final byte[] TAB = Bytes.toBytes(TABLE_NAME);
-
-            HTableDescriptor desc = new HTableDescriptor(TAB);
-            desc.addFamily(new HColumnDescriptor(FAM));
-            new HBaseAdmin(conf).createTable(desc);
-
-            Job job = ImportTsv.createSubmittableJob(conf, args);
-            job.waitForCompletion(false);
-            assertTrue(job.isSuccessful());
-            if(log.isInfoEnabled())
-                log.info("ImportTsv successful. Running HBase Giraph job.");
-
-            //now operate over HBase using Vertex I/O formats
-            conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
-            conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME);
-
-            GiraphJob giraphJob = new GiraphJob(conf, getCallingMethodName());
-            GiraphConfiguration giraphConf = giraphJob.getConfiguration();
-            giraphConf.setZooKeeperConfiguration(
-                    cluster.getMaster().getZooKeeper().getQuorum());
-            setupConfiguration(giraphJob);
-            giraphConf.setVertexClass(EdgeNotification.class);
-            giraphConf.setVertexInputFormatClass(TableEdgeInputFormat.class);
-            giraphConf.setVertexOutputFormatClass(TableEdgeOutputFormat.class);
-
-            assertTrue(giraphJob.run(true));
-            if(log.isInfoEnabled())
-                log.info("Giraph job successful. Checking output qualifier.");
-
-            //Do a get on row 0002, it should have a parent of 0001
-            //if the outputFormat worked.
-            HTable table = new HTable(conf, TABLE_NAME);
-            Result result = table.get(new Get("0002".getBytes()));
-            byte[] parentBytes = result.getValue(FAMILY.getBytes(),
-                    OUTPUT_FIELD.getBytes());
-            assertNotNull(parentBytes);
-            assertTrue(parentBytes.length > 0);
-            assertEquals("0001", Bytes.toString(parentBytes));
-
-        }   finally {
-            cluster.shutdown();
-        }
-    }
-
-    /*
-    Test compute method that sends each edge a notification of its parents.
-    The test set only has a 1-1 parent-to-child ratio for this unit test.
-     */
-    public static class EdgeNotification
-            extends EdgeListVertex<Text, Text, Text, Text> {
-        @Override
-        public void compute(Iterable<Text> messages) throws IOException {
-          for (Text message : messages) {
-            getValue().set(message);
-          }
-          if(getSuperstep() == 0) {
-            sendMessageToAllEdges(getId());
-          }
-          voteToHalt();
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java b/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
deleted file mode 100644
index e4e08d6..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.hbase.edgemarker;
-
-import org.apache.giraph.graph.Edge;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexReader;
-import org.apache.giraph.io.hbase.HBaseVertexInputFormat;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.log4j.Logger;
-
-import com.google.common.collect.Lists;
-
-import java.io.IOException;
-import java.util.List;
-
-/*
-  Test subclass for HBaseVertexInputFormat. Reads a simple
-  children qualifier to create an edge.
- */
-public class TableEdgeInputFormat extends
-        HBaseVertexInputFormat<Text, Text, Text, Text> {
-
-    private static final Logger log =
-            Logger.getLogger(TableEdgeInputFormat.class);
-    private static final Text uselessEdgeValue = new Text();
-
-    public VertexReader<Text, Text, Text, Text>
-            createVertexReader(InputSplit split,
-                               TaskAttemptContext context) throws IOException {
-
-        return new TableEdgeVertexReader(split, context);
-
-    }
-
-    /*
-     Uses the RecordReader to return Hbase rows
-     */
-    public static class TableEdgeVertexReader
-            extends HBaseVertexReader<Text, Text, Text, Text> {
-
-        private final byte[] CF = Bytes.toBytes("cf");
-        private final byte[] CHILDREN = Bytes.toBytes("children");
-
-        public TableEdgeVertexReader(InputSplit split, TaskAttemptContext context) throws IOException {
-            super(split, context);
-        }
-
-
-        public boolean nextVertex() throws IOException,
-                InterruptedException {
-            return getRecordReader().nextKeyValue();
-        }
-
-        /*
-         For each row, create a vertex with the row ID as a text,
-         and it's 'children' qualifier as a single edge.
-         */
-        public Vertex<Text, Text, Text, Text>
-                    getCurrentVertex()
-                throws IOException, InterruptedException {
-            Result row = getRecordReader().getCurrentValue();
-            Vertex<Text, Text, Text, Text> vertex =
-                getConfiguration().createVertex();
-            Text vertexId = new Text(Bytes.toString(row.getRow()));
-            List<Edge<Text, Text>> edges = Lists.newLinkedList();
-            String edge = Bytes.toString(row.getValue(CF, CHILDREN));
-            Text vertexValue = new Text();
-            Text edgeId = new Text(edge);
-            edges.add(new Edge<Text, Text>(edgeId, uselessEdgeValue));
-            vertex.initialize(vertexId, vertexValue, edges);
-
-            return vertex;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java b/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
deleted file mode 100644
index 169fd88..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.hbase.edgemarker;
-
-import org.apache.giraph.io.hbase.HBaseVertexOutputFormat;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexWriter;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.io.IOException;
-/*
- Test subclass for HBaseVertexOutputFormat
- */
-public class TableEdgeOutputFormat
-        extends HBaseVertexOutputFormat<Text, Text, Text> {
-
-
-    public VertexWriter<Text, Text, Text>
-    createVertexWriter(TaskAttemptContext context)
-            throws IOException, InterruptedException {
-        return new TableEdgeVertexWriter(context);
-    }
-
-    /*
-     For each vertex, write back to the configured table using
-     the vertex id as the row key bytes.
-     */
-    public static class TableEdgeVertexWriter
-            extends HBaseVertexWriter<Text, Text, Text> {
-
-        private final byte[] CF = Bytes.toBytes("cf");
-        private final byte[] PARENT =  Bytes.toBytes("parent");
-
-        public TableEdgeVertexWriter(TaskAttemptContext context)
-          throws IOException, InterruptedException  {
-            super(context);
-        }
-        /*
-         Record the vertex value as a the value for a new qualifier 'parent'.
-         */
-        public void writeVertex(
-                Vertex<Text, Text, Text, ?> vertex)
-                throws IOException, InterruptedException {
-              RecordWriter<ImmutableBytesWritable, Writable> writer = getRecordWriter();
-              byte[] rowBytes = vertex.getId().getBytes();
-              Put put = new Put(rowBytes);
-              Text value = vertex.getValue();
-              if(value.toString().length() > 0)   {
-                 put.add(CF, PARENT, value.getBytes());
-                 writer.write(new ImmutableBytesWritable(rowBytes), put);
-              }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java b/giraph-formats/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java
deleted file mode 100644
index 421cc28..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hcatalog;
-
-import junit.framework.TestCase;
-
-import java.util.Map;
-import org.junit.Test;
-
-public class TestHiveUtils extends TestCase {
-  @Test
-  public void testParsePartition() {
-    String partitionStr = "feature1=2012-10-09, feature2=a1+b2, feature3=ff-gg";
-    Map<String, String> partition = HiveUtils.parsePartitionValues(partitionStr);
-    assertEquals(3, partition.size());
-    assertEquals("2012-10-09", partition.get("feature1"));
-    assertEquals("a1+b2", partition.get("feature2"));
-    assertEquals("ff-gg", partition.get("feature3"));
-  }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/pom.xml
----------------------------------------------------------------------
diff --git a/giraph-hbase/pom.xml b/giraph-hbase/pom.xml
new file mode 100644
index 0000000..49e41b3
--- /dev/null
+++ b/giraph-hbase/pom.xml
@@ -0,0 +1,225 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.giraph</groupId>
+    <artifactId>giraph-parent</artifactId>
+    <version>0.2-SNAPSHOT</version>
+  </parent>
+  <artifactId>giraph-hbase</artifactId>
+  <packaging>jar</packaging>
+
+  <name>Apache Giraph HBase I/O</name>
+
+  <properties>
+    <top.dir>${project.basedir}/..</top.dir>
+  </properties>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.9</version>
+        <configuration>
+          <configLocation>${top.dir}/checkstyle.xml</configLocation>
+          <headerLocation>${top.dir}/license-header.txt</headerLocation>
+          <enableRulesSummary>false</enableRulesSummary>
+          <failOnError>true</failOnError>
+          <includeTestSourceDirectory>false</includeTestSourceDirectory>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>verify</phase>
+            <goals>
+               <goal>check</goal>
+             </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>2.6</version>
+        <configuration>
+          <systemProperties>
+            <property>
+              <name>prop.jarLocation</name>
+              <value>${top.dir}/giraph-core/target/giraph-${project.version}-${forHadoop}-jar-with-dependencies.jar</value>
+            </property>
+          </systemProperties>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>findbugs-maven-plugin</artifactId>
+        <version>2.5.1</version>
+        <configuration>
+          <xmlOutput>true</xmlOutput>
+          <findbugsXmlOutput>false</findbugsXmlOutput>
+          <excludeFilterFile>${top.dir}/findbugs-exclude.xml</excludeFilterFile>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>verify</phase>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <repositories>
+    <!-- This is the main maven repository. Normally we wouldn't need to put
+       it here when it's the only one being used, but since we need to add
+       special repositories to get hcatalog we need to mention this one
+       specifically otherwise it won't be included. -->
+    <repository>
+      <id>central</id>
+      <name>Maven Repository</name>
+      <url>http://repo1.maven.org/maven2</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+    </repository>
+    <!-- This is necessary for hcatalog. -->
+    <repository>
+      <id>apache</id>
+      <name>Apache Repository</name>
+      <url>https://repository.apache.org/content/repositories/snapshots</url>
+      <snapshots>
+        <enabled>true</enabled>
+      </snapshots>
+    </repository>
+    <!-- This is necessary for hive-metastore dependencies for hcatalog. -->
+    <repository>
+      <id>datanucleus</id>
+      <name>datanucleus maven repository</name>
+      <url>http://www.datanucleus.org/downloads/maven2</url>
+      <layout>default</layout>
+      <releases>
+        <enabled>true</enabled>
+        <checksumPolicy>warn</checksumPolicy>
+      </releases>
+    </repository>
+  </repositories>
+
+  <profiles>
+    <profile>
+      <id>hadoop_0.20.203</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+
+    <profile>
+      <id>hadoop_1.0</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+
+    <profile>
+      <id>hadoop_non_secure</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+
+    <profile>
+      <id>hadoop_facebook</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>system</scope>
+          <systemPath>${lib.dir}/facebook-hadoop-0.20-test.jar</systemPath>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
+  <dependencies>
+    <!-- compile dependencies. sorted lexicographically. -->
+    <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.giraph</groupId>
+      <artifactId>giraph-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.giraph</groupId>
+      <artifactId>giraph</artifactId>
+      <version>0.2-SNAPSHOT</version>
+      <type>test-jar</type>
+    </dependency>
+
+    <!-- test dependencies. sorted lexicographically. -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/main/assembly/compile.xml
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/main/assembly/compile.xml b/giraph-hbase/src/main/assembly/compile.xml
new file mode 100644
index 0000000..0f7678c
--- /dev/null
+++ b/giraph-hbase/src/main/assembly/compile.xml
@@ -0,0 +1,39 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
+  <id>jar-with-dependencies</id>
+   <formats>
+    <format>jar</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+
+  <dependencySets>
+    <dependencySet>
+      <useProjectArtifact>true</useProjectArtifact>
+      <outputDirectory>/</outputDirectory>
+      <unpackOptions>
+          <excludes>
+              <exclude>META-INF/LICENSE</exclude>
+          </excludes>
+      </unpackOptions>
+      <unpack>false</unpack>
+      <scope>runtime</scope>
+    </dependencySet>
+  </dependencySets>
+</assembly>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexInputFormat.java b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexInputFormat.java
new file mode 100644
index 0000000..cf87035
--- /dev/null
+++ b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexInputFormat.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.io.hbase;
+
+import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
+import org.apache.giraph.graph.VertexInputFormat;
+import org.apache.giraph.graph.VertexReader;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ *
+ * Base class that wraps an HBase TableInputFormat and underlying Scan object
+ * to help instantiate vertices from an HBase table. All
+ * the static TableInputFormat properties necessary to configure
+ * an HBase job are available.
+ *
+ * For example, setting conf.set(TableInputFormat.INPUT_TABLE, "in_table");
+ * from the job setup routine will properly delegate to the
+ * TableInputFormat instance. The Configurable interface prevents specific
+ * wrapper methods from having to be called.
+ *
+ * Works with {@link HBaseVertexOutputFormat}
+ *
+ * @param <I> Vertex index value
+ * @param <V> Vertex value
+ * @param <E> Edge value
+ * @param <M> Message data
+ */
+@SuppressWarnings("rawtypes")
+public abstract class HBaseVertexInputFormat<
+    I extends WritableComparable,
+    V extends Writable,
+    E extends Writable,
+    M extends Writable>
+    extends VertexInputFormat<I, V, E, M>  {
+
+
+   /**
+   * delegate HBase table input format
+   */
+  protected static final TableInputFormat BASE_FORMAT =
+          new TableInputFormat();
+  /**
+  * logger
+  */
+  private static final Logger LOG =
+          Logger.getLogger(HBaseVertexInputFormat.class);
+
+  /**
+   * Takes an instance of RecordReader that supports
+   * HBase row-key, result records.  Subclasses can focus on
+   * vertex instantiation details without worrying about connection
+   * semantics. Subclasses are expected to implement nextVertex() and
+   * getCurrentVertex()
+   *
+   *
+   *
+   * @param <I> Vertex index value
+   * @param <V> Vertex value
+   * @param <E> Edge value
+   * @param <M> Message data
+   */
+  public abstract static class HBaseVertexReader<
+          I extends WritableComparable,
+          V extends Writable,
+          E extends Writable, M extends Writable>
+          implements VertexReader<I, V, E, M> {
+    /** Giraph configuration */
+    private ImmutableClassesGiraphConfiguration<I, V, E, M> configuration;
+    /** Reader instance */
+    private final RecordReader<ImmutableBytesWritable, Result> reader;
+    /** Context passed to initialize */
+    private TaskAttemptContext context;
+
+    /**
+     * Sets the base TableInputFormat and creates a record reader.
+     *
+     * @param split InputSplit
+     * @param context Context
+     * @throws IOException
+     */
+    public HBaseVertexReader(InputSplit split, TaskAttemptContext context)
+      throws IOException {
+      BASE_FORMAT.setConf(context.getConfiguration());
+      this.reader = BASE_FORMAT.createRecordReader(split, context);
+    }
+
+    public ImmutableClassesGiraphConfiguration<I, V, E, M> getConfiguration() {
+      return configuration;
+    }
+
+    /**
+     * initialize
+     *
+     * @param inputSplit Input split to be used for reading vertices.
+     * @param context Context from the task.
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    public void initialize(InputSplit inputSplit,
+                           TaskAttemptContext context)
+      throws IOException,
+      InterruptedException {
+      reader.initialize(inputSplit, context);
+      this.context = context;
+      this.configuration = new ImmutableClassesGiraphConfiguration<I, V, E, M>(
+          context.getConfiguration());
+    }
+
+    /**
+     * close
+     * @throws IOException
+     */
+    public void close() throws IOException {
+      reader.close();
+    }
+
+    /**
+     * getProgress
+     *
+     * @return progress
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    public float getProgress() throws
+      IOException, InterruptedException {
+      return reader.getProgress();
+    }
+
+    /**
+     * getRecordReader
+     *
+     * @return Record reader to be used for reading.
+     */
+    protected RecordReader<ImmutableBytesWritable,
+      Result> getRecordReader() {
+      return reader;
+    }
+
+   /**
+    * getContext
+    *
+    * @return Context passed to initialize.
+    */
+    protected TaskAttemptContext getContext() {
+      return context;
+    }
+
+  }
+
+  /**
+   * getSplits
+   *
+   * @param context Context of the job
+   * @param numWorkers Number of workers used for this job
+   * @return HBase region splits
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public List<InputSplit> getSplits(
+  JobContext context, int numWorkers)
+    throws IOException, InterruptedException {
+    BASE_FORMAT.setConf(context.getConfiguration());
+    return BASE_FORMAT.getSplits(context);
+  }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexOutputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexOutputFormat.java b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexOutputFormat.java
new file mode 100644
index 0000000..2a27b63
--- /dev/null
+++ b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexOutputFormat.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.io.hbase;
+
+import org.apache.giraph.graph.VertexOutputFormat;
+import org.apache.giraph.graph.VertexWriter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+
+/**
+ *
+ * Base class for writing Vertex mutations back to specific
+ * rows in an HBase table. This class wraps an instance of TableOutputFormat
+ * for easy configuration with the existing properties.
+ *
+ * Setting conf.set(TableOutputFormat.OUTPUT_TABLE, "out_table");
+ * will properly delegate to the TableOutputFormat instance contained
+ * in this class. The Configurable interface prevents specific
+ * wrapper methods from having to be called.
+ *
+ * Works with {@link HBaseVertexInputFormat}
+ *
+ * @param <I> Vertex index value
+ * @param <V> Vertex value
+ * @param <E> Edge value
+ */
+@SuppressWarnings("rawtypes")
+public abstract class HBaseVertexOutputFormat<
+        I extends WritableComparable,
+        V extends Writable,
+        E extends Writable>
+        extends VertexOutputFormat
+                <I, V, E> {
+
+  /**
+   * delegate output format that writes to HBase
+   */
+  protected static final TableOutputFormat<ImmutableBytesWritable>
+  BASE_FORMAT = new TableOutputFormat<ImmutableBytesWritable>();
+
+  /**
+   *   Constructor
+   *
+   *   Simple class which takes an instance of RecordWriter
+   *   over Writable objects. Subclasses are
+   *   expected to implement writeVertex()
+   *
+   * @param <I> Vertex index value
+   * @param <V> Vertex value
+   * @param <E> Edge value
+   */
+  public abstract static class HBaseVertexWriter<
+          I extends WritableComparable,
+          V extends Writable,
+          E extends Writable>
+          implements VertexWriter<I, V, E> {
+
+    /**
+     * context
+     */
+    private TaskAttemptContext context;
+
+    /**
+     * record writer instance
+     */
+    private RecordWriter<ImmutableBytesWritable,
+              Writable> recordWriter;
+
+   /**
+    * Sets up base table output format and creates a record writer.
+    * @param context task attempt context
+    */
+    public HBaseVertexWriter(TaskAttemptContext context)
+      throws IOException, InterruptedException {
+      BASE_FORMAT.setConf(context.getConfiguration());
+      this.recordWriter = BASE_FORMAT.getRecordWriter(context);
+    }
+
+    /**
+     * initialize
+     *
+     * @param context Context used to write the vertices.
+     * @throws IOException
+     */
+    public void initialize(TaskAttemptContext context)
+      throws IOException {
+      this.context = context;
+    }
+
+    /**
+     * close
+     *
+     * @param context the context of the task
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    public void close(TaskAttemptContext context)
+      throws IOException, InterruptedException {
+      recordWriter.close(context);
+    }
+
+    /**
+     * Get the table record writer;
+     *
+     * @return Record writer to be used for writing.
+     */
+    public RecordWriter<ImmutableBytesWritable,
+            Writable> getRecordWriter() {
+      return recordWriter;
+    }
+
+    /**
+     * getContext
+     *
+     * @return Context passed to initialize.
+     */
+    public TaskAttemptContext getContext() {
+      return context;
+    }
+
+  }
+
+  /**
+   * checkOutputSpecs
+   *
+   * @param context information about the job
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public void checkOutputSpecs(JobContext context)
+    throws IOException, InterruptedException {
+    BASE_FORMAT.checkOutputSpecs(context);
+  }
+
+  /**
+   * getOutputCommitter
+   *
+   * @param context the task context
+   * @return  OutputCommitter ouputCommitter
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public OutputCommitter getOutputCommitter(
+    TaskAttemptContext context)
+    throws IOException, InterruptedException {
+    BASE_FORMAT.setConf(context.getConfiguration());
+    return BASE_FORMAT.getOutputCommitter(context);
+  }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/package-info.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/package-info.java b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/package-info.java
new file mode 100644
index 0000000..9179cee
--- /dev/null
+++ b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * HBase Input/Output for Giraph.
+ */
+package org.apache.giraph.io.hbase;

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
new file mode 100644
index 0000000..ea4bed1
--- /dev/null
+++ b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.io.hbase;
+
+
+import org.apache.giraph.BspCase;
+import org.apache.giraph.conf.GiraphConfiguration;
+import org.apache.giraph.graph.EdgeListVertex;
+import org.apache.giraph.graph.GiraphJob;
+import org.apache.giraph.io.hbase.edgemarker.TableEdgeInputFormat;
+import org.apache.giraph.io.hbase.edgemarker.TableEdgeOutputFormat;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv;
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
+import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+/*
+Test case for HBase reading/writing vertices from an HBase instance.
+*/
+public class TestHBaseRootMarkerVertextFormat extends BspCase {
+
+    /**
+     * Create the test case
+     *
+     * @param testName name of the test case
+     */
+    private HBaseTestingUtility testUtil = new HBaseTestingUtility();
+    private final Logger log = Logger.getLogger(TestHBaseRootMarkerVertextFormat.class);
+
+    private final String TABLE_NAME = "simple_graph";
+    private final String FAMILY = "cf";
+    private final String QUALIFER = "children";
+    private final String OUTPUT_FIELD = "parent";
+
+    public TestHBaseRootMarkerVertextFormat() {
+        super(TestHBaseRootMarkerVertextFormat.class.getName());
+    }
+
+    @Test
+    public void testHBaseInputOutput() throws Exception{
+
+        if (System.getProperty("prop.mapred.job.tracker") != null) {
+            if(log.isInfoEnabled())
+                log.info("testHBaseInputOutput: Ignore this test if not local mode.");
+            return;
+        }
+
+        File jarTest = new File(System.getProperty("prop.jarLocation"));
+        if(!jarTest.exists()) {
+            fail("Could not find Giraph jar at " +
+                    "location specified by 'prop.jarLocation'. " +
+                    "Make sure you built the main Giraph artifact?.");
+        }
+
+        String INPUT_FILE = "graph.csv";
+        //First let's load some data using ImportTsv into our mock table.
+        String[] args = new String[] {
+                "-Dimporttsv.columns=HBASE_ROW_KEY,cf:"+QUALIFER,
+                "-Dimporttsv.separator=" + "\u002c",
+                TABLE_NAME,
+                INPUT_FILE
+        };
+
+
+        MiniHBaseCluster cluster = testUtil.startMiniCluster();
+
+        GenericOptionsParser opts =
+                new GenericOptionsParser(cluster.getConfiguration(), args);
+        Configuration conf = opts.getConfiguration();
+        args = opts.getRemainingArgs();
+
+        try {
+
+            FileSystem fs = FileSystem.get(conf);
+            FSDataOutputStream op = fs.create(new Path(INPUT_FILE), true);
+            String line1 = "0001,0002\n";
+            String line2 = "0002,0004\n";
+            String line3 = "0003,0005\n";
+            String line4 = "0004,-1\n";
+            String line5 = "0005,-1\n";
+            op.write(line1.getBytes());
+            op.write(line2.getBytes());
+            op.write(line3.getBytes());
+            op.write(line4.getBytes());
+            op.write(line5.getBytes());
+            op.close();
+
+            final byte[] FAM = Bytes.toBytes(FAMILY);
+            final byte[] TAB = Bytes.toBytes(TABLE_NAME);
+
+            HTableDescriptor desc = new HTableDescriptor(TAB);
+            desc.addFamily(new HColumnDescriptor(FAM));
+            new HBaseAdmin(conf).createTable(desc);
+
+            Job job = ImportTsv.createSubmittableJob(conf, args);
+            job.waitForCompletion(false);
+            assertTrue(job.isSuccessful());
+            if(log.isInfoEnabled())
+                log.info("ImportTsv successful. Running HBase Giraph job.");
+
+            //now operate over HBase using Vertex I/O formats
+            conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
+            conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME);
+
+            GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName());
+            GiraphConfiguration giraphConf = giraphJob.getConfiguration();
+            giraphConf.setZooKeeperConfiguration(
+                    cluster.getMaster().getZooKeeper().getQuorum());
+            setupConfiguration(giraphJob);
+            giraphConf.setVertexClass(EdgeNotification.class);
+            giraphConf.setVertexInputFormatClass(TableEdgeInputFormat.class);
+            giraphConf.setVertexOutputFormatClass(TableEdgeOutputFormat.class);
+
+            assertTrue(giraphJob.run(true));
+            if(log.isInfoEnabled())
+                log.info("Giraph job successful. Checking output qualifier.");
+
+            //Do a get on row 0002, it should have a parent of 0001
+            //if the outputFormat worked.
+            HTable table = new HTable(conf, TABLE_NAME);
+            Result result = table.get(new Get("0002".getBytes()));
+            byte[] parentBytes = result.getValue(FAMILY.getBytes(),
+                    OUTPUT_FIELD.getBytes());
+            assertNotNull(parentBytes);
+            assertTrue(parentBytes.length > 0);
+            Assert.assertEquals("0001", Bytes.toString(parentBytes));
+
+        }   finally {
+            cluster.shutdown();
+        }
+    }
+
+    /*
+    Test compute method that sends each edge a notification of its parents.
+    The test set only has a 1-1 parent-to-child ratio for this unit test.
+     */
+    public static class EdgeNotification
+            extends EdgeListVertex<Text, Text, Text, Text> {
+        @Override
+        public void compute(Iterable<Text> messages) throws IOException {
+          for (Text message : messages) {
+            getValue().set(message);
+          }
+          if(getSuperstep() == 0) {
+            sendMessageToAllEdges(getId());
+          }
+          voteToHalt();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
new file mode 100644
index 0000000..e4e08d6
--- /dev/null
+++ b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.io.hbase.edgemarker;
+
+import org.apache.giraph.graph.Edge;
+import org.apache.giraph.graph.Vertex;
+import org.apache.giraph.graph.VertexReader;
+import org.apache.giraph.io.hbase.HBaseVertexInputFormat;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+
+import java.io.IOException;
+import java.util.List;
+
+/*
+  Test subclass for HBaseVertexInputFormat. Reads a simple
+  children qualifier to create an edge.
+ */
+public class TableEdgeInputFormat extends
+        HBaseVertexInputFormat<Text, Text, Text, Text> {
+
+    private static final Logger log =
+            Logger.getLogger(TableEdgeInputFormat.class);
+    private static final Text uselessEdgeValue = new Text();
+
+    public VertexReader<Text, Text, Text, Text>
+            createVertexReader(InputSplit split,
+                               TaskAttemptContext context) throws IOException {
+
+        return new TableEdgeVertexReader(split, context);
+
+    }
+
+    /*
+     Uses the RecordReader to return Hbase rows
+     */
+    public static class TableEdgeVertexReader
+            extends HBaseVertexReader<Text, Text, Text, Text> {
+
+        private final byte[] CF = Bytes.toBytes("cf");
+        private final byte[] CHILDREN = Bytes.toBytes("children");
+
+        public TableEdgeVertexReader(InputSplit split, TaskAttemptContext context) throws IOException {
+            super(split, context);
+        }
+
+
+        public boolean nextVertex() throws IOException,
+                InterruptedException {
+            return getRecordReader().nextKeyValue();
+        }
+
+        /*
+         For each row, create a vertex with the row ID as a text,
+         and it's 'children' qualifier as a single edge.
+         */
+        public Vertex<Text, Text, Text, Text>
+                    getCurrentVertex()
+                throws IOException, InterruptedException {
+            Result row = getRecordReader().getCurrentValue();
+            Vertex<Text, Text, Text, Text> vertex =
+                getConfiguration().createVertex();
+            Text vertexId = new Text(Bytes.toString(row.getRow()));
+            List<Edge<Text, Text>> edges = Lists.newLinkedList();
+            String edge = Bytes.toString(row.getValue(CF, CHILDREN));
+            Text vertexValue = new Text();
+            Text edgeId = new Text(edge);
+            edges.add(new Edge<Text, Text>(edgeId, uselessEdgeValue));
+            vertex.initialize(vertexId, vertexValue, edges);
+
+            return vertex;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
new file mode 100644
index 0000000..169fd88
--- /dev/null
+++ b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.io.hbase.edgemarker;
+
+import org.apache.giraph.io.hbase.HBaseVertexOutputFormat;
+import org.apache.giraph.graph.Vertex;
+import org.apache.giraph.graph.VertexWriter;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+/*
+ Test subclass for HBaseVertexOutputFormat
+ */
+public class TableEdgeOutputFormat
+        extends HBaseVertexOutputFormat<Text, Text, Text> {
+
+
+    public VertexWriter<Text, Text, Text>
+    createVertexWriter(TaskAttemptContext context)
+            throws IOException, InterruptedException {
+        return new TableEdgeVertexWriter(context);
+    }
+
+    /*
+     For each vertex, write back to the configured table using
+     the vertex id as the row key bytes.
+     */
+    public static class TableEdgeVertexWriter
+            extends HBaseVertexWriter<Text, Text, Text> {
+
+        private final byte[] CF = Bytes.toBytes("cf");
+        private final byte[] PARENT =  Bytes.toBytes("parent");
+
+        public TableEdgeVertexWriter(TaskAttemptContext context)
+          throws IOException, InterruptedException  {
+            super(context);
+        }
+        /*
+         Record the vertex value as a the value for a new qualifier 'parent'.
+         */
+        public void writeVertex(
+                Vertex<Text, Text, Text, ?> vertex)
+                throws IOException, InterruptedException {
+              RecordWriter<ImmutableBytesWritable, Writable> writer = getRecordWriter();
+              byte[] rowBytes = vertex.getId().getBytes();
+              Put put = new Put(rowBytes);
+              Text value = vertex.getValue();
+              if(value.toString().length() > 0)   {
+                 put.add(CF, PARENT, value.getBytes());
+                 writer.write(new ImmutableBytesWritable(rowBytes), put);
+              }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hcatalog/pom.xml
----------------------------------------------------------------------
diff --git a/giraph-hcatalog/pom.xml b/giraph-hcatalog/pom.xml
new file mode 100644
index 0000000..48a5133
--- /dev/null
+++ b/giraph-hcatalog/pom.xml
@@ -0,0 +1,233 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.giraph</groupId>
+    <artifactId>giraph-parent</artifactId>
+    <version>0.2-SNAPSHOT</version>
+  </parent>
+  <artifactId>giraph-hcatalog</artifactId>
+  <packaging>jar</packaging>
+
+  <name>Apache Giraph HCatalog I/O</name>
+
+  <properties>
+    <top.dir>${project.basedir}/..</top.dir>
+  </properties>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.9</version>
+        <configuration>
+          <configLocation>${top.dir}/checkstyle.xml</configLocation>
+          <headerLocation>${top.dir}/license-header.txt</headerLocation>
+          <enableRulesSummary>false</enableRulesSummary>
+          <failOnError>true</failOnError>
+          <includeTestSourceDirectory>false</includeTestSourceDirectory>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>verify</phase>
+            <goals>
+               <goal>check</goal>
+             </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>2.6</version>
+        <configuration>
+          <systemProperties>
+            <property>
+              <name>prop.jarLocation</name>
+              <value>${top.dir}/giraph-core/target/giraph-${project.version}-${forHadoop}-jar-with-dependencies.jar</value>
+            </property>
+          </systemProperties>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>findbugs-maven-plugin</artifactId>
+        <version>2.5.1</version>
+        <configuration>
+          <xmlOutput>true</xmlOutput>
+          <findbugsXmlOutput>false</findbugsXmlOutput>
+          <excludeFilterFile>${top.dir}/findbugs-exclude.xml</excludeFilterFile>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>verify</phase>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <repositories>
+    <!-- This is the main maven repository. Normally we wouldn't need to put
+       it here when it's the only one being used, but since we need to add
+       special repositories to get hcatalog we need to mention this one
+       specifically otherwise it won't be included. -->
+    <repository>
+      <id>central</id>
+      <name>Maven Repository</name>
+      <url>http://repo1.maven.org/maven2</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+    </repository>
+    <!-- This is necessary for hcatalog. -->
+    <repository>
+      <id>apache</id>
+      <name>Apache Repository</name>
+      <url>https://repository.apache.org/content/repositories/snapshots</url>
+      <snapshots>
+        <enabled>true</enabled>
+      </snapshots>
+    </repository>
+    <!-- This is necessary for hive-metastore dependencies for hcatalog. -->
+    <repository>
+      <id>datanucleus</id>
+      <name>datanucleus maven repository</name>
+      <url>http://www.datanucleus.org/downloads/maven2</url>
+      <layout>default</layout>
+      <releases>
+        <enabled>true</enabled>
+        <checksumPolicy>warn</checksumPolicy>
+      </releases>
+    </repository>
+  </repositories>
+
+  <profiles>
+    <profile>
+      <id>hadoop_0.20.203</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+
+    <profile>
+      <id>hadoop_1.0</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+
+    <profile>
+      <id>hadoop_non_secure</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+
+    <profile>
+      <id>hadoop_facebook</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>system</scope>
+          <systemPath>${lib.dir}/facebook-hadoop-0.20-test.jar</systemPath>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
+  <dependencies>
+    <!-- compile dependencies. sorted lexicographically. -->
+    <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.giraph</groupId>
+      <artifactId>giraph-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hcatalog</groupId>
+      <artifactId>hcatalog-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-common</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-exec</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-metastore</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.giraph</groupId>
+      <artifactId>giraph</artifactId>
+      <version>0.2-SNAPSHOT</version>
+      <type>test-jar</type>
+    </dependency>
+
+    <!-- test dependencies. sorted lexicographically. -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hcatalog/src/main/assembly/compile.xml
----------------------------------------------------------------------
diff --git a/giraph-hcatalog/src/main/assembly/compile.xml b/giraph-hcatalog/src/main/assembly/compile.xml
new file mode 100644
index 0000000..0f7678c
--- /dev/null
+++ b/giraph-hcatalog/src/main/assembly/compile.xml
@@ -0,0 +1,39 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
+  <id>jar-with-dependencies</id>
+   <formats>
+    <format>jar</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+
+  <dependencySets>
+    <dependencySet>
+      <useProjectArtifact>true</useProjectArtifact>
+      <outputDirectory>/</outputDirectory>
+      <unpackOptions>
+          <excludes>
+              <exclude>META-INF/LICENSE</exclude>
+          </excludes>
+      </unpackOptions>
+      <unpack>false</unpack>
+      <scope>runtime</scope>
+    </dependencySet>
+  </dependencySets>
+</assembly>
\ No newline at end of file