You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ni...@apache.org on 2013/01/02 20:04:01 UTC
[2/4] GIRAPH-458: split formats module into accumulo, hbase,
hcatalog (nitay)
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java b/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java
deleted file mode 100644
index b670144..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.accumulo.edgemarker;
-
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.giraph.graph.Edge;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexReader;
-import org.apache.giraph.io.accumulo.AccumuloVertexInputFormat;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import com.google.common.collect.Lists;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.regex.Pattern;
-
-/*
- Example subclass which reads in Key/Value pairs to construct vertex objects.
- */
-public class AccumuloEdgeInputFormat
- extends AccumuloVertexInputFormat<Text, Text, Text, Text> {
-
- private static final Text uselessEdgeValue = new Text();
- private Configuration conf;
- public VertexReader<Text, Text, Text, Text>
- createVertexReader(InputSplit split, TaskAttemptContext context)
- throws IOException {
- try {
-
- return new AccumuloEdgeVertexReader(
- accumuloInputFormat.createRecordReader(split, context)) {
- };
- } catch (InterruptedException e) {
- throw new IOException(e);
- }
-
- }
- /*
- Reader takes Key/Value pairs from the underlying input format.
- */
- public static class AccumuloEdgeVertexReader
- extends AccumuloVertexReader<Text, Text, Text, Text> {
-
- public static final Pattern commaPattern = Pattern.compile("[,]");
-
- public AccumuloEdgeVertexReader(RecordReader<Key, Value> recordReader) {
- super(recordReader);
- }
-
-
- public boolean nextVertex() throws IOException, InterruptedException {
- return getRecordReader().nextKeyValue();
- }
-
- /*
- Each Key/Value contains the information needed to construct the vertices.
- */
- public Vertex<Text, Text, Text, Text> getCurrentVertex()
- throws IOException, InterruptedException {
- Key key = getRecordReader().getCurrentKey();
- Value value = getRecordReader().getCurrentValue();
- Vertex<Text, Text, Text, Text> vertex =
- getConfiguration().createVertex();
- Text vertexId = key.getRow();
- List<Edge<Text, Text>> edges = Lists.newLinkedList();
- String edge = new String(value.get());
- Text edgeId = new Text(edge);
- edges.add(new Edge<Text, Text>(edgeId, uselessEdgeValue));
- vertex.initialize(vertexId, new Text(), edges);
-
- return vertex;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java b/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java
deleted file mode 100644
index ff00fd6..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.accumulo.edgemarker;
-
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.giraph.io.accumulo.AccumuloVertexOutputFormat;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexWriter;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.io.IOException;
-
-/*
- Example subclass for writing vertices back to Accumulo.
- */
-public class AccumuloEdgeOutputFormat
- extends AccumuloVertexOutputFormat<Text, Text, Text> {
-
- public VertexWriter<Text, Text, Text>
- createVertexWriter(TaskAttemptContext context)
- throws IOException, InterruptedException {
- RecordWriter<Text, Mutation> writer =
- accumuloOutputFormat.getRecordWriter(context);
- String tableName = context.getConfiguration().get(OUTPUT_TABLE);
- if(tableName == null)
- throw new IOException("Forgot to set table name " +
- "using AccumuloVertexOutputFormat.OUTPUT_TABLE");
- return new AccumuloEdgeVertexWriter(writer, tableName);
- }
-
- /*
- Wraps RecordWriter for writing Mutations back to the configured Accumulo Table.
- */
- public static class AccumuloEdgeVertexWriter
- extends AccumuloVertexWriter<Text, Text, Text> {
-
- private final Text CF = new Text("cf");
- private final Text PARENT = new Text("parent");
- private Text tableName;
-
- public AccumuloEdgeVertexWriter(
- RecordWriter<Text, Mutation> writer, String tableName) {
- super(writer);
- this.tableName = new Text(tableName);
- }
- /*
- Write back a mutation that adds a qualifier for 'parent' containing the vertex value
- as the cell value. Assume the vertex ID corresponds to a key.
- */
- public void writeVertex(Vertex<Text, Text, Text, ?> vertex)
- throws IOException, InterruptedException {
- RecordWriter<Text, Mutation> writer = getRecordWriter();
- Mutation mt = new Mutation(vertex.getId());
- mt.put(CF, PARENT, new Value(
- vertex.getValue().toString().getBytes()));
- writer.write(tableName, mt);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java b/giraph-formats/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
deleted file mode 100644
index c09913d..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hbase;
-
-
-import org.apache.giraph.BspCase;
-import org.apache.giraph.conf.GiraphConfiguration;
-import org.apache.giraph.graph.EdgeListVertex;
-import org.apache.giraph.graph.GiraphJob;
-import org.apache.giraph.io.hbase.edgemarker.TableEdgeInputFormat;
-import org.apache.giraph.io.hbase.edgemarker.TableEdgeOutputFormat;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
-import org.apache.hadoop.hbase.client.HTable;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv;
-import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
-import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.GenericOptionsParser;
-import org.apache.log4j.Logger;
-import org.junit.Test;
-
-import java.io.File;
-import java.io.IOException;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-/*
-Test case for HBase reading/writing vertices from an HBase instance.
-*/
-public class TestHBaseRootMarkerVertextFormat extends BspCase {
-
- /**
- * Create the test case
- *
- * @param testName name of the test case
- */
- private HBaseTestingUtility testUtil = new HBaseTestingUtility();
- private final Logger log = Logger.getLogger(TestHBaseRootMarkerVertextFormat.class);
-
- private final String TABLE_NAME = "simple_graph";
- private final String FAMILY = "cf";
- private final String QUALIFER = "children";
- private final String OUTPUT_FIELD = "parent";
-
- public TestHBaseRootMarkerVertextFormat() {
- super(TestHBaseRootMarkerVertextFormat.class.getName());
- }
-
- @Test
- public void testHBaseInputOutput() throws Exception{
-
- if (System.getProperty("prop.mapred.job.tracker") != null) {
- if(log.isInfoEnabled())
- log.info("testHBaseInputOutput: Ignore this test if not local mode.");
- return;
- }
-
- File jarTest = new File(System.getProperty("prop.jarLocation"));
- if(!jarTest.exists()) {
- fail("Could not find Giraph jar at " +
- "location specified by 'prop.jarLocation'. " +
- "Make sure you built the main Giraph artifact?.");
- }
-
- String INPUT_FILE = "graph.csv";
- //First let's load some data using ImportTsv into our mock table.
- String[] args = new String[] {
- "-Dimporttsv.columns=HBASE_ROW_KEY,cf:"+QUALIFER,
- "-Dimporttsv.separator=" + "\u002c",
- TABLE_NAME,
- INPUT_FILE
- };
-
-
- MiniHBaseCluster cluster = testUtil.startMiniCluster();
-
- GenericOptionsParser opts =
- new GenericOptionsParser(cluster.getConfiguration(), args);
- Configuration conf = opts.getConfiguration();
- args = opts.getRemainingArgs();
-
- try {
-
- FileSystem fs = FileSystem.get(conf);
- FSDataOutputStream op = fs.create(new Path(INPUT_FILE), true);
- String line1 = "0001,0002\n";
- String line2 = "0002,0004\n";
- String line3 = "0003,0005\n";
- String line4 = "0004,-1\n";
- String line5 = "0005,-1\n";
- op.write(line1.getBytes());
- op.write(line2.getBytes());
- op.write(line3.getBytes());
- op.write(line4.getBytes());
- op.write(line5.getBytes());
- op.close();
-
- final byte[] FAM = Bytes.toBytes(FAMILY);
- final byte[] TAB = Bytes.toBytes(TABLE_NAME);
-
- HTableDescriptor desc = new HTableDescriptor(TAB);
- desc.addFamily(new HColumnDescriptor(FAM));
- new HBaseAdmin(conf).createTable(desc);
-
- Job job = ImportTsv.createSubmittableJob(conf, args);
- job.waitForCompletion(false);
- assertTrue(job.isSuccessful());
- if(log.isInfoEnabled())
- log.info("ImportTsv successful. Running HBase Giraph job.");
-
- //now operate over HBase using Vertex I/O formats
- conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
- conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME);
-
- GiraphJob giraphJob = new GiraphJob(conf, getCallingMethodName());
- GiraphConfiguration giraphConf = giraphJob.getConfiguration();
- giraphConf.setZooKeeperConfiguration(
- cluster.getMaster().getZooKeeper().getQuorum());
- setupConfiguration(giraphJob);
- giraphConf.setVertexClass(EdgeNotification.class);
- giraphConf.setVertexInputFormatClass(TableEdgeInputFormat.class);
- giraphConf.setVertexOutputFormatClass(TableEdgeOutputFormat.class);
-
- assertTrue(giraphJob.run(true));
- if(log.isInfoEnabled())
- log.info("Giraph job successful. Checking output qualifier.");
-
- //Do a get on row 0002, it should have a parent of 0001
- //if the outputFormat worked.
- HTable table = new HTable(conf, TABLE_NAME);
- Result result = table.get(new Get("0002".getBytes()));
- byte[] parentBytes = result.getValue(FAMILY.getBytes(),
- OUTPUT_FIELD.getBytes());
- assertNotNull(parentBytes);
- assertTrue(parentBytes.length > 0);
- assertEquals("0001", Bytes.toString(parentBytes));
-
- } finally {
- cluster.shutdown();
- }
- }
-
- /*
- Test compute method that sends each edge a notification of its parents.
- The test set only has a 1-1 parent-to-child ratio for this unit test.
- */
- public static class EdgeNotification
- extends EdgeListVertex<Text, Text, Text, Text> {
- @Override
- public void compute(Iterable<Text> messages) throws IOException {
- for (Text message : messages) {
- getValue().set(message);
- }
- if(getSuperstep() == 0) {
- sendMessageToAllEdges(getId());
- }
- voteToHalt();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java b/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
deleted file mode 100644
index e4e08d6..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.hbase.edgemarker;
-
-import org.apache.giraph.graph.Edge;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexReader;
-import org.apache.giraph.io.hbase.HBaseVertexInputFormat;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.log4j.Logger;
-
-import com.google.common.collect.Lists;
-
-import java.io.IOException;
-import java.util.List;
-
-/*
- Test subclass for HBaseVertexInputFormat. Reads a simple
- children qualifier to create an edge.
- */
-public class TableEdgeInputFormat extends
- HBaseVertexInputFormat<Text, Text, Text, Text> {
-
- private static final Logger log =
- Logger.getLogger(TableEdgeInputFormat.class);
- private static final Text uselessEdgeValue = new Text();
-
- public VertexReader<Text, Text, Text, Text>
- createVertexReader(InputSplit split,
- TaskAttemptContext context) throws IOException {
-
- return new TableEdgeVertexReader(split, context);
-
- }
-
- /*
- Uses the RecordReader to return Hbase rows
- */
- public static class TableEdgeVertexReader
- extends HBaseVertexReader<Text, Text, Text, Text> {
-
- private final byte[] CF = Bytes.toBytes("cf");
- private final byte[] CHILDREN = Bytes.toBytes("children");
-
- public TableEdgeVertexReader(InputSplit split, TaskAttemptContext context) throws IOException {
- super(split, context);
- }
-
-
- public boolean nextVertex() throws IOException,
- InterruptedException {
- return getRecordReader().nextKeyValue();
- }
-
- /*
- For each row, create a vertex with the row ID as a text,
- and it's 'children' qualifier as a single edge.
- */
- public Vertex<Text, Text, Text, Text>
- getCurrentVertex()
- throws IOException, InterruptedException {
- Result row = getRecordReader().getCurrentValue();
- Vertex<Text, Text, Text, Text> vertex =
- getConfiguration().createVertex();
- Text vertexId = new Text(Bytes.toString(row.getRow()));
- List<Edge<Text, Text>> edges = Lists.newLinkedList();
- String edge = Bytes.toString(row.getValue(CF, CHILDREN));
- Text vertexValue = new Text();
- Text edgeId = new Text(edge);
- edges.add(new Edge<Text, Text>(edgeId, uselessEdgeValue));
- vertex.initialize(vertexId, vertexValue, edges);
-
- return vertex;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java b/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
deleted file mode 100644
index 169fd88..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.hbase.edgemarker;
-
-import org.apache.giraph.io.hbase.HBaseVertexOutputFormat;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexWriter;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.io.IOException;
-/*
- Test subclass for HBaseVertexOutputFormat
- */
-public class TableEdgeOutputFormat
- extends HBaseVertexOutputFormat<Text, Text, Text> {
-
-
- public VertexWriter<Text, Text, Text>
- createVertexWriter(TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new TableEdgeVertexWriter(context);
- }
-
- /*
- For each vertex, write back to the configured table using
- the vertex id as the row key bytes.
- */
- public static class TableEdgeVertexWriter
- extends HBaseVertexWriter<Text, Text, Text> {
-
- private final byte[] CF = Bytes.toBytes("cf");
- private final byte[] PARENT = Bytes.toBytes("parent");
-
- public TableEdgeVertexWriter(TaskAttemptContext context)
- throws IOException, InterruptedException {
- super(context);
- }
- /*
- Record the vertex value as a the value for a new qualifier 'parent'.
- */
- public void writeVertex(
- Vertex<Text, Text, Text, ?> vertex)
- throws IOException, InterruptedException {
- RecordWriter<ImmutableBytesWritable, Writable> writer = getRecordWriter();
- byte[] rowBytes = vertex.getId().getBytes();
- Put put = new Put(rowBytes);
- Text value = vertex.getValue();
- if(value.toString().length() > 0) {
- put.add(CF, PARENT, value.getBytes());
- writer.write(new ImmutableBytesWritable(rowBytes), put);
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-formats/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java
----------------------------------------------------------------------
diff --git a/giraph-formats/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java b/giraph-formats/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java
deleted file mode 100644
index 421cc28..0000000
--- a/giraph-formats/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hcatalog;
-
-import junit.framework.TestCase;
-
-import java.util.Map;
-import org.junit.Test;
-
-public class TestHiveUtils extends TestCase {
- @Test
- public void testParsePartition() {
- String partitionStr = "feature1=2012-10-09, feature2=a1+b2, feature3=ff-gg";
- Map<String, String> partition = HiveUtils.parsePartitionValues(partitionStr);
- assertEquals(3, partition.size());
- assertEquals("2012-10-09", partition.get("feature1"));
- assertEquals("a1+b2", partition.get("feature2"));
- assertEquals("ff-gg", partition.get("feature3"));
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/pom.xml
----------------------------------------------------------------------
diff --git a/giraph-hbase/pom.xml b/giraph-hbase/pom.xml
new file mode 100644
index 0000000..49e41b3
--- /dev/null
+++ b/giraph-hbase/pom.xml
@@ -0,0 +1,225 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.giraph</groupId>
+ <artifactId>giraph-parent</artifactId>
+ <version>0.2-SNAPSHOT</version>
+ </parent>
+ <artifactId>giraph-hbase</artifactId>
+ <packaging>jar</packaging>
+
+ <name>Apache Giraph HBase I/O</name>
+
+ <properties>
+ <top.dir>${project.basedir}/..</top.dir>
+ </properties>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ <version>2.9</version>
+ <configuration>
+ <configLocation>${top.dir}/checkstyle.xml</configLocation>
+ <headerLocation>${top.dir}/license-header.txt</headerLocation>
+ <enableRulesSummary>false</enableRulesSummary>
+ <failOnError>true</failOnError>
+ <includeTestSourceDirectory>false</includeTestSourceDirectory>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>verify</phase>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.6</version>
+ <configuration>
+ <systemProperties>
+ <property>
+ <name>prop.jarLocation</name>
+ <value>${top.dir}/giraph-core/target/giraph-${project.version}-${forHadoop}-jar-with-dependencies.jar</value>
+ </property>
+ </systemProperties>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>findbugs-maven-plugin</artifactId>
+ <version>2.5.1</version>
+ <configuration>
+ <xmlOutput>true</xmlOutput>
+ <findbugsXmlOutput>false</findbugsXmlOutput>
+ <excludeFilterFile>${top.dir}/findbugs-exclude.xml</excludeFilterFile>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>verify</phase>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <repositories>
+ <!-- This is the main maven repository. Normally we wouldn't need to put
+ it here when it's the only one being used, but since we need to add
+ special repositories to get hcatalog we need to mention this one
+ specifically otherwise it won't be included. -->
+ <repository>
+ <id>central</id>
+ <name>Maven Repository</name>
+ <url>http://repo1.maven.org/maven2</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ </repository>
+ <!-- This is necessary for hcatalog. -->
+ <repository>
+ <id>apache</id>
+ <name>Apache Repository</name>
+ <url>https://repository.apache.org/content/repositories/snapshots</url>
+ <snapshots>
+ <enabled>true</enabled>
+ </snapshots>
+ </repository>
+ <!-- This is necessary for hive-metastore dependencies for hcatalog. -->
+ <repository>
+ <id>datanucleus</id>
+ <name>datanucleus maven repository</name>
+ <url>http://www.datanucleus.org/downloads/maven2</url>
+ <layout>default</layout>
+ <releases>
+ <enabled>true</enabled>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ </repository>
+ </repositories>
+
+ <profiles>
+ <profile>
+ <id>hadoop_0.20.203</id>
+ <activation>
+ <activeByDefault>true</activeByDefault>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+
+ <profile>
+ <id>hadoop_1.0</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+
+ <profile>
+ <id>hadoop_non_secure</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+
+ <profile>
+ <id>hadoop_facebook</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>system</scope>
+ <systemPath>${lib.dir}/facebook-hadoop-0.20-test.jar</systemPath>
+ </dependency>
+ </dependencies>
+ </profile>
+ </profiles>
+
+ <dependencies>
+ <!-- compile dependencies. sorted lexicographically. -->
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.giraph</groupId>
+ <artifactId>giraph-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.zookeeper</groupId>
+ <artifactId>zookeeper</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.giraph</groupId>
+ <artifactId>giraph</artifactId>
+ <version>0.2-SNAPSHOT</version>
+ <type>test-jar</type>
+ </dependency>
+
+ <!-- test dependencies. sorted lexicographically. -->
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/main/assembly/compile.xml
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/main/assembly/compile.xml b/giraph-hbase/src/main/assembly/compile.xml
new file mode 100644
index 0000000..0f7678c
--- /dev/null
+++ b/giraph-hbase/src/main/assembly/compile.xml
@@ -0,0 +1,39 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
+ <id>jar-with-dependencies</id>
+ <formats>
+ <format>jar</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+
+ <dependencySets>
+ <dependencySet>
+ <useProjectArtifact>true</useProjectArtifact>
+ <outputDirectory>/</outputDirectory>
+ <unpackOptions>
+ <excludes>
+ <exclude>META-INF/LICENSE</exclude>
+ </excludes>
+ </unpackOptions>
+ <unpack>false</unpack>
+ <scope>runtime</scope>
+ </dependencySet>
+ </dependencySets>
+</assembly>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexInputFormat.java b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexInputFormat.java
new file mode 100644
index 0000000..cf87035
--- /dev/null
+++ b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexInputFormat.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.io.hbase;
+
+import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
+import org.apache.giraph.graph.VertexInputFormat;
+import org.apache.giraph.graph.VertexReader;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ *
+ * Base class that wraps an HBase TableInputFormat and underlying Scan object
+ * to help instantiate vertices from an HBase table. All
+ * the static TableInputFormat properties necessary to configure
+ * an HBase job are available.
+ *
+ * For example, setting conf.set(TableInputFormat.INPUT_TABLE, "in_table");
+ * from the job setup routine will properly delegate to the
+ * TableInputFormat instance. The Configurable interface prevents specific
+ * wrapper methods from having to be called.
+ *
+ * Works with {@link HBaseVertexOutputFormat}
+ *
+ * @param <I> Vertex index value
+ * @param <V> Vertex value
+ * @param <E> Edge value
+ * @param <M> Message data
+ */
+@SuppressWarnings("rawtypes")
+public abstract class HBaseVertexInputFormat<
+ I extends WritableComparable,
+ V extends Writable,
+ E extends Writable,
+ M extends Writable>
+ extends VertexInputFormat<I, V, E, M> {
+
+
+ /**
+ * delegate HBase table input format
+ */
+ protected static final TableInputFormat BASE_FORMAT =
+ new TableInputFormat();
+ /**
+ * logger
+ */
+ private static final Logger LOG =
+ Logger.getLogger(HBaseVertexInputFormat.class);
+
+ /**
+ * Takes an instance of RecordReader that supports
+ * HBase row-key, result records. Subclasses can focus on
+ * vertex instantiation details without worrying about connection
+ * semantics. Subclasses are expected to implement nextVertex() and
+ * getCurrentVertex()
+ *
+ *
+ *
+ * @param <I> Vertex index value
+ * @param <V> Vertex value
+ * @param <E> Edge value
+ * @param <M> Message data
+ */
+ public abstract static class HBaseVertexReader<
+ I extends WritableComparable,
+ V extends Writable,
+ E extends Writable, M extends Writable>
+ implements VertexReader<I, V, E, M> {
+ /** Giraph configuration */
+ private ImmutableClassesGiraphConfiguration<I, V, E, M> configuration;
+ /** Reader instance */
+ private final RecordReader<ImmutableBytesWritable, Result> reader;
+ /** Context passed to initialize */
+ private TaskAttemptContext context;
+
+ /**
+ * Sets the base TableInputFormat and creates a record reader.
+ *
+ * @param split InputSplit
+ * @param context Context
+ * @throws IOException
+ */
+ public HBaseVertexReader(InputSplit split, TaskAttemptContext context)
+ throws IOException {
+ BASE_FORMAT.setConf(context.getConfiguration());
+ this.reader = BASE_FORMAT.createRecordReader(split, context);
+ }
+
+ public ImmutableClassesGiraphConfiguration<I, V, E, M> getConfiguration() {
+ return configuration;
+ }
+
+ /**
+ * initialize
+ *
+ * @param inputSplit Input split to be used for reading vertices.
+ * @param context Context from the task.
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ public void initialize(InputSplit inputSplit,
+ TaskAttemptContext context)
+ throws IOException,
+ InterruptedException {
+ reader.initialize(inputSplit, context);
+ this.context = context;
+ this.configuration = new ImmutableClassesGiraphConfiguration<I, V, E, M>(
+ context.getConfiguration());
+ }
+
+ /**
+ * close
+ * @throws IOException
+ */
+ public void close() throws IOException {
+ reader.close();
+ }
+
+ /**
+ * getProgress
+ *
+ * @return progress
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ public float getProgress() throws
+ IOException, InterruptedException {
+ return reader.getProgress();
+ }
+
+ /**
+ * getRecordReader
+ *
+ * @return Record reader to be used for reading.
+ */
+ protected RecordReader<ImmutableBytesWritable,
+ Result> getRecordReader() {
+ return reader;
+ }
+
+ /**
+ * getContext
+ *
+ * @return Context passed to initialize.
+ */
+ protected TaskAttemptContext getContext() {
+ return context;
+ }
+
+ }
+
+ /**
+ * getSplits
+ *
+ * @param context Context of the job
+ * @param numWorkers Number of workers used for this job
+ * @return HBase region splits
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ public List<InputSplit> getSplits(
+ JobContext context, int numWorkers)
+ throws IOException, InterruptedException {
+ BASE_FORMAT.setConf(context.getConfiguration());
+ return BASE_FORMAT.getSplits(context);
+ }
+}
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexOutputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexOutputFormat.java b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexOutputFormat.java
new file mode 100644
index 0000000..2a27b63
--- /dev/null
+++ b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexOutputFormat.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.io.hbase;
+
+import org.apache.giraph.graph.VertexOutputFormat;
+import org.apache.giraph.graph.VertexWriter;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+
+/**
+ *
+ * Base class for writing Vertex mutations back to specific
+ * rows in an HBase table. This class wraps an instance of TableOutputFormat
+ * for easy configuration with the existing properties.
+ *
+ * Setting conf.set(TableOutputFormat.OUTPUT_TABLE, "out_table");
+ * will properly delegate to the TableOutputFormat instance contained
+ * in this class. The Configurable interface prevents specific
+ * wrapper methods from having to be called.
+ *
+ * Works with {@link HBaseVertexInputFormat}
+ *
+ * @param <I> Vertex index value
+ * @param <V> Vertex value
+ * @param <E> Edge value
+ */
+@SuppressWarnings("rawtypes")
+public abstract class HBaseVertexOutputFormat<
+ I extends WritableComparable,
+ V extends Writable,
+ E extends Writable>
+ extends VertexOutputFormat
+ <I, V, E> {
+
+ /**
+ * delegate output format that writes to HBase
+ */
+ protected static final TableOutputFormat<ImmutableBytesWritable>
+ BASE_FORMAT = new TableOutputFormat<ImmutableBytesWritable>();
+
+ /**
+ * Constructor
+ *
+ * Simple class which takes an instance of RecordWriter
+ * over Writable objects. Subclasses are
+ * expected to implement writeVertex()
+ *
+ * @param <I> Vertex index value
+ * @param <V> Vertex value
+ * @param <E> Edge value
+ */
+ public abstract static class HBaseVertexWriter<
+ I extends WritableComparable,
+ V extends Writable,
+ E extends Writable>
+ implements VertexWriter<I, V, E> {
+
+ /**
+ * context
+ */
+ private TaskAttemptContext context;
+
+ /**
+ * record writer instance
+ */
+ private RecordWriter<ImmutableBytesWritable,
+ Writable> recordWriter;
+
+ /**
+ * Sets up base table output format and creates a record writer.
+ * @param context task attempt context
+ */
+ public HBaseVertexWriter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ BASE_FORMAT.setConf(context.getConfiguration());
+ this.recordWriter = BASE_FORMAT.getRecordWriter(context);
+ }
+
+ /**
+ * initialize
+ *
+ * @param context Context used to write the vertices.
+ * @throws IOException
+ */
+ public void initialize(TaskAttemptContext context)
+ throws IOException {
+ this.context = context;
+ }
+
+ /**
+ * close
+ *
+ * @param context the context of the task
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ public void close(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ recordWriter.close(context);
+ }
+
+ /**
+ * Get the table record writer;
+ *
+ * @return Record writer to be used for writing.
+ */
+ public RecordWriter<ImmutableBytesWritable,
+ Writable> getRecordWriter() {
+ return recordWriter;
+ }
+
+ /**
+ * getContext
+ *
+ * @return Context passed to initialize.
+ */
+ public TaskAttemptContext getContext() {
+ return context;
+ }
+
+ }
+
+ /**
+ * checkOutputSpecs
+ *
+ * @param context information about the job
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ public void checkOutputSpecs(JobContext context)
+ throws IOException, InterruptedException {
+ BASE_FORMAT.checkOutputSpecs(context);
+ }
+
+ /**
+ * getOutputCommitter
+ *
+ * @param context the task context
+ * @return OutputCommitter ouputCommitter
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ public OutputCommitter getOutputCommitter(
+ TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ BASE_FORMAT.setConf(context.getConfiguration());
+ return BASE_FORMAT.getOutputCommitter(context);
+ }
+}
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/package-info.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/package-info.java b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/package-info.java
new file mode 100644
index 0000000..9179cee
--- /dev/null
+++ b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * HBase Input/Output for Giraph.
+ */
+package org.apache.giraph.io.hbase;
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
new file mode 100644
index 0000000..ea4bed1
--- /dev/null
+++ b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.io.hbase;
+
+
+import org.apache.giraph.BspCase;
+import org.apache.giraph.conf.GiraphConfiguration;
+import org.apache.giraph.graph.EdgeListVertex;
+import org.apache.giraph.graph.GiraphJob;
+import org.apache.giraph.io.hbase.edgemarker.TableEdgeInputFormat;
+import org.apache.giraph.io.hbase.edgemarker.TableEdgeOutputFormat;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.mapreduce.ImportTsv;
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
+import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+/*
+Test case for HBase reading/writing vertices from an HBase instance.
+*/
+public class TestHBaseRootMarkerVertextFormat extends BspCase {
+
+ /**
+ * Create the test case
+ *
+ * @param testName name of the test case
+ */
+ private HBaseTestingUtility testUtil = new HBaseTestingUtility();
+ private final Logger log = Logger.getLogger(TestHBaseRootMarkerVertextFormat.class);
+
+ private final String TABLE_NAME = "simple_graph";
+ private final String FAMILY = "cf";
+ private final String QUALIFER = "children";
+ private final String OUTPUT_FIELD = "parent";
+
+ public TestHBaseRootMarkerVertextFormat() {
+ super(TestHBaseRootMarkerVertextFormat.class.getName());
+ }
+
+ @Test
+ public void testHBaseInputOutput() throws Exception{
+
+ if (System.getProperty("prop.mapred.job.tracker") != null) {
+ if(log.isInfoEnabled())
+ log.info("testHBaseInputOutput: Ignore this test if not local mode.");
+ return;
+ }
+
+ File jarTest = new File(System.getProperty("prop.jarLocation"));
+ if(!jarTest.exists()) {
+ fail("Could not find Giraph jar at " +
+ "location specified by 'prop.jarLocation'. " +
+ "Make sure you built the main Giraph artifact?.");
+ }
+
+ String INPUT_FILE = "graph.csv";
+ //First let's load some data using ImportTsv into our mock table.
+ String[] args = new String[] {
+ "-Dimporttsv.columns=HBASE_ROW_KEY,cf:"+QUALIFER,
+ "-Dimporttsv.separator=" + "\u002c",
+ TABLE_NAME,
+ INPUT_FILE
+ };
+
+
+ MiniHBaseCluster cluster = testUtil.startMiniCluster();
+
+ GenericOptionsParser opts =
+ new GenericOptionsParser(cluster.getConfiguration(), args);
+ Configuration conf = opts.getConfiguration();
+ args = opts.getRemainingArgs();
+
+ try {
+
+ FileSystem fs = FileSystem.get(conf);
+ FSDataOutputStream op = fs.create(new Path(INPUT_FILE), true);
+ String line1 = "0001,0002\n";
+ String line2 = "0002,0004\n";
+ String line3 = "0003,0005\n";
+ String line4 = "0004,-1\n";
+ String line5 = "0005,-1\n";
+ op.write(line1.getBytes());
+ op.write(line2.getBytes());
+ op.write(line3.getBytes());
+ op.write(line4.getBytes());
+ op.write(line5.getBytes());
+ op.close();
+
+ final byte[] FAM = Bytes.toBytes(FAMILY);
+ final byte[] TAB = Bytes.toBytes(TABLE_NAME);
+
+ HTableDescriptor desc = new HTableDescriptor(TAB);
+ desc.addFamily(new HColumnDescriptor(FAM));
+ new HBaseAdmin(conf).createTable(desc);
+
+ Job job = ImportTsv.createSubmittableJob(conf, args);
+ job.waitForCompletion(false);
+ assertTrue(job.isSuccessful());
+ if(log.isInfoEnabled())
+ log.info("ImportTsv successful. Running HBase Giraph job.");
+
+ //now operate over HBase using Vertex I/O formats
+ conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
+ conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME);
+
+ GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName());
+ GiraphConfiguration giraphConf = giraphJob.getConfiguration();
+ giraphConf.setZooKeeperConfiguration(
+ cluster.getMaster().getZooKeeper().getQuorum());
+ setupConfiguration(giraphJob);
+ giraphConf.setVertexClass(EdgeNotification.class);
+ giraphConf.setVertexInputFormatClass(TableEdgeInputFormat.class);
+ giraphConf.setVertexOutputFormatClass(TableEdgeOutputFormat.class);
+
+ assertTrue(giraphJob.run(true));
+ if(log.isInfoEnabled())
+ log.info("Giraph job successful. Checking output qualifier.");
+
+ //Do a get on row 0002, it should have a parent of 0001
+ //if the outputFormat worked.
+ HTable table = new HTable(conf, TABLE_NAME);
+ Result result = table.get(new Get("0002".getBytes()));
+ byte[] parentBytes = result.getValue(FAMILY.getBytes(),
+ OUTPUT_FIELD.getBytes());
+ assertNotNull(parentBytes);
+ assertTrue(parentBytes.length > 0);
+ Assert.assertEquals("0001", Bytes.toString(parentBytes));
+
+ } finally {
+ cluster.shutdown();
+ }
+ }
+
+ /*
+ Test compute method that sends each edge a notification of its parents.
+ The test set only has a 1-1 parent-to-child ratio for this unit test.
+ */
+ public static class EdgeNotification
+ extends EdgeListVertex<Text, Text, Text, Text> {
+ @Override
+ public void compute(Iterable<Text> messages) throws IOException {
+ for (Text message : messages) {
+ getValue().set(message);
+ }
+ if(getSuperstep() == 0) {
+ sendMessageToAllEdges(getId());
+ }
+ voteToHalt();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
new file mode 100644
index 0000000..e4e08d6
--- /dev/null
+++ b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.io.hbase.edgemarker;
+
+import org.apache.giraph.graph.Edge;
+import org.apache.giraph.graph.Vertex;
+import org.apache.giraph.graph.VertexReader;
+import org.apache.giraph.io.hbase.HBaseVertexInputFormat;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Lists;
+
+import java.io.IOException;
+import java.util.List;
+
+/*
+ Test subclass for HBaseVertexInputFormat. Reads a simple
+ children qualifier to create an edge.
+ */
+public class TableEdgeInputFormat extends
+ HBaseVertexInputFormat<Text, Text, Text, Text> {
+
+ private static final Logger log =
+ Logger.getLogger(TableEdgeInputFormat.class);
+ private static final Text uselessEdgeValue = new Text();
+
+ public VertexReader<Text, Text, Text, Text>
+ createVertexReader(InputSplit split,
+ TaskAttemptContext context) throws IOException {
+
+ return new TableEdgeVertexReader(split, context);
+
+ }
+
+ /*
+ Uses the RecordReader to return Hbase rows
+ */
+ public static class TableEdgeVertexReader
+ extends HBaseVertexReader<Text, Text, Text, Text> {
+
+ private final byte[] CF = Bytes.toBytes("cf");
+ private final byte[] CHILDREN = Bytes.toBytes("children");
+
+ public TableEdgeVertexReader(InputSplit split, TaskAttemptContext context) throws IOException {
+ super(split, context);
+ }
+
+
+ public boolean nextVertex() throws IOException,
+ InterruptedException {
+ return getRecordReader().nextKeyValue();
+ }
+
+ /*
+ For each row, create a vertex with the row ID as a text,
+ and it's 'children' qualifier as a single edge.
+ */
+ public Vertex<Text, Text, Text, Text>
+ getCurrentVertex()
+ throws IOException, InterruptedException {
+ Result row = getRecordReader().getCurrentValue();
+ Vertex<Text, Text, Text, Text> vertex =
+ getConfiguration().createVertex();
+ Text vertexId = new Text(Bytes.toString(row.getRow()));
+ List<Edge<Text, Text>> edges = Lists.newLinkedList();
+ String edge = Bytes.toString(row.getValue(CF, CHILDREN));
+ Text vertexValue = new Text();
+ Text edgeId = new Text(edge);
+ edges.add(new Edge<Text, Text>(edgeId, uselessEdgeValue));
+ vertex.initialize(vertexId, vertexValue, edges);
+
+ return vertex;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
new file mode 100644
index 0000000..169fd88
--- /dev/null
+++ b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.io.hbase.edgemarker;
+
+import org.apache.giraph.io.hbase.HBaseVertexOutputFormat;
+import org.apache.giraph.graph.Vertex;
+import org.apache.giraph.graph.VertexWriter;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+/*
+ Test subclass for HBaseVertexOutputFormat
+ */
+public class TableEdgeOutputFormat
+ extends HBaseVertexOutputFormat<Text, Text, Text> {
+
+
+ public VertexWriter<Text, Text, Text>
+ createVertexWriter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ return new TableEdgeVertexWriter(context);
+ }
+
+ /*
+ For each vertex, write back to the configured table using
+ the vertex id as the row key bytes.
+ */
+ public static class TableEdgeVertexWriter
+ extends HBaseVertexWriter<Text, Text, Text> {
+
+ private final byte[] CF = Bytes.toBytes("cf");
+ private final byte[] PARENT = Bytes.toBytes("parent");
+
+ public TableEdgeVertexWriter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ super(context);
+ }
+ /*
+ Record the vertex value as a the value for a new qualifier 'parent'.
+ */
+ public void writeVertex(
+ Vertex<Text, Text, Text, ?> vertex)
+ throws IOException, InterruptedException {
+ RecordWriter<ImmutableBytesWritable, Writable> writer = getRecordWriter();
+ byte[] rowBytes = vertex.getId().getBytes();
+ Put put = new Put(rowBytes);
+ Text value = vertex.getValue();
+ if(value.toString().length() > 0) {
+ put.add(CF, PARENT, value.getBytes());
+ writer.write(new ImmutableBytesWritable(rowBytes), put);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hcatalog/pom.xml
----------------------------------------------------------------------
diff --git a/giraph-hcatalog/pom.xml b/giraph-hcatalog/pom.xml
new file mode 100644
index 0000000..48a5133
--- /dev/null
+++ b/giraph-hcatalog/pom.xml
@@ -0,0 +1,233 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.giraph</groupId>
+ <artifactId>giraph-parent</artifactId>
+ <version>0.2-SNAPSHOT</version>
+ </parent>
+ <artifactId>giraph-hcatalog</artifactId>
+ <packaging>jar</packaging>
+
+ <name>Apache Giraph HCatalog I/O</name>
+
+ <properties>
+ <top.dir>${project.basedir}/..</top.dir>
+ </properties>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ <version>2.9</version>
+ <configuration>
+ <configLocation>${top.dir}/checkstyle.xml</configLocation>
+ <headerLocation>${top.dir}/license-header.txt</headerLocation>
+ <enableRulesSummary>false</enableRulesSummary>
+ <failOnError>true</failOnError>
+ <includeTestSourceDirectory>false</includeTestSourceDirectory>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>verify</phase>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.6</version>
+ <configuration>
+ <systemProperties>
+ <property>
+ <name>prop.jarLocation</name>
+ <value>${top.dir}/giraph-core/target/giraph-${project.version}-${forHadoop}-jar-with-dependencies.jar</value>
+ </property>
+ </systemProperties>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>findbugs-maven-plugin</artifactId>
+ <version>2.5.1</version>
+ <configuration>
+ <xmlOutput>true</xmlOutput>
+ <findbugsXmlOutput>false</findbugsXmlOutput>
+ <excludeFilterFile>${top.dir}/findbugs-exclude.xml</excludeFilterFile>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>verify</phase>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <repositories>
+ <!-- This is the main maven repository. Normally we wouldn't need to put
+ it here when it's the only one being used, but since we need to add
+ special repositories to get hcatalog we need to mention this one
+ specifically otherwise it won't be included. -->
+ <repository>
+ <id>central</id>
+ <name>Maven Repository</name>
+ <url>http://repo1.maven.org/maven2</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ </repository>
+ <!-- This is necessary for hcatalog. -->
+ <repository>
+ <id>apache</id>
+ <name>Apache Repository</name>
+ <url>https://repository.apache.org/content/repositories/snapshots</url>
+ <snapshots>
+ <enabled>true</enabled>
+ </snapshots>
+ </repository>
+ <!-- This is necessary for hive-metastore dependencies for hcatalog. -->
+ <repository>
+ <id>datanucleus</id>
+ <name>datanucleus maven repository</name>
+ <url>http://www.datanucleus.org/downloads/maven2</url>
+ <layout>default</layout>
+ <releases>
+ <enabled>true</enabled>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ </repository>
+ </repositories>
+
+ <profiles>
+ <profile>
+ <id>hadoop_0.20.203</id>
+ <activation>
+ <activeByDefault>true</activeByDefault>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+
+ <profile>
+ <id>hadoop_1.0</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+
+ <profile>
+ <id>hadoop_non_secure</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+
+ <profile>
+ <id>hadoop_facebook</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>system</scope>
+ <systemPath>${lib.dir}/facebook-hadoop-0.20-test.jar</systemPath>
+ </dependency>
+ </dependencies>
+ </profile>
+ </profiles>
+
+ <dependencies>
+ <!-- compile dependencies. sorted lexicographically. -->
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.giraph</groupId>
+ <artifactId>giraph-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hcatalog</groupId>
+ <artifactId>hcatalog-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-metastore</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.giraph</groupId>
+ <artifactId>giraph</artifactId>
+ <version>0.2-SNAPSHOT</version>
+ <type>test-jar</type>
+ </dependency>
+
+ <!-- test dependencies. sorted lexicographically. -->
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
http://git-wip-us.apache.org/repos/asf/giraph/blob/57ea5561/giraph-hcatalog/src/main/assembly/compile.xml
----------------------------------------------------------------------
diff --git a/giraph-hcatalog/src/main/assembly/compile.xml b/giraph-hcatalog/src/main/assembly/compile.xml
new file mode 100644
index 0000000..0f7678c
--- /dev/null
+++ b/giraph-hcatalog/src/main/assembly/compile.xml
@@ -0,0 +1,39 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
+ <id>jar-with-dependencies</id>
+ <formats>
+ <format>jar</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+
+ <dependencySets>
+ <dependencySet>
+ <useProjectArtifact>true</useProjectArtifact>
+ <outputDirectory>/</outputDirectory>
+ <unpackOptions>
+ <excludes>
+ <exclude>META-INF/LICENSE</exclude>
+ </excludes>
+ </unpackOptions>
+ <unpack>false</unpack>
+ <scope>runtime</scope>
+ </dependencySet>
+ </dependencySets>
+</assembly>
\ No newline at end of file