You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by di...@apache.org on 2020/05/08 22:24:50 UTC
[giraph] branch trunk updated: GIRAPH-1236
This is an automated email from the ASF dual-hosted git repository.
dionysios pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/giraph.git
The following commit(s) were added to refs/heads/trunk by this push:
new 271fb85 GIRAPH-1236
271fb85 is described below
commit 271fb855b1a7d8bad926179c734223dbd1f48f69
Author: Dionysios Logothetis <dl...@gmail.com>
AuthorDate: Fri May 8 15:24:25 2020 -0700
GIRAPH-1236
closes #122
---
giraph-dist/pom.xml | 4 -
giraph-hbase/pom.xml | 201 --------------------
giraph-hbase/src/main/assembly/compile.xml | 39 ----
.../giraph/io/hbase/HBaseVertexInputFormat.java | 172 -----------------
.../giraph/io/hbase/HBaseVertexOutputFormat.java | 165 ----------------
.../org/apache/giraph/io/hbase/package-info.java | 21 --
.../io/hbase/TestHBaseRootMarkerVertextFormat.java | 211 ---------------------
.../io/hbase/edgemarker/TableEdgeInputFormat.java | 100 ----------
.../io/hbase/edgemarker/TableEdgeOutputFormat.java | 75 --------
pom.xml | 140 +++-----------
10 files changed, 23 insertions(+), 1105 deletions(-)
diff --git a/giraph-dist/pom.xml b/giraph-dist/pom.xml
index d6e9f69..10b7a1b 100644
--- a/giraph-dist/pom.xml
+++ b/giraph-dist/pom.xml
@@ -50,10 +50,6 @@
<profile>
<id>hadoop_2</id>
<dependencies>
- <dependency>
- <groupId>org.apache.giraph</groupId>
- <artifactId>giraph-hbase</artifactId>
- </dependency>
</dependencies>
</profile>
</profiles>
diff --git a/giraph-hbase/pom.xml b/giraph-hbase/pom.xml
deleted file mode 100644
index c4424ac..0000000
--- a/giraph-hbase/pom.xml
+++ /dev/null
@@ -1,201 +0,0 @@
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied. See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
-
- <parent>
- <groupId>org.apache.giraph</groupId>
- <artifactId>giraph-parent</artifactId>
- <version>1.3.0-SNAPSHOT</version>
- </parent>
- <artifactId>giraph-hbase</artifactId>
- <packaging>jar</packaging>
-
- <name>Apache Giraph HBase I/O</name>
- <url>http://giraph.apache.org/giraph-hbase/</url>
- <description>Giraph HBase input/output classes</description>
-
- <properties>
- <top.dir>${project.basedir}/..</top.dir>
- <!-- TODO: Skip until duplicate classes are fixed in the future -->
- <project.enforcer.skip>true</project.enforcer.skip>
- <!-- TODO: Fix HBase duplicate classes in the future -->
- <giraph.maven.duplicate.finder.skip>true</giraph.maven.duplicate.finder.skip>
- </properties>
-
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-assembly-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-checkstyle-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-javadoc-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-site-plugin</artifactId>
- <configuration>
- <siteDirectory>${project.basedir}/src/site</siteDirectory>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <version>2.6</version>
- <configuration>
- <skip>${surefire.skip}</skip>
- <systemProperties>
- <property>
- <name>prop.jarLocation</name>
- <value>${top.dir}/giraph-core/target/giraph-${project.version}-${forHadoop}-jar-with-dependencies.jar</value>
- </property>
- </systemProperties>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>findbugs-maven-plugin</artifactId>
- </plugin>
- </plugins>
- </build>
-
- <profiles>
- <profile>
- <id>hadoop_0.20.203</id>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-test</artifactId>
- <version>${hadoop.version}</version>
- <scope>test</scope>
- </dependency>
- </dependencies>
- </profile>
-
- <profile>
- <id>hadoop_1</id>
- <activation>
- <activeByDefault>true</activeByDefault>
- </activation>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-test</artifactId>
- <version>${hadoop.version}</version>
- <scope>test</scope>
- </dependency>
- </dependencies>
- </profile>
-
- <profile>
- <id>hadoop_2</id>
- <properties>
- <surefire.skip>true</surefire.skip>
- </properties>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-minicluster</artifactId>
- <version>${hadoop.version}</version>
- <scope>test</scope>
- </dependency>
- </dependencies>
- </profile>
-
- <profile>
- <id>hadoop_non_secure</id>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-test</artifactId>
- <version>${hadoop.version}</version>
- <scope>test</scope>
- </dependency>
- </dependencies>
- </profile>
-
- <profile>
- <id>hadoop_facebook</id>
- <dependencies>
- <dependency>
- <groupId>com.facebook.hadoop</groupId>
- <artifactId>hadoop-test</artifactId>
- <scope>test</scope>
- </dependency>
- </dependencies>
- </profile>
- </profiles>
-
- <dependencies>
- <!-- compile dependencies. sorted lexicographically. -->
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- </dependency>
- <dependency>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.giraph</groupId>
- <artifactId>giraph-core</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- </dependency>
-
- <!-- runtime dependency -->
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- <scope>runtime</scope>
- </dependency>
-
- <!-- test dependencies. sorted lexicographically. -->
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.giraph</groupId>
- <artifactId>giraph-core</artifactId>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
- </dependencies>
-</project>
diff --git a/giraph-hbase/src/main/assembly/compile.xml b/giraph-hbase/src/main/assembly/compile.xml
deleted file mode 100644
index 6acf679..0000000
--- a/giraph-hbase/src/main/assembly/compile.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
- <id>jar-with-dependencies</id>
- <formats>
- <format>jar</format>
- </formats>
- <includeBaseDirectory>false</includeBaseDirectory>
-
- <dependencySets>
- <dependencySet>
- <useProjectArtifact>true</useProjectArtifact>
- <outputDirectory>/</outputDirectory>
- <unpackOptions>
- <excludes>
- <exclude>META-INF/LICENSE</exclude>
- </excludes>
- </unpackOptions>
- <unpack>true</unpack>
- <scope>runtime</scope>
- </dependencySet>
- </dependencySets>
-</assembly>
\ No newline at end of file
diff --git a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexInputFormat.java b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexInputFormat.java
deleted file mode 100644
index 9c12ce3..0000000
--- a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexInputFormat.java
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.hbase;
-
-import java.io.IOException;
-import java.util.List;
-import org.apache.giraph.io.VertexInputFormat;
-import org.apache.giraph.io.VertexReader;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.log4j.Logger;
-
-/**
- *
- * Base class that wraps an HBase TableInputFormat and underlying Scan object
- * to help instantiate vertices from an HBase table. All
- * the static TableInputFormat properties necessary to configure
- * an HBase job are available.
- *
- * For example, setting conf.set(TableInputFormat.INPUT_TABLE, "in_table");
- * from the job setup routine will properly delegate to the
- * TableInputFormat instance. The Configurable interface prevents specific
- * wrapper methods from having to be called.
- *
- * Works with {@link HBaseVertexOutputFormat}
- *
- * @param <I> Vertex index value
- * @param <V> Vertex value
- * @param <E> Edge value
- */
-@SuppressWarnings("rawtypes")
-public abstract class HBaseVertexInputFormat<
- I extends WritableComparable,
- V extends Writable,
- E extends Writable>
- extends VertexInputFormat<I, V, E> {
-
-
- /**
- * delegate HBase table input format
- */
- protected static final TableInputFormat BASE_FORMAT =
- new TableInputFormat();
- /**
- * logger
- */
- private static final Logger LOG =
- Logger.getLogger(HBaseVertexInputFormat.class);
-
- /**
- * Takes an instance of RecordReader that supports
- * HBase row-key, result records. Subclasses can focus on
- * vertex instantiation details without worrying about connection
- * semantics. Subclasses are expected to implement nextVertex() and
- * getCurrentVertex()
- *
- *
- *
- * @param <I> Vertex index value
- * @param <V> Vertex value
- * @param <E> Edge value
- */
- public abstract static class HBaseVertexReader<
- I extends WritableComparable,
- V extends Writable,
- E extends Writable>
- extends VertexReader<I, V, E> {
-
- /** Reader instance */
- private final RecordReader<ImmutableBytesWritable, Result> reader;
- /** Context passed to initialize */
- private TaskAttemptContext context;
-
- /**
- * Sets the base TableInputFormat and creates a record reader.
- *
- * @param split InputSplit
- * @param context Context
- * @throws IOException
- */
- public HBaseVertexReader(InputSplit split, TaskAttemptContext context)
- throws IOException {
- BASE_FORMAT.setConf(context.getConfiguration());
- this.reader = BASE_FORMAT.createRecordReader(split, context);
- }
-
- /**
- * initialize
- *
- * @param inputSplit Input split to be used for reading vertices.
- * @param context Context from the task.
- * @throws IOException
- * @throws InterruptedException
- */
- public void initialize(InputSplit inputSplit,
- TaskAttemptContext context)
- throws IOException, InterruptedException {
- reader.initialize(inputSplit, context);
- this.context = context;
- }
-
- /**
- * close
- * @throws IOException
- */
- public void close() throws IOException {
- reader.close();
- }
-
- /**
- * getProgress
- *
- * @return progress
- * @throws IOException
- * @throws InterruptedException
- */
- public float getProgress() throws
- IOException, InterruptedException {
- return reader.getProgress();
- }
-
- /**
- * getRecordReader
- *
- * @return Record reader to be used for reading.
- */
- protected RecordReader<ImmutableBytesWritable,
- Result> getRecordReader() {
- return reader;
- }
-
- /**
- * getContext
- *
- * @return Context passed to initialize.
- */
- protected TaskAttemptContext getContext() {
- return context;
- }
-
- }
-
- @Override
- public List<InputSplit> getSplits(
- JobContext context, int minSplitCountHint)
- throws IOException, InterruptedException {
- BASE_FORMAT.setConf(getConf());
- return BASE_FORMAT.getSplits(context);
- }
-}
diff --git a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexOutputFormat.java b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexOutputFormat.java
deleted file mode 100644
index f875c19..0000000
--- a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/HBaseVertexOutputFormat.java
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hbase;
-
-import java.io.IOException;
-import org.apache.giraph.io.VertexOutputFormat;
-import org.apache.giraph.io.VertexWriter;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-/**
- *
- * Base class for writing Vertex mutations back to specific
- * rows in an HBase table. This class wraps an instance of TableOutputFormat
- * for easy configuration with the existing properties.
- *
- * Setting conf.set(TableOutputFormat.OUTPUT_TABLE, "out_table");
- * will properly delegate to the TableOutputFormat instance contained
- * in this class. The Configurable interface prevents specific
- * wrapper methods from having to be called.
- *
- * Works with {@link HBaseVertexInputFormat}
- *
- * @param <I> Vertex index value
- * @param <V> Vertex value
- * @param <E> Edge value
- */
-@SuppressWarnings("rawtypes")
-public abstract class HBaseVertexOutputFormat<
- I extends WritableComparable,
- V extends Writable,
- E extends Writable>
- extends VertexOutputFormat
- <I, V, E> {
-
- /**
- * delegate output format that writes to HBase
- */
- protected static final TableOutputFormat<ImmutableBytesWritable>
- BASE_FORMAT = new TableOutputFormat<ImmutableBytesWritable>();
-
- /**
- * Constructor
- *
- * Simple class which takes an instance of RecordWriter
- * over Writable objects. Subclasses are
- * expected to implement writeVertex()
- *
- * @param <I> Vertex index value
- * @param <V> Vertex value
- * @param <E> Edge value
- */
- public abstract static class HBaseVertexWriter<
- I extends WritableComparable,
- V extends Writable,
- E extends Writable>
- extends VertexWriter<I, V, E> {
-
- /** Context */
- private TaskAttemptContext context;
- /** Record writer instance */
- private RecordWriter<ImmutableBytesWritable, Writable> recordWriter;
-
- /**
- * Sets up base table output format and creates a record writer.
- * @param context task attempt context
- */
- public HBaseVertexWriter(TaskAttemptContext context)
- throws IOException, InterruptedException {
- BASE_FORMAT.setConf(context.getConfiguration());
- this.recordWriter = BASE_FORMAT.getRecordWriter(context);
- }
-
- /**
- * initialize
- *
- * @param context Context used to write the vertices.
- * @throws IOException
- */
- public void initialize(TaskAttemptContext context)
- throws IOException {
- this.context = context;
- }
-
- /**
- * close
- *
- * @param context the context of the task
- * @throws IOException
- * @throws InterruptedException
- */
- public void close(TaskAttemptContext context)
- throws IOException, InterruptedException {
- recordWriter.close(context);
- }
-
- /**
- * Get the table record writer;
- *
- * @return Record writer to be used for writing.
- */
- public RecordWriter<ImmutableBytesWritable,
- Writable> getRecordWriter() {
- return recordWriter;
- }
-
- /**
- * getContext
- *
- * @return Context passed to initialize.
- */
- public TaskAttemptContext getContext() {
- return context;
- }
- }
-
- /**
- * checkOutputSpecs
- *
- * @param context information about the job
- * @throws IOException
- * @throws InterruptedException
- */
- public void checkOutputSpecs(JobContext context)
- throws IOException, InterruptedException {
- BASE_FORMAT.checkOutputSpecs(context);
- }
-
- /**
- * getOutputCommitter
- *
- * @param context the task context
- * @return OutputCommitter ouputCommitter
- * @throws IOException
- * @throws InterruptedException
- */
- public OutputCommitter getOutputCommitter(
- TaskAttemptContext context)
- throws IOException, InterruptedException {
- BASE_FORMAT.setConf(getConf());
- return BASE_FORMAT.getOutputCommitter(context);
- }
-}
diff --git a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/package-info.java b/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/package-info.java
deleted file mode 100644
index 9179cee..0000000
--- a/giraph-hbase/src/main/java/org/apache/giraph/io/hbase/package-info.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * HBase Input/Output for Giraph.
- */
-package org.apache.giraph.io.hbase;
diff --git a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
deleted file mode 100644
index a39b51c..0000000
--- a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hbase;
-
-
-import org.apache.giraph.BspCase;
-import org.apache.giraph.conf.GiraphConfiguration;
-import org.apache.giraph.graph.BasicComputation;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.io.hbase.edgemarker.TableEdgeInputFormat;
-import org.apache.giraph.io.hbase.edgemarker.TableEdgeOutputFormat;
-import org.apache.giraph.job.GiraphJob;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
-import org.apache.hadoop.hbase.client.HTable;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv;
-import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
-import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.GenericOptionsParser;
-import org.apache.log4j.Logger;
-import org.junit.Test;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.UUID;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-/**
- * Test case for HBase reading/writing vertices from an HBase instance.
- */
-public class TestHBaseRootMarkerVertextFormat extends BspCase {
- private final Logger log = Logger.getLogger(TestHBaseRootMarkerVertextFormat.class);
-
- private final String TABLE_NAME = "simple_graph";
- private final String FAMILY = "cf";
- private final String QUALIFER = "children";
- private final String OUTPUT_FIELD = "parent";
-
- private final HBaseTestingUtility testUtil = new HBaseTestingUtility();
-
- public TestHBaseRootMarkerVertextFormat() {
- super(TestHBaseRootMarkerVertextFormat.class.getName());
- }
-
- @Test
- public void testHBaseInputOutput() throws Exception {
- if (System.getProperty("prop.mapred.job.tracker") != null) {
- if(log.isInfoEnabled())
- log.info("testHBaseInputOutput: Ignore this test if not local mode.");
- return;
- }
-
- File jarTest = new File(System.getProperty("prop.jarLocation"));
- if(!jarTest.exists()) {
- fail("Could not find Giraph jar at " +
- "location specified by 'prop.jarLocation'. " +
- "Make sure you built the main Giraph artifact?.");
- }
-
- FileSystem fs = null;
- Path hbaseRootdir = null;
- try {
- MiniHBaseCluster cluster = testUtil.startMiniCluster(1);
- cluster.waitForActiveAndReadyMaster();
- testUtil.startMiniMapReduceCluster();
-
- // Let's set up the hbase root directory.
- Configuration conf = testUtil.getConfiguration();
- try {
- fs = testUtil.getTestFileSystem();
- String randomStr = UUID.randomUUID().toString();
- String tmpdir = System.getProperty("java.io.tmpdir") + "/" +
- randomStr + "/";
- hbaseRootdir = fs.makeQualified(new Path(tmpdir));
-
- conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
- fs.mkdirs(hbaseRootdir);
- } catch(IOException ioe) {
- fail("Could not create hbase root directory.");
- }
-
- //First let's load some data using ImportTsv into our mock table.
- String INPUT_FILE = hbaseRootdir.toString() + "/graph.csv";
- String[] args = new String[] {
- "-Dimporttsv.columns=HBASE_ROW_KEY,cf:"+QUALIFER,
- "-Dimporttsv.separator=" + "\u002c",
- TABLE_NAME,
- INPUT_FILE
- };
-
- GenericOptionsParser opts =
- new GenericOptionsParser(testUtil.getConfiguration(), args);
- args = opts.getRemainingArgs();
-
- fs = FileSystem.get(conf);
- fs.setConf(conf);
- Path inputPath = fs.makeQualified(new Path(hbaseRootdir, "graph.csv"));
- FSDataOutputStream op = fs.create(inputPath, true);
- String line1 = "0001,0002\n";
- String line2 = "0002,0004\n";
- String line3 = "0003,0005\n";
- String line4 = "0004,-1\n";
- String line5 = "0005,-1\n";
- op.write(line1.getBytes());
- op.write(line2.getBytes());
- op.write(line3.getBytes());
- op.write(line4.getBytes());
- op.write(line5.getBytes());
- op.close();
-
- final byte[] FAM = Bytes.toBytes(FAMILY);
- final byte[] TAB = Bytes.toBytes(TABLE_NAME);
-
- HTableDescriptor desc = new HTableDescriptor(TAB);
- desc.addFamily(new HColumnDescriptor(FAM));
- HBaseAdmin hbaseAdmin=new HBaseAdmin(conf);
- if (hbaseAdmin.isTableAvailable(TABLE_NAME)) {
- hbaseAdmin.disableTable(TABLE_NAME);
- hbaseAdmin.deleteTable(TABLE_NAME);
- }
- hbaseAdmin.createTable(desc);
-
- // Do the import
- Job job = ImportTsv.createSubmittableJob(conf, args);
- job.waitForCompletion(false);
- assertTrue(job.isSuccessful());
- if(log.isInfoEnabled())
- log.info("ImportTsv successful. Running HBase Giraph job.");
-
- // Now operate over HBase using Vertex I/O formats
- conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
- conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME);
-
- GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName());
- GiraphConfiguration giraphConf = giraphJob.getConfiguration();
- setupConfiguration(giraphJob);
- giraphConf.setComputationClass(EdgeNotification.class);
- giraphConf.setVertexInputFormatClass(TableEdgeInputFormat.class);
- giraphConf.setVertexOutputFormatClass(TableEdgeOutputFormat.class);
-
- assertTrue(giraphJob.run(true));
- if(log.isInfoEnabled())
- log.info("Giraph job successful. Checking output qualifier.");
-
- // Do a get on row 0002, it should have a parent of 0001
- // if the outputFormat worked.
- HTable table = new HTable(conf, TABLE_NAME);
- Result result = table.get(new Get("0002".getBytes()));
- byte[] parentBytes = result.getValue(FAMILY.getBytes(),
- OUTPUT_FIELD.getBytes());
- assertNotNull(parentBytes);
- assertTrue(parentBytes.length > 0);
- assertEquals("0001", Bytes.toString(parentBytes));
- } finally {
- testUtil.shutdownMiniMapReduceCluster();
- testUtil.shutdownMiniCluster();
- }
- }
-
- /**
- * Test compute method that sends each edge a notification of its parents.
- * The test set only has a 1-1 parent-to-child ratio for this unit test.
- */
- public static class EdgeNotification
- extends BasicComputation<Text, Text, Text, Text> {
- @Override
- public void compute(Vertex<Text, Text, Text> vertex,
- Iterable<Text> messages) throws IOException {
- for (Text message : messages) {
- vertex.getValue().set(message);
- }
- if(getSuperstep() == 0) {
- sendMessageToAllEdges(vertex, vertex.getId());
- }
- vertex.voteToHalt();
- }
- }
-}
diff --git a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
deleted file mode 100644
index 8589de8..0000000
--- a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.hbase.edgemarker;
-
-import org.apache.giraph.edge.Edge;
-import org.apache.giraph.edge.EdgeFactory;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.io.VertexReader;
-import org.apache.giraph.io.hbase.HBaseVertexInputFormat;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.log4j.Logger;
-
-import com.google.common.collect.Lists;
-
-import java.io.IOException;
-import java.util.List;
-
-/**
- * Test subclass for HBaseVertexInputFormat. Reads a simple
- * children qualifier to create an edge.
- */
-public class TableEdgeInputFormat extends
- HBaseVertexInputFormat<Text, Text, Text> {
-
- private static final Logger LOG =
- Logger.getLogger(TableEdgeInputFormat.class);
- private static final Text uselessEdgeValue = new Text();
-
- @Override public void checkInputSpecs(Configuration conf) { }
-
- public VertexReader<Text, Text, Text>
- createVertexReader(InputSplit split,
- TaskAttemptContext context) throws IOException {
-
- return new TableEdgeVertexReader(split, context);
-
- }
-
- /**
- * Uses the RecordReader to return Hbase rows
- */
- public static class TableEdgeVertexReader
- extends HBaseVertexReader<Text, Text, Text> {
-
- private final byte[] CF = Bytes.toBytes("cf");
- private final byte[] CHILDREN = Bytes.toBytes("children");
-
- public TableEdgeVertexReader(InputSplit split, TaskAttemptContext context) throws IOException {
- super(split, context);
- }
-
- @Override
- public boolean nextVertex() throws IOException,
- InterruptedException {
- return getRecordReader().nextKeyValue();
- }
-
- /**
- * For each row, create a vertex with the row ID as a text,
- * and it's 'children' qualifier as a single edge.
- */
- @Override
- public Vertex<Text, Text, Text>
- getCurrentVertex()
- throws IOException, InterruptedException {
- Result row = getRecordReader().getCurrentValue();
- Vertex<Text, Text, Text> vertex =
- getConf().createVertex();
- Text vertexId = new Text(Bytes.toString(row.getRow()));
- List<Edge<Text, Text>> edges = Lists.newLinkedList();
- String edge = Bytes.toString(row.getValue(CF, CHILDREN));
- Text vertexValue = new Text();
- Text edgeId = new Text(edge);
- edges.add(EdgeFactory.create(edgeId, uselessEdgeValue));
- vertex.initialize(vertexId, vertexValue, edges);
-
- return vertex;
- }
- }
-}
diff --git a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java b/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
deleted file mode 100644
index aa95f96..0000000
--- a/giraph-hbase/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.hbase.edgemarker;
-
-import org.apache.giraph.io.hbase.HBaseVertexOutputFormat;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.io.VertexWriter;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.io.IOException;
-/*
- Test subclass for HBaseVertexOutputFormat
- */
-public class TableEdgeOutputFormat
- extends HBaseVertexOutputFormat<Text, Text, Text> {
-
-
- public VertexWriter<Text, Text, Text>
- createVertexWriter(TaskAttemptContext context)
- throws IOException, InterruptedException {
- return new TableEdgeVertexWriter(context);
- }
-
- /*
- For each vertex, write back to the configured table using
- the vertex id as the row key bytes.
- */
- public static class TableEdgeVertexWriter
- extends HBaseVertexWriter<Text, Text, Text> {
-
- private final byte[] CF = Bytes.toBytes("cf");
- private final byte[] PARENT = Bytes.toBytes("parent");
-
- public TableEdgeVertexWriter(TaskAttemptContext context)
- throws IOException, InterruptedException {
- super(context);
- }
- /*
- Record the vertex value as a the value for a new qualifier 'parent'.
- */
- public void writeVertex(
- Vertex<Text, Text, Text> vertex)
- throws IOException, InterruptedException {
- RecordWriter<ImmutableBytesWritable, Writable> writer = getRecordWriter();
- byte[] rowBytes = vertex.getId().getBytes();
- Put put = new Put(rowBytes);
- Text value = vertex.getValue();
- if (value.toString().length() > 0) {
- put.add(CF, PARENT, value.getBytes());
- writer.write(new ImmutableBytesWritable(rowBytes), put);
- }
- }
- }
-}
diff --git a/pom.xml b/pom.xml
index f12a4f0..b959bc8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -332,7 +332,11 @@ under the License.
<dep.fastutil.version>6.5.4</dep.fastutil.version>
<dep.google.findbugs.version>2.0.2</dep.google.findbugs.version>
<dep.guava.version>21.0</dep.guava.version>
+<<<<<<< HEAD
<dep.hbase.version>0.94.16</dep.hbase.version>
+=======
+ <dep.hcatalog.version>0.5.0-incubating</dep.hcatalog.version>
+>>>>>>> 0155f6eb23f5c1765a00aa6181bb80daffbb51e6
<dep.hive.version>0.11.0</dep.hive.version>
<dep.hiveio.version>0.26</dep.hiveio.version>
<dep.jaxb-impl.version>2.2.4-1</dep.jaxb-impl.version>
@@ -1134,8 +1138,12 @@ under the License.
<id>hadoop_2</id>
<modules>
<module>giraph-accumulo</module>
+<<<<<<< HEAD
<module>giraph-hbase</module>
<<<<<<< HEAD
+=======
+ <module>giraph-hcatalog</module>
+>>>>>>> 0155f6eb23f5c1765a00aa6181bb80daffbb51e6
<module>giraph-gora</module>
=======
<module>giraph-hcatalog</module>
@@ -1578,8 +1586,23 @@ under the License.
</dependency>
<dependency>
<groupId>org.apache.giraph</groupId>
+<<<<<<< HEAD
<artifactId>giraph-hbase</artifactId>
<version>${project.version}</version>
+=======
+ <artifactId>giraph-hcatalog</artifactId>
+ <version>${project.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging-api</artifactId>
+ </exclusion>
+ </exclusions>
+>>>>>>> 0155f6eb23f5c1765a00aa6181bb80daffbb51e6
</dependency>
<dependency>
<groupId>org.apache.giraph</groupId>
@@ -1907,74 +1930,6 @@ under the License.
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>${dep.hbase.version}</version>
- <scope>provided</scope>
- <exclusions>
- <exclusion>
- <groupId>com.google.protobuf</groupId>
- <artifactId>protobuf-java</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-server</artifactId>
- </exclusion>
- <exclusion>
- <groupId>commons-httpclient</groupId>
- <artifactId>commons-httpclient</artifactId>
- </exclusion>
- <exclusion>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- </exclusion>
- <exclusion>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- </exclusion>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.thrift</groupId>
- <artifactId>thrift</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>servlet-api-2.5</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jetty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jsp-api-2.1</artifactId>
- </exclusion>
- <exclusion>
- <groupId>tomcat</groupId>
- <artifactId>jasper-compiler</artifactId>
- </exclusion>
- <exclusion>
- <groupId>tomcat</groupId>
- <artifactId>jasper-runtime</artifactId>
- </exclusion>
- <exclusion>
- <groupId>javax.ws.rs</groupId>
- <artifactId>jsr311-api</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
<!-- runtime dependencies. sorted lexicographically. -->
<dependency>
@@ -2065,55 +2020,6 @@ under the License.
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <type>test-jar</type>
- <version>${dep.hbase.version}</version>
- <scope>test</scope>
- <exclusions>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-server</artifactId>
- </exclusion>
- <exclusion>
- <groupId>commons-httpclient</groupId>
- <artifactId>commons-httpclient</artifactId>
- </exclusion>
- <exclusion>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- </exclusion>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>avro</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.thrift</groupId>
- <artifactId>thrift</artifactId>
- </exclusion>
- <exclusion>
- <groupId>tomcat</groupId>
- <artifactId>jasper-compiler</artifactId>
- </exclusion>
- <exclusion>
- <groupId>tomcat</groupId>
- <artifactId>jasper-runtime</artifactId>
- </exclusion>
- <exclusion>
- <groupId>javax.ws.rs</groupId>
- <artifactId>jsr311-api</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>${dep.mockito.version}</version>