You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by th...@apache.org on 2013/09/16 01:02:10 UTC
svn commit: r1523513 [2/2] - in /hive/branches/branch-0.12/hcatalog:
build-support/ant/ core/src/main/java/org/apache/hcatalog/mapreduce/
core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/
core/src/main/java/org/apache/hive/hcatalog/comm...
Added: hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHCatHBaseInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHCatHBaseInputFormat.java?rev=1523513&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHCatHBaseInputFormat.java (added)
+++ hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHCatHBaseInputFormat.java Sun Sep 15 23:02:09 2013
@@ -0,0 +1,609 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hcatalog.hbase;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hive.cli.CliSessionState;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hive.hcatalog.cli.HCatDriver;
+import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer;
+import org.apache.hive.hcatalog.common.HCatConstants;
+import org.apache.hive.hcatalog.common.HCatException;
+import org.apache.hive.hcatalog.common.HCatUtil;
+import org.apache.hive.hcatalog.data.HCatRecord;
+import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hive.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.hbase.snapshot.RevisionManager;
+import org.apache.hcatalog.hbase.snapshot.RevisionManagerConfiguration;
+import org.apache.hcatalog.hbase.snapshot.Transaction;
+import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hive.hcatalog.mapreduce.InputJobInfo;
+import org.apache.hive.hcatalog.mapreduce.PartInfo;
+import org.junit.Test;
+
+public class TestHCatHBaseInputFormat extends SkeletonHBaseTest {
+
+ private static HiveConf hcatConf;
+ private static HCatDriver hcatDriver;
+ private final byte[] FAMILY = Bytes.toBytes("testFamily");
+ private final byte[] QUALIFIER1 = Bytes.toBytes("testQualifier1");
+ private final byte[] QUALIFIER2 = Bytes.toBytes("testQualifier2");
+
+ public TestHCatHBaseInputFormat() throws Exception {
+ hcatConf = getHiveConf();
+ hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname,
+ HCatSemanticAnalyzer.class.getName());
+ URI fsuri = getFileSystem().getUri();
+ Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(),
+ getTestDir());
+ hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString());
+ hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString());
+
+ //Add hbase properties
+
+ for (Map.Entry<String, String> el : getHbaseConf()) {
+ if (el.getKey().startsWith("hbase.")) {
+ hcatConf.set(el.getKey(), el.getValue());
+ }
+ }
+ HBaseConfiguration.merge(hcatConf,
+ RevisionManagerConfiguration.create());
+
+
+ SessionState.start(new CliSessionState(hcatConf));
+ hcatDriver = new HCatDriver();
+
+ }
+
+ private List<Put> generatePuts(int num, String tableName) throws IOException {
+
+ List<String> columnFamilies = Arrays.asList("testFamily");
+ RevisionManager rm = null;
+ List<Put> myPuts;
+ try {
+ rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf);
+ rm.open();
+ myPuts = new ArrayList<Put>();
+ for (int i = 1; i <= num; i++) {
+ Put put = new Put(Bytes.toBytes("testRow"));
+ put.add(FAMILY, QUALIFIER1, i, Bytes.toBytes("textValue-" + i));
+ put.add(FAMILY, QUALIFIER2, i, Bytes.toBytes("textValue-" + i));
+ myPuts.add(put);
+ Transaction tsx = rm.beginWriteTransaction(tableName,
+ columnFamilies);
+ rm.commitWriteTransaction(tsx);
+ }
+ } finally {
+ if (rm != null)
+ rm.close();
+ }
+
+ return myPuts;
+ }
+
+ private void populateHBaseTable(String tName, int revisions) throws IOException {
+ List<Put> myPuts = generatePuts(revisions, tName);
+ HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName));
+ table.put(myPuts);
+ }
+
+ private long populateHBaseTableQualifier1(String tName, int value, Boolean commit)
+ throws IOException {
+ List<String> columnFamilies = Arrays.asList("testFamily");
+ RevisionManager rm = null;
+ List<Put> myPuts = new ArrayList<Put>();
+ long revision;
+ try {
+ rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf);
+ rm.open();
+ Transaction tsx = rm.beginWriteTransaction(tName, columnFamilies);
+
+ Put put = new Put(Bytes.toBytes("testRow"));
+ revision = tsx.getRevisionNumber();
+ put.add(FAMILY, QUALIFIER1, revision,
+ Bytes.toBytes("textValue-" + value));
+ myPuts.add(put);
+
+ // If commit is null it is left as a running transaction
+ if (commit != null) {
+ if (commit) {
+ rm.commitWriteTransaction(tsx);
+ } else {
+ rm.abortWriteTransaction(tsx);
+ }
+ }
+ } finally {
+ if (rm != null)
+ rm.close();
+ }
+ HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName));
+ table.put(myPuts);
+ return revision;
+ }
+
+ @Test
+ public void TestHBaseTableReadMR() throws Exception {
+ String tableName = newTableName("MyTable");
+ String databaseName = newTableName("MyDatabase");
+ //Table name will be lower case unless specified by hbase.table.name property
+ String hbaseTableName = (databaseName + "." + tableName).toLowerCase();
+ String db_dir = getTestDir() + "/hbasedb";
+
+ String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '"
+ + db_dir + "'";
+ String tableQuery = "CREATE TABLE " + databaseName + "." + tableName
+ + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " +
+ "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'"
+ + "TBLPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')";
+
+ CommandProcessorResponse responseOne = hcatDriver.run(dbquery);
+ assertEquals(0, responseOne.getResponseCode());
+ CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
+ assertEquals(0, responseTwo.getResponseCode());
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
+ assertTrue(doesTableExist);
+
+ populateHBaseTable(hbaseTableName, 5);
+ Configuration conf = new Configuration(hcatConf);
+ conf.set(HCatConstants.HCAT_KEY_HIVE_CONF,
+ HCatUtil.serialize(getHiveConf().getAllProperties()));
+
+ // output settings
+ Path outputDir = new Path(getTestDir(), "mapred/testHbaseTableMRRead");
+ FileSystem fs = getFileSystem();
+ if (fs.exists(outputDir)) {
+ fs.delete(outputDir, true);
+ }
+ // create job
+ Job job = new Job(conf, "hbase-mr-read-test");
+ job.setJarByClass(this.getClass());
+ job.setMapperClass(MapReadHTable.class);
+ MapReadHTable.resetCounters();
+
+ job.setInputFormatClass(HCatInputFormat.class);
+ HCatInputFormat.setInput(job.getConfiguration(), databaseName, tableName);
+ job.setOutputFormatClass(TextOutputFormat.class);
+ TextOutputFormat.setOutputPath(job, outputDir);
+ job.setMapOutputKeyClass(BytesWritable.class);
+ job.setMapOutputValueClass(Text.class);
+ job.setOutputKeyClass(BytesWritable.class);
+ job.setOutputValueClass(Text.class);
+ job.setNumReduceTasks(0);
+ assertTrue(job.waitForCompletion(true));
+ // Note: These asserts only works in case of LocalJobRunner as they run in same jvm.
+ // If using MiniMRCluster, the tests will have to be modified.
+ assertFalse(MapReadHTable.error);
+ assertEquals(MapReadHTable.count, 1);
+
+ String dropTableQuery = "DROP TABLE " + hbaseTableName;
+ CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
+ assertEquals(0, responseThree.getResponseCode());
+
+ boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName);
+ assertFalse(isHbaseTableThere);
+
+ String dropDB = "DROP DATABASE " + databaseName;
+ CommandProcessorResponse responseFour = hcatDriver.run(dropDB);
+ assertEquals(0, responseFour.getResponseCode());
+ }
+
+ @Test
+ public void TestHBaseTableProjectionReadMR() throws Exception {
+
+ String tableName = newTableName("MyTable");
+ //Table name as specified by hbase.table.name property
+ String hbaseTableName = "MyDB_" + tableName;
+ String tableQuery = "CREATE TABLE " + tableName
+ + "(key string, testqualifier1 string, testqualifier2 string) STORED BY "
+ + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'"
+ + "TBLPROPERTIES ('hbase.columns.mapping'="
+ + "':key,testFamily:testQualifier1,testFamily:testQualifier2',"
+ + "'hbase.table.name'='" + hbaseTableName + "')";
+
+ CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
+ assertEquals(0, responseTwo.getResponseCode());
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
+ assertTrue(doesTableExist);
+
+ populateHBaseTable(hbaseTableName, 5);
+
+ Configuration conf = new Configuration(hcatConf);
+ conf.set(HCatConstants.HCAT_KEY_HIVE_CONF,
+ HCatUtil.serialize(getHiveConf().getAllProperties()));
+
+ // output settings
+ Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR");
+ FileSystem fs = getFileSystem();
+ if (fs.exists(outputDir)) {
+ fs.delete(outputDir, true);
+ }
+ // create job
+ Job job = new Job(conf, "hbase-column-projection");
+ job.setJarByClass(this.getClass());
+ job.setMapperClass(MapReadProjHTable.class);
+ job.setInputFormatClass(HCatInputFormat.class);
+ HCatInputFormat.setOutputSchema(job, getProjectionSchema());
+ HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
+ job.setOutputFormatClass(TextOutputFormat.class);
+ TextOutputFormat.setOutputPath(job, outputDir);
+ job.setMapOutputKeyClass(BytesWritable.class);
+ job.setMapOutputValueClass(Text.class);
+ job.setOutputKeyClass(BytesWritable.class);
+ job.setOutputValueClass(Text.class);
+ job.setNumReduceTasks(0);
+ assertTrue(job.waitForCompletion(true));
+ assertFalse(MapReadProjHTable.error);
+ assertEquals(MapReadProjHTable.count, 1);
+
+ String dropTableQuery = "DROP TABLE " + tableName;
+ CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
+ assertEquals(0, responseThree.getResponseCode());
+
+ boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName);
+ assertFalse(isHbaseTableThere);
+ }
+
+ @Test
+ public void TestHBaseInputFormatProjectionReadMR() throws Exception {
+
+ String tableName = newTableName("mytable");
+ String tableQuery = "CREATE TABLE " + tableName
+ + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " +
+ "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'"
+ + "TBLPROPERTIES ('hbase.columns.mapping'=':key," +
+ "testFamily:testQualifier1,testFamily:testQualifier2')";
+
+ CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
+ assertEquals(0, responseTwo.getResponseCode());
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists(tableName);
+ assertTrue(doesTableExist);
+
+ populateHBaseTable(tableName, 5);
+
+ Configuration conf = new Configuration(hcatConf);
+ conf.set(HCatConstants.HCAT_KEY_HIVE_CONF,
+ HCatUtil.serialize(getHiveConf().getAllProperties()));
+
+ // output settings
+ Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR");
+ FileSystem fs = getFileSystem();
+ if (fs.exists(outputDir)) {
+ fs.delete(outputDir, true);
+ }
+ // create job
+ JobConf job = new JobConf(conf);
+ job.setJobName("hbase-scan-column");
+ job.setJarByClass(this.getClass());
+ job.setMapperClass(MapReadProjectionHTable.class);
+ job.setInputFormat(HBaseInputFormat.class);
+
+ //Configure projection schema
+ job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema()));
+ Job newJob = new Job(job);
+ HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
+ String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO);
+ InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString);
+ job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString);
+ for (PartInfo partinfo : info.getPartitions()) {
+ for (Entry<String, String> entry : partinfo.getJobProperties().entrySet())
+ job.set(entry.getKey(), entry.getValue());
+ }
+ assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS));
+
+ job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);
+ org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir);
+ job.setMapOutputKeyClass(BytesWritable.class);
+ job.setMapOutputValueClass(Text.class);
+ job.setOutputKeyClass(BytesWritable.class);
+ job.setOutputValueClass(Text.class);
+ job.setNumReduceTasks(0);
+
+ RunningJob runJob = JobClient.runJob(job);
+ runJob.waitForCompletion();
+ assertTrue(runJob.isSuccessful());
+ assertFalse(MapReadProjHTable.error);
+ assertEquals(MapReadProjHTable.count, 1);
+
+ String dropTableQuery = "DROP TABLE " + tableName;
+ CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
+ assertEquals(0, responseThree.getResponseCode());
+
+ boolean isHbaseTableThere = hAdmin.tableExists(tableName);
+ assertFalse(isHbaseTableThere);
+ }
+
+ @Test
+ public void TestHBaseTableIgnoreAbortedTransactions() throws Exception {
+ String tableName = newTableName("mytable");
+ String tableQuery = "CREATE TABLE " + tableName
+ + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " +
+ "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'"
+ + "TBLPROPERTIES ('hbase.columns.mapping'=':key," +
+ "testFamily:testQualifier1,testFamily:testQualifier2')";
+
+ CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
+ assertEquals(0, responseTwo.getResponseCode());
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists(tableName);
+ assertTrue(doesTableExist);
+
+ populateHBaseTable(tableName, 5);
+ populateHBaseTableQualifier1(tableName, 6, false);
+ populateHBaseTableQualifier1(tableName, 7, false);
+
+ Configuration conf = new Configuration(hcatConf);
+ conf.set(HCatConstants.HCAT_KEY_HIVE_CONF,
+ HCatUtil.serialize(getHiveConf().getAllProperties()));
+
+ Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions");
+ FileSystem fs = getFileSystem();
+ if (fs.exists(outputDir)) {
+ fs.delete(outputDir, true);
+ }
+ Job job = new Job(conf, "hbase-aborted-transaction");
+ job.setJarByClass(this.getClass());
+ job.setMapperClass(MapReadHTable.class);
+ MapReadHTable.resetCounters();
+ job.setInputFormatClass(HCatInputFormat.class);
+ HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
+ job.setOutputFormatClass(TextOutputFormat.class);
+ TextOutputFormat.setOutputPath(job, outputDir);
+ job.setMapOutputKeyClass(BytesWritable.class);
+ job.setMapOutputValueClass(Text.class);
+ job.setOutputKeyClass(BytesWritable.class);
+ job.setOutputValueClass(Text.class);
+ job.setNumReduceTasks(0);
+ assertTrue(job.waitForCompletion(true));
+ // Verify that the records do not contain aborted transaction
+ // revisions 6 and 7 for testFamily:testQualifier1 and
+ // fetches revision 5 for both testQualifier1 and testQualifier2
+ assertFalse(MapReadHTable.error);
+ assertEquals(1, MapReadHTable.count);
+
+ String dropTableQuery = "DROP TABLE " + tableName;
+ CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
+ assertEquals(0, responseThree.getResponseCode());
+
+ boolean isHbaseTableThere = hAdmin.tableExists(tableName);
+ assertFalse(isHbaseTableThere);
+ }
+
+ @Test
+ public void TestHBaseTableIgnoreAbortedAndRunningTransactions() throws Exception {
+ String tableName = newTableName("mytable");
+ String tableQuery = "CREATE TABLE " + tableName
+ + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " +
+ "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'"
+ + "TBLPROPERTIES ('hbase.columns.mapping'=':key," +
+ "testFamily:testQualifier1,testFamily:testQualifier2')";
+
+ CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
+ assertEquals(0, responseTwo.getResponseCode());
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists(tableName);
+ assertTrue(doesTableExist);
+
+ populateHBaseTable(tableName, 2);
+ populateHBaseTableQualifier1(tableName, 3, Boolean.TRUE); //Committed transaction
+ populateHBaseTableQualifier1(tableName, 4, null); //Running transaction
+ populateHBaseTableQualifier1(tableName, 5, Boolean.FALSE); //Aborted transaction
+ populateHBaseTableQualifier1(tableName, 6, Boolean.TRUE); //Committed transaction
+ populateHBaseTableQualifier1(tableName, 7, null); //Running Transaction
+ populateHBaseTableQualifier1(tableName, 8, Boolean.FALSE); //Aborted Transaction
+
+ Configuration conf = new Configuration(hcatConf);
+ conf.set(HCatConstants.HCAT_KEY_HIVE_CONF,
+ HCatUtil.serialize(getHiveConf().getAllProperties()));
+
+ Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions");
+ FileSystem fs = getFileSystem();
+ if (fs.exists(outputDir)) {
+ fs.delete(outputDir, true);
+ }
+ Job job = new Job(conf, "hbase-running-aborted-transaction");
+ job.setJarByClass(this.getClass());
+ job.setMapperClass(MapReadHTableRunningAbort.class);
+ job.setInputFormatClass(HCatInputFormat.class);
+ HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
+ job.setOutputFormatClass(TextOutputFormat.class);
+ TextOutputFormat.setOutputPath(job, outputDir);
+ job.setMapOutputKeyClass(BytesWritable.class);
+ job.setMapOutputValueClass(Text.class);
+ job.setOutputKeyClass(BytesWritable.class);
+ job.setOutputValueClass(Text.class);
+ job.setNumReduceTasks(0);
+ assertTrue(job.waitForCompletion(true));
+ // Verify that the records do not contain running and aborted transaction
+ // and it fetches revision 2 for testQualifier1 and testQualifier2
+ assertFalse(MapReadHTableRunningAbort.error);
+ assertEquals(1, MapReadHTableRunningAbort.count);
+
+ String dropTableQuery = "DROP TABLE " + tableName;
+ CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
+ assertEquals(0, responseThree.getResponseCode());
+
+ boolean isHbaseTableThere = hAdmin.tableExists(tableName);
+ assertFalse(isHbaseTableThere);
+ }
+
+
+ static class MapReadHTable
+ extends
+ Mapper<ImmutableBytesWritable, HCatRecord, WritableComparable<?>, Text> {
+
+ static boolean error = false;
+ static int count = 0;
+
+ @Override
+ public void map(ImmutableBytesWritable key, HCatRecord value,
+ Context context) throws IOException, InterruptedException {
+ System.out.println("HCat record value" + value.toString());
+ boolean correctValues = (value.size() == 3)
+ && (value.get(0).toString()).equalsIgnoreCase("testRow")
+ && (value.get(1).toString()).equalsIgnoreCase("textValue-5")
+ && (value.get(2).toString()).equalsIgnoreCase("textValue-5");
+
+ if (correctValues == false) {
+ error = true;
+ }
+ count++;
+ }
+
+ public static void resetCounters() {
+ error = false;
+ count = 0;
+ }
+ }
+
+ static class MapReadProjHTable
+ extends
+ Mapper<ImmutableBytesWritable, HCatRecord, WritableComparable<?>, Text> {
+
+ static boolean error = false;
+ static int count = 0;
+
+ @Override
+ public void map(ImmutableBytesWritable key, HCatRecord value,
+ Context context) throws IOException, InterruptedException {
+ System.out.println("HCat record value" + value.toString());
+ boolean correctValues = (value.size() == 2)
+ && (value.get(0).toString()).equalsIgnoreCase("testRow")
+ && (value.get(1).toString()).equalsIgnoreCase("textValue-5");
+
+ if (correctValues == false) {
+ error = true;
+ }
+ count++;
+ }
+ }
+
+ static class MapReadProjectionHTable
+ implements org.apache.hadoop.mapred.Mapper<ImmutableBytesWritable, Result, WritableComparable<?>, Text> {
+
+ static boolean error = false;
+ static int count = 0;
+
+ @Override
+ public void configure(JobConf job) {
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+
+ @Override
+ public void map(ImmutableBytesWritable key, Result result,
+ OutputCollector<WritableComparable<?>, Text> output, Reporter reporter)
+ throws IOException {
+ System.out.println("Result " + result.toString());
+ List<KeyValue> list = result.list();
+ boolean correctValues = (list.size() == 1)
+ && (Bytes.toString(list.get(0).getRow())).equalsIgnoreCase("testRow")
+ && (Bytes.toString(list.get(0).getValue())).equalsIgnoreCase("textValue-5")
+ && (Bytes.toString(list.get(0).getFamily())).equalsIgnoreCase("testFamily")
+ && (Bytes.toString(list.get(0).getQualifier())).equalsIgnoreCase("testQualifier1");
+
+ if (correctValues == false) {
+ error = true;
+ }
+ count++;
+ }
+ }
+
+ static class MapReadHTableRunningAbort
+ extends
+ Mapper<ImmutableBytesWritable, HCatRecord, WritableComparable<?>, Text> {
+
+ static boolean error = false;
+ static int count = 0;
+
+ @Override
+ public void map(ImmutableBytesWritable key, HCatRecord value,
+ Context context) throws IOException, InterruptedException {
+ System.out.println("HCat record value" + value.toString());
+ boolean correctValues = (value.size() == 3)
+ && (value.get(0).toString()).equalsIgnoreCase("testRow")
+ && (value.get(1).toString()).equalsIgnoreCase("textValue-3")
+ && (value.get(2).toString()).equalsIgnoreCase("textValue-2");
+
+ if (correctValues == false) {
+ error = true;
+ }
+ count++;
+ }
+ }
+
+ private HCatSchema getProjectionSchema() throws HCatException {
+
+ HCatSchema schema = new HCatSchema(new ArrayList<HCatFieldSchema>());
+ schema.append(new HCatFieldSchema("key", HCatFieldSchema.Type.STRING,
+ ""));
+ schema.append(new HCatFieldSchema("testqualifier1",
+ HCatFieldSchema.Type.STRING, ""));
+ return schema;
+ }
+
+
+}
Added: hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHiveHBaseStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHiveHBaseStorageHandler.java?rev=1523513&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHiveHBaseStorageHandler.java (added)
+++ hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHiveHBaseStorageHandler.java Sun Sep 15 23:02:09 2013
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hcatalog.hbase;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.net.URI;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hive.cli.CliSessionState;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hive.hcatalog.cli.HCatDriver;
+import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer;
+import org.junit.Test;
+
+public class TestHiveHBaseStorageHandler extends SkeletonHBaseTest {
+
+ private static HiveConf hcatConf;
+ private static HCatDriver hcatDriver;
+ private static Warehouse wh;
+
+ public void Initialize() throws Exception {
+
+ hcatConf = getHiveConf();
+ hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname,
+ HCatSemanticAnalyzer.class.getName());
+ URI fsuri = getFileSystem().getUri();
+ Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(),
+ getTestDir());
+ hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString());
+ hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString());
+
+ //Add hbase properties
+ for (Map.Entry<String, String> el : getHbaseConf()) {
+ if (el.getKey().startsWith("hbase.")) {
+ hcatConf.set(el.getKey(), el.getValue());
+ }
+ }
+
+ SessionState.start(new CliSessionState(hcatConf));
+ hcatDriver = new HCatDriver();
+
+ }
+
+ @Test
+ public void testTableCreateDrop() throws Exception {
+ Initialize();
+
+ hcatDriver.run("drop table test_table");
+ CommandProcessorResponse response = hcatDriver
+ .run("create table test_table(key int, value string) STORED BY " +
+ "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
+ + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')");
+
+ assertEquals(0, response.getResponseCode());
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists("test_table");
+
+ assertTrue(doesTableExist);
+
+ hcatDriver.run("drop table test_table");
+ doesTableExist = hAdmin.tableExists("test_table");
+ assertTrue(doesTableExist == false);
+
+ }
+ public void testHBaseTableCreateDrop() throws Exception {
+ Initialize();
+
+ hcatDriver.run("drop table test_table");
+ CommandProcessorResponse response = hcatDriver
+ .run("create table test_table(key int, value string) STORED BY " +
+ "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
+ + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')");
+
+ assertEquals(0, response.getResponseCode());
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists("test_table");
+
+ assertTrue(doesTableExist);
+
+ hcatDriver.run("drop table test_table");
+ doesTableExist = hAdmin.tableExists("test_table");
+ assertTrue(doesTableExist == false);
+
+ }
+
+ @Test
+ public void testTableCreateDropDifferentCase() throws Exception {
+ Initialize();
+
+ hcatDriver.run("drop table test_Table");
+ CommandProcessorResponse response = hcatDriver
+ .run("create table test_Table(key int, value string) STORED BY " +
+ "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
+ + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')");
+
+ assertEquals(0, response.getResponseCode());
+
+ //HBase table gets created with the specific case
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists("test_table");
+
+ assertTrue(doesTableExist);
+
+ hcatDriver.run("drop table test_table");
+ doesTableExist = hAdmin.tableExists("test_table");
+ assertTrue(doesTableExist == false);
+ }
+
+ @Test
+ public void testTableCreateDropCaseSensitive() throws Exception {
+ Initialize();
+
+ hcatDriver.run("drop table test_Table");
+ CommandProcessorResponse response = hcatDriver
+ .run("create table test_Table(key int, value string) STORED BY " +
+ "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
+ + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')" +
+ " TBLPROPERTIES ('hbase.table.name'='CaseSensitiveTable')");
+
+ assertEquals(0, response.getResponseCode());
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists("CaseSensitiveTable");
+
+ assertTrue(doesTableExist);
+
+
+ hcatDriver.run("drop table test_table");
+ doesTableExist = hAdmin.tableExists("CaseSensitiveTable");
+ assertTrue(doesTableExist == false);
+
+ }
+
+ @Test
+ public void testTableDropNonExistent() throws Exception {
+ Initialize();
+
+ hcatDriver.run("drop table mytable");
+ CommandProcessorResponse response = hcatDriver
+ .run("create table mytable(key int, value string) STORED BY " +
+ "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
+ + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')");
+
+ assertEquals(0, response.getResponseCode());
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists("mytable");
+ assertTrue(doesTableExist);
+
+ //Now delete the table from hbase
+ if (hAdmin.isTableEnabled("mytable")) {
+ hAdmin.disableTable("mytable");
+ }
+ hAdmin.deleteTable("mytable");
+ doesTableExist = hAdmin.tableExists("mytable");
+ assertTrue(doesTableExist == false);
+
+ CommandProcessorResponse responseTwo = hcatDriver.run("drop table mytable");
+ assertTrue(responseTwo.getResponseCode() != 0);
+
+ }
+
+ @Test
+ public void testTableCreateExternal() throws Exception {
+
+ String tableName = "testTable";
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+
+ HTableDescriptor tableDesc = new HTableDescriptor(tableName);
+ tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("key")));
+ tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("familyone")));
+ tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("familytwo")));
+
+ hAdmin.createTable(tableDesc);
+ boolean doesTableExist = hAdmin.tableExists(tableName);
+ assertTrue(doesTableExist);
+
+ hcatDriver.run("drop table mytabletwo");
+ CommandProcessorResponse response = hcatDriver
+ .run("create external table mytabletwo(key int, valueone string, valuetwo string) STORED BY " +
+ "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
+ + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,familyone:val,familytwo:val') " +
+ " TBLPROPERTIES ('hbase.table.name'='testTable')");
+
+ assertEquals(0, response.getResponseCode());
+
+ }
+
+
+}
Added: hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHiveHBaseTableOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHiveHBaseTableOutputFormat.java?rev=1523513&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHiveHBaseTableOutputFormat.java (added)
+++ hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHiveHBaseTableOutputFormat.java Sun Sep 15 23:02:09 2013
@@ -0,0 +1,341 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hcatalog.hbase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.mapred.TableOutputFormat;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hive.cli.CliSessionState;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.hbase.HBaseSerDe;
+import org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hive.hcatalog.cli.HCatDriver;
+import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer;
+import org.apache.hive.hcatalog.common.ErrorType;
+import org.apache.hive.hcatalog.common.HCatConstants;
+import org.apache.hive.hcatalog.common.HCatException;
+import org.apache.hive.hcatalog.common.HCatUtil;
+import org.apache.hive.hcatalog.data.DefaultHCatRecord;
+import org.apache.hive.hcatalog.data.HCatRecord;
+import org.apache.hive.hcatalog.data.schema.HCatSchema;
+import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat;
+import org.apache.hive.hcatalog.mapreduce.OutputJobInfo;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test HBaseDirectOUtputFormat and HBaseStorageHandler using a MiniCluster
+ */
+public class TestHiveHBaseTableOutputFormat extends SkeletonHBaseTest {
+
+ private final HiveConf allConf;
+ private final HCatDriver hcatDriver;
+
+ public TestHiveHBaseTableOutputFormat() {
+ allConf = getHiveConf();
+ allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname,
+ HCatSemanticAnalyzer.class.getName());
+ allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString());
+ allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(),"warehouse").toString());
+
+ //Add hbase properties
+ for (Map.Entry<String, String> el : getHbaseConf())
+ if (el.getKey().startsWith("hbase.")) {
+ allConf.set(el.getKey(), el.getValue());
+ }
+ SessionState.start(new CliSessionState(allConf));
+ hcatDriver = new HCatDriver();
+ }
+
+ @Test
+ public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
+ String testName = "directOutputFormatTest";
+ Path methodTestDir = new Path(getTestDir(),testName);
+
+ String tableName = newTableName(testName).toLowerCase();
+ String familyName = "my_family";
+ byte[] familyNameBytes = Bytes.toBytes(familyName);
+
+ //include hbase config in conf file
+ Configuration conf = new Configuration(allConf);
+ conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties()));
+
+ //create table
+ createTable(tableName,new String[]{familyName});
+
+ String data[] = {
+ "1,english:ONE,spanish:UNO",
+ "2,english:TWO,spanish:DOS",
+ "3,english:THREE,spanish:TRES"};
+
+ // input/output settings
+ Path inputPath = new Path(methodTestDir,"mr_input");
+ getFileSystem().mkdirs(inputPath);
+ FSDataOutputStream os = getFileSystem().create(new Path(inputPath,"inputFile.txt"));
+ for(String line: data)
+ os.write(Bytes.toBytes(line + "\n"));
+ os.close();
+
+ //create job
+ JobConf job = new JobConf(conf);
+ job.setJobName(testName);
+ job.setWorkingDirectory(new Path(methodTestDir,"mr_work"));
+ job.setJarByClass(this.getClass());
+ job.setMapperClass(MapWrite.class);
+
+ job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
+ org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);
+ // why we need to set all the 3 properties??
+ job.setOutputFormat(HiveHBaseTableOutputFormat.class);
+ job.set(HBaseSerDe.HBASE_TABLE_NAME,tableName);
+ job.set(TableOutputFormat.OUTPUT_TABLE, tableName);
+ job.set(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX+".hbase.mapreduce.outputTableName", tableName);
+
+ try {
+ OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
+ job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO,
+ HCatUtil.serialize(outputJobInfo));
+ } catch (Exception ex) {
+ throw new IOException("Serialization error " + ex.getMessage(), ex);
+ }
+
+ job.setMapOutputKeyClass(BytesWritable.class);
+ job.setMapOutputValueClass(HCatRecord.class);
+ job.setOutputKeyClass(BytesWritable.class);
+ job.setOutputValueClass(HCatRecord.class);
+ job.setNumReduceTasks(0);
+ System.getProperty("java.classpath");
+ RunningJob runJob = JobClient.runJob(job);
+ runJob.waitForCompletion();
+ assertTrue(runJob.isSuccessful());
+
+ //verify
+ HTable table = new HTable(conf, tableName);
+ Scan scan = new Scan();
+ scan.addFamily(familyNameBytes);
+ ResultScanner scanner = table.getScanner(scan);
+ int index=0;
+ for(Result result: scanner) {
+ String vals[] = data[index].toString().split(",");
+ for(int i=1;i<vals.length;i++) {
+ String pair[] = vals[i].split(":");
+ assertTrue(result.containsColumn(familyNameBytes,Bytes.toBytes(pair[0])));
+ assertEquals(pair[1],Bytes.toString(result.getValue(familyNameBytes,Bytes.toBytes(pair[0]))));
+ }
+ index++;
+ }
+ assertEquals(data.length,index);
+ }
+
+ @Test
+ public void directHCatOutputFormatTest() throws Exception {
+ String testName = "TestHiveHBaseTableOutputFormat";
+ Path methodTestDir = new Path(getTestDir(),testName);
+
+ String databaseName = testName;
+ String dbDir = new Path(methodTestDir,"DB_"+testName).toString();
+ String tableName = newTableName(testName);
+ String familyName = "my_family";
+ byte[] familyNameBytes = Bytes.toBytes(familyName);
+ //Table name will be lower case unless specified by hbase.table.name property
+ String hbaseTableName = (databaseName + "." + tableName).toLowerCase();
+
+ //include hbase config in conf file
+ Configuration conf = new Configuration(allConf);
+ conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties()));
+
+
+ String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'";
+ String tableQuery = "CREATE TABLE " + databaseName + "." + tableName +
+ "(key int, english string, spanish string) STORED BY " +
+ "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'" +
+ " WITH SERDEPROPERTIES (" +
+ "'hbase.columns.mapping'=':key,"+familyName+":english,"+familyName+":spanish')" ;
+
+ assertEquals(0, hcatDriver.run(dbquery).getResponseCode());
+ assertEquals(0, hcatDriver.run(tableQuery).getResponseCode());
+
+ String data[] = {
+ "1,english:ONE,spanish:UNO",
+ "2,english:TWO,spanish:DOS",
+ "3,english:THREE,spanish:TRES"};
+
+ // input/output settings
+ Path inputPath = new Path(methodTestDir,"mr_input");
+ getFileSystem().mkdirs(inputPath);
+ //create multiple files so we can test with multiple mappers
+ for(int i=0;i<data.length;i++) {
+ FSDataOutputStream os = getFileSystem().create(new Path(inputPath,"inputFile"+i+".txt"));
+ os.write(Bytes.toBytes(data[i] + "\n"));
+ os.close();
+ }
+
+ //create job
+ Path workingDir = new Path(methodTestDir, "mr_work");
+ OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName,
+ tableName, null);
+
+ Job job = configureJob(testName, conf, workingDir, MapHCatWrite.class,
+ outputJobInfo, inputPath);
+
+ assertTrue(job.waitForCompletion(true));
+
+
+ //verify
+ HTable table = new HTable(conf, hbaseTableName);
+ Scan scan = new Scan();
+ scan.addFamily(familyNameBytes);
+ ResultScanner scanner = table.getScanner(scan);
+ int index=0;
+ for(Result result: scanner) {
+ String vals[] = data[index].toString().split(",");
+ for(int i=1;i<vals.length;i++) {
+ String pair[] = vals[i].split(":");
+ assertTrue(result.containsColumn(familyNameBytes,Bytes.toBytes(pair[0])));
+ assertEquals(pair[1],Bytes.toString(result.getValue(familyNameBytes,Bytes.toBytes(pair[0]))));
+ //assertEquals(1l,result.getColumn(familyNameBytes,Bytes.toBytes(pair[0])).get(0).getTimestamp());
+ }
+ index++;
+ }
+ assertEquals(data.length,index);
+ }
+
+ private Job configureJob(String jobName, Configuration conf,
+ Path workingDir, Class<? extends Mapper> mapperClass,
+ OutputJobInfo outputJobInfo, Path inputPath) throws IOException {
+
+ try {
+ //now setting the schema
+ HiveConf hiveConf = HCatUtil.getHiveConf(conf);
+ HiveMetaStoreClient client = HCatUtil.getHiveClient(hiveConf);
+ Table table = client.getTable(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName());
+ StorageDescriptor tblSD = table.getSd();
+ if (tblSD == null) {
+ throw new HCatException(
+ "Cannot construct partition info from an empty storage descriptor.");
+ }
+ HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(tblSD.getCols()));
+ outputJobInfo.setOutputSchema(tableSchema);
+ }
+ catch(Exception e) {
+ if( e instanceof HCatException ) {
+ throw (HCatException) e;
+ } else {
+ throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e);
+ }
+ }
+ conf.set(HBaseSerDe.HBASE_TABLE_NAME,outputJobInfo.getDatabaseName()+ "." + outputJobInfo.getTableName());
+ conf.set(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,outputJobInfo.getDatabaseName()+ "." + outputJobInfo.getTableName());
+ conf.set(TableOutputFormat.OUTPUT_TABLE, outputJobInfo.getDatabaseName() + "."+ outputJobInfo.getTableName());
+ conf.set(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX+".hbase.mapreduce.outputTableName", outputJobInfo.getDatabaseName() + "." + outputJobInfo.getTableName());
+ conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO,HCatUtil.serialize(outputJobInfo));
+
+ Job job = new Job(conf, jobName);
+ job.setWorkingDirectory(workingDir);
+ job.setJarByClass(this.getClass());
+ job.setMapperClass(mapperClass);
+
+ job.setInputFormatClass(TextInputFormat.class);
+ TextInputFormat.setInputPaths(job, inputPath);
+ //job.setOutputFormatClass(HiveHBaseTableOutputFormat.class);
+ job.setOutputFormatClass(HCatOutputFormat.class);
+ HCatOutputFormat.setOutput(job, outputJobInfo);
+ job.setMapOutputKeyClass(BytesWritable.class);
+ job.setMapOutputValueClass(HCatRecord.class);
+ job.setOutputKeyClass(BytesWritable.class);
+ job.setOutputValueClass(HCatRecord.class);
+
+ job.setNumReduceTasks(0);
+ return job;
+ }
+
+ public static class MapHCatWrite extends Mapper<LongWritable, Text, BytesWritable, HCatRecord> {
+
+ @Override
+ public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
+ OutputJobInfo jobInfo = (OutputJobInfo)HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO));
+ HCatRecord record = new DefaultHCatRecord(3);
+ HCatSchema schema = jobInfo.getOutputSchema();
+ String vals[] = value.toString().split(",");
+ record.setInteger("key",schema,Integer.parseInt(vals[0]));
+ for(int i=1;i<vals.length;i++) {
+ String pair[] = vals[i].split(":");
+ record.set(pair[0],schema,pair[1]);
+ }
+ context.write(null,record);
+ }
+ }
+
+
+
+ public static class MapWrite implements org.apache.hadoop.mapred.Mapper<LongWritable, Text, BytesWritable, Put> {
+
+ @Override
+ public void configure(JobConf job) {
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+
+ @Override
+ public void map(LongWritable key, Text value,
+ OutputCollector<BytesWritable, Put> output, Reporter reporter)
+ throws IOException {
+ String vals[] = value.toString().split(",");
+ Put put = new Put(Bytes.toBytes(vals[0]));
+ for(int i=1;i<vals.length;i++) {
+ String pair[] = vals[i].split(":");
+ put.add(Bytes.toBytes("my_family"),
+ Bytes.toBytes(pair[0]),
+ Bytes.toBytes(pair[1]));
+ }
+ output.collect(null, put);
+ }
+ }
+
+}
Added: hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestPigHBaseStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestPigHBaseStorageHandler.java?rev=1523513&view=auto
==============================================================================
--- hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestPigHBaseStorageHandler.java (added)
+++ hive/branches/branch-0.12/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestPigHBaseStorageHandler.java Sun Sep 15 23:02:09 2013
@@ -0,0 +1,360 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hcatalog.hbase;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hive.cli.CliSessionState;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.ql.Driver;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
+import org.junit.Test;
+
+public class TestPigHBaseStorageHandler extends SkeletonHBaseTest {
+
+ private static HiveConf hcatConf;
+ private static Driver driver;
+ private static String mypath;
+
+ private final byte[] FAMILY = Bytes.toBytes("testFamily");
+ private final byte[] QUALIFIER1 = Bytes.toBytes("testQualifier1");
+ private final byte[] QUALIFIER2 = Bytes.toBytes("testQualifier2");
+
+ public void Initialize() throws Exception {
+
+ hcatConf = new HiveConf(this.getClass());
+ //hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname,
+ // HCatSemanticAnalyzer.class.getName());
+ URI fsuri = getFileSystem().getUri();
+ Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(),
+ getTestDir());
+ hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
+ hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
+ hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString());
+
+ //Add hbase properties
+ for (Map.Entry<String, String> el : getHbaseConf()) {
+ if (el.getKey().startsWith("hbase.")) {
+ hcatConf.set(el.getKey(), el.getValue());
+ }
+ }
+
+ driver = new Driver(hcatConf);
+ SessionState.start(new CliSessionState(hcatConf));
+
+ }
+
+ private void populateHBaseTable(String tName) throws IOException {
+ List<Put> myPuts = generatePuts(tName);
+ HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName));
+ table.put(myPuts);
+ }
+
+ private List<Put> generatePuts(String tableName) throws IOException {
+
+ List<String> columnFamilies = Arrays.asList("testFamily");
+ List<Put> myPuts;
+ myPuts = new ArrayList<Put>();
+ for (int i = 1; i <=10; i++) {
+ Put put = new Put(Bytes.toBytes(i));
+ put.add(FAMILY, QUALIFIER1, 1, Bytes.toBytes("textA-" + i));
+ put.add(FAMILY, QUALIFIER2, 1, Bytes.toBytes("textB-" + i));
+ myPuts.add(put);
+ }
+ return myPuts;
+ }
+
+ public static void createTestDataFile(String filename) throws IOException {
+ FileWriter writer = null;
+ int LOOP_SIZE = 10;
+ float f = -100.1f;
+ try {
+ File file = new File(filename);
+ file.deleteOnExit();
+ writer = new FileWriter(file);
+
+ for (int i =1; i <= LOOP_SIZE; i++) {
+ writer.write(i+ "\t" +(f+i)+ "\t" + "textB-" + i + "\n");
+ }
+ } finally {
+ if (writer != null) {
+ writer.close();
+ }
+ }
+
+ }
+
+ @Test
+ public void testPigHBaseSchema() throws Exception {
+ Initialize();
+
+ String tableName = newTableName("MyTable");
+ String databaseName = newTableName("MyDatabase");
+ //Table name will be lower case unless specified by hbase.table.name property
+ String hbaseTableName = "testTable";
+ String db_dir = getTestDir() + "/hbasedb";
+
+ String dbQuery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '"
+ + db_dir + "'";
+
+ String deleteQuery = "DROP TABLE "+databaseName+"."+tableName;
+
+ String tableQuery = "CREATE TABLE " + databaseName + "." + tableName
+ + "(key float, testqualifier1 string, testqualifier2 int) STORED BY " +
+ "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
+ + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')"
+ + " TBLPROPERTIES ('hbase.table.name'='"+hbaseTableName+"')";
+
+ CommandProcessorResponse responseOne = driver.run(deleteQuery);
+ assertEquals(0, responseOne.getResponseCode());
+
+
+ CommandProcessorResponse responseTwo = driver.run(dbQuery);
+ assertEquals(0, responseTwo.getResponseCode());
+
+
+ CommandProcessorResponse responseThree = driver.run(tableQuery);
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
+ assertTrue(doesTableExist);
+
+ PigServer server = new PigServer(ExecType.LOCAL,hcatConf.getAllProperties());
+ server.registerQuery("A = load '"+databaseName+"."+tableName+"' using org.apache.hive.hcatalog.pig.HCatLoader();");
+
+ Schema dumpedASchema = server.dumpSchema("A");
+
+ List<FieldSchema> fields = dumpedASchema.getFields();
+ assertEquals(3, fields.size());
+
+ assertEquals(DataType.FLOAT,fields.get(0).type);
+ assertEquals("key",fields.get(0).alias.toLowerCase());
+
+ assertEquals( DataType.CHARARRAY,fields.get(1).type);
+ assertEquals("testQualifier1".toLowerCase(), fields.get(1).alias.toLowerCase());
+
+ assertEquals( DataType.INTEGER,fields.get(2).type);
+ assertEquals("testQualifier2".toLowerCase(), fields.get(2).alias.toLowerCase());
+
+ }
+
+
+ @Test
+ public void testPigFilterProjection() throws Exception {
+ Initialize();
+
+ String tableName = newTableName("MyTable");
+ String databaseName = newTableName("MyDatabase");
+ //Table name will be lower case unless specified by hbase.table.name property
+ String hbaseTableName = (databaseName + "." + tableName).toLowerCase();
+ String db_dir = getTestDir() + "/hbasedb";
+
+ String dbQuery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '"
+ + db_dir + "'";
+
+ String deleteQuery = "DROP TABLE "+databaseName+"."+tableName;
+
+ String tableQuery = "CREATE TABLE " + databaseName + "." + tableName
+ + "(key int, testqualifier1 string, testqualifier2 string) STORED BY " +
+ "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'" +
+ " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')" +
+ " TBLPROPERTIES ('hbase.table.default.storage.type'='binary')";
+
+ CommandProcessorResponse responseOne = driver.run(deleteQuery);
+ assertEquals(0, responseOne.getResponseCode());
+
+
+ CommandProcessorResponse responseTwo = driver.run(dbQuery);
+ assertEquals(0, responseTwo.getResponseCode());
+
+
+ CommandProcessorResponse responseThree = driver.run(tableQuery);
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
+ assertTrue(doesTableExist);
+
+ populateHBaseTable(hbaseTableName);
+
+ Configuration conf = new Configuration(getHbaseConf());
+ HTable table = new HTable(conf, hbaseTableName);
+ Scan scan = new Scan();
+ scan.addFamily(Bytes.toBytes("testFamily"));
+ ResultScanner scanner = table.getScanner(scan);
+ int index=1;
+
+ PigServer server = new PigServer(ExecType.LOCAL,hcatConf.getAllProperties());
+ server.registerQuery("A = load '"+databaseName+"."+tableName+"' using org.apache.hive.hcatalog.pig.HCatLoader();");
+ server.registerQuery("B = filter A by key < 5;");
+ server.registerQuery("C = foreach B generate key,testqualifier2;");
+ Iterator<Tuple> itr = server.openIterator("C");
+ //verify if the filter is correct and returns 2 rows and contains 2 columns and the contents match
+ while(itr.hasNext()){
+ Tuple t = itr.next();
+ assertTrue(t.size() == 2);
+ assertTrue(t.get(0).getClass() == Integer.class);
+ assertEquals(index,t.get(0));
+ assertTrue(t.get(1).getClass() == String.class);
+ assertEquals("textB-"+index,t.get(1));
+ index++;
+ }
+ assertEquals(index-1,4);
+ }
+
+ @Test
+ public void testPigPopulation() throws Exception {
+ Initialize();
+
+ String tableName = newTableName("MyTable");
+ String databaseName = newTableName("MyDatabase");
+ //Table name will be lower case unless specified by hbase.table.name property
+ String hbaseTableName = (databaseName + "." + tableName).toLowerCase();
+ String db_dir = getTestDir() + "/hbasedb";
+ String POPTXT_FILE_NAME = db_dir+"testfile.txt";
+ float f = -100.1f;
+
+ String dbQuery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '"
+ + db_dir + "'";
+
+ String deleteQuery = "DROP TABLE "+databaseName+"."+tableName;
+
+ String tableQuery = "CREATE TABLE " + databaseName + "." + tableName
+ + "(key int, testqualifier1 float, testqualifier2 string) STORED BY " +
+ "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
+ + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')"
+ + " TBLPROPERTIES ('hbase.table.default.storage.type'='binary')";
+
+
+ String selectQuery = "SELECT * from "+databaseName.toLowerCase()+"."+tableName.toLowerCase();
+
+
+ CommandProcessorResponse responseOne = driver.run(deleteQuery);
+ assertEquals(0, responseOne.getResponseCode());
+
+
+ CommandProcessorResponse responseTwo = driver.run(dbQuery);
+ assertEquals(0, responseTwo.getResponseCode());
+
+
+ CommandProcessorResponse responseThree = driver.run(tableQuery);
+
+ HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
+ boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
+ assertTrue(doesTableExist);
+
+
+ createTestDataFile(POPTXT_FILE_NAME);
+
+ PigServer server = new PigServer(ExecType.LOCAL,hcatConf.getAllProperties());
+ server.registerQuery("A = load '"+POPTXT_FILE_NAME+"' using PigStorage() as (key:int, testqualifier1:float, testqualifier2:chararray);");
+ server.registerQuery("B = filter A by (key > 2) AND (key < 8) ;");
+ server.registerQuery("store B into '"+databaseName.toLowerCase()+"."+tableName.toLowerCase()+"' using org.apache.hive.hcatalog.pig.HCatStorer();");
+ server.registerQuery("C = load '"+databaseName.toLowerCase()+"."+tableName.toLowerCase()+"' using org.apache.hive.hcatalog.pig.HCatLoader();");
+ // Schema should be same
+ Schema dumpedBSchema = server.dumpSchema("C");
+
+ List<FieldSchema> fields = dumpedBSchema.getFields();
+ assertEquals(3, fields.size());
+
+ assertEquals(DataType.INTEGER,fields.get(0).type);
+ assertEquals("key",fields.get(0).alias.toLowerCase());
+
+ assertEquals( DataType.FLOAT,fields.get(1).type);
+ assertEquals("testQualifier1".toLowerCase(), fields.get(1).alias.toLowerCase());
+
+ assertEquals( DataType.CHARARRAY,fields.get(2).type);
+ assertEquals("testQualifier2".toLowerCase(), fields.get(2).alias.toLowerCase());
+
+ //Query the hbase table and check the key is valid and only 5 are present
+ Configuration conf = new Configuration(getHbaseConf());
+ HTable table = new HTable(conf, hbaseTableName);
+ Scan scan = new Scan();
+ scan.addFamily(Bytes.toBytes("testFamily"));
+ byte[] familyNameBytes = Bytes.toBytes("testFamily");
+ ResultScanner scanner = table.getScanner(scan);
+ int index=3;
+ int count=0;
+ for(Result result: scanner) {
+ //key is correct
+ assertEquals(index,Bytes.toInt(result.getRow()));
+ //first column exists
+ assertTrue(result.containsColumn(familyNameBytes,Bytes.toBytes("testQualifier1")));
+ //value is correct
+ assertEquals((index+f),Bytes.toFloat(result.getValue(familyNameBytes,Bytes.toBytes("testQualifier1"))),0);
+
+ //second column exists
+ assertTrue(result.containsColumn(familyNameBytes,Bytes.toBytes("testQualifier2")));
+ //value is correct
+ assertEquals(("textB-"+index).toString(),Bytes.toString(result.getValue(familyNameBytes,Bytes.toBytes("testQualifier2"))));
+ index++;
+ count++;
+ }
+ // 5 rows should be returned
+ assertEquals(count,5);
+
+ //Check if hive returns results correctly
+ driver.run(selectQuery);
+ ArrayList<String> result = new ArrayList<String>();
+ driver.getResults(result);
+ //Query using the hive command line
+ assertEquals(5, result.size());
+ Iterator<String> itr = result.iterator();
+ for(int i = 3; i <= 7; i++) {
+ String tokens[] = itr.next().split("\\s+");
+ assertEquals(i,Integer.parseInt(tokens[0]));
+ assertEquals(i+f,Float.parseFloat(tokens[1]),0);
+ assertEquals(("textB-"+i).toString(),tokens[2]);
+ }
+
+ //delete the table from the database
+ CommandProcessorResponse responseFour = driver.run(deleteQuery);
+ assertEquals(0, responseFour.getResponseCode());
+
+ }
+
+}