You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ha...@apache.org on 2011/12/06 20:05:39 UTC
svn commit: r1211077 [6/7] - in /incubator/hcatalog/trunk: ./ conf/
src/test/e2e/hcatalog/ src/test/e2e/hcatalog/conf/
src/test/e2e/hcatalog/deployers/ src/test/e2e/hcatalog/drivers/
src/test/e2e/hcatalog/tests/ src/test/e2e/hcatalog/tools/generate/ sr...
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hive.conf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hive.conf?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hive.conf (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hive.conf Tue Dec 6 20:05:37 2011
@@ -0,0 +1,117 @@
+#!/home/y/bin/perl
+
+ #
+ # Do
+ # egrep '^#|name.*=>' hcat.conf | egrep -v '^#!|egrep' | less
+ # to get an outline of this test conf file
+ #
+
+ # Has a couple of Hive set directives:
+ # set hive.exec.dynamic.partition.mode=nonstrict;
+ # set hive.exec.dynamic.partition=true;
+
+
+$cfg = {
+ 'driver' => 'Hive',
+ 'groups' => [
+ {
+ 'name' => 'Hive_Checkin',
+ 'tests' => [ {
+ 'num' => 1,
+ 'sql' => q\select * from studenttab10k;\,
+ 'floatpostprocess' => 1,
+ 'delimiter' => ' ',
+ },
+ {
+ 'num' => 2,
+ 'sql' => q\drop table if exists checkin_2;
+ create table checkin_2 as select * from studenttab10k;\,
+ 'floatpostprocess' => 1,
+ 'delimiter' => ' ',
+ },
+ {
+ 'num' => 3,
+ 'sql' => q\SELECT studenttab10k.* FROM studenttab10k JOIN votertab10k ON (studenttab10k.name = votertab10k.name);\,
+ 'floatpostprocess' => 1,
+ 'delimiter' => ' ',
+ },
+ {
+ 'num' => 4,
+ 'sql' => q"
+ drop table if exists multi_insert_1_1;
+ drop table if exists multi_insert_1_2;
+ drop table if exists multi_insert_1_3;
+
+ create table multi_insert_1_1 (
+ name string,
+ ds string)
+ row format delimited
+ fields terminated by '\\t'
+ stored as textfile;
+
+ create table multi_insert_1_2 (
+ name string,
+ ds string)
+ row format delimited
+ fields terminated by '\\t'
+ stored as textfile;
+
+ create table multi_insert_1_3 (
+ name string,
+ ds string)
+ row format delimited
+ fields terminated by '\\t'
+ stored as textfile;
+
+ from studentparttab30k
+ insert overwrite table multi_insert_1_1
+ select name, ds
+ where ds = '20110924'
+
+ insert overwrite table multi_insert_1_2
+ select name, ds
+ where ds = '20110925'
+
+ insert overwrite table multi_insert_1_3
+ select name, ds
+ where ds = '20110926';
+ ",
+ 'result_table' => ['multi_insert_1_1',
+ 'multi_insert_1_2',
+ 'multi_insert_1_3'],
+ 'verify_sql' =>["select name, ds
+ from studentparttab30k
+ where ds = '20110924';",
+ "select name, ds
+ from studentparttab30k
+ where ds = '20110925';",
+ "select name, ds
+ from studentparttab30k
+ where ds = '20110926';"]
+ } ]
+ }, # end g
+ {
+ 'name' => 'Hive_Read',
+ 'tests' => [ {
+ 'num' => 1,
+ 'sql' => q\select * from all100krc;\,
+ 'floatpostprocess' => 1,
+ 'delimiter' => ' ',
+ } ]
+ }, # end g
+ {
+ 'name' => 'Hive_Write',
+ 'tests' => [ {
+ 'num' => 1,
+ 'sql' => q\
+drop table if exists hive_write_1;
+create table hive_write_1 (name string, age int, gpa double) stored as rcfile;
+insert into TABLE hive_write_1 select * from all100krc;\,
+ 'result_table' => 'hive_write_1',
+ 'verify_sql' =>"select name, age, gpa from all100krc;",
+ 'floatpostprocess' => 1,
+ 'delimiter' => ' ',
+ } ]
+ }
+ ]
+}
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf Tue Dec 6 20:05:37 2011
@@ -0,0 +1,173 @@
+#!/home/y/bin/perl
+
+ #
+ # Do
+ # egrep '^#|name.*=>' hcat.conf | egrep -v '^#!|egrep' | less
+ # to get an outline of this test conf file
+ #
+
+ # Has a couple of Hive set directives:
+ # set hive.exec.dynamic.partition.mode=nonstrict;
+ # set hive.exec.dynamic.partition=true;
+
+
+$cfg = {
+ 'driver' => 'Pig',
+ 'groups' => [
+# This first group should be moved to deployer ?
+ {
+ 'name' => 'Pig_Checkin',
+ 'tests' => [
+
+ {
+ 'num' => 1
+ ,'hcat_prep'=>q\drop table if exists pig_checkin_1;
+create table pig_checkin_1 (name string, age int, gpa double) STORED AS TEXTFILE;\
+ ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+store a into 'pig_checkin_1' using org.apache.hcatalog.pig.HCatStorer();\,
+ ,'result_table' => 'pig_checkin_1'
+ ,'sql' => q\select * from studenttab10k;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 2
+ ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+b = load 'votertab10k' using org.apache.hcatalog.pig.HCatLoader();
+c = join a by name, b by name;
+store c into ':OUTPATH:';\,
+ ,'sql' => [ 'select s.name, s.age, gpa, v.name, v.age, registration, contributions from studenttab10k s join votertab10k v on (s.name = v.name);']
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 3
+ ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+b = load ':INPATH:/votertab10k' as (name:chararray, age:int, registration:chararray, contributions:float);
+c = join a by name, b by name;
+store c into ':OUTPATH:';\
+ ,'sql' => q\select s.name, s.age, gpa, v.name, v.age, registration, contributions from studenttab10k s join votertab10k v on (s.name = v.name);\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 4
+ ,'hcat_prep'=>q\drop table if exists pig_checkin_4_1;
+drop table if exists pig_checkin_4_2;
+create table pig_checkin_4_1 (name string, age int, gpa double) STORED AS TEXTFILE;
+create table pig_checkin_4_2 (name string, age int, gpa double) STORED AS TEXTFILE;\
+ ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+split a into b if age <=40, c if age > 40;
+store b into 'pig_checkin_4_1' using org.apache.hcatalog.pig.HCatStorer();
+store c into 'pig_checkin_4_2' using org.apache.hcatalog.pig.HCatStorer();\,
+ ,'result_table' => ['pig_checkin_4_1','pig_checkin_4_2']
+ ,'sql' => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;']
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 5
+ ,'hcat_prep'=>q\drop table if exists pig_checkin_5;
+create table pig_checkin_5 (name string, age int, gpa double) STORED AS TEXTFILE;\
+ ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+split a into b if age <=40, c if age > 40;
+store b into 'pig_checkin_5' using org.apache.hcatalog.pig.HCatStorer();
+store c into ':OUTPATH:';\,
+ ,'result_table' => ['pig_checkin_5','?']
+ ,'sql' => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;']
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+
+ ],
+ }, # end g
+ {
+ 'name' => 'Pig_Read',
+ 'tests' => [
+
+ {
+ 'num' => 1
+ ,'pig' => q\a = load 'all100k' using org.apache.hcatalog.pig.HCatLoader();
+store a into ':OUTPATH:';\,
+ ,'sql' => q\select * from all100k;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 2
+ ,'pig' => q\a = load 'all100kjson' using org.apache.hcatalog.pig.HCatLoader();
+b = foreach a generate s, i, d;
+store b into ':OUTPATH:';\,
+ ,'sql' => q\select s, i, d from all100kjson;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 3
+ ,'pig' => q\a = load 'all100krc' using org.apache.hcatalog.pig.HCatLoader();
+store a into ':OUTPATH:';\,
+ ,'sql' => q\select * from all100krc;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ }
+ ],
+ }, # end g
+ {
+ 'name' => 'Pig_Write',
+ 'tests' => [
+ {
+ 'num' => 1
+ ,'hcat_prep'=>q\drop table if exists pig_write_1;
+create table pig_write_1(t tinyint,si smallint,i int,b bigint,bool boolean,f float,d double,s string) stored as rcfile;\
+ ,'pig' => q\a = load ':INPATH:/all100k' using PigStorage(':') as (t:int,si:int,i:int,b:int,bo:boolean,f:float,d:double,s:chararray);
+store a into 'pig_write_1' using org.apache.hcatalog.pig.HCatStorer();\,
+ ,'result_table' => 'pig_write_1'
+ ,'sql' => q\select * from all100k;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 2
+ ,'hcat_prep'=>q\drop table if exists pig_write_2;
+create table pig_write_2(
+ s string,
+ i int,
+ d double,
+ m map<string, string>,
+ bb array<struct<a: int, b: string>>)
+ STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
+ INPUTDRIVER 'org.apache.hcatalog.pig.drivers.LoadFuncBasedInputDriver' OUTPUTDRIVER 'org.apache.hcatalog.pig.drivers.StoreFuncBasedOutputDriver'
+ TBLPROPERTIES ('hcat.pig.loader'='org.apache.pig.builtin.JsonLoader', 'hcat.pig.storer'='org.apache.pig.builtin.JsonStorage', 'hcat.pig.loader.args'=
+'s:chararray, i:int, d:double, m:map[chararray], bb:{t:(a:int, b:chararray)}', 'hcat.pig.args.delimiter'=' ');
+\
+ ,'pig' => q\a = load 'all100kjson' using org.apache.hcatalog.pig.HCatLoader();
+b = foreach a generate s, i, d;
+store b into ':OUTPATH:';\,
+ ,'sql' => q\select IFNULL(s, ""), IFNULL(i, ""), IFNULL(d, "") from all100kjson;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 3
+ ,'hcat_prep'=>q\drop table if exists pig_write_3;
+create table pig_write_3(
+ name string,
+ age int,
+ gpa double)
+stored as rcfile
+TBLPROPERTIES (
+ 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
+ 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
+);
+\
+ ,'pig' => q\a = load 'all100krc' using org.apache.hcatalog.pig.HCatLoader();
+store a into ':OUTPATH:';\,
+ ,'sql' => q\select * from all100krc;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ }
+ ],
+ }, # end g
+
+ ]
+}
Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl?rev=1211077&r1=1211076&r2=1211077&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl Tue Dec 6 20:05:37 2011
@@ -1,26 +1,28 @@
#!/usr/bin/env perl
-############################################################################
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
# A utility to generate test data for pig test harness tests.
#
#
use strict;
use charnames ();
+use Cwd;
+use IPC::Run qw(run);
our @firstName = ("alice", "bob", "calvin", "david", "ethan", "fred",
"gabriella", "holly", "irene", "jessica", "katie", "luke", "mike", "nick",
@@ -137,16 +139,15 @@ sub randomGreekLetter()
sub randomNameAgeGpaMap()
{
my $size = int(rand(3));
- my $map = "[";
my @mapValues = ( "name#" . randomName(), "age#" . randomAge(), "gpa#" . randomGpa() );
$size = ($size == 0 ? 1 : $size);
+ my $map;
for(my $i = 0; $i <= $size; $i++) {
$map .= $mapValues[$i];
if($i != $size) {
$map .= ",";
}
}
- $map .= "]";
return $map;
}
@@ -169,47 +170,103 @@ sub getMapFields($) {
sub randomNameAgeGpaTuple()
{
my $gpa = sprintf("%0.2f", randomGpa());
- return "(" . randomName() . "," . randomAge() . "," . $gpa . ")" ;
+ return randomName() . "," . randomAge() . "," . $gpa ;
}
-sub randomNameAgeGpaBag()
+sub randomList()
{
- my $size = int(rand(int(3)));
- my $bag = "{";
- $size = ($size == 0 ? 1 : $size);
+ my $size = int(rand(int(3))) + 1;
+ my $bag;
for(my $i = 0; $i <= $size; $i++) {
- $bag .= randomNameAgeGpaTuple();
- if($i != $size) {
- $bag .= ",";
- }
+ $bag .= randomAge();
+ $bag .= "," if ($i != $size);
}
- $bag .= "}";
return $bag;
}
-our @textDoc = (
- "The cosmological proof, which we are now about to ex-",
- "amine, retains the connection of absolute necessity with the",
- "highest reality, but instead of reasoning, like the former proof,",
- "from the highest reality to necessity of existence, it reasons",
- "from the previously given unconditioned necessity of some",
- "being to the unlimited reality of that being. It thus enters upon",
- "a course of reasoning which, whether rational or only pseudo-",
- "rational, is at any rate natural, and the most convincing not",
- "only for common sense but even for speculative understand-",
- "ing. It also sketches the first outline of all the proofs in natural",
- "theology, an outline which has always been and always will",
- "be followed, however much embellished and disguised by",
- "superfluous additions. This proof, termed by Leibniz the proof",
- "a contingentia mundi, we shall now proceed to expound and",
- "examine.");
+sub randomEscape()
+{
+ my $r = rand(1);
+ if ($r < 0.16) {
+ return '\"';
+ } elsif ($r < 0.32) {
+ return '\\\\';
+ } elsif ($r < 0.48) {
+ return '\/';
+ } elsif ($r < 0.64) {
+ return '\n';
+ } elsif ($r < 0.80) {
+ return '\t';
+ } else {
+ return randomUnicodeHex();
+ }
+}
+
+
+sub randomJsonString()
+{
+ my $r = rand(1);
+ #if ($r < 0.05) {
+ # return "null";
+ #} elsif ($r < 0.10) {
+ # return randomName() . randomEscape() . randomName();
+ #} else {
+ return randomName();
+ #}
+}
+
+sub randomNullBoolean()
+{
+ my $r = rand(1);
+ if ($r < 0.05) {
+ return 'null';
+ } elsif ($r < 0.525) {
+ return 'true';
+ } else {
+ return 'false';
+ }
+}
+
+sub randomJsonMap()
+{
+ if (rand(1) < 0.05) {
+ return 'null';
+ }
+
+ my $str = "{";
+ my $num = rand(5) + 1;
+ for (my $i = 0; $i < $num; $i++) {
+ $str .= "," unless $i == 0;
+ $str .= '"' . randomCity() . '" : "' . randomName() . '"';
+ }
+ $str .= "}";
+ return $str;
+}
+
+sub randomJsonBag()
+{
+ if (rand(1) < 0.05) {
+ return 'null';
+ }
+
+ my $str = "[";
+ my $num = rand(5) + 1;
+ for (my $i = 0; $i < $num; $i++) {
+ $str .= "," unless $i == 0;
+ $str .= '{"a":' . int(rand(2**32) - 2**31) . ',"b":"' .
+ randomJsonString() . '"}';
+ }
+ $str .= "]";
+}
sub usage()
{
- warn "Usage: $0 filetype numrows tablename targetdir [nosql]\n";
- warn "\tValid filetypes [studenttab, studentcolon, \n";
- warn "\t\tstudentnulltab, studentcomplextab, studentctrla, voternulltab\n";
- warn "\t\tvotertab, reg1459894, textdoc, unicode, manual]\n";
+ warn "Usage: $0 filetype numrows tablename hdfstargetdir [format]\n";
+ warn "\tValid filetypes [studenttab, studentparttab, \n";
+ warn "\t\tstudentnull, allscalars, studentcomplextab, \n";
+ warn "\t\tvoternulltab votertab, unicode]\n";
+ warn "hdfstargetdir is the directory in hdfs that data will be copied to for loading into tables\n";
+ warn "format is one of rc, csv, or json. csv is the default";
}
our @greekUnicode = ("\N{U+03b1}", "\N{U+03b2}", "\N{U+03b3}", "\N{U+03b4}",
@@ -226,26 +283,93 @@ sub randomUnicodeNonAscii()
return $name;
}
+sub randomUnicodeHex()
+{
+ return sprintf "\\u%04x", 0x3b1 + int(rand(25));
+}
+
my $testvar = "\N{U+03b1}\N{U+03b3}\N{U+03b1}\N{U+03c0}\N{U+03b7}";
-sub getBulkCopyCmd(){
- my $sourceDir= shift;
- my $tableName = shift;
- my $delimeter = shift;
- $delimeter = '\t' if ( !$delimeter );
+sub getBulkCopyCmd($$;$)
+{
+ my ($tableName, $delimeter, $filename) = @_;
+
+ $filename = $tableName if (!defined($filename));
+
+ return "load data local infile '" . cwd . "/$filename'
+ into table $tableName
+ columns terminated by '$delimeter';"
+}
+
+sub generateSecondHalfCreateTable($$$;$$$)
+{
+ my ($hivefp, $format, $location, $fieldDelim, $structDelim, $mapDelim) = @_;
+
+ if ($format eq "csv") {
+ print $hivefp "
+row format delimited
+fields terminated by '$fieldDelim'
+stored as textfile
+location '$location';\n";
+ } elsif ($format eq "rc") {
+ print $hivefp "
+stored as rcfile
+location '$location'
+TBLPROPERTIES (
+ 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
+ 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
+);\n";
+ } elsif ($format eq "json") {
+ print $hivefp " STORED AS
+INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
+INPUTDRIVER 'org.apache.hcatalog.pig.drivers.LoadFuncBasedInputDriver' OUTPUTDRIVER 'org.apache.hcatalog.pig.drivers.StoreFuncBasedOutputDriver';
+location '$location'
+TBLPROPERTIES ('hcat.pig.loader'='org.apache.pig.builtin.JsonLoader', 'hcat.pig.storer'='org.apache.pig.builtin.JsonStorage', 'hcat.pig.loader.args'=
+'s:chararray, i:int, d:double, m:map[chararray], bb:{t:(a:int, b:chararray)}, 'hcat.pig.args.delimiter'='\t')
+;\n";
+ } else {
+ die "Unknown format $format\n";
+ }
+}
+
+our $hadoopCoreJar = undef;
-# . "\nCOPY $tableName FROM \'$sourceDir/$tableName' using DELIMITERS \'". '\t' . "\' WITH NULL AS '\n';";
+sub findHadoopJars()
+{
+ if (not defined $ENV{'HADOOP_HOME'}) {
+ die 'Please set $HADOOP_HOME\n';
+ }
- my $cmd= "\nbegin transaction;"
- . "\nCOPY $tableName FROM \'$sourceDir/$tableName' using DELIMITERS \'$delimeter\';"
- . "\ncommit;"
- . "\n";
+ my $coreJar = `ls $ENV{'HADOOP_HOME'}/hadoop-core-*.jar`;
+ my $loggingJar = `ls $ENV{'HADOOP_HOME'}/lib/commons-logging-*.jar | grep -v api`;
+ my $cfgJar = `ls $ENV{'HADOOP_HOME'}/lib/commons-configuration-*.jar`;
+ my $langJar = `ls $ENV{'HADOOP_HOME'}/lib/commons-lang-*.jar`;
+ my $cliJar = `ls $ENV{'HADOOP_HOME'}/lib/commons-cli-*.jar`;
+ chomp $coreJar;
+ chomp $loggingJar;
+ chomp $cfgJar;
+ chomp $langJar;
+ chomp $cliJar;
+ return ($coreJar, $loggingJar, $cfgJar, $langJar, $cliJar);
+}
+
+sub findHiveJars()
+{
+ if (not defined $ENV{'HIVE_HOME'}) {
+ die 'Please set $HIVE_HOME\n';
+ }
- return $cmd;
+ my $execJar = `ls ../../../../../hive/external/build/ql/hive-exec-*.jar`;
+ my $cliJar = `ls ../../../../../hive/external/build/cli/hive-cli-*.jar`;
+ chomp $execJar;
+ chomp $cliJar;
+ return ($execJar, $cliJar);
}
-# main($)
+
+
+# main
{
# explicitly call srand so we get the same data every time
# we generate it. However, we set it individually for each table type.
@@ -255,50 +379,121 @@ sub getBulkCopyCmd(){
my $filetype = shift;
my $numRows = shift;
my $tableName = shift;
- my $targetDir= shift;
- my $nosql = shift;
+ my $hdfsTargetDir= shift;
+ my $format = shift;
- die usage() if (!defined($filetype) || !defined($numRows));
+ die usage() if (!defined($filetype) || !defined($numRows) || !defined($tableName) || !defined($hdfsTargetDir));
if ($numRows <= 0) { usage(); }
- if ( $targetDir ) {
- open(HDFS, "> $targetDir/$tableName") or die("Cannot open file $tableName, $!\n");
- open(PSQL, "> $targetDir/$tableName.sql") or die("Cannot open file $tableName.sql, $!\n") unless defined $nosql;
- } else {
- open(HDFS, "> $tableName") or die("Cannot open file $tableName, $!\n");
- open(PSQL, "> $tableName.sql") or die("Cannot open file $tableName.sql, $!\n") unless defined $nosql;
+ $format = "csv" if not defined $format;
+
+ if ($format eq "csv") {
+ open(HDFS, "> $tableName") or die("Cannot open file $tableName, $!\n");
}
+ open(MYSQL, "> $tableName.mysql.sql") or
+ die("Cannot open file $tableName.mysql.sql, $!\n");
+ open(my $hivefp, "> $tableName.hcat.sql") or
+ die("Cannot open file $tableName.hive.sql, $!\n");
- if ($filetype eq "manual") {
- } elsif ($filetype eq "studenttab") {
+ if ($filetype eq "studenttab") {
srand(3.14159 + $numRows);
- print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n" unless defined $nosql;
- print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
- for (my $i = 0; $i < $numRows; $i++) {
- my $name = randomName();
- my $age = randomAge();
- my $gpa = randomGpa();
- printf HDFS "%s\t%d\t%.2f\n", $name, $age, $gpa;
+ print MYSQL "drop table if exists $tableName;\n";
+ print MYSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
+ print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+ name string,
+ age int,
+ gpa double)";
+
+ generateSecondHalfCreateTable($hivefp, $format,
+ "$hdfsTargetDir/$tableName", '\\t');
+ if ($format eq "csv") {
+ print MYSQL &getBulkCopyCmd($tableName, "\t", "$tableName");
+ for (my $i = 0; $i < $numRows; $i++) {
+ my $name = randomName();
+ my $age = randomAge();
+ my $gpa = randomGpa();
+ printf HDFS "%s\t%d\t%.2f\n", $name, $age, $gpa;
+ }
+ } elsif ($format eq "rc") {
+ print MYSQL &getBulkCopyCmd($tableName, "\t", "$tableName.plain");
+ my ($hadoopCoreJar, $commonsLoggingJar, $commonsConfigJar,
+ $commonsLangJar, $commonsCliJar) = findHadoopJars();
+ my ($hiveExecJar, $hiveCliJar) = findHiveJars();
+ my @cmd = ('java', '-cp',
+ "../tools/generate/java/hive-gen.jar:$hadoopCoreJar:" .
+ "$commonsLoggingJar:$commonsConfigJar:$commonsLangJar:" .
+ "$hiveExecJar",
+ 'org.apache.hadoop.hive.tools.generate.RCFileGenerator',
+ 'student', $numRows, "$tableName", "$tableName.plain");
+ run(\@cmd) or die "Unable to run command [" . join(" ", @cmd)
+ . "]\n";
+ #@cmd = ('java', '-cp',
+ # "$hiveCliJar:$hiveExecJar:$hadoopCoreJar:" .
+ # "$commonsLoggingJar:$commonsCliJar:$commonsConfigJar",
+ # "org.apache.hadoop.hive.cli.RCFileCat", "$tableName");
+ #run(\@cmd, '>', $tableName) or
+ # die "Unable to run command [" . join(" ", @cmd) . "]\n";
+ } else {
+ die "Unknown format $format\n";
}
+ } elsif ($filetype eq "studentparttab") {
+ srand(3.14159 + $numRows);
+ print MYSQL "drop table if exists $tableName;\n";
+ print MYSQL "create table $tableName (name varchar(100), age integer, gpa float(3), ds char(8));\n";
+ print MYSQL &getBulkCopyCmd($tableName, "\t", "$tableName.mysql");
+ print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+ name string,
+ age int,
+ gpa double)
+ partitioned by (ds string)
+ row format delimited
+ fields terminated by '\\t'
+ stored as textfile
+ location '$hdfsTargetDir/$tableName';
+ alter table $tableName add IF NOT EXISTS partition (ds='20110924') location '$hdfsTargetDir/$tableName/$tableName.20110924';
+ alter table $tableName add IF NOT EXISTS partition (ds='20110925') location '$hdfsTargetDir/$tableName/$tableName.20110925';
+ alter table $tableName add IF NOT EXISTS partition (ds='20110926') location '$hdfsTargetDir/$tableName/$tableName.20110926';
+ ";
+ open(MYSQLDATA, "> $tableName.mysql") or die("Cannot open file $tableName.mysql, $!\n");
+ for (my $ds = 20110924; $ds < 20110927; $ds++) {
+ close(HDFS);
+ open(HDFS, "> $tableName.$ds") or die("Cannot open file $tableName.$ds, $!\n");
+ for (my $i = 0; $i < $numRows; $i++) {
+ my $name = randomName();
+ my $age = randomAge();
+ my $gpa = randomGpa();
+ printf HDFS "%s\t%d\t%.2f\n", $name, $age, $gpa;
+ printf MYSQLDATA "%s\t%d\t%.3f\t%d\n", $name, $age, $gpa, $ds;
+ }
+ }
+ close(MYSQLDATA);
- } elsif ($filetype eq "studentnulltab") {
+ } elsif ($filetype eq "studentnull") {
srand(3.14159 + $numRows);
- print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
- print PSQL "begin transaction;\n";
+ print MYSQL "drop table if exists $tableName;\n";
+ print MYSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
+ print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+ name string,
+ age int,
+ gpa double)
+ row format delimited
+ fields terminated by '\\001'
+ stored as textfile
+ location '$hdfsTargetDir/$tableName';\n";
for (my $i = 0; $i < $numRows; $i++) {
# generate nulls in a random fashion
my $name = rand(1) < 0.05 ? '' : randomName();
my $age = rand(1) < 0.05 ? '' : randomAge();
my $gpa = rand(1) < 0.05 ? '' : randomGpa();
- printf PSQL "insert into $tableName (name, age, gpa) values(";
- print PSQL ($name eq ''? "null, " : "'$name', "), ($age eq ''? "null, " : "$age, ");
+ printf MYSQL "insert into $tableName (name, age, gpa) values(";
+ print MYSQL ($name eq ''? "null, " : "'$name', "), ($age eq ''? "null, " : "$age, ");
if($gpa eq '') {
- print PSQL "null);\n"
+ print MYSQL "null);\n"
} else {
- printf PSQL "%.2f);\n", $gpa;
+ printf MYSQL "%.2f);\n", $gpa;
}
- print HDFS "$name\t$age\t";
+ print HDFS "$name$age";
if($gpa eq '') {
print HDFS "\n"
} else {
@@ -306,69 +501,65 @@ sub getBulkCopyCmd(){
}
}
- print PSQL "commit;\n" unless defined $nosql;
+ print MYSQL "commit;\n";
- } elsif ($filetype eq "studentcolon") {
+ } elsif ($filetype eq "allscalars") {
srand(2.718281828459 + $numRows);
- print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n" unless defined $nosql;
- print PSQL &getBulkCopyCmd( $targetDir, $tableName, ':' ) unless defined $nosql;
+ print MYSQL "drop table if exists $tableName;\n";
+ print MYSQL "create table $tableName (t tinyint, si smallint, i int, b
+ bigint, f double, d double, s varchar(25));\n";
+ print MYSQL &getBulkCopyCmd($tableName, ':');
+ print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ s string)
+ row format delimited
+ fields terminated by ':'
+ stored as textfile
+ location '$hdfsTargetDir/$tableName';\n
+ alter table $tableName set TBLPROPERTIES
+ ('hcat.pig.loader.args'=':', 'hcat.pig.storer.args'=':');\n";
for (my $i = 0; $i < $numRows; $i++) {
- my $name = randomName();
- my $age = randomAge();
- my $gpa = randomGpa();
- printf HDFS "%s:%d:%.2f\n", $name, $age, $gpa;
-=begin
- } elsif ($filetype eq "studentusrdef") {
- srand(6.62606896 + $numRows);
- for (my $i = 0; $i < $numRows; $i++) {
- # TODO need to add SQL info.
- printf("%s,%d,%.2f,", randomName(), randomAge(), randomGpa());
- printf("<%s,%s,%s,%d>,", randomStreet(), randomCity(), randomState(),
- randomZip());
- printf("[%s:<%s,%s>],", randomClass(), randomClass(), randomName());
- printf("{");
- my $elementsInBag = int(rand(100));
- for (my $j = 0; $j < $elementsInBag; $j++) {
- if ($j != 0) { printf(","); }
- printf("<%s,%s,%s>", randomClass(), randomName(), randomGrade());
- }
- printf("}\n");
- }
-=cut
+ printf HDFS "%d:%d:%d:%ld:%.2f:%.2f:%s\n",
+ (int(rand(2**8) - 2**7)),
+ (int(rand(2**16) - 2**15)),
+ (int(rand(2**32) - 2**31)),
+ (int(rand(2**64) - 2**61)),
+ rand(100000.0) - 50000.0,
+ rand(10000000.0) - 5000000.0,
+ randomName();
}
- print PSQL "commit;\n" unless defined $nosql;
-
- } elsif ($filetype eq "studentctrla") {
- srand(6.14159 + $numRows);
- print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
- print PSQL "begin transaction;\n";
- for (my $i = 0; $i < $numRows; $i++) {
- my $name = randomName();
- my $age = randomAge();
- my $gpa = randomGpa();
- printf PSQL "insert into $tableName (name, age, gpa) values('%s', %d, %.2f);\n",
- $name, $age, $gpa;
- printf HDFS "%s%d%.2f\n", $name, $age, $gpa;
- }
- print PSQL "commit;\n" unless defined $nosql;
-
-
} elsif ($filetype eq "studentcomplextab") {
srand(3.14159 + $numRows);
- print PSQL "create table $tableName (nameagegpamap varchar(500), nameagegpatuple varchar(500), nameagegpabag varchar(500), nameagegpamap_name varchar(500), nameagegpamap_age integer, nameagegpamap_gpa float(3));\n";
- print PSQL "begin transaction;\n";
+ print MYSQL "drop table if exists $tableName;\n";
+ print MYSQL "create table $tableName (nameagegpamap varchar(500), nameagegpatuple varchar(500), nameagegpabag varchar(500), nameagegpamap_name varchar(500), nameagegpamap_age integer, nameagegpamap_gpa float(3));\n";
+ print MYSQL "begin transaction;\n";
+ print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+ nameagegpamap map<string, string>,
+ struct <name: string, age: int, gpa: float>,
+ array <int>)
+ row format delimited
+ fields terminated by '\\t'
+ collection items terminated by ','
+ map keys terminated by '#'
+ stored as textfile
+ location '$hdfsTargetDir/$tableName';\n";
for (my $i = 0; $i < $numRows; $i++) {
# generate nulls in a random fashion
my $map = rand(1) < 0.05 ? '' : randomNameAgeGpaMap();
my $tuple = rand(1) < 0.05 ? '' : randomNameAgeGpaTuple();
- my $bag = rand(1) < 0.05 ? '' : randomNameAgeGpaBag();
- printf PSQL "insert into $tableName (nameagegpamap, nameagegpatuple, nameagegpabag, nameagegpamap_name, nameagegpamap_age, nameagegpamap_gpa) values(";
+ my $bag = rand(1) < 0.05 ? '' : randomList();
+ printf MYSQL "insert into $tableName (nameagegpamap, nameagegpatuple, nameagegpabag, nameagegpamap_name, nameagegpamap_age, nameagegpamap_gpa) values(";
my $mapHash;
if($map ne '') {
$mapHash = getMapFields($map);
}
- print PSQL ($map eq ''? "null, " : "'$map', "),
+ print MYSQL ($map eq ''? "null, " : "'$map', "),
($tuple eq ''? "null, " : "'$tuple', "),
($bag eq '' ? "null, " : "'$bag', "),
($map eq '' ? "null, " : (exists($mapHash->{'name'}) ? "'".$mapHash->{'name'}."', " : "null, ")),
@@ -376,13 +567,23 @@ sub getBulkCopyCmd(){
($map eq '' ? "null);\n" : (exists($mapHash->{'gpa'}) ? "'".$mapHash->{'gpa'}."');\n" : "null);\n"));
print HDFS "$map\t$tuple\t$bag\n";
}
- print PSQL "commit;\n" unless defined $nosql;
+ print MYSQL "commit;\n";
} elsif ($filetype eq "votertab") {
srand(299792458 + $numRows);
- print PSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n" unless defined $nosql;
- print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
- for (my $i = 0; $i < $numRows; $i++) {
+ print MYSQL "drop table if exists $tableName;\n";
+ print MYSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n";
+ print MYSQL &getBulkCopyCmd($tableName, "\t");
+ print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+ name string,
+ age int,
+ registration string,
+ contributions float)
+ row format delimited
+ fields terminated by '\\t'
+ stored as textfile
+ location '$hdfsTargetDir/$tableName';\n";
+for (my $i = 0; $i < $numRows; $i++) {
my $name = randomName();
my $age = randomAge();
my $registration = randomRegistration();
@@ -393,22 +594,32 @@ sub getBulkCopyCmd(){
} elsif ($filetype eq "voternulltab") {
srand(299792458 + $numRows);
- print PSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n" unless defined $nosql;
- print PSQL "begin transaction;\n" unless defined $nosql;
+ print MYSQL "drop table if exists $tableName;\n";
+ print MYSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n";
+ print MYSQL "begin transaction;\n";
+ print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+ name string,
+ age int,
+ registration string,
+ contributions float)
+ row format delimited
+ fields terminated by '\\t'
+ stored as textfile
+ location '$hdfsTargetDir/$tableName';\n";
for (my $i = 0; $i < $numRows; $i++) {
# generate nulls in a random fashion
my $name = rand(1) < 0.05 ? '' : randomName();
my $age = rand(1) < 0.05 ? '' : randomAge();
my $registration = rand(1) < 0.05 ? '' : randomRegistration();
my $contribution = rand(1) < 0.05 ? '' : randomContribution();
- printf PSQL "insert into $tableName (name, age, registration, contributions) values(";
- print PSQL ($name eq ''? "null, " : "'$name', "),
+ printf MYSQL "insert into $tableName (name, age, registration, contributions) values(";
+ print MYSQL ($name eq ''? "null, " : "'$name', "),
($age eq ''? "null, " : "$age, "),
($registration eq ''? "null, " : "'$registration', ");
if($contribution eq '') {
- print PSQL "null);\n"
+ print MYSQL "null);\n"
} else {
- printf PSQL "%.2f);\n", $contribution;
+ printf MYSQL "%.2f);\n", $contribution;
}
print HDFS "$name\t$age\t$registration\t";
if($contribution eq '') {
@@ -417,42 +628,64 @@ sub getBulkCopyCmd(){
printf HDFS "%.2f\n", $contribution;
}
}
- print PSQL "commit;\n" unless defined $nosql;
-
- } elsif ($filetype eq "reg1459894") {
- srand(6.67428 + $numRows);
- print PSQL "create table $tableName (first varchar(10), second varchar(10));\n" unless defined $nosql;
- print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
- for (my $i = 0; $i < $numRows; $i++) {
- my $letter = randomNumLetter();
- my $gkLetter = randomGreekLetter();
- printf HDFS "%s\t%s\n", $letter, $gkLetter;
- }
-
- } elsif ($filetype eq "textdoc") {
- # This one ignores the number of lines. It isn't random either.
- print PSQL "create table $tableName (name varchar(255));\n" unless defined $nosql;
- print PSQL "begin transaction;\n" unless defined $nosql;
- for (my $i = 0; $i < @textDoc; $i++) {
- my $sqlWords = $textDoc[$i];
- $sqlWords =~ s/([\w-]+)/$1,/g;
- print PSQL "insert into $tableName (name) values('($sqlWords)');\n" unless defined $nosql;
- print HDFS "$textDoc[$i]\n";
- }
- print PSQL "commit;\n" unless defined $nosql;
-
+ print MYSQL "commit;\n";
} elsif ($filetype eq "unicode") {
srand(1.41421 + $numRows);
- print PSQL "create table $tableName (name varchar(255));\n" unless defined $nosql;
- print PSQL "begin transaction;\n" unless defined $nosql;
+ print MYSQL "drop table if exists $tableName;\n";
+ print MYSQL "create table $tableName (name varchar(255));\n";
+ print MYSQL "begin transaction;\n";
+ print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+ name string)
+ row format delimited
+ fields terminated by '\\t'
+ stored as textfile
+ location '$hdfsTargetDir/$tableName';\n";
for (my $i = 0; $i < $numRows; $i++) {
my $name = randomUnicodeNonAscii();
- printf PSQL "insert into $tableName (name) values('%s');\n",
- $name unless defined $nosql;
+ printf MYSQL "insert into $tableName (name) values('%s');\n", $name;
printf HDFS "%s\n", $name;
}
- print PSQL "commit;\n" unless defined $nosql;
+ print MYSQL "commit;\n";
+ } elsif ($filetype eq "json") {
+ srand(6.0221415 + $numRows);
+ print MYSQL "drop table if exists $tableName;";
+ print MYSQL "create table $tableName(
+ s varchar(100),
+ i int,
+ d double);";
+ print MYSQL &getBulkCopyCmd($tableName, "\t", "$tableName.plain");
+ print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+ s string,
+ i int,
+ d double,
+ m map<string, string>,
+ bb array<struct<a: int, b: string>>)
+ STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
+ INPUTDRIVER 'org.apache.hcatalog.pig.drivers.LoadFuncBasedInputDriver' OUTPUTDRIVER 'org.apache.hcatalog.pig.drivers.StoreFuncBasedOutputDriver'
+ location '$hdfsTargetDir/$tableName'
+ TBLPROPERTIES ('hcat.pig.loader'='org.apache.pig.builtin.JsonLoader', 'hcat.pig.storer'='org.apache.pig.builtin.JsonStorage', 'hcat.pig.loader.args'=
+'s:chararray, i:int, d:double, m:map[chararray], bb:{t:(a:int, b:chararray)}', 'hcat.pig.args.delimiter'='\t');\n";
+ open(PLAIN, ">$tableName.plain") or
+ die("Cannot open file $tableName.hive.sql, $!\n");
+ for (my $i = 0; $i < $numRows; $i++) {
+ my $s = randomJsonString();
+ my $i = int(rand(2**32) - 2**31),
+ my $d = rand(2**10) - 2**9,
+# my $i = rand(1) < 0.05 ? 'null' : (int(rand(2**32) - 2**31)),
+# my $d = rand(1) < 0.05 ? 'null' : (rand(2**10) - 2**9),
+ my $m = randomJsonMap();
+ my $bb = randomJsonBag();
+
+# printf MYSQL "insert into $tableName (name) values('%s');\n", $name;
+ print HDFS qq@{"s":"$s", "i":$i, "d":$d, "m":$m, "bb":$bb}\n@;
+ if ($s eq 'null') {
+ $s="";
+ }
+ print PLAIN "$s\t$i\t$d\n";
+ }
+ close PLAIN;
+ print MYSQL "commit;\n";
} else {
warn "Unknown filetype $filetype\n";
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/build.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/build.xml?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/build.xml (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/build.xml Tue Dec 6 20:05:37 2011
@@ -0,0 +1,71 @@
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.-->
+
+<project name="Hive-Data-Generator" default="generator-jar">
+
+ <property name="generator.jarfile" value="hive-gen.jar" />
+ <property name="generator.build.dir" value="${basedir}/build" />
+ <property name="generator.src.dir" value="${basedir}/org/" />
+
+
+ <path id="generator-classpath">
+ <fileset file="${hive.serde.jarfile}" />
+ <fileset file="${hive.ql.jarfile}" />
+ <fileset file="${hadoop.core.jarfile}" />
+ </path>
+
+ <target name="init">
+ <mkdir dir="${generator.build.dir}" />
+ </target>
+
+ <target name="clean">
+ <delete dir="${generator.build.dir}" />
+ <delete file="${generator.jarfile}" />
+ </target>
+
+ <target name="generator-compile"
+ depends="init, serde.jar.check, ql.jar.check, hadoop.jar.check">
+ <echo>*** Compiling UDFs ***</echo>
+ <javac srcdir="${generator.src.dir}" destdir="${generator.build.dir}" debug="on">
+ <classpath refid="generator-classpath" />
+ </javac>
+ </target>
+
+ <target name="generator-jar" depends="generator-compile">
+ <echo>*** Creating UDF jar ***</echo>
+ <jar duplicate="preserve" jarfile="${generator.jarfile}">
+ <fileset dir="build"/>
+ </jar>
+ </target>
+
+ <target name="serde.jar.check" unless="hive.serde.jarfile">
+ <fail message="'hive.serde.jarfile' is not defined.
+ Please pass -Dhive.serde.jarfile=<Hive serde jar to use> to Ant on the command-line." />
+ </target>
+
+ <target name="ql.jar.check" unless="hive.ql.jarfile">
+ <fail message="'hive.ql.jarfile' is not defined.
+ Please pass -Dhive.ql.jarfile=<Hive ql jar to use> to Ant on the command-line." />
+ </target>
+
+ <target name="hadoop.jar.check" unless="hadoop.core.jarfile">
+ <fail message="'hadoop.core.jarfile' is not defined.
+ Please pass -Dhadoop.core.jarfile=<Hadoop core jar to use> to Ant on the command-line." />
+ </target>
+
+
+
+
+</project>
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java Tue Dec 6 20:05:37 2011
@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.tools.generate;
+
+import java.util.Properties;
+import java.util.Random;
+import java.io.DataOutputStream;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.PrintWriter;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.compress.DefaultCodec;
+
+import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
+import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
+import org.apache.hadoop.hive.ql.io.RCFile;
+import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
+
+/**
+ * Generate RCFile test data
+ *
+ */
+public class RCFileGenerator {
+
+ private static Configuration conf = new Configuration();
+ private static Path basedir;
+ private static FileSystem fs;
+ private static Properties tbl;
+ private static Random rand;
+
+ private static Path getFile(String filename) throws Exception {
+ return new Path(basedir, filename);
+ }
+
+ private static String[] firstName = {"alice", "bob", "calvin", "david",
+ "ethan", "fred", "gabriella", "holly", "irene", "jessica", "katie",
+ "luke", "mike", "nick", "oscar", "priscilla", "quinn", "rachel",
+ "sarah", "tom", "ulysses", "victor", "wendy", "xavier", "yuri",
+ "zach"};
+
+ private static String[] lastName = {"allen", "brown", "carson",
+ "davidson", "ellison", "falkner", "garcia", "hernandez", "ichabod",
+ "johnson", "king", "laertes", "miller", "nixon", "ovid", "polk",
+ "quirinius", "robinson", "steinbeck", "thompson", "underhill",
+ "van buren", "white", "xylophone", "young", "zipper"};
+
+ private static String randomName() {
+ StringBuffer buf =
+ new StringBuffer(firstName[rand.nextInt(firstName.length)]);
+ buf.append(' ');
+ buf.append(lastName[rand.nextInt(lastName.length)]);
+ return buf.toString();
+ }
+
+ private static int randomAge() {
+ return rand.nextInt(60) + 18;
+ }
+
+ private static double randomGpa() {
+ return 4 * rand.nextFloat();
+ }
+
+ private static String[] registration = {"democrat", "green",
+ "independent", "libertarian", "republican", "socialist"};
+
+ private static String randomRegistration() {
+ return registration[rand.nextInt(registration.length)];
+ }
+
+ private static double randomContribution() {
+ return rand.nextFloat() * 1000;
+ }
+
+ private static byte[] randomMap() throws Exception {
+ int len = rand.nextInt(5) + 1;
+
+ StringBuffer buf = new StringBuffer();
+ for (int i = 0; i < len; i++) {
+ if (i != 0) buf.append('\u0002');
+ buf.append(firstName[rand.nextInt(26)]);
+ buf.append('\u0003');
+ buf.append(lastName[rand.nextInt(26)]);
+ }
+ return buf.toString().getBytes("UTF-8");
+ }
+
+ private static byte[] randomArray() throws Exception {
+ int len = rand.nextInt(5) + 1;
+
+ StringBuffer buf = new StringBuffer();
+ for (int i = 0; i < len; i++) {
+ if (i != 0) buf.append('\u0002');
+ buf.append(Integer.valueOf(randomAge()).toString());
+ buf.append('\u0003');
+ buf.append(randomName());
+ }
+ return buf.toString().getBytes("UTF-8");
+ }
+
+ private static void usage() {
+ System.err.println("Usage: rcfilegen format number_of_rows " +
+ "output_file plain_output_file");
+ System.err.println(" format one of: student voter alltypes");
+ System.exit(1);
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (args.length != 4) usage();
+
+ String format = args[0];
+ int numRows = Integer.valueOf(args[1]);
+ if (numRows < 1) usage();
+ String output = args[2];
+ String plainOutput = args[3];
+
+ fs = FileSystem.getLocal(conf);
+ basedir = new Path(".");
+
+ genData(format, numRows, output, plainOutput);
+ }
+
+ private static void genData(String format,
+ int numRows,
+ String output, String plainOutput) throws Exception {
+ int numFields = 0;
+ if (format.equals("student")) {
+ rand = new Random(numRows);
+ numFields = 3;
+ } else if (format.equals("voter")) {
+ rand = new Random(1000000000 + numRows);
+ numFields = 4;
+ } else if (format.equals("alltypes")) {
+ rand = new Random(2000000000L + numRows);
+ numFields = 10;
+ }
+
+ RCFileOutputFormat.setColumnNumber(conf, numFields);
+ RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output),
+ null, new DefaultCodec());
+
+ PrintWriter pw = new PrintWriter(new FileWriter(plainOutput));
+
+ for (int j = 0; j < numRows; j++) {
+ BytesRefArrayWritable row = new BytesRefArrayWritable(numFields);
+
+ byte[][] fields = null;
+
+ if (format.equals("student")) {
+ byte[][] f = {
+ randomName().getBytes("UTF-8"),
+ Integer.valueOf(randomAge()).toString().getBytes("UTF-8"),
+ Double.valueOf(randomGpa()).toString().getBytes("UTF-8")
+ };
+ fields = f;
+ } else if (format.equals("voter")) {
+ byte[][] f = {
+ randomName().getBytes("UTF-8"),
+ Integer.valueOf(randomAge()).toString().getBytes("UTF-8"),
+ randomRegistration().getBytes("UTF-8"),
+ Double.valueOf(randomContribution()).toString().getBytes("UTF-8")
+ };
+ fields = f;
+ } else if (format.equals("alltypes")) {
+ byte[][] f = {
+ Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"),
+ Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"),
+ Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"),
+ Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"),
+ Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"),
+ Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"),
+ randomName().getBytes("UTF-8"),
+ randomMap(),
+ randomArray()
+ };
+ fields = f;
+ }
+
+
+ for (int i = 0; i < fields.length; i++) {
+ BytesRefWritable field = new BytesRefWritable(fields[i], 0,
+ fields[i].length);
+ row.set(i, field);
+ pw.print(new String(fields[i]));
+ if (i!=fields.length-1)
+ pw.print("\t");
+ else
+ pw.println();
+ }
+
+ writer.append(row);
+ }
+
+ writer.close();
+ pw.close();
+ }
+}
+
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/install/install.sh
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/install/install.sh?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/install/install.sh (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/install/install.sh Tue Dec 6 20:05:37 2011
@@ -0,0 +1,212 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script assumes that it is being run from the top level directory of the
+# HCatalog distribution tarball
+
+host="unknown"
+dir="unknown"
+hadoop_home="unknown"
+tarball="unknown"
+dbroot="unknown"
+portnum="9933"
+passwd="hive"
+warehouseDir="/user/hive/warehouse"
+sasl="false"
+keytabpath="unknown"
+kerberosprincipal="unknown"
+forrest="unknown"
+
+function usage() {
+ echo "Usage: $0 -D dbroot -d directory -f forrest -h hadoop_home "
+ echo " -m host -t tarball"
+ echo " [-p portnum] [-P password] [-w warehouse_directory]"
+ echo " [-s true|false -k keytabpath -K kerberos_principal]"
+ echo
+ echo " dbroot is the root directory for the mysql drivers"
+ echo " directory is the directory where it will be installed"
+ echo " hadoop_home is the directory of your Hadoop installation."
+ echo " host is the machine to install the HCatalog server on"
+ echo " tarball is the result of running ant src-release in hcat"
+ echo " portnum is the port for the thrift server to use, " \
+ "default $portnum"
+ echo " password is the password for the metastore db, default $passwd"
+ echo " warehouse_directory is the HDFS directory to use for " \
+ "internal hive tables, default $warehouseDir"
+ echo " -s true will enable security, -s false turn it off, " \
+ "default $sasl"
+ echo " keytabpath is path to Kerberos keytab file, required with " \
+ "-s true"
+ echo " kerberos_principal service principal for thrift server, " \
+ "required with -s true"
+ echo " All paths must be absolute"
+}
+
+while [ "${1}x" != "x" ] ; do
+ if [ $1 == "-D" ] ; then
+ shift
+ dbroot=$1
+ shift
+ elif [ $1 == "-d" ] ; then
+ shift
+ dir=$1
+ shift
+ elif [ $1 == "-f" ] ; then
+ shift
+ forrest=$1
+ shift
+ elif [ $1 == "-h" ] ; then
+ shift
+ hadoop_home=$1
+ shift
+ elif [ $1 == "-K" ] ; then
+ shift
+ kerberosprincipal=$1
+ kerberosprincipal=${kerberosprincipal/@/\\@}
+ shift
+ elif [ $1 == "-k" ] ; then
+ shift
+ keytabpath=$1
+ shift
+ elif [ $1 == "-m" ] ; then
+ shift
+ host=$1
+ shift
+ elif [ $1 == "-p" ] ; then
+ shift
+ portnum=$1
+ shift
+ elif [ $1 == "-P" ] ; then
+ shift
+ passwd=$1
+ shift
+ elif [ $1 == "-s" ] ; then
+ shift
+ sasl=$1
+ shift
+ elif [ $1 == "-t" ] ; then
+ shift
+ tarball=$1
+ shift
+ elif [ $1 == "-w" ] ; then
+ shift
+ warehouseDir=$1
+ shift
+ else
+ echo "Unknown option $1"
+ shift
+ fi
+
+done
+
+for var in $forrest $dbroot $host $dir $hadoop_home $tarball ; do
+ if [ $var == "unknown" ] ; then
+ usage
+ exit 1
+ fi
+done
+
+# Make sure root and dbroot are absolute paths
+
+for var in $forrest $dbroot $dir $hadoop_home ; do
+ if [ ${var:0:1} != "/" ] ; then
+ usage
+ exit 1
+ fi
+done
+
+# Take the src distribution and build an installable tarball
+# Copy the tarball over
+rm -rf /tmp/${USER}_hcat_scratch
+mkdir /tmp/${USER}_hcat_scratch
+cd /tmp/${USER}_hcat_scratch
+cp $tarball .
+tar zxf *
+dirname=`ls -1 | grep -v gz`
+cd $dirname
+ant -Dforrest.home=$forrest tar
+tarfiledir=`pwd`
+tarfilebase=`ls build/hcatalog-*.tar.gz`
+tarfile="$tarfiledir/$tarfilebase"
+
+tfile=/tmp/${USER}_hcat_test_tarball.tgz
+scp $tarfile $host:$tfile
+
+# Write a quick perl script to modify the hive-site.xml file
+pfile=/tmp/${USER}_hcat_test_hive_site_modify.pl
+cat > $pfile <<!
+#!/usr/bin/env perl
+
+while (<>) {
+ s!DBHOSTNAME!$host!;
+ s!SVRHOST!$host!;
+ s!PASSWORD!$passwd!;
+ s!WAREHOUSE_DIR!$warehouseDir!;
+ s!SASL_ENABLED!$sasl!;
+ s!KEYTAB_PATH!$keytabpath!;
+ s!KERBEROS_PRINCIPAL!$kerberosprincipal!;
+ s!PORT!$portnum!;
+ print;
+}
+!
+
+
+# Run the install script
+file=/tmp/${USER}_hcat_test_install.sh
+cat > $file <<!
+#!/usr/bin/env bash
+rm -rf /tmp/${USER}_hcat_scratch
+mkdir /tmp/${USER}_hcat_scratch
+cd /tmp/${USER}_hcat_scratch
+cp $tfile .
+tar zxf ${USER}_hcat_test_tarball.tgz
+cd hcatalog-*
+share/hcatalog/scripts/hcat_server_install.sh -r $dir -d $dbroot \
+ -h $hadoop_home -p $portnum
+
+chmod +x $pfile
+cp $dir/etc/hcatalog/hive-site.xml /tmp/${USER}_hcat_test_hive_site.tmp
+$pfile < /tmp/${USER}_hcat_test_hive_site.tmp > $dir/etc/hcatalog/hive-site.xml
+!
+
+scp $file $host:$file
+scp $pfile $host:$pfile
+ssh $host chmod +x $file
+ssh $host $file
+if [ $? != "0" ] ; then
+ echo "Failed to install hcat"
+ exit 1
+fi
+
+# Stop the current server
+file=/tmp/${USER}_hcat_test_install_stop_server.sh
+cat > $file <<!
+#!/usr/bin/env bash
+export HADOOP_HOME=$hadoop_home
+$dir/share/hcatalog/scripts/hcat_server_stop.sh
+!
+scp $file $host:$file
+ssh $host chmod +x $file
+ssh $host $file
+
+# Start the server
+ssh $host $dir/share/hcatalog/scripts/hcat_server_start.sh
+if [ $? != "0" ] ; then
+ echo "Failed to start hcat"
+ exit 1
+fi
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java Tue Dec 6 20:05:37 2011
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.HCatOutputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.hcatalog.mapreduce.OutputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table. It performs a group by on the first column and a SUM operation on the
+ * other columns. This is to simulate a typical operation in a map reduce
+ * program to test that hcat hands the right data to the map reduce program
+ *
+ * Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat
+ * jar> The <tab|ctrla> argument controls the output delimiter The hcat jar
+ * location should be specified as file://<full path to jar>
+ */
+public class GroupByAge extends Configured implements Tool {
+
+ public static class Map extends
+ Mapper<WritableComparable, HCatRecord, IntWritable, IntWritable> {
+
+ int age;
+
+ @Override
+ protected void map(
+ WritableComparable key,
+ HCatRecord value,
+ org.apache.hadoop.mapreduce.Mapper<WritableComparable, HCatRecord, IntWritable, IntWritable>.Context context)
+ throws IOException, InterruptedException {
+ age = (Integer) value.get(1);
+ context.write(new IntWritable(age), new IntWritable(1));
+ }
+ }
+
+ public static class Reduce extends Reducer<IntWritable, IntWritable,
+ WritableComparable, HCatRecord> {
+
+
+ @Override
+ protected void reduce(IntWritable key, java.lang.Iterable<IntWritable>
+ values, org.apache.hadoop.mapreduce.Reducer<IntWritable,IntWritable,WritableComparable,HCatRecord>.Context context)
+ throws IOException ,InterruptedException {
+ int sum = 0;
+ Iterator<IntWritable> iter = values.iterator();
+ while (iter.hasNext()) {
+ sum++;
+ iter.next();
+ }
+ HCatRecord record = new DefaultHCatRecord(2);
+ record.set(0, key.get());
+ record.set(1, sum);
+
+ context.write(null, record);
+ }
+ }
+
+ public int run(String[] args) throws Exception {
+ Configuration conf = getConf();
+ args = new GenericOptionsParser(conf, args).getRemainingArgs();
+
+ String serverUri = args[0];
+ String inputTableName = args[1];
+ String outputTableName = args[2];
+ String dbName = null;
+
+ String principalID = System
+ .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+ if (principalID != null)
+ conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+ Job job = new Job(conf, "GroupByAge");
+ HCatInputFormat.setInput(job, InputJobInfo.create(dbName,
+ inputTableName, null, serverUri, principalID));
+ // initialize HCatOutputFormat
+
+ job.setInputFormatClass(HCatInputFormat.class);
+ job.setJarByClass(GroupByAge.class);
+ job.setMapperClass(Map.class);
+ job.setReducerClass(Reduce.class);
+ job.setMapOutputKeyClass(IntWritable.class);
+ job.setMapOutputValueClass(IntWritable.class);
+ job.setOutputKeyClass(WritableComparable.class);
+ job.setOutputValueClass(DefaultHCatRecord.class);
+ HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName,
+ outputTableName, null, serverUri, principalID));
+ HCatSchema s = HCatOutputFormat.getTableSchema(job);
+ System.err.println("INFO: output schema explicitly set for writing:"
+ + s);
+ HCatOutputFormat.setSchema(job, s);
+ job.setOutputFormatClass(HCatOutputFormat.class);
+ return (job.waitForCompletion(true) ? 0 : 1);
+ }
+
+ public static void main(String[] args) throws Exception {
+ int exitCode = ToolRunner.run(new GroupByAge(), args);
+ System.exit(exitCode);
+ }
+}
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java Tue Dec 6 20:05:37 2011
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.pig.data.DataBag;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table. It performs a group by on the first column and a SUM operation on the
+ * other columns. This is to simulate a typical operation in a map reduce program
+ * to test that hcat hands the right data to the map reduce program
+ *
+ * Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat jar>
+ The <tab|ctrla> argument controls the output delimiter
+ The hcat jar location should be specified as file://<full path to jar>
+ */
+public class ReadJson extends Configured implements Tool {
+
+ public static class Map
+ extends Mapper<WritableComparable, HCatRecord, IntWritable, HCatRecord>{
+
+ String s;
+ Integer i;
+ Double d;
+
+ @Override
+ protected void map(WritableComparable key, HCatRecord value,
+ org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+ IntWritable,HCatRecord>.Context context)
+ throws IOException ,InterruptedException {
+ s = value.get(0)==null?null:(String)value.get(0);
+ i = value.get(1)==null?null:(Integer)value.get(1);
+ d = value.get(2)==null?null:(Double)value.get(2);
+
+ HCatRecord record = new DefaultHCatRecord(3);
+ record.set(0, s);
+ record.set(1, i);
+ record.set(2, d);
+
+ context.write(null, record);
+
+ }
+ }
+
+ public int run(String[] args) throws Exception {
+ Configuration conf = getConf();
+ args = new GenericOptionsParser(conf, args).getRemainingArgs();
+
+ String serverUri = args[0];
+ String tableName = args[1];
+ String outputDir = args[2];
+ String dbName = null;
+
+ String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+ if(principalID != null)
+ conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+ Job job = new Job(conf, "ReadJson");
+ HCatInputFormat.setInput(job, InputJobInfo.create(
+ dbName, tableName, null, serverUri, principalID));
+ // initialize HCatOutputFormat
+
+ job.setInputFormatClass(HCatInputFormat.class);
+ job.setOutputFormatClass(TextOutputFormat.class);
+ job.setJarByClass(ReadJson.class);
+ job.setMapperClass(Map.class);
+ job.setOutputKeyClass(IntWritable.class);
+ job.setOutputValueClass(HCatRecord.class);
+ job.setNumReduceTasks(0);
+ FileOutputFormat.setOutputPath(job, new Path(outputDir));
+ return (job.waitForCompletion(true) ? 0 : 1);
+ }
+
+ public static void main(String[] args) throws Exception {
+ int exitCode = ToolRunner.run(new ReadJson(), args);
+ System.exit(exitCode);
+ }
+}
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java Tue Dec 6 20:05:37 2011
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.pig.data.DataBag;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table. It performs a group by on the first column and a SUM operation on the
+ * other columns. This is to simulate a typical operation in a map reduce program
+ * to test that hcat hands the right data to the map reduce program
+ *
+ * Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat jar>
+ The <tab|ctrla> argument controls the output delimiter
+ The hcat jar location should be specified as file://<full path to jar>
+ */
+public class ReadRC extends Configured implements Tool {
+
+ public static class Map
+ extends Mapper<WritableComparable, HCatRecord, IntWritable, HCatRecord>{
+
+ String name;
+ int age;
+ double gpa;
+
+ @Override
+ protected void map(WritableComparable key, HCatRecord value,
+ org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+ IntWritable,HCatRecord>.Context context)
+ throws IOException ,InterruptedException {
+ name = (String)value.get(0);
+ age = (Integer)value.get(1);
+ gpa = (Double)value.get(2);
+
+ HCatRecord record = new DefaultHCatRecord(3);
+ record.set(0, name);
+ record.set(1, age);
+ record.set(2, gpa);
+
+ context.write(null, record);
+
+ }
+ }
+
+ public int run(String[] args) throws Exception {
+ Configuration conf = getConf();
+ args = new GenericOptionsParser(conf, args).getRemainingArgs();
+
+ String serverUri = args[0];
+ String tableName = args[1];
+ String outputDir = args[2];
+ String dbName = null;
+
+ String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+ if(principalID != null)
+ conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+ Job job = new Job(conf, "ReadRC");
+ HCatInputFormat.setInput(job, InputJobInfo.create(
+ dbName, tableName, null, serverUri, principalID));
+ // initialize HCatOutputFormat
+
+ job.setInputFormatClass(HCatInputFormat.class);
+ job.setOutputFormatClass(TextOutputFormat.class);
+ job.setJarByClass(ReadRC.class);
+ job.setMapperClass(Map.class);
+ job.setOutputKeyClass(IntWritable.class);
+ job.setOutputValueClass(HCatRecord.class);
+ job.setNumReduceTasks(0);
+ FileOutputFormat.setOutputPath(job, new Path(outputDir));
+ return (job.waitForCompletion(true) ? 0 : 1);
+ }
+
+ public static void main(String[] args) throws Exception {
+ int exitCode = ToolRunner.run(new ReadRC(), args);
+ System.exit(exitCode);
+ }
+}
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java Tue Dec 6 20:05:37 2011
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table. It performs a group by on the first column and a SUM operation on the
+ * other columns. This is to simulate a typical operation in a map reduce program
+ * to test that hcat hands the right data to the map reduce program
+ *
+ * Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat jar>
+ The <tab|ctrla> argument controls the output delimiter
+ The hcat jar location should be specified as file://<full path to jar>
+ */
+public class ReadText extends Configured implements Tool {
+
+ public static class Map
+ extends Mapper<WritableComparable, HCatRecord, IntWritable, HCatRecord>{
+
+ int t;
+ int si;
+ int i;
+ long b;
+ float f;
+ double d;
+ String s;
+
+ @Override
+ protected void map(WritableComparable key, HCatRecord value,
+ org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+ IntWritable,HCatRecord>.Context context)
+ throws IOException ,InterruptedException {
+ t = (Integer)value.get(0);
+ si = (Integer)value.get(1);
+ i = (Integer)value.get(2);
+ b = (Long)value.get(3);
+ f = (Float)value.get(4);
+ d = (Double)value.get(5);
+ s = (String)value.get(6);
+
+ HCatRecord record = new DefaultHCatRecord(7);
+ record.set(0, t);
+ record.set(1, si);
+ record.set(2, i);
+ record.set(3, b);
+ record.set(4, f);
+ record.set(5, d);
+ record.set(6, s);
+
+ context.write(null, record);
+
+ }
+ }
+
+ public int run(String[] args) throws Exception {
+ Configuration conf = getConf();
+ args = new GenericOptionsParser(conf, args).getRemainingArgs();
+
+ String serverUri = args[0];
+ String tableName = args[1];
+ String outputDir = args[2];
+ String dbName = null;
+
+ String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+ if(principalID != null)
+ conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+ Job job = new Job(conf, "ReadText");
+ HCatInputFormat.setInput(job, InputJobInfo.create(
+ dbName, tableName, null, serverUri, principalID));
+ // initialize HCatOutputFormat
+
+ job.setInputFormatClass(HCatInputFormat.class);
+ job.setOutputFormatClass(TextOutputFormat.class);
+ job.setJarByClass(ReadText.class);
+ job.setMapperClass(Map.class);
+ job.setOutputKeyClass(IntWritable.class);
+ job.setOutputValueClass(HCatRecord.class);
+ job.setNumReduceTasks(0);
+ FileOutputFormat.setOutputPath(job, new Path(outputDir));
+ return (job.waitForCompletion(true) ? 0 : 1);
+ }
+
+ public static void main(String[] args) throws Exception {
+ int exitCode = ToolRunner.run(new ReadText(), args);
+ System.exit(exitCode);
+ }
+}