You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ha...@apache.org on 2011/12/06 20:05:39 UTC
svn commit: r1211077 [6/7] - in /incubator/hcatalog/trunk: ./ conf/ src/test/e2e/hcatalog/ src/test/e2e/hcatalog/conf/ src/test/e2e/hcatalog/deployers/ src/test/e2e/hcatalog/drivers/ src/test/e2e/hcatalog/tests/ src/test/e2e/hcatalog/tools/generate/ sr...

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hive.conf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hive.conf?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hive.conf (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hive.conf Tue Dec  6 20:05:37 2011
@@ -0,0 +1,117 @@
+#!/home/y/bin/perl
+
+	#
+	# Do
+	# egrep '^#|name.*=>' hcat.conf | egrep -v '^#!|egrep' | less
+	# to get an outline of this test conf file
+	#
+	
+  # Has a couple of Hive set directives:
+  #   set hive.exec.dynamic.partition.mode=nonstrict;
+  #   set hive.exec.dynamic.partition=true;
+
+
+$cfg = {
+        'driver' => 'Hive',
+		'groups' => [
+		{
+                    'name' => 'Hive_Checkin',
+                    'tests' => [ {
+                        'num' => 1,
+                         'sql' => q\select * from studenttab10k;\,
+                         'floatpostprocess' => 1,
+                         'delimiter' => '	',
+                    },
+                    {
+                        'num' => 2,
+                         'sql' => q\drop table if exists checkin_2;
+                         create table checkin_2 as select * from studenttab10k;\,
+                         'floatpostprocess' => 1,
+                         'delimiter' => '	',
+                    },
+                    {
+                        'num' => 3,
+                         'sql' => q\SELECT studenttab10k.* FROM studenttab10k JOIN votertab10k ON (studenttab10k.name = votertab10k.name);\,
+                         'floatpostprocess' => 1,
+                         'delimiter' => '	',
+                    },
+                    {
+                        'num' => 4,
+                         'sql' => q"
+                         drop table if exists multi_insert_1_1;
+                         drop table if exists multi_insert_1_2;
+                         drop table if exists multi_insert_1_3;
+
+                         create table multi_insert_1_1 (
+                             name string,
+                             ds string)
+                             row format delimited
+                             fields terminated by '\\t'
+                             stored as textfile;
+
+                         create table multi_insert_1_2 (
+                             name string,
+                             ds string)
+                             row format delimited
+                             fields terminated by '\\t'
+                             stored as textfile;
+
+                         create table multi_insert_1_3 (
+                             name string,
+                             ds string)
+                             row format delimited
+                             fields terminated by '\\t'
+                             stored as textfile;
+
+                         from studentparttab30k
+                             insert overwrite table multi_insert_1_1
+                             select name, ds
+                             where ds = '20110924'
+
+                             insert overwrite table multi_insert_1_2
+                             select name, ds
+                             where ds = '20110925'
+
+                             insert overwrite table multi_insert_1_3
+                             select name, ds
+                             where ds = '20110926';
+                             ",
+                         'result_table' => ['multi_insert_1_1',
+                             'multi_insert_1_2',
+                             'multi_insert_1_3'],
+                         'verify_sql' =>["select name, ds
+                                 from studentparttab30k
+                                 where ds = '20110924';",
+                             "select name, ds
+                                 from studentparttab30k
+                                 where ds = '20110925';",
+                             "select name, ds
+                                 from studentparttab30k
+                                 where ds = '20110926';"]
+                    } ]
+ 		}, # end g
+                {
+                    'name' => 'Hive_Read',
+                    'tests' => [ {
+                        'num' => 1,
+                         'sql' => q\select * from all100krc;\,
+                         'floatpostprocess' => 1,
+                         'delimiter' => '	',
+                    } ]
+                }, # end g
+                {
+                    'name' => 'Hive_Write',
+                    'tests' => [ {
+                        'num' => 1,
+                         'sql' => q\
+drop table if exists hive_write_1;
+create table hive_write_1 (name string, age int, gpa double) stored as rcfile;
+insert into TABLE hive_write_1 select * from all100krc;\,
+                         'result_table' => 'hive_write_1',
+                         'verify_sql' =>"select name, age, gpa from all100krc;",
+                         'floatpostprocess' => 1,
+                         'delimiter' => '	',
+                    } ]
+                }
+         ]
+}

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf Tue Dec  6 20:05:37 2011
@@ -0,0 +1,173 @@
+#!/home/y/bin/perl
+
+	#
+	# Do
+	# egrep '^#|name.*=>' hcat.conf | egrep -v '^#!|egrep' | less
+	# to get an outline of this test conf file
+	#
+	
+  # Has a couple of Hive set directives:
+  #   set hive.exec.dynamic.partition.mode=nonstrict;
+  #   set hive.exec.dynamic.partition=true;
+
+
+$cfg = {
+        'driver' => 'Pig',
+		'groups' => [
+# This first group should be moved to deployer ?
+		{
+			'name' => 'Pig_Checkin',
+			'tests' => [
+
+				{
+				 'num' => 1
+                                ,'hcat_prep'=>q\drop table if exists pig_checkin_1;
+create table pig_checkin_1 (name string, age int, gpa double) STORED AS TEXTFILE;\
+				,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+store a into 'pig_checkin_1' using org.apache.hcatalog.pig.HCatStorer();\,
+                                ,'result_table' => 'pig_checkin_1'
+                                ,'sql' => q\select * from studenttab10k;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+				}, 
+				{
+				 'num' => 2
+                                ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+b = load 'votertab10k' using org.apache.hcatalog.pig.HCatLoader();
+c = join a by name, b by name;
+store c into ':OUTPATH:';\,
+				,'sql'   => [ 'select s.name, s.age, gpa, v.name, v.age, registration, contributions from studenttab10k s join votertab10k v on (s.name = v.name);']
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+				}, 
+				{
+				 'num' => 3
+                                ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+b = load ':INPATH:/votertab10k' as (name:chararray, age:int, registration:chararray, contributions:float);
+c = join a by name, b by name;
+store c into ':OUTPATH:';\
+				,'sql' => q\select s.name, s.age, gpa, v.name, v.age, registration, contributions from studenttab10k s join votertab10k v on (s.name = v.name);\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+				}, 
+				{
+				 'num' => 4
+                                ,'hcat_prep'=>q\drop table if exists pig_checkin_4_1;
+drop table if exists pig_checkin_4_2;
+create table pig_checkin_4_1 (name string, age int, gpa double) STORED AS TEXTFILE;
+create table pig_checkin_4_2 (name string, age int, gpa double) STORED AS TEXTFILE;\
+                                ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+split a into b if age <=40, c if age > 40;
+store b into 'pig_checkin_4_1' using org.apache.hcatalog.pig.HCatStorer();
+store c into 'pig_checkin_4_2' using org.apache.hcatalog.pig.HCatStorer();\,
+                                ,'result_table' => ['pig_checkin_4_1','pig_checkin_4_2']
+				,'sql'   => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;']
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+				}, 
+				{
+				 'num' => 5
+                                ,'hcat_prep'=>q\drop table if exists pig_checkin_5;
+create table pig_checkin_5 (name string, age int, gpa double) STORED AS TEXTFILE;\
+                                ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+split a into b if age <=40, c if age > 40;
+store b into 'pig_checkin_5' using org.apache.hcatalog.pig.HCatStorer();
+store c into ':OUTPATH:';\,
+                                ,'result_table' => ['pig_checkin_5','?']
+				,'sql'   => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;']
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+				}, 
+
+			],
+ 		}, # end g
+                {
+                        'name' => 'Pig_Read',
+                        'tests' => [
+
+                                {
+                                 'num' => 1
+                                ,'pig' => q\a = load 'all100k' using org.apache.hcatalog.pig.HCatLoader();
+store a into ':OUTPATH:';\,
+				,'sql' => q\select * from all100k;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+                                {
+                                 'num' => 2
+                                ,'pig' => q\a = load 'all100kjson' using org.apache.hcatalog.pig.HCatLoader();
+b = foreach a generate s, i, d;
+store b into ':OUTPATH:';\,
+				,'sql' => q\select s, i, d from all100kjson;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+                                {
+                                 'num' => 3
+                                ,'pig' => q\a = load 'all100krc' using org.apache.hcatalog.pig.HCatLoader();
+store a into ':OUTPATH:';\,
+				,'sql' => q\select * from all100krc;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                }
+                        ],
+                }, # end g
+                {
+                        'name' => 'Pig_Write',
+                        'tests' => [
+                                {
+                                 'num' => 1
+                                ,'hcat_prep'=>q\drop table if exists pig_write_1;
+create table pig_write_1(t tinyint,si smallint,i int,b bigint,bool boolean,f float,d double,s string) stored as rcfile;\
+                                ,'pig' => q\a = load ':INPATH:/all100k' using PigStorage(':') as (t:int,si:int,i:int,b:int,bo:boolean,f:float,d:double,s:chararray);
+store a into 'pig_write_1' using org.apache.hcatalog.pig.HCatStorer();\,
+                                ,'result_table' => 'pig_write_1'
+				,'sql' => q\select * from all100k;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+                                {
+                                 'num' => 2
+                                ,'hcat_prep'=>q\drop table if exists pig_write_2;
+create table pig_write_2(
+            s string,
+            i int,
+            d double,
+            m map<string, string>,
+            bb array<struct<a: int, b: string>>)
+            STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
+            INPUTDRIVER 'org.apache.hcatalog.pig.drivers.LoadFuncBasedInputDriver' OUTPUTDRIVER 'org.apache.hcatalog.pig.drivers.StoreFuncBasedOutputDriver'
+            TBLPROPERTIES ('hcat.pig.loader'='org.apache.pig.builtin.JsonLoader', 'hcat.pig.storer'='org.apache.pig.builtin.JsonStorage', 'hcat.pig.loader.args'=
+'s:chararray, i:int, d:double, m:map[chararray], bb:{t:(a:int, b:chararray)}', 'hcat.pig.args.delimiter'='	');
+\
+                                ,'pig' => q\a = load 'all100kjson' using org.apache.hcatalog.pig.HCatLoader();
+b = foreach a generate s, i, d;
+store b into ':OUTPATH:';\,
+				,'sql' => q\select IFNULL(s, ""), IFNULL(i, ""), IFNULL(d, "") from all100kjson;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+                                {
+                                 'num' => 3
+                                ,'hcat_prep'=>q\drop table if exists pig_write_3;
+create table pig_write_3(
+            name string,
+            age int,
+            gpa double)
+stored as rcfile
+TBLPROPERTIES (
+    'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
+    'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
+);
+\
+                                ,'pig' => q\a = load 'all100krc' using org.apache.hcatalog.pig.HCatLoader();
+store a into ':OUTPATH:';\,
+				,'sql' => q\select * from all100krc;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                }
+                        ],
+                }, # end g
+
+         ]
+}

Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl?rev=1211077&r1=1211076&r2=1211077&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl Tue Dec  6 20:05:37 2011
@@ -1,26 +1,28 @@
 #!/usr/bin/env perl
-############################################################################           
-#  Licensed to the Apache Software Foundation (ASF) under one or more                  
-#  contributor license agreements.  See the NOTICE file distributed with               
-#  this work for additional information regarding copyright ownership.                 
-#  The ASF licenses this file to You under the Apache License, Version 2.0             
-#  (the "License"); you may not use this file except in compliance with                
-#  the License.  You may obtain a copy of the License at                               
-#                                                                                      
-#      http://www.apache.org/licenses/LICENSE-2.0                                      
-#                                                                                      
-#  Unless required by applicable law or agreed to in writing, software                 
-#  distributed under the License is distributed on an "AS IS" BASIS,                   
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
-#  See the License for the specific language governing permissions and                 
-#  limitations under the License.                                                      
-                                                                                       
+############################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
 # A utility to generate test data for pig test harness tests.
 # 
 #
 
 use strict;
 use charnames ();
+use Cwd;
+use IPC::Run qw(run);
 
 our @firstName = ("alice", "bob", "calvin", "david", "ethan", "fred",
     "gabriella", "holly", "irene", "jessica", "katie", "luke", "mike", "nick",
@@ -137,16 +139,15 @@ sub randomGreekLetter()
 sub randomNameAgeGpaMap()
 {
     my $size = int(rand(3));
-    my $map = "[";
     my @mapValues = ( "name#" . randomName(), "age#" . randomAge(), "gpa#" . randomGpa() );
     $size = ($size == 0 ? 1 : $size);
+    my $map;
     for(my $i = 0; $i <= $size; $i++) {
         $map .= $mapValues[$i];
         if($i != $size) {
             $map .= ",";
         }
     }
-    $map .= "]";
     return $map;
 }
 
@@ -169,47 +170,103 @@ sub getMapFields($) {
 sub randomNameAgeGpaTuple()
 {
     my $gpa = sprintf("%0.2f", randomGpa());
-    return "(" . randomName() . "," . randomAge() . "," . $gpa . ")" ;
+    return randomName() . "," . randomAge() . "," . $gpa ;
 }
 
-sub randomNameAgeGpaBag()
+sub randomList()
 {
-    my $size = int(rand(int(3)));
-    my $bag = "{";
-    $size = ($size == 0 ? 1 : $size);
+    my $size = int(rand(int(3))) + 1;
+    my $bag;
     for(my $i = 0; $i <= $size; $i++) {
-        $bag .= randomNameAgeGpaTuple();
-        if($i != $size) {
-            $bag .= ",";
-        }
+        $bag .= randomAge();
+        $bag .= "," if ($i != $size);
     }
-    $bag .= "}";
     return $bag;
 }
 
-our @textDoc = (
-    "The cosmological proof, which we are now about to ex-",
-    "amine, retains the connection of absolute necessity with the",
-    "highest reality, but instead of reasoning, like the former proof,",
-    "from the highest reality to necessity of existence, it reasons",
-    "from the previously given unconditioned necessity of some",
-    "being to the unlimited reality of that being. It thus enters upon",
-    "a course of reasoning which, whether rational or only pseudo-",
-    "rational, is at any rate natural, and the most convincing not",
-    "only for common sense but even for speculative understand-",
-    "ing. It also sketches the first outline of all the proofs in natural",
-    "theology, an outline which has always been and always will",
-    "be followed, however much embellished and disguised by",
-    "superfluous additions. This proof, termed by Leibniz the proof",
-    "a contingentia mundi, we shall now proceed to expound and",
-    "examine.");
+sub randomEscape()
+{
+    my $r = rand(1);
+    if ($r < 0.16) {
+        return '\"';
+    } elsif ($r < 0.32) {
+        return '\\\\';
+    } elsif ($r < 0.48) {
+        return '\/';
+    } elsif ($r < 0.64) {
+        return '\n';
+    } elsif ($r < 0.80) {
+        return '\t';
+    } else {
+        return randomUnicodeHex();
+    }
+}
+
+
+sub randomJsonString()
+{
+    my $r = rand(1);
+    #if ($r < 0.05) {
+    #    return "null";
+    #} elsif ($r < 0.10) {
+    #    return randomName() . randomEscape() . randomName();
+    #} else {
+        return randomName();
+    #}
+}
+
+sub randomNullBoolean()
+{
+    my $r = rand(1);
+    if ($r < 0.05) {
+        return 'null';
+    } elsif ($r < 0.525) {
+        return 'true';
+    } else {
+        return 'false';
+    }
+}
+
+sub randomJsonMap()
+{
+    if (rand(1) < 0.05) {
+        return 'null';
+    }
+
+    my $str = "{";
+    my $num = rand(5) + 1;
+    for (my $i = 0; $i < $num; $i++) {
+        $str .= "," unless $i == 0;
+        $str .= '"' . randomCity() . '" : "' . randomName() . '"';
+    }
+    $str .= "}";
+    return $str;
+}
+
+sub randomJsonBag()
+{
+    if (rand(1) < 0.05) {
+        return 'null';
+    }
+
+    my $str = "[";
+    my $num = rand(5) + 1;
+    for (my $i = 0; $i < $num; $i++) {
+        $str .= "," unless $i == 0;
+        $str .= '{"a":' . int(rand(2**32) - 2**31) . ',"b":"' .
+            randomJsonString() . '"}';
+    }
+    $str .= "]";
+}
 
 sub usage()
 {
-    warn "Usage: $0 filetype numrows tablename targetdir [nosql]\n";
-    warn "\tValid filetypes [studenttab, studentcolon, \n";
-    warn "\t\tstudentnulltab, studentcomplextab, studentctrla, voternulltab\n";
-    warn "\t\tvotertab, reg1459894, textdoc, unicode, manual]\n";
+    warn "Usage: $0 filetype numrows tablename hdfstargetdir [format]\n";
+    warn "\tValid filetypes [studenttab, studentparttab, \n";
+    warn "\t\tstudentnull, allscalars, studentcomplextab, \n";
+    warn "\t\tvoternulltab votertab, unicode]\n";
+    warn "hdfstargetdir is the directory in hdfs that data will be copied to for loading into tables\n";
+    warn "format is one of rc, csv, or json.  csv is the default";
 }
 
 our @greekUnicode = ("\N{U+03b1}", "\N{U+03b2}", "\N{U+03b3}", "\N{U+03b4}",
@@ -226,26 +283,93 @@ sub randomUnicodeNonAscii()
     return $name;
 }
 
+sub randomUnicodeHex()
+{
+    return sprintf "\\u%04x", 0x3b1 + int(rand(25));
+}
+
 my $testvar = "\N{U+03b1}\N{U+03b3}\N{U+03b1}\N{U+03c0}\N{U+03b7}";
 
-sub getBulkCopyCmd(){
-        my $sourceDir= shift;
-        my $tableName = shift;
-        my $delimeter = shift;
-        $delimeter = '\t' if ( !$delimeter );
+sub getBulkCopyCmd($$;$)
+{
+    my ($tableName, $delimeter, $filename) = @_;
+
+    $filename = $tableName if (!defined($filename));
+        
+    return "load data local infile '" . cwd . "/$filename'
+            into table $tableName
+            columns terminated by '$delimeter';" 
+}
+
+sub generateSecondHalfCreateTable($$$;$$$)
+{
+    my ($hivefp, $format, $location, $fieldDelim, $structDelim, $mapDelim) = @_;
+
+    if ($format eq "csv") {
+        print $hivefp "
+row format delimited
+fields terminated by '$fieldDelim'
+stored as textfile
+location '$location';\n";
+    } elsif ($format eq "rc") {
+        print $hivefp "
+stored as rcfile
+location '$location'
+TBLPROPERTIES (
+    'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
+    'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
+);\n";
+    } elsif ($format eq "json") {
+        print $hivefp " STORED AS
+INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' 
+INPUTDRIVER 'org.apache.hcatalog.pig.drivers.LoadFuncBasedInputDriver' OUTPUTDRIVER 'org.apache.hcatalog.pig.drivers.StoreFuncBasedOutputDriver';
+location '$location'
+TBLPROPERTIES ('hcat.pig.loader'='org.apache.pig.builtin.JsonLoader', 'hcat.pig.storer'='org.apache.pig.builtin.JsonStorage', 'hcat.pig.loader.args'=
+'s:chararray, i:int, d:double, m:map[chararray], bb:{t:(a:int, b:chararray)}, 'hcat.pig.args.delimiter'='\t')
+;\n";
+    } else {
+        die "Unknown format $format\n";
+    }
+}
+
+our $hadoopCoreJar = undef;
 
-#               . "\nCOPY $tableName FROM \'$sourceDir/$tableName' using DELIMITERS \'". '\t' . "\' WITH NULL AS '\n';";
+sub findHadoopJars()
+{
+    if (not defined $ENV{'HADOOP_HOME'}) {
+        die 'Please set $HADOOP_HOME\n';
+    }
 
-        my $cmd= "\nbegin transaction;" 
-                  . "\nCOPY $tableName FROM \'$sourceDir/$tableName' using DELIMITERS \'$delimeter\';" 
-                  . "\ncommit;"
-                  . "\n";
+    my $coreJar = `ls $ENV{'HADOOP_HOME'}/hadoop-core-*.jar`;
+    my $loggingJar = `ls $ENV{'HADOOP_HOME'}/lib/commons-logging-*.jar | grep -v api`;
+    my $cfgJar = `ls $ENV{'HADOOP_HOME'}/lib/commons-configuration-*.jar`;
+    my $langJar = `ls $ENV{'HADOOP_HOME'}/lib/commons-lang-*.jar`;
+    my $cliJar = `ls $ENV{'HADOOP_HOME'}/lib/commons-cli-*.jar`;
+    chomp $coreJar;
+    chomp $loggingJar;
+    chomp $cfgJar;
+    chomp $langJar;
+    chomp $cliJar;
+    return ($coreJar, $loggingJar, $cfgJar, $langJar, $cliJar);
+}
+
+sub findHiveJars()
+{
+    if (not defined $ENV{'HIVE_HOME'}) {
+        die 'Please set $HIVE_HOME\n';
+    }
 
-        return $cmd;
+    my $execJar = `ls ../../../../../hive/external/build/ql/hive-exec-*.jar`;
+    my $cliJar = `ls ../../../../../hive/external/build/cli/hive-cli-*.jar`;
+    chomp $execJar;
+    chomp $cliJar;
+    return ($execJar, $cliJar);
 }
 
 
-# main($)
+
+
+# main
 {
     # explicitly call srand so we get the same data every time
     # we generate it.  However, we set it individually for each table type.
@@ -255,50 +379,121 @@ sub getBulkCopyCmd(){
     my $filetype = shift;
     my $numRows = shift;
     my $tableName = shift;
-    my $targetDir= shift;
-    my $nosql = shift;
+    my $hdfsTargetDir= shift;
+    my $format = shift;
 
-    die usage() if (!defined($filetype) || !defined($numRows));
+    die usage() if (!defined($filetype) || !defined($numRows) || !defined($tableName) || !defined($hdfsTargetDir));
 
     if ($numRows <= 0) { usage(); }
 
-    if ( $targetDir ) {
-       open(HDFS, "> $targetDir/$tableName") or die("Cannot open file $tableName, $!\n");
-       open(PSQL, "> $targetDir/$tableName.sql") or die("Cannot open file $tableName.sql, $!\n") unless defined $nosql;
-    } else {
-       open(HDFS, "> $tableName") or die("Cannot open file $tableName, $!\n");
-       open(PSQL, "> $tableName.sql") or die("Cannot open file $tableName.sql, $!\n") unless defined $nosql;
+    $format = "csv" if not defined $format;
+
+    if ($format eq "csv") {
+        open(HDFS, "> $tableName") or die("Cannot open file $tableName, $!\n");
     }
+    open(MYSQL, "> $tableName.mysql.sql") or
+        die("Cannot open file $tableName.mysql.sql, $!\n");
+    open(my $hivefp, "> $tableName.hcat.sql") or
+        die("Cannot open file $tableName.hive.sql, $!\n");
 
-    if ($filetype eq "manual") {
-    } elsif ($filetype eq "studenttab") {
+    if ($filetype eq "studenttab") {
         srand(3.14159 + $numRows);
-        print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n" unless defined $nosql;
-        print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
-        for (my $i = 0; $i < $numRows; $i++) {
-            my $name = randomName();
-            my $age = randomAge();
-            my $gpa = randomGpa();
-            printf HDFS "%s\t%d\t%.2f\n", $name, $age, $gpa;
+        print MYSQL "drop table if exists $tableName;\n";
+        print MYSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
+        print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+            name string,
+            age int,
+            gpa double)";
+
+        generateSecondHalfCreateTable($hivefp, $format,
+            "$hdfsTargetDir/$tableName", '\\t');
+        if ($format eq "csv") {
+            print MYSQL &getBulkCopyCmd($tableName, "\t", "$tableName");
+            for (my $i = 0; $i < $numRows; $i++) {
+                my $name = randomName();
+                my $age = randomAge();
+                my $gpa = randomGpa();
+                printf HDFS "%s\t%d\t%.2f\n", $name, $age, $gpa;
+            }
+        } elsif ($format eq "rc") {
+            print MYSQL &getBulkCopyCmd($tableName, "\t", "$tableName.plain");
+            my ($hadoopCoreJar, $commonsLoggingJar, $commonsConfigJar,
+                $commonsLangJar, $commonsCliJar) = findHadoopJars();
+            my ($hiveExecJar, $hiveCliJar) = findHiveJars();
+            my @cmd = ('java', '-cp',
+                "../tools/generate/java/hive-gen.jar:$hadoopCoreJar:" .
+                "$commonsLoggingJar:$commonsConfigJar:$commonsLangJar:" .
+                "$hiveExecJar",
+                'org.apache.hadoop.hive.tools.generate.RCFileGenerator',
+                'student', $numRows, "$tableName", "$tableName.plain");
+            run(\@cmd) or die "Unable to run command [" . join(" ", @cmd) 
+                . "]\n";
+            #@cmd = ('java', '-cp',
+            #    "$hiveCliJar:$hiveExecJar:$hadoopCoreJar:" .
+            #    "$commonsLoggingJar:$commonsCliJar:$commonsConfigJar",
+            #    "org.apache.hadoop.hive.cli.RCFileCat", "$tableName");
+            #run(\@cmd, '>', $tableName) or
+            #    die "Unable to run command [" . join(" ", @cmd) . "]\n";
+        } else {
+            die "Unknown format $format\n";
         }
+    } elsif ($filetype eq "studentparttab") {
+        srand(3.14159 + $numRows);
+        print MYSQL "drop table if exists $tableName;\n";
+        print MYSQL "create table $tableName (name varchar(100), age integer, gpa float(3), ds char(8));\n";
+        print MYSQL &getBulkCopyCmd($tableName, "\t", "$tableName.mysql");
+        print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+            name string,
+            age int,
+            gpa double)
+        partitioned by (ds string)
+        row format delimited
+        fields terminated by '\\t'
+        stored as textfile
+        location '$hdfsTargetDir/$tableName';
+        alter table $tableName add IF NOT EXISTS partition (ds='20110924') location '$hdfsTargetDir/$tableName/$tableName.20110924';
+        alter table $tableName add IF NOT EXISTS partition (ds='20110925') location '$hdfsTargetDir/$tableName/$tableName.20110925';
+        alter table $tableName add IF NOT EXISTS partition (ds='20110926') location '$hdfsTargetDir/$tableName/$tableName.20110926';
+        ";
+        open(MYSQLDATA, "> $tableName.mysql") or die("Cannot open file $tableName.mysql, $!\n");
+        for (my $ds = 20110924; $ds < 20110927; $ds++) {
+            close(HDFS);
+            open(HDFS, "> $tableName.$ds") or die("Cannot open file $tableName.$ds, $!\n");
+            for (my $i = 0; $i < $numRows; $i++) {
+                my $name = randomName();
+                my $age = randomAge();
+                my $gpa = randomGpa();
+                printf HDFS "%s\t%d\t%.2f\n", $name, $age, $gpa;
+                printf MYSQLDATA "%s\t%d\t%.3f\t%d\n", $name, $age, $gpa, $ds;
+            }
+        }
+        close(MYSQLDATA);
 
-    } elsif ($filetype eq "studentnulltab") {
+    } elsif ($filetype eq "studentnull") {
         srand(3.14159 + $numRows);
-        print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
-        print PSQL "begin transaction;\n";
+        print MYSQL "drop table if exists $tableName;\n";
+        print MYSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
+        print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+            name string,
+            age int,
+            gpa double)
+        row format delimited
+        fields terminated by '\\001'
+        stored as textfile
+        location '$hdfsTargetDir/$tableName';\n";
         for (my $i = 0; $i < $numRows; $i++) {
             # generate nulls in a random fashion
             my $name = rand(1) < 0.05 ? '' : randomName();
             my $age = rand(1) < 0.05 ? '' : randomAge();
             my $gpa = rand(1) < 0.05 ? '' : randomGpa();
-            printf PSQL "insert into $tableName (name, age, gpa) values(";
-            print PSQL ($name eq ''? "null, " : "'$name', "), ($age eq ''? "null, " : "$age, ");
+            printf MYSQL "insert into $tableName (name, age, gpa) values(";
+            print MYSQL ($name eq ''? "null, " : "'$name', "), ($age eq ''? "null, " : "$age, ");
             if($gpa eq '') {
-                print PSQL "null);\n"
+                print MYSQL "null);\n"
             } else {
-                printf PSQL "%.2f);\n", $gpa;    
+                printf MYSQL "%.2f);\n", $gpa;    
             }
-            print HDFS "$name\t$age\t";
+            print HDFS "$name$age";
             if($gpa eq '') {
                 print HDFS "\n"
             } else {
@@ -306,69 +501,65 @@ sub getBulkCopyCmd(){
             }
             
         }
-        print PSQL "commit;\n" unless defined $nosql;
+        print MYSQL "commit;\n";
 
-    } elsif ($filetype eq "studentcolon") {
+    } elsif ($filetype eq "allscalars") {
         srand(2.718281828459 + $numRows);
-        print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n" unless defined $nosql;
-        print PSQL &getBulkCopyCmd( $targetDir, $tableName, ':' ) unless defined $nosql;
+        print MYSQL "drop table if exists $tableName;\n";
+        print MYSQL "create table $tableName (t tinyint, si smallint, i int, b
+            bigint, f double, d double, s varchar(25));\n";
+        print MYSQL &getBulkCopyCmd($tableName, ':');
+        print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            s string)
+        row format delimited
+        fields terminated by ':'
+        stored as textfile
+        location '$hdfsTargetDir/$tableName';\n
+        alter table $tableName set TBLPROPERTIES 
+         ('hcat.pig.loader.args'=':', 'hcat.pig.storer.args'=':');\n";
         for (my $i = 0; $i < $numRows; $i++) {
-            my $name = randomName();
-            my $age = randomAge();
-            my $gpa = randomGpa();
-            printf HDFS "%s:%d:%.2f\n", $name, $age, $gpa;
-=begin
-    } elsif ($filetype eq "studentusrdef") {
-        srand(6.62606896 + $numRows);
-        for (my $i = 0; $i < $numRows; $i++) {
-            # TODO need to add SQL info.
-            printf("%s,%d,%.2f,", randomName(), randomAge(), randomGpa());
-            printf("<%s,%s,%s,%d>,", randomStreet(), randomCity(), randomState(),
-                randomZip());
-            printf("[%s:<%s,%s>],", randomClass(), randomClass(), randomName());
-            printf("{");
-            my $elementsInBag = int(rand(100));
-            for (my $j = 0; $j < $elementsInBag; $j++) {
-                if ($j != 0) { printf(","); }
-                printf("<%s,%s,%s>", randomClass(), randomName(), randomGrade());
-            }
-            printf("}\n");
-        }
-=cut
+            printf HDFS "%d:%d:%d:%ld:%.2f:%.2f:%s\n",
+                (int(rand(2**8) - 2**7)),
+                (int(rand(2**16) - 2**15)),
+                (int(rand(2**32) - 2**31)),
+                (int(rand(2**64) - 2**61)),
+                rand(100000.0) - 50000.0,
+                rand(10000000.0) - 5000000.0,
+                randomName();
         }
-        print PSQL "commit;\n" unless defined $nosql;
-
-    } elsif ($filetype eq "studentctrla") {
-        srand(6.14159 + $numRows);
-        print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
-        print PSQL "begin transaction;\n";
-        for (my $i = 0; $i < $numRows; $i++) {
-            my $name = randomName();
-            my $age = randomAge();
-            my $gpa = randomGpa();
-            printf PSQL "insert into $tableName (name, age, gpa) values('%s', %d, %.2f);\n",
-                $name, $age, $gpa;
-            printf HDFS "%s%d%.2f\n", $name, $age, $gpa;
-        }
-        print PSQL "commit;\n" unless defined $nosql;
-
-
     } elsif ($filetype eq "studentcomplextab") {
         srand(3.14159 + $numRows);
-        print PSQL "create table $tableName (nameagegpamap varchar(500), nameagegpatuple varchar(500), nameagegpabag varchar(500), nameagegpamap_name varchar(500), nameagegpamap_age integer, nameagegpamap_gpa float(3));\n";
-        print PSQL "begin transaction;\n";
+        print MYSQL "drop table if exists $tableName;\n";
+        print MYSQL "create table $tableName (nameagegpamap varchar(500), nameagegpatuple varchar(500), nameagegpabag varchar(500), nameagegpamap_name varchar(500), nameagegpamap_age integer, nameagegpamap_gpa float(3));\n";
+        print MYSQL "begin transaction;\n";
+        print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+            nameagegpamap map<string, string>,
+            struct <name: string, age: int, gpa: float>,
+            array <int>)
+        row format delimited
+        fields terminated by '\\t'
+        collection items terminated by ','
+        map keys terminated by '#'
+        stored as textfile
+        location '$hdfsTargetDir/$tableName';\n";
         for (my $i = 0; $i < $numRows; $i++) {
             # generate nulls in a random fashion
             my $map = rand(1) < 0.05 ? '' : randomNameAgeGpaMap();
             my $tuple = rand(1) < 0.05 ? '' : randomNameAgeGpaTuple();
-            my $bag = rand(1) < 0.05 ? '' : randomNameAgeGpaBag();
-            printf PSQL "insert into $tableName (nameagegpamap, nameagegpatuple, nameagegpabag, nameagegpamap_name, nameagegpamap_age, nameagegpamap_gpa) values(";
+            my $bag = rand(1) < 0.05 ? '' : randomList();
+            printf MYSQL "insert into $tableName (nameagegpamap, nameagegpatuple, nameagegpabag, nameagegpamap_name, nameagegpamap_age, nameagegpamap_gpa) values(";
             my $mapHash;
             if($map ne '') {
                 $mapHash = getMapFields($map);
             }
 
-            print PSQL ($map eq ''? "null, " : "'$map', "), 
+            print MYSQL ($map eq ''? "null, " : "'$map', "), 
                         ($tuple eq ''? "null, " : "'$tuple', "),
                         ($bag eq '' ? "null, " : "'$bag', "),
                         ($map eq '' ? "null, " : (exists($mapHash->{'name'}) ? "'".$mapHash->{'name'}."', " : "null, ")),
@@ -376,13 +567,23 @@ sub getBulkCopyCmd(){
                         ($map eq '' ? "null);\n" : (exists($mapHash->{'gpa'}) ? "'".$mapHash->{'gpa'}."');\n" : "null);\n"));
             print HDFS "$map\t$tuple\t$bag\n";
         }
-        print PSQL "commit;\n" unless defined $nosql;
+        print MYSQL "commit;\n";
 
     } elsif ($filetype eq "votertab") {
         srand(299792458 + $numRows);
-        print PSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n" unless defined $nosql;
-        print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
-        for (my $i = 0; $i < $numRows; $i++) {
+        print MYSQL "drop table if exists $tableName;\n";
+        print MYSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n";
+        print MYSQL &getBulkCopyCmd($tableName, "\t");
+        print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+            name string,
+            age int,
+            registration string,
+            contributions float)
+        row format delimited
+        fields terminated by '\\t'
+        stored as textfile
+        location '$hdfsTargetDir/$tableName';\n";
+for (my $i = 0; $i < $numRows; $i++) {
             my $name = randomName();
             my $age = randomAge();
             my $registration = randomRegistration();
@@ -393,22 +594,32 @@ sub getBulkCopyCmd(){
 
     } elsif ($filetype eq "voternulltab") {
         srand(299792458 + $numRows);
-        print PSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n" unless defined $nosql;
-        print PSQL "begin transaction;\n" unless defined $nosql;
+        print MYSQL "drop table if exists $tableName;\n";
+        print MYSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n";
+        print MYSQL "begin transaction;\n";
+        print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+            name string,
+            age int,
+            registration string,
+            contributions float)
+        row format delimited
+        fields terminated by '\\t'
+        stored as textfile
+        location '$hdfsTargetDir/$tableName';\n";
         for (my $i = 0; $i < $numRows; $i++) {
             # generate nulls in a random fashion
             my $name = rand(1) < 0.05 ? '' : randomName();
             my $age = rand(1) < 0.05 ? '' : randomAge();
             my $registration = rand(1) < 0.05 ? '' : randomRegistration();
             my $contribution = rand(1) < 0.05 ? '' : randomContribution();
-            printf PSQL "insert into $tableName (name, age, registration, contributions) values(";
-            print PSQL ($name eq ''? "null, " : "'$name', "), 
+            printf MYSQL "insert into $tableName (name, age, registration, contributions) values(";
+            print MYSQL ($name eq ''? "null, " : "'$name', "), 
                             ($age eq ''? "null, " : "$age, "),
                             ($registration eq ''? "null, " : "'$registration', ");
             if($contribution eq '') {
-                print PSQL "null);\n"
+                print MYSQL "null);\n"
             } else {
-                printf PSQL "%.2f);\n", $contribution;    
+                printf MYSQL "%.2f);\n", $contribution;    
             }
             print HDFS "$name\t$age\t$registration\t";
             if($contribution eq '') {
@@ -417,42 +628,64 @@ sub getBulkCopyCmd(){
                 printf HDFS "%.2f\n", $contribution;    
             }
         }
-        print PSQL "commit;\n" unless defined $nosql;
-
-    } elsif ($filetype eq "reg1459894") {
-        srand(6.67428 + $numRows);
-        print PSQL "create table $tableName (first varchar(10), second varchar(10));\n" unless defined $nosql;
-        print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
-        for (my $i = 0; $i < $numRows; $i++) {
-            my $letter = randomNumLetter(); 
-            my $gkLetter = randomGreekLetter(); 
-            printf HDFS "%s\t%s\n", $letter, $gkLetter;
-        }
-
-    } elsif ($filetype eq "textdoc") {
-        # This one ignores the number of lines.  It isn't random either.
-        print PSQL "create table $tableName (name varchar(255));\n" unless defined $nosql;
-        print PSQL "begin transaction;\n" unless defined $nosql;
-        for (my $i = 0; $i < @textDoc; $i++) {
-            my $sqlWords = $textDoc[$i];
-            $sqlWords =~ s/([\w-]+)/$1,/g;
-            print PSQL  "insert into $tableName (name) values('($sqlWords)');\n" unless defined $nosql;
-            print HDFS "$textDoc[$i]\n";
-        }
-        print PSQL "commit;\n" unless defined $nosql;
-
+        print MYSQL "commit;\n";
 
     } elsif ($filetype eq "unicode") {
         srand(1.41421 + $numRows);
-        print PSQL "create table $tableName (name varchar(255));\n" unless defined $nosql;
-        print PSQL "begin transaction;\n" unless defined $nosql;
+        print MYSQL "drop table if exists $tableName;\n";
+        print MYSQL "create table $tableName (name varchar(255));\n";
+        print MYSQL "begin transaction;\n";
+        print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+            name string)
+        row format delimited
+        fields terminated by '\\t'
+        stored as textfile
+        location '$hdfsTargetDir/$tableName';\n";
         for (my $i = 0; $i < $numRows; $i++) {
             my $name = randomUnicodeNonAscii(); 
-            printf PSQL "insert into $tableName (name) values('%s');\n",
-                $name unless defined $nosql;
+            printf MYSQL "insert into $tableName (name) values('%s');\n", $name;
             printf HDFS "%s\n", $name;
         }
-        print PSQL "commit;\n" unless defined $nosql;
+        print MYSQL "commit;\n";
+    } elsif ($filetype eq "json") {
+        srand(6.0221415 + $numRows);
+        print MYSQL "drop table if exists $tableName;";
+        print MYSQL "create table $tableName(
+            s varchar(100),
+            i int,
+            d double);";
+        print MYSQL &getBulkCopyCmd($tableName, "\t", "$tableName.plain");
+        print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
+            s string,
+            i int,
+            d double,
+            m map<string, string>,
+            bb array<struct<a: int, b: string>>)
+            STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
+            INPUTDRIVER 'org.apache.hcatalog.pig.drivers.LoadFuncBasedInputDriver' OUTPUTDRIVER 'org.apache.hcatalog.pig.drivers.StoreFuncBasedOutputDriver'
+            location '$hdfsTargetDir/$tableName'
+            TBLPROPERTIES ('hcat.pig.loader'='org.apache.pig.builtin.JsonLoader', 'hcat.pig.storer'='org.apache.pig.builtin.JsonStorage', 'hcat.pig.loader.args'=
+'s:chararray, i:int, d:double, m:map[chararray], bb:{t:(a:int, b:chararray)}', 'hcat.pig.args.delimiter'='\t');\n";
+        open(PLAIN, ">$tableName.plain") or
+            die("Cannot open file $tableName.hive.sql, $!\n");
+        for (my $i = 0; $i < $numRows; $i++) {
+            my $s = randomJsonString();
+            my $i = int(rand(2**32) - 2**31),
+            my $d = rand(2**10) - 2**9,
+#            my $i = rand(1) < 0.05 ? 'null' : (int(rand(2**32) - 2**31)),
+#            my $d = rand(1) < 0.05 ? 'null' : (rand(2**10) - 2**9),
+            my $m = randomJsonMap();
+            my $bb = randomJsonBag();
+
+#           printf MYSQL "insert into $tableName (name) values('%s');\n", $name;
+            print HDFS qq@{"s":"$s", "i":$i, "d":$d, "m":$m, "bb":$bb}\n@;
+            if ($s eq 'null') {
+                $s="";
+            }
+            print PLAIN "$s\t$i\t$d\n";
+        }
+        close PLAIN;
+        print MYSQL "commit;\n";
 
     } else {
         warn "Unknown filetype $filetype\n";

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/build.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/build.xml?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/build.xml (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/build.xml Tue Dec  6 20:05:37 2011
@@ -0,0 +1,71 @@
+<!--  Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.-->
+
+<project name="Hive-Data-Generator" default="generator-jar">
+
+    <property name="generator.jarfile" value="hive-gen.jar" />
+    <property name="generator.build.dir" value="${basedir}/build" />
+    <property name="generator.src.dir" value="${basedir}/org/" />
+
+
+    <path id="generator-classpath">
+        <fileset file="${hive.serde.jarfile}" />
+        <fileset file="${hive.ql.jarfile}" />
+        <fileset file="${hadoop.core.jarfile}" />
+    </path>
+
+    <target name="init">
+        <mkdir dir="${generator.build.dir}" />
+    </target>
+
+    <target name="clean">
+        <delete dir="${generator.build.dir}" />
+        <delete file="${generator.jarfile}" />
+    </target>
+
+    <target name="generator-compile"
+            depends="init, serde.jar.check, ql.jar.check, hadoop.jar.check">
+        <echo>*** Compiling UDFs ***</echo>
+        <javac srcdir="${generator.src.dir}" destdir="${generator.build.dir}" debug="on">
+            <classpath refid="generator-classpath" />
+        </javac>
+    </target>
+
+    <target name="generator-jar" depends="generator-compile">
+        <echo>*** Creating UDF jar ***</echo>
+        <jar duplicate="preserve" jarfile="${generator.jarfile}">
+	    <fileset dir="build"/>
+        </jar>
+    </target>
+
+	<target name="serde.jar.check" unless="hive.serde.jarfile">
+	 	<fail message="'hive.serde.jarfile' is not defined. 
+		Please pass -Dhive.serde.jarfile=&lt;Hive serde jar to use&gt; to Ant on the command-line." />
+	</target>
+
+	<target name="ql.jar.check" unless="hive.ql.jarfile">
+	 	<fail message="'hive.ql.jarfile' is not defined. 
+		Please pass -Dhive.ql.jarfile=&lt;Hive ql jar to use&gt; to Ant on the command-line." />
+	</target>
+
+	<target name="hadoop.jar.check" unless="hadoop.core.jarfile">
+	 	<fail message="'hadoop.core.jarfile' is not defined. 
+		Please pass -Dhadoop.core.jarfile=&lt;Hadoop core jar to use&gt; to Ant on the command-line." />
+	</target>
+
+
+
+
+</project>

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java Tue Dec  6 20:05:37 2011
@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.tools.generate;
+
+import java.util.Properties;
+import java.util.Random;
+import java.io.DataOutputStream;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.PrintWriter;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.compress.DefaultCodec;
+
+import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
+import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
+import org.apache.hadoop.hive.ql.io.RCFile;
+import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
+
+/**
+ * Generate RCFile test data
+ *
+ */
+public class RCFileGenerator {
+
+    private static Configuration conf = new Configuration();
+    private static Path basedir;
+    private static FileSystem fs;
+    private static Properties tbl;
+    private static Random rand;
+
+    private static Path getFile(String filename) throws Exception {
+       return new Path(basedir, filename);
+    }
+
+    private static String[] firstName = {"alice", "bob", "calvin", "david",
+      "ethan", "fred", "gabriella", "holly", "irene", "jessica", "katie",
+      "luke", "mike", "nick", "oscar", "priscilla", "quinn", "rachel",
+      "sarah", "tom", "ulysses", "victor", "wendy", "xavier", "yuri",
+      "zach"};
+
+    private static String[] lastName = {"allen", "brown", "carson",
+      "davidson", "ellison", "falkner", "garcia", "hernandez", "ichabod",
+      "johnson", "king", "laertes", "miller", "nixon", "ovid", "polk",
+      "quirinius", "robinson", "steinbeck", "thompson", "underhill",
+      "van buren", "white", "xylophone", "young", "zipper"};
+
+    private static String randomName() {
+        StringBuffer buf =
+            new StringBuffer(firstName[rand.nextInt(firstName.length)]);
+        buf.append(' ');
+        buf.append(lastName[rand.nextInt(lastName.length)]);
+        return buf.toString();
+    }
+
+    private static int randomAge() {
+        return rand.nextInt(60) + 18;
+    }
+
+    private static double randomGpa() {
+        return 4 * rand.nextFloat();
+    }
+
+    private static String[] registration = {"democrat", "green",
+        "independent", "libertarian", "republican", "socialist"};
+
+    private static String randomRegistration() {
+        return registration[rand.nextInt(registration.length)];
+    }
+
+    private static double randomContribution() {
+        return rand.nextFloat() * 1000;
+    }
+
+    private static byte[] randomMap() throws Exception {
+        int len = rand.nextInt(5) + 1;
+
+        StringBuffer buf = new StringBuffer();
+        for (int i = 0; i < len; i++) {
+            if (i != 0) buf.append('\u0002');
+            buf.append(firstName[rand.nextInt(26)]);
+            buf.append('\u0003');
+            buf.append(lastName[rand.nextInt(26)]);
+        }
+        return buf.toString().getBytes("UTF-8");
+    }
+
+    private static byte[] randomArray() throws Exception {
+        int len = rand.nextInt(5) + 1;
+
+        StringBuffer buf = new StringBuffer();
+        for (int i = 0; i < len; i++) {
+            if (i != 0) buf.append('\u0002');
+            buf.append(Integer.valueOf(randomAge()).toString());
+            buf.append('\u0003');
+            buf.append(randomName());
+        }
+        return buf.toString().getBytes("UTF-8");
+    }
+
+    private static void usage() {
+        System.err.println("Usage: rcfilegen format number_of_rows " + 
+            "output_file plain_output_file");
+        System.err.println("  format one of:  student voter alltypes");
+        System.exit(1);
+    }
+
+    public static void main(String[] args) throws Exception {
+        if (args.length != 4) usage();
+
+        String format = args[0];
+        int numRows = Integer.valueOf(args[1]);
+        if (numRows < 1) usage();
+        String output = args[2];
+        String plainOutput = args[3];
+
+        fs = FileSystem.getLocal(conf);
+        basedir = new Path(".");
+
+        genData(format, numRows, output, plainOutput);
+    }
+
+    private static void genData(String format,
+                                int numRows,
+                                String output, String plainOutput) throws Exception {
+        int numFields = 0;
+        if (format.equals("student")) { 
+            rand = new Random(numRows);
+            numFields = 3;
+        } else if (format.equals("voter")) {
+            rand = new Random(1000000000 + numRows);
+            numFields = 4;
+        } else if (format.equals("alltypes")) {
+            rand = new Random(2000000000L + numRows);
+            numFields = 10;
+        }
+
+        RCFileOutputFormat.setColumnNumber(conf, numFields);
+        RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output),
+            null, new DefaultCodec());
+        
+        PrintWriter pw = new PrintWriter(new FileWriter(plainOutput));
+
+        for (int j = 0; j < numRows; j++) {
+            BytesRefArrayWritable row = new BytesRefArrayWritable(numFields);
+
+            byte[][] fields = null;
+
+            if (format.equals("student")) {
+                byte[][] f = {
+                    randomName().getBytes("UTF-8"),
+                    Integer.valueOf(randomAge()).toString().getBytes("UTF-8"),
+                    Double.valueOf(randomGpa()).toString().getBytes("UTF-8")
+                };
+                fields = f;
+            } else if (format.equals("voter")) {
+                byte[][] f = {
+                    randomName().getBytes("UTF-8"),
+                    Integer.valueOf(randomAge()).toString().getBytes("UTF-8"),
+                    randomRegistration().getBytes("UTF-8"),
+                    Double.valueOf(randomContribution()).toString().getBytes("UTF-8")
+                };
+                fields = f;
+            } else if (format.equals("alltypes")) {
+                byte[][] f = {
+                    Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"),
+                    Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"),
+                    Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"),
+                    Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"),
+                    Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"),
+                    Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"),
+                    randomName().getBytes("UTF-8"),
+                    randomMap(),
+                    randomArray()
+                };
+                fields = f;
+            }
+
+
+            for (int i = 0; i < fields.length; i++) {
+                BytesRefWritable field = new BytesRefWritable(fields[i], 0,
+                    fields[i].length);
+                row.set(i, field);
+                pw.print(new String(fields[i]));
+                if (i!=fields.length-1)
+                    pw.print("\t");
+                else
+                    pw.println();
+            }
+
+            writer.append(row);
+        }
+
+        writer.close();
+        pw.close();
+  }
+}
+

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/install/install.sh
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/install/install.sh?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/install/install.sh (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/install/install.sh Tue Dec  6 20:05:37 2011
@@ -0,0 +1,212 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script assumes that it is being run from the top level directory of the
+# HCatalog distribution tarball
+
+host="unknown"
+dir="unknown"
+hadoop_home="unknown"
+tarball="unknown"
+dbroot="unknown"
+portnum="9933"
+passwd="hive"
+warehouseDir="/user/hive/warehouse"
+sasl="false"
+keytabpath="unknown"
+kerberosprincipal="unknown"
+forrest="unknown"
+
+function usage() {
+    echo "Usage: $0 -D dbroot -d directory -f forrest -h hadoop_home "
+    echo "  -m host -t tarball"
+    echo "  [-p portnum] [-P password] [-w warehouse_directory]"
+    echo "  [-s true|false -k keytabpath -K kerberos_principal]"
+    echo 
+    echo "    dbroot is the root directory for the mysql drivers"
+    echo "    directory is the directory where it will be installed"
+    echo "    hadoop_home is the directory of your Hadoop installation."
+    echo "    host is the machine to install the HCatalog server on"
+    echo "    tarball is the result of running ant src-release in hcat"
+    echo "    portnum is the port for the thrift server to use, " \
+        "default $portnum"
+    echo "    password is the password for the metastore db, default $passwd"
+    echo "    warehouse_directory is the HDFS directory to use for " \
+        "internal hive tables, default $warehouseDir"
+    echo "    -s true will enable security, -s false turn it off, " \
+        "default $sasl"
+    echo "    keytabpath is path to Kerberos keytab file, required with " \
+        "-s true"
+    echo "    kerberos_principal service principal for thrift server, " \
+        "required with -s true"
+    echo "    All paths must be absolute"
+}
+
+while [ "${1}x" != "x" ] ; do
+    if [ $1 == "-D" ] ; then
+        shift
+        dbroot=$1
+        shift
+    elif [ $1 == "-d" ] ; then
+        shift
+        dir=$1
+        shift
+    elif [ $1 == "-f" ] ; then
+        shift
+        forrest=$1
+        shift
+    elif [ $1 == "-h" ] ; then
+        shift
+        hadoop_home=$1
+        shift
+    elif [ $1 == "-K" ] ; then
+        shift
+        kerberosprincipal=$1
+        kerberosprincipal=${kerberosprincipal/@/\\@}
+        shift
+    elif [ $1 == "-k" ] ; then
+        shift
+        keytabpath=$1
+        shift
+    elif [ $1 == "-m" ] ; then
+        shift
+        host=$1
+        shift
+    elif [ $1 == "-p" ] ; then
+        shift
+        portnum=$1
+        shift
+    elif [ $1 == "-P" ] ; then
+        shift
+        passwd=$1
+        shift
+    elif [ $1 == "-s" ] ; then
+        shift
+        sasl=$1
+        shift
+    elif [ $1 == "-t" ] ; then
+        shift
+        tarball=$1
+        shift
+    elif [ $1 == "-w" ] ; then
+        shift
+        warehouseDir=$1
+        shift
+    else
+        echo "Unknown option $1"
+        shift
+    fi
+
+done
+
+for var in $forrest $dbroot $host $dir $hadoop_home $tarball ; do
+    if [ $var == "unknown" ] ; then
+        usage
+        exit 1
+    fi
+done
+
+# Make sure root and dbroot are absolute paths
+
+for var in $forrest $dbroot $dir $hadoop_home ; do
+    if [ ${var:0:1} != "/" ] ; then
+        usage
+        exit 1
+    fi
+done
+
+# Take the src distribution and build an installable tarball
+# Copy the tarball over
+rm -rf /tmp/${USER}_hcat_scratch
+mkdir /tmp/${USER}_hcat_scratch
+cd /tmp/${USER}_hcat_scratch
+cp $tarball .
+tar zxf *
+dirname=`ls -1 | grep -v gz`
+cd $dirname
+ant -Dforrest.home=$forrest tar
+tarfiledir=`pwd`
+tarfilebase=`ls build/hcatalog-*.tar.gz`
+tarfile="$tarfiledir/$tarfilebase"
+
+tfile=/tmp/${USER}_hcat_test_tarball.tgz
+scp $tarfile $host:$tfile
+
+# Write a quick perl script to modify the hive-site.xml file
+pfile=/tmp/${USER}_hcat_test_hive_site_modify.pl
+cat > $pfile <<!
+#!/usr/bin/env perl
+
+while (<>) {
+    s!DBHOSTNAME!$host!;
+    s!SVRHOST!$host!;
+    s!PASSWORD!$passwd!;
+    s!WAREHOUSE_DIR!$warehouseDir!;
+    s!SASL_ENABLED!$sasl!;
+    s!KEYTAB_PATH!$keytabpath!;
+    s!KERBEROS_PRINCIPAL!$kerberosprincipal!;
+    s!PORT!$portnum!;
+    print;
+}
+!
+ 
+
+# Run the install script
+file=/tmp/${USER}_hcat_test_install.sh
+cat > $file <<!
+#!/usr/bin/env bash
+rm -rf /tmp/${USER}_hcat_scratch
+mkdir /tmp/${USER}_hcat_scratch
+cd /tmp/${USER}_hcat_scratch
+cp $tfile .
+tar zxf ${USER}_hcat_test_tarball.tgz
+cd hcatalog-*
+share/hcatalog/scripts/hcat_server_install.sh -r $dir -d $dbroot \
+    -h $hadoop_home -p $portnum
+
+chmod +x $pfile
+cp $dir/etc/hcatalog/hive-site.xml /tmp/${USER}_hcat_test_hive_site.tmp
+$pfile < /tmp/${USER}_hcat_test_hive_site.tmp > $dir/etc/hcatalog/hive-site.xml 
+!
+
+scp $file $host:$file
+scp $pfile $host:$pfile
+ssh $host chmod +x $file
+ssh $host $file
+if [ $? != "0" ] ; then
+    echo "Failed to install hcat"
+    exit 1
+fi
+
+# Stop the current server
+file=/tmp/${USER}_hcat_test_install_stop_server.sh
+cat > $file <<!
+#!/usr/bin/env bash
+export HADOOP_HOME=$hadoop_home
+$dir/share/hcatalog/scripts/hcat_server_stop.sh
+!
+scp $file $host:$file
+ssh $host chmod +x $file
+ssh $host $file
+
+# Start the server
+ssh $host $dir/share/hcatalog/scripts/hcat_server_start.sh
+if [ $? != "0" ] ; then
+    echo "Failed to start hcat"
+    exit 1
+fi

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java Tue Dec  6 20:05:37 2011
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.HCatOutputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.hcatalog.mapreduce.OutputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table. It performs a group by on the first column and a SUM operation on the
+ * other columns. This is to simulate a typical operation in a map reduce
+ * program to test that hcat hands the right data to the map reduce program
+ * 
+ * Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat
+ * jar> The <tab|ctrla> argument controls the output delimiter The hcat jar
+ * location should be specified as file://<full path to jar>
+ */
+public class GroupByAge extends Configured implements Tool {
+
+    public static class Map extends
+            Mapper<WritableComparable, HCatRecord, IntWritable, IntWritable> {
+
+        int age;
+        
+        @Override
+        protected void map(
+                WritableComparable key,
+                HCatRecord value,
+                org.apache.hadoop.mapreduce.Mapper<WritableComparable, HCatRecord, IntWritable, IntWritable>.Context context)
+                throws IOException, InterruptedException {
+            age = (Integer) value.get(1);
+            context.write(new IntWritable(age), new IntWritable(1));
+        }
+    }
+    
+    public static class Reduce extends Reducer<IntWritable, IntWritable,
+    WritableComparable, HCatRecord> {
+
+
+      @Override
+      protected void reduce(IntWritable key, java.lang.Iterable<IntWritable>
+        values, org.apache.hadoop.mapreduce.Reducer<IntWritable,IntWritable,WritableComparable,HCatRecord>.Context context)
+        throws IOException ,InterruptedException {
+          int sum = 0;
+          Iterator<IntWritable> iter = values.iterator();
+          while (iter.hasNext()) {
+              sum++;
+              iter.next();
+          }
+          HCatRecord record = new DefaultHCatRecord(2);
+          record.set(0, key.get());
+          record.set(1, sum);
+          
+          context.write(null, record);
+        }
+    }
+
+    public int run(String[] args) throws Exception {
+        Configuration conf = getConf();
+        args = new GenericOptionsParser(conf, args).getRemainingArgs();
+
+        String serverUri = args[0];
+        String inputTableName = args[1];
+        String outputTableName = args[2];
+        String dbName = null;
+
+        String principalID = System
+                .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+        if (principalID != null)
+            conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+        Job job = new Job(conf, "GroupByAge");
+        HCatInputFormat.setInput(job, InputJobInfo.create(dbName,
+                inputTableName, null, serverUri, principalID));
+        // initialize HCatOutputFormat
+
+        job.setInputFormatClass(HCatInputFormat.class);
+        job.setJarByClass(GroupByAge.class);
+        job.setMapperClass(Map.class);
+        job.setReducerClass(Reduce.class);
+        job.setMapOutputKeyClass(IntWritable.class);
+        job.setMapOutputValueClass(IntWritable.class);
+        job.setOutputKeyClass(WritableComparable.class);
+        job.setOutputValueClass(DefaultHCatRecord.class);
+        HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName,
+                outputTableName, null, serverUri, principalID));
+        HCatSchema s = HCatOutputFormat.getTableSchema(job);
+        System.err.println("INFO: output schema explicitly set for writing:"
+                + s);
+        HCatOutputFormat.setSchema(job, s);
+        job.setOutputFormatClass(HCatOutputFormat.class);
+        return (job.waitForCompletion(true) ? 0 : 1);
+    }
+
+    public static void main(String[] args) throws Exception {
+        int exitCode = ToolRunner.run(new GroupByAge(), args);
+        System.exit(exitCode);
+    }
+}

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java Tue Dec  6 20:05:37 2011
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.pig.data.DataBag;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table. It performs a group by on the first column and a SUM operation on the
+ * other columns. This is to simulate a typical operation in a map reduce program
+ * to test that hcat hands the right data to the map reduce program
+ * 
+ * Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat jar>
+            The <tab|ctrla> argument controls the output delimiter
+            The hcat jar location should be specified as file://<full path to jar>
+ */
+public class ReadJson extends Configured implements Tool {
+    
+  public static class Map
+       extends Mapper<WritableComparable, HCatRecord, IntWritable, HCatRecord>{
+      
+      String s;
+      Integer i;
+      Double d;
+      
+    @Override
+  protected void map(WritableComparable key, HCatRecord value, 
+          org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+          IntWritable,HCatRecord>.Context context) 
+    throws IOException ,InterruptedException {
+        s = value.get(0)==null?null:(String)value.get(0);
+        i = value.get(1)==null?null:(Integer)value.get(1);
+        d = value.get(2)==null?null:(Double)value.get(2);
+        
+        HCatRecord record = new DefaultHCatRecord(3);
+        record.set(0, s);
+        record.set(1, i);
+        record.set(2, d);
+        
+        context.write(null, record);
+
+    }
+  }
+  
+   public int run(String[] args) throws Exception {
+    Configuration conf = getConf();
+    args = new GenericOptionsParser(conf, args).getRemainingArgs();
+
+    String serverUri = args[0];
+    String tableName = args[1];
+    String outputDir = args[2];
+    String dbName = null;
+    
+    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+    if(principalID != null)
+    conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+    Job job = new Job(conf, "ReadJson");
+    HCatInputFormat.setInput(job, InputJobInfo.create(
+    		dbName, tableName, null, serverUri, principalID));
+    // initialize HCatOutputFormat
+    
+    job.setInputFormatClass(HCatInputFormat.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+    job.setJarByClass(ReadJson.class);
+    job.setMapperClass(Map.class);
+    job.setOutputKeyClass(IntWritable.class);
+    job.setOutputValueClass(HCatRecord.class);
+    job.setNumReduceTasks(0);
+    FileOutputFormat.setOutputPath(job, new Path(outputDir));
+    return (job.waitForCompletion(true) ? 0 : 1);
+  }
+   
+   public static void main(String[] args) throws Exception {
+       int exitCode = ToolRunner.run(new ReadJson(), args);
+       System.exit(exitCode);
+   }
+}

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java Tue Dec  6 20:05:37 2011
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.pig.data.DataBag;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table. It performs a group by on the first column and a SUM operation on the
+ * other columns. This is to simulate a typical operation in a map reduce program
+ * to test that hcat hands the right data to the map reduce program
+ * 
+ * Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat jar>
+            The <tab|ctrla> argument controls the output delimiter
+            The hcat jar location should be specified as file://<full path to jar>
+ */
+public class ReadRC extends Configured implements Tool {
+    
+  public static class Map
+       extends Mapper<WritableComparable, HCatRecord, IntWritable, HCatRecord>{
+      
+      String name;
+      int age;
+      double gpa;
+      
+    @Override
+  protected void map(WritableComparable key, HCatRecord value, 
+          org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+          IntWritable,HCatRecord>.Context context) 
+    throws IOException ,InterruptedException {
+        name = (String)value.get(0);
+        age = (Integer)value.get(1);
+        gpa = (Double)value.get(2);
+        
+        HCatRecord record = new DefaultHCatRecord(3);
+        record.set(0, name);
+        record.set(1, age);
+        record.set(2, gpa);
+        
+        context.write(null, record);
+
+    }
+  }
+  
+   public int run(String[] args) throws Exception {
+    Configuration conf = getConf();
+    args = new GenericOptionsParser(conf, args).getRemainingArgs();
+
+    String serverUri = args[0];
+    String tableName = args[1];
+    String outputDir = args[2];
+    String dbName = null;
+    
+    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+    if(principalID != null)
+    conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+    Job job = new Job(conf, "ReadRC");
+    HCatInputFormat.setInput(job, InputJobInfo.create(
+    		dbName, tableName, null, serverUri, principalID));
+    // initialize HCatOutputFormat
+    
+    job.setInputFormatClass(HCatInputFormat.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+    job.setJarByClass(ReadRC.class);
+    job.setMapperClass(Map.class);
+    job.setOutputKeyClass(IntWritable.class);
+    job.setOutputValueClass(HCatRecord.class);
+    job.setNumReduceTasks(0);
+    FileOutputFormat.setOutputPath(job, new Path(outputDir));
+    return (job.waitForCompletion(true) ? 0 : 1);
+  }
+   
+   public static void main(String[] args) throws Exception {
+       int exitCode = ToolRunner.run(new ReadRC(), args);
+       System.exit(exitCode);
+   }
+}

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java Tue Dec  6 20:05:37 2011
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table. It performs a group by on the first column and a SUM operation on the
+ * other columns. This is to simulate a typical operation in a map reduce program
+ * to test that hcat hands the right data to the map reduce program
+ * 
+ * Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat jar>
+            The <tab|ctrla> argument controls the output delimiter
+            The hcat jar location should be specified as file://<full path to jar>
+ */
+public class ReadText extends Configured implements Tool {
+    
+  public static class Map
+       extends Mapper<WritableComparable, HCatRecord, IntWritable, HCatRecord>{
+      
+      int t;
+      int si;
+      int i;
+      long b;
+      float f;
+      double d;
+      String s;
+      
+    @Override
+  protected void map(WritableComparable key, HCatRecord value, 
+          org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+          IntWritable,HCatRecord>.Context context) 
+    throws IOException ,InterruptedException {
+        t = (Integer)value.get(0);
+        si = (Integer)value.get(1);
+        i = (Integer)value.get(2);
+        b = (Long)value.get(3);
+        f = (Float)value.get(4);
+        d = (Double)value.get(5);
+        s = (String)value.get(6);
+        
+        HCatRecord record = new DefaultHCatRecord(7);
+        record.set(0, t);
+        record.set(1, si);
+        record.set(2, i);
+        record.set(3, b);
+        record.set(4, f);
+        record.set(5, d);
+        record.set(6, s);
+        
+        context.write(null, record);
+
+    }
+  }
+  
+   public int run(String[] args) throws Exception {
+    Configuration conf = getConf();
+    args = new GenericOptionsParser(conf, args).getRemainingArgs();
+
+    String serverUri = args[0];
+    String tableName = args[1];
+    String outputDir = args[2];
+    String dbName = null;
+    
+    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+    if(principalID != null)
+    conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+    Job job = new Job(conf, "ReadText");
+    HCatInputFormat.setInput(job, InputJobInfo.create(
+    		dbName, tableName, null, serverUri, principalID));
+    // initialize HCatOutputFormat
+    
+    job.setInputFormatClass(HCatInputFormat.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+    job.setJarByClass(ReadText.class);
+    job.setMapperClass(Map.class);
+    job.setOutputKeyClass(IntWritable.class);
+    job.setOutputValueClass(HCatRecord.class);
+    job.setNumReduceTasks(0);
+    FileOutputFormat.setOutputPath(job, new Path(outputDir));
+    return (job.waitForCompletion(true) ? 0 : 1);
+  }
+   
+   public static void main(String[] args) throws Exception {
+       int exitCode = ToolRunner.run(new ReadText(), args);
+       System.exit(exitCode);
+   }
+}