You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ha...@apache.org on 2011/08/30 06:18:18 UTC
svn commit: r1163097 [6/7] - in /incubator/hcatalog/trunk: ./ src/test/e2e/ src/test/e2e/hcatalog/ src/test/e2e/hcatalog/conf/ src/test/e2e/hcatalog/data/ src/test/e2e/hcatalog/deployers/ src/test/e2e/hcatalog/drivers/ src/test/e2e/hcatalog/paramfiles/...

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl Tue Aug 30 06:18:16 2011
@@ -0,0 +1,463 @@
+#!/usr/bin/env perl
+############################################################################           
+#  Licensed to the Apache Software Foundation (ASF) under one or more                  
+#  contributor license agreements.  See the NOTICE file distributed with               
+#  this work for additional information regarding copyright ownership.                 
+#  The ASF licenses this file to You under the Apache License, Version 2.0             
+#  (the "License"); you may not use this file except in compliance with                
+#  the License.  You may obtain a copy of the License at                               
+#                                                                                      
+#      http://www.apache.org/licenses/LICENSE-2.0                                      
+#                                                                                      
+#  Unless required by applicable law or agreed to in writing, software                 
+#  distributed under the License is distributed on an "AS IS" BASIS,                   
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
+#  See the License for the specific language governing permissions and                 
+#  limitations under the License.                                                      
+                                                                                       
+# A utility to generate test data for pig test harness tests.
+# 
+#
+
+use strict;
+use charnames ();
+
+our @firstName = ("alice", "bob", "calvin", "david", "ethan", "fred",
+    "gabriella", "holly", "irene", "jessica", "katie", "luke", "mike", "nick",
+    "oscar", "priscilla", "quinn", "rachel", "sarah", "tom", "ulysses", "victor",
+    "wendy", "xavier", "yuri", "zach");
+
+our @lastName = ("allen", "brown", "carson", "davidson", "ellison", "falkner",
+    "garcia", "hernandez", "ichabod", "johnson", "king", "laertes", "miller",
+    "nixon", "ovid", "polk", "quirinius", "robinson", "steinbeck", "thompson",
+    "underhill", "van buren", "white", "xylophone", "young", "zipper");
+
+sub randomName()
+{
+    return sprintf("%s %s", $firstName[int(rand(26))],
+        $lastName[int(rand(26))]);
+}
+
+our @city = ("albuquerque", "bombay", "calcutta", "danville", "eugene",
+    "frankfurt", "grenoble", "harrisburg", "indianapolis",
+    "jerusalem", "kellogg", "lisbon", "marseilles",
+    "nice", "oklohoma city", "paris", "queensville", "roswell",
+    "san francisco", "twin falls", "umatilla", "vancouver", "wheaton",
+    "xacky", "youngs town", "zippy");
+
+sub randomCity()
+{
+    return $city[int(rand(26))];
+}
+
+our @state = ( "AL", "AK", "AS", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", 
+    "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
+    "MA", "MI", "MN", "MS", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC",
+    "ND", "OH", "OK", "OR", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA",
+    "WA", "WV", "WI", "WY");
+
+sub randomState()
+{
+    return $state[int(rand(50))];
+}
+
+our @classname = ("american history", "biology", "chemistry", "debate",
+    "education", "forestry", "geology", "history", "industrial engineering",
+    "joggying", "kindergarten", "linguistics", "mathematics", "nap time",
+    "opthamology", "philosophy", "quiet hour", "religion", "study skills",
+    "topology", "undecided", "values clariffication", "wind surfing", 
+    "xylophone band", "yard duty", "zync studies");
+
+sub randomClass()
+{
+    return $classname[int(rand(26))];
+}
+
+our @grade = ("A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-",
+    "F");
+
+sub randomGrade()
+{
+    return $grade[int(rand(int(@grade)))];
+}
+
+our @registration = ("democrat", "green", "independent", "libertarian",
+    "republican", "socialist");
+
+sub randomRegistration()
+{
+    return $registration[int(rand(int(@registration)))];
+}
+
+sub randomAge()
+{
+    return (int(rand(60)) + 18);
+}
+
+sub randomGpa()
+{
+    return rand(4.0);
+}
+
+our @street = ("A", "B", "C", "D", "E", "F", "G", "H", "I",
+    "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S",
+    "T", "U", "V", "W", "X", "Y", "Z");
+
+sub randomStreet()
+{
+    return sprintf("%d %s st", int(rand(1000)), $street[int(rand(26))]);
+}
+
+sub randomZip()
+{
+    return int(rand(100000));
+}
+
+sub randomContribution()
+{
+    return sprintf("%.2f", rand(1000));
+}
+
+our @numLetter = ("1", "09", "09a");
+
+sub randomNumLetter()
+{
+    return $numLetter[int(rand(int(@numLetter)))];
+}
+
+our @greekLetter = ( "alpha", "beta", "gamma", "delta", "epsilon", "zeta",
+    "eta", "theta", "iota", "kappa", "lambda", "mu", "nu", "xi", "omicron",
+    "pi", "rho", "sigma", "tau", "upsilon", "chi", "phi", "psi", "omega" );
+
+sub randomGreekLetter()
+{
+    return $greekLetter[int(rand(int(@greekLetter)))];
+}
+
+sub randomNameAgeGpaMap()
+{
+    my $size = int(rand(3));
+    my $map = "[";
+    my @mapValues = ( "name#" . randomName(), "age#" . randomAge(), "gpa#" . randomGpa() );
+    $size = ($size == 0 ? 1 : $size);
+    for(my $i = 0; $i <= $size; $i++) {
+        $map .= $mapValues[$i];
+        if($i != $size) {
+            $map .= ",";
+        }
+    }
+    $map .= "]";
+    return $map;
+}
+
+sub getMapFields($) {
+    my $mapString = shift;
+    # remove the enclosing square brackets
+    $mapString =~ s/[\[\]]//g;
+    # get individual map fields
+    my @fields = split(/,/, $mapString);
+    # get only the values 
+    my $hash;
+    for my $field (@fields) {
+        if($field =~ /(\S+)#(.*)/) {
+            $hash->{$1} = $2;
+        }
+    }
+    return $hash;
+}
+
+sub randomNameAgeGpaTuple()
+{
+    my $gpa = sprintf("%0.2f", randomGpa());
+    return "(" . randomName() . "," . randomAge() . "," . $gpa . ")" ;
+}
+
+sub randomNameAgeGpaBag()
+{
+    my $size = int(rand(int(3)));
+    my $bag = "{";
+    $size = ($size == 0 ? 1 : $size);
+    for(my $i = 0; $i <= $size; $i++) {
+        $bag .= randomNameAgeGpaTuple();
+        if($i != $size) {
+            $bag .= ",";
+        }
+    }
+    $bag .= "}";
+    return $bag;
+}
+
+our @textDoc = (
+    "The cosmological proof, which we are now about to ex-",
+    "amine, retains the connection of absolute necessity with the",
+    "highest reality, but instead of reasoning, like the former proof,",
+    "from the highest reality to necessity of existence, it reasons",
+    "from the previously given unconditioned necessity of some",
+    "being to the unlimited reality of that being. It thus enters upon",
+    "a course of reasoning which, whether rational or only pseudo-",
+    "rational, is at any rate natural, and the most convincing not",
+    "only for common sense but even for speculative understand-",
+    "ing. It also sketches the first outline of all the proofs in natural",
+    "theology, an outline which has always been and always will",
+    "be followed, however much embellished and disguised by",
+    "superfluous additions. This proof, termed by Leibniz the proof",
+    "a contingentia mundi, we shall now proceed to expound and",
+    "examine.");
+
+sub usage()
+{
+    warn "Usage: $0 filetype numrows tablename targetdir [nosql]\n";
+    warn "\tValid filetypes [studenttab, studentcolon, \n";
+    warn "\t\tstudentnulltab, studentcomplextab, studentctrla, voternulltab\n";
+    warn "\t\tvotertab, reg1459894, textdoc, unicode, manual]\n";
+}
+
+our @greekUnicode = ("\N{U+03b1}", "\N{U+03b2}", "\N{U+03b3}", "\N{U+03b4}",
+    "\N{U+03b5}", "\N{U+03b6}", "\N{U+03b7}", "\N{U+03b8}", "\N{U+03b9}",
+    "\N{U+03ba}", "\N{U+03bb}", "\N{U+03bc}", "\N{U+03bd}", "\N{U+03be}",
+    "\N{U+03bf}", "\N{U+03c0}", "\N{U+03c1}", "\N{U+03c2}", "\N{U+03c3}",
+    "\N{U+03c4}", "\N{U+03c5}", "\N{U+03c6}", "\N{U+03c7}", "\N{U+03c8}",
+    "\N{U+03c9}");
+
+sub randomUnicodeNonAscii()
+{
+    my $name = $firstName[int(rand(int(@firstName)))] .
+         $greekUnicode[int(rand(int(@greekUnicode)))];
+    return $name;
+}
+
+my $testvar = "\N{U+03b1}\N{U+03b3}\N{U+03b1}\N{U+03c0}\N{U+03b7}";
+
+sub getBulkCopyCmd(){
+        my $sourceDir= shift;
+        my $tableName = shift;
+        my $delimeter = shift;
+        $delimeter = '\t' if ( !$delimeter );
+
+#               . "\nCOPY $tableName FROM \'$sourceDir/$tableName' using DELIMITERS \'". '\t' . "\' WITH NULL AS '\n';";
+
+        my $cmd= "\nbegin transaction;" 
+                  . "\nCOPY $tableName FROM \'$sourceDir/$tableName' using DELIMITERS \'$delimeter\';" 
+                  . "\ncommit;"
+                  . "\n";
+
+        return $cmd;
+}
+
+
+# main($)
+{
+    # explicitly call srand so we get the same data every time
+    # we generate it.  However, we set it individually for each table type.
+    # Otherwise we'd be generating the same data sets regardless of size,
+    # and this would really skew our joins.
+
+    my $filetype = shift;
+    my $numRows = shift;
+    my $tableName = shift;
+    my $targetDir= shift;
+    my $nosql = shift;
+
+    die usage() if (!defined($filetype) || !defined($numRows));
+
+    if ($numRows <= 0) { usage(); }
+
+    if ( $targetDir ) {
+       open(HDFS, "> $targetDir/$tableName") or die("Cannot open file $tableName, $!\n");
+       open(PSQL, "> $targetDir/$tableName.sql") or die("Cannot open file $tableName.sql, $!\n") unless defined $nosql;
+    } else {
+       open(HDFS, "> $tableName") or die("Cannot open file $tableName, $!\n");
+       open(PSQL, "> $tableName.sql") or die("Cannot open file $tableName.sql, $!\n") unless defined $nosql;
+    }
+
+    if ($filetype eq "manual") {
+    } elsif ($filetype eq "studenttab") {
+        srand(3.14159 + $numRows);
+        print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n" unless defined $nosql;
+        print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
+        for (my $i = 0; $i < $numRows; $i++) {
+            my $name = randomName();
+            my $age = randomAge();
+            my $gpa = randomGpa();
+            printf HDFS "%s\t%d\t%.2f\n", $name, $age, $gpa;
+        }
+
+    } elsif ($filetype eq "studentnulltab") {
+        srand(3.14159 + $numRows);
+        print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
+        print PSQL "begin transaction;\n";
+        for (my $i = 0; $i < $numRows; $i++) {
+            # generate nulls in a random fashion
+            my $name = rand(1) < 0.05 ? '' : randomName();
+            my $age = rand(1) < 0.05 ? '' : randomAge();
+            my $gpa = rand(1) < 0.05 ? '' : randomGpa();
+            printf PSQL "insert into $tableName (name, age, gpa) values(";
+            print PSQL ($name eq ''? "null, " : "'$name', "), ($age eq ''? "null, " : "$age, ");
+            if($gpa eq '') {
+                print PSQL "null);\n"
+            } else {
+                printf PSQL "%.2f);\n", $gpa;    
+            }
+            print HDFS "$name\t$age\t";
+            if($gpa eq '') {
+                print HDFS "\n"
+            } else {
+                printf HDFS "%.2f\n", $gpa;    
+            }
+            
+        }
+        print PSQL "commit;\n" unless defined $nosql;
+
+    } elsif ($filetype eq "studentcolon") {
+        srand(2.718281828459 + $numRows);
+        print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n" unless defined $nosql;
+        print PSQL &getBulkCopyCmd( $targetDir, $tableName, ':' ) unless defined $nosql;
+        for (my $i = 0; $i < $numRows; $i++) {
+            my $name = randomName();
+            my $age = randomAge();
+            my $gpa = randomGpa();
+            printf HDFS "%s:%d:%.2f\n", $name, $age, $gpa;
+=begin
+    } elsif ($filetype eq "studentusrdef") {
+        srand(6.62606896 + $numRows);
+        for (my $i = 0; $i < $numRows; $i++) {
+            # TODO need to add SQL info.
+            printf("%s,%d,%.2f,", randomName(), randomAge(), randomGpa());
+            printf("<%s,%s,%s,%d>,", randomStreet(), randomCity(), randomState(),
+                randomZip());
+            printf("[%s:<%s,%s>],", randomClass(), randomClass(), randomName());
+            printf("{");
+            my $elementsInBag = int(rand(100));
+            for (my $j = 0; $j < $elementsInBag; $j++) {
+                if ($j != 0) { printf(","); }
+                printf("<%s,%s,%s>", randomClass(), randomName(), randomGrade());
+            }
+            printf("}\n");
+        }
+=cut
+        }
+        print PSQL "commit;\n" unless defined $nosql;
+
+    } elsif ($filetype eq "studentctrla") {
+        srand(6.14159 + $numRows);
+        print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
+        print PSQL "begin transaction;\n";
+        for (my $i = 0; $i < $numRows; $i++) {
+            my $name = randomName();
+            my $age = randomAge();
+            my $gpa = randomGpa();
+            printf PSQL "insert into $tableName (name, age, gpa) values('%s', %d, %.2f);\n",
+                $name, $age, $gpa;
+            printf HDFS "%s%d%.2f\n", $name, $age, $gpa;
+        }
+        print PSQL "commit;\n" unless defined $nosql;
+
+
+    } elsif ($filetype eq "studentcomplextab") {
+        srand(3.14159 + $numRows);
+        print PSQL "create table $tableName (nameagegpamap varchar(500), nameagegpatuple varchar(500), nameagegpabag varchar(500), nameagegpamap_name varchar(500), nameagegpamap_age integer, nameagegpamap_gpa float(3));\n";
+        print PSQL "begin transaction;\n";
+        for (my $i = 0; $i < $numRows; $i++) {
+            # generate nulls in a random fashion
+            my $map = rand(1) < 0.05 ? '' : randomNameAgeGpaMap();
+            my $tuple = rand(1) < 0.05 ? '' : randomNameAgeGpaTuple();
+            my $bag = rand(1) < 0.05 ? '' : randomNameAgeGpaBag();
+            printf PSQL "insert into $tableName (nameagegpamap, nameagegpatuple, nameagegpabag, nameagegpamap_name, nameagegpamap_age, nameagegpamap_gpa) values(";
+            my $mapHash;
+            if($map ne '') {
+                $mapHash = getMapFields($map);
+            }
+
+            print PSQL ($map eq ''? "null, " : "'$map', "), 
+                        ($tuple eq ''? "null, " : "'$tuple', "),
+                        ($bag eq '' ? "null, " : "'$bag', "),
+                        ($map eq '' ? "null, " : (exists($mapHash->{'name'}) ? "'".$mapHash->{'name'}."', " : "null, ")),
+                        ($map eq '' ? "null, " : (exists($mapHash->{'age'}) ? "'".$mapHash->{'age'}."', " : "null, ")),
+                        ($map eq '' ? "null);\n" : (exists($mapHash->{'gpa'}) ? "'".$mapHash->{'gpa'}."');\n" : "null);\n"));
+            print HDFS "$map\t$tuple\t$bag\n";
+        }
+        print PSQL "commit;\n" unless defined $nosql;
+
+    } elsif ($filetype eq "votertab") {
+        srand(299792458 + $numRows);
+        print PSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n" unless defined $nosql;
+        print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
+        for (my $i = 0; $i < $numRows; $i++) {
+            my $name = randomName();
+            my $age = randomAge();
+            my $registration = randomRegistration();
+            my $contribution = randomContribution();
+            printf HDFS "%s\t%d\t%s\t%.2f\n", $name, $age,
+                $registration, $contribution;
+        }
+
+    } elsif ($filetype eq "voternulltab") {
+        srand(299792458 + $numRows);
+        print PSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n" unless defined $nosql;
+        print PSQL "begin transaction;\n" unless defined $nosql;
+        for (my $i = 0; $i < $numRows; $i++) {
+            # generate nulls in a random fashion
+            my $name = rand(1) < 0.05 ? '' : randomName();
+            my $age = rand(1) < 0.05 ? '' : randomAge();
+            my $registration = rand(1) < 0.05 ? '' : randomRegistration();
+            my $contribution = rand(1) < 0.05 ? '' : randomContribution();
+            printf PSQL "insert into $tableName (name, age, registration, contributions) values(";
+            print PSQL ($name eq ''? "null, " : "'$name', "), 
+                            ($age eq ''? "null, " : "$age, "),
+                            ($registration eq ''? "null, " : "'$registration', ");
+            if($contribution eq '') {
+                print PSQL "null);\n"
+            } else {
+                printf PSQL "%.2f);\n", $contribution;    
+            }
+            print HDFS "$name\t$age\t$registration\t";
+            if($contribution eq '') {
+                print HDFS "\n"
+            } else {
+                printf HDFS "%.2f\n", $contribution;    
+            }
+        }
+        print PSQL "commit;\n" unless defined $nosql;
+
+    } elsif ($filetype eq "reg1459894") {
+        srand(6.67428 + $numRows);
+        print PSQL "create table $tableName (first varchar(10), second varchar(10));\n" unless defined $nosql;
+        print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
+        for (my $i = 0; $i < $numRows; $i++) {
+            my $letter = randomNumLetter(); 
+            my $gkLetter = randomGreekLetter(); 
+            printf HDFS "%s\t%s\n", $letter, $gkLetter;
+        }
+
+    } elsif ($filetype eq "textdoc") {
+        # This one ignores the number of lines.  It isn't random either.
+        print PSQL "create table $tableName (name varchar(255));\n" unless defined $nosql;
+        print PSQL "begin transaction;\n" unless defined $nosql;
+        for (my $i = 0; $i < @textDoc; $i++) {
+            my $sqlWords = $textDoc[$i];
+            $sqlWords =~ s/([\w-]+)/$1,/g;
+            print PSQL  "insert into $tableName (name) values('($sqlWords)');\n" unless defined $nosql;
+            print HDFS "$textDoc[$i]\n";
+        }
+        print PSQL "commit;\n" unless defined $nosql;
+
+
+    } elsif ($filetype eq "unicode") {
+        srand(1.41421 + $numRows);
+        print PSQL "create table $tableName (name varchar(255));\n" unless defined $nosql;
+        print PSQL "begin transaction;\n" unless defined $nosql;
+        for (my $i = 0; $i < $numRows; $i++) {
+            my $name = randomUnicodeNonAscii(); 
+            printf PSQL "insert into $tableName (name) values('%s');\n",
+                $name unless defined $nosql;
+            printf HDFS "%s\n", $name;
+        }
+        print PSQL "commit;\n" unless defined $nosql;
+
+    } else {
+        warn "Unknown filetype $filetype\n";
+        usage();
+    }
+}
+
+

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/test/floatpostprocessor.pl
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/test/floatpostprocessor.pl?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/test/floatpostprocessor.pl (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/test/floatpostprocessor.pl Tue Aug 30 06:18:16 2011
@@ -0,0 +1,111 @@
+#!/usr/bin/env perl
+
+############################################################################           
+#  Licensed to the Apache Software Foundation (ASF) under one or more                  
+#  contributor license agreements.  See the NOTICE file distributed with               
+#  this work for additional information regarding copyright ownership.                 
+#  The ASF licenses this file to You under the Apache License, Version 2.0             
+#  (the "License"); you may not use this file except in compliance with                
+#  the License.  You may obtain a copy of the License at                               
+#                                                                                      
+#      http://www.apache.org/licenses/LICENSE-2.0                                      
+#                                                                                      
+#  Unless required by applicable law or agreed to in writing, software                 
+#  distributed under the License is distributed on an "AS IS" BASIS,                   
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
+#  See the License for the specific language governing permissions and                 
+#  limitations under the License.                                                      
+                                                                                       
+#
+# A simple tool to make sure all floats in the output are written the same way.
+# It is assumed that the data in question is being read from stdin.
+#
+#
+
+use strict;
+
+our @floats;
+our $delim;
+
+sub parseLine($)
+{
+	my $line = shift;
+	chomp $line;
+	return split(/$delim/, $line);
+}
+
+sub postprocess($)
+{
+	my @fields = parseLine(shift);
+
+	for (my $i = 0; $i < @fields; $i++) {
+		if ($i != 0) { print($delim); }
+		if ($floats[$i]) {
+			printf("%.3f", $fields[$i]);
+		} else {
+			print($fields[$i]);
+		}
+	}
+	print "\n";
+}
+
+sub is_float {
+	my $n = shift;
+	if(!defined $n || $n eq ""){
+		return 0;
+	}
+	if($n =~ /^[+-]?\d+\.\d+([eE][-+]?[0-9]+)?$/){
+		return 1;
+	}
+
+	my $abs = abs($n);
+	if ($abs - int($abs) > 0) {
+		return 1;
+	}
+	return 0;
+}
+
+
+# main
+{
+	$delim = shift;
+	if (!defined($delim)) {
+		die "Usage: $0 delimiter\n";
+	}
+
+	my @sampled;
+    my $line;
+    # read away any empty lines into the sample
+    do {
+	    $line = <STDIN>;
+	    push(@sampled, $line);
+    } while($line && $line =~ /^\s*$/);
+	# Sample the next thousand lines to figure out which columns have floats.
+	for (my $i = 0; $i < 1000 && ($line = <STDIN>); $i++) {
+		push(@sampled, $line);
+	}
+    foreach my $line (@sampled) {
+		my @fields = parseLine($line);
+		for (my $j = 0; $j < @fields; $j++) {
+			if(is_float($fields[$j])){
+				$floats[$j] = 1;				
+			}
+
+
+		}
+    }
+
+	# Now, play each of the sampled lines through the postprocessor
+	foreach my $line (@sampled) {
+		postprocess($line);
+	}
+
+	while (<STDIN>) {
+		postprocess($_);
+	}
+
+}
+
+
+
+	

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/build.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/build.xml?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/build.xml (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/build.xml Tue Aug 30 06:18:16 2011
@@ -0,0 +1,51 @@
+<!--  Licensed to the Apache Software Foundation (ASF) under one or more                  
+   contributor license agreements.  See the NOTICE file distributed with               
+   this work for additional information regarding copyright ownership.                 
+   The ASF licenses this file to You under the Apache License, Version 2.0             
+   (the "License"); you may not use this file except in compliance with                
+   the License.  You may obtain a copy of the License at                               
+                                                                                       
+       http://www.apache.org/licenses/LICENSE-2.0                                      
+                                                                                       
+   Unless required by applicable law or agreed to in writing, software                 
+   distributed under the License is distributed on an "AS IS" BASIS,                   
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
+   See the License for the specific language governing permissions and                 
+   limitations under the License.-->
+                                                                                       
+<project name="HCatalog-test-utils" default="udf-jar">
+
+    <property name="udf.jarfile" value="testudf.jar" />
+    <property name="build.dir" value="${basedir}/build" />
+    <property name="src.dir" value="${basedir}/org/" />
+
+    <path id="udf-classpath">
+       <fileset file="../../../../../../build/hcatalog/*.jar" />
+       <fileset file="../../../../../../build/ivy/lib/hcatalog/*.jar" />
+       <fileset file="../../../../../../hive/external/build/dist/lib/*.jar" />
+       <fileset file="../../../../../../hive/external/build/hadoopcore/hadoop-0.20.1/*.jar" />
+    </path>
+
+    <target name="init">
+        <mkdir dir="${build.dir}" />
+    </target>
+
+    <target name="clean">
+        <delete dir="${build.dir}" />
+        <delete file="${udf.jarfile}" />
+    </target>
+
+    <target name="udf-compile" depends="init">
+        <echo>*** Compiling UDFs ***</echo>
+        <javac srcdir="${src.dir}" destdir="${build.dir}" debug="on">
+            <classpath refid="udf-classpath" />
+        </javac>
+    </target>
+
+    <target name="udf-jar" depends="udf-compile">
+        <echo>*** Creating UDF jar ***</echo>
+        <jar duplicate="preserve" jarfile="${udf.jarfile}">
+	    <fileset dir="build"/>
+        </jar>
+    </target>
+</project>

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTestDriver.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTestDriver.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTestDriver.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTestDriver.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import org.apache.hcatalog.utils.TypeDataCheck;
+import org.apache.hadoop.util.ProgramDriver;
+
+/**
+ * A description of an example program based on its class and a 
+ * human-readable description.
+ */
+public class HCatTestDriver {
+  
+  public static void main(String argv[]){
+    int exitCode = -1;
+    ProgramDriver pgd = new ProgramDriver();
+    try {
+      pgd.addClass("typedatacheck", TypeDataCheck.class, 
+                   "A map/reduce program that checks the type of each field and" +
+                   " outputs the entire table (to test hcat).");
+      pgd.addClass("sumnumbers", SumNumbers.class, 
+      "A map/reduce program that performs a group by on the first column and a " +
+      "SUM operation on the other columns of the \"numbers\" table.");
+      pgd.addClass("storenumbers", StoreNumbers.class, "A map/reduce program that " +
+      		"reads from the \"numbers\" table and adds 10 to each fields and writes " +
+      				"to the \"numbers_partitioned\" table into the datestamp=20100101 " +
+      				"partition OR the \"numbers_empty_initially\" table based on a " +
+      				"cmdline arg");
+      pgd.addClass("storecomplex", StoreComplex.class, "A map/reduce program that " +
+              "reads from the \"complex\" table and stores as-is into the " +
+              "\"complex_empty_initially\" table.");
+      pgd.addClass("storedemo", StoreDemo.class, "demo prog.");
+      pgd.driver(argv);
+      
+      // Success
+      exitCode = 0;
+    }
+    catch(Throwable e){
+      e.printStackTrace();
+    }
+    
+    System.exit(exitCode);
+  }
+}
+	

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheck.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheck.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheck.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheck.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.util.Utils;
+
+/**
+ * This UDF can be used to check that a tuple presented by HCatLoader has the
+ * right types for the fields
+ * 
+ * Usage is :
+ * 
+ * register testudf.jar;
+ * a = load 'numbers' using HCatLoader(...);
+ * b = foreach a generate HCatTypeCheck('intnum1000:int,id:int,intnum5:int,intnum100:int,intnum:int,longnum:long,floatnum:float,doublenum:double', *);
+ * store b into 'output';
+ * 
+ * The schema string (the first argument to the UDF) is of the form one would provide in a 
+ * pig load statement.
+ * 
+ * The output should only contain the value '1' in all rows. (This UDF returns
+ * the integer value 1 if all fields have the right type, else throws IOException)
+ *  
+ */
+public class HCatTypeCheck extends EvalFunc<Integer> {
+
+    static HashMap<Byte, Class<?>> typeMap = new HashMap<Byte, Class<?>>();
+    
+    @Override
+    public Integer exec(Tuple input) throws IOException {
+        String schemaStr = (String) input.get(0);
+        Schema s = null;
+        try {
+            s = getSchemaFromString(schemaStr);
+        } catch (Exception e) {
+            throw new IOException(e);
+        }
+        for(int i = 0; i < s.size(); i++) {
+            check(s.getField(i).type, input.get(i+1)); // input.get(i+1) since input.get(0) is the schema;
+        }
+        return 1;
+    }
+    
+    static {    
+        typeMap.put(DataType.INTEGER, Integer.class);
+        typeMap.put(DataType.LONG, Long.class);
+        typeMap.put(DataType.FLOAT, Float.class);
+        typeMap.put(DataType.DOUBLE, Double.class);
+        typeMap.put(DataType.CHARARRAY, String.class);
+        typeMap.put(DataType.TUPLE, Tuple.class);
+        typeMap.put(DataType.MAP, Map.class);
+        typeMap.put(DataType.BAG, DataBag.class);
+    }
+    
+    
+    
+    private void die(String expectedType, Object o) throws IOException {
+        throw new IOException("Expected " + expectedType + ", got " +  
+              o.getClass().getName());
+    }
+    
+    
+    private String check(Byte type, Object o) throws IOException {
+        if(o == null) {
+            return "";
+        }
+        if(check(typeMap.get(type), o)) {
+            if(type.equals(DataType.MAP)) {
+                Map<String, String> m = (Map<String, String>) o;
+                check(m);
+            } else if(type.equals(DataType.BAG)) {
+                DataBag bg = (DataBag) o;
+                for (Tuple tuple : bg) {
+                    Map<String, String> m = (Map<String, String>) tuple.get(0);
+                    check(m);
+                }
+            } else if(type.equals(DataType.TUPLE)) {
+                Tuple t = (Tuple) o;
+                if(!check(Integer.class, t.get(0)) ||
+                        !check(String.class, t.get(1)) ||
+                                !check(Double.class, t.get(2))) {
+                    die("t:tuple(num:int,str:string,dbl:double)", t);
+                }
+            }
+        } else {
+            die(typeMap.get(type).getName(), o);
+        }
+        return o.toString();
+    }
+    
+    /**
+    * @param m
+    * @throws IOException 
+    */
+    private void check(Map<String, String> m) throws IOException {
+      for(Entry<String, String> e: m.entrySet()) {
+          // just access key and value to ensure they are correct
+          if(!check(String.class, e.getKey())) {
+              die("String", e.getKey());
+          }
+          if(!check(String.class, e.getValue())) {
+              die("String", e.getValue());
+          }
+      }
+      
+    }
+    
+    private boolean check(Class<?> expected, Object actual) {
+        if(actual == null) {
+            return true;
+        }
+        return expected.isAssignableFrom(actual.getClass());
+    }
+
+    Schema getSchemaFromString(String schemaString) throws Exception {
+        /** ByteArrayInputStream stream = new ByteArrayInputStream(schemaString.getBytes()) ;
+        QueryParser queryParser = new QueryParser(stream) ;
+        Schema schema = queryParser.TupleSchema() ;
+        Schema.setSchemaDefaultType(schema, org.apache.pig.data.DataType.BYTEARRAY);
+        return schema;
+        */
+        return Utils.getSchemaFromString(schemaString);
+    }
+
+}

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheckHive.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheckHive.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheckHive.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheckHive.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+
+/**
+ * A hive udf to check types of the fields read from hcat. A sample hive query which can use this is:
+ * 
+ * create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
+ * select typecheck('map<string,string>+struct<num:int,str:string,dbl:double>+array<map<string,string>>+int', 
+ * mymap, mytuple, bagofmap, rownum) from complex;
+ * 
+ * 
+ * The first argument to the UDF is a string representing the schema of the columns in the table. 
+ * The columns in the tables are the remaining args to it.
+ * The schema specification consists of the types as given by "describe <table>"
+ * with each column's type separated from the next column's type by a '+'
+ * 
+ * The UDF will throw an exception (and cause the query to fail) if it does not
+ * encounter the correct types.
+ * 
+ * The output is a string representation of the data , type and hive category.
+ * It is not advisable to use this against large dataset since the output would also
+ * be large. 
+ * 
+ */
+public final class HCatTypeCheckHive extends GenericUDF {
+
+ObjectInspector[] argOIs;
+@Override
+public Object evaluate(DeferredObject[] args) throws HiveException {
+    List<Object> row = new ArrayList<Object>();
+    String typesStr = (String) getJavaObject(args[0].get(), argOIs[0], new ArrayList<Category>());
+    String[] types = typesStr.split("\\+");
+    for(int i = 0; i < types.length; i++) {
+        types[i] = types[i].toLowerCase();
+    }
+    for(int i = 1; i < args.length; i++) {
+        ObjectInspector oi = argOIs[i];
+        List<ObjectInspector.Category> categories = new ArrayList<ObjectInspector.Category>();
+        Object o = getJavaObject(args[i].get(),oi, categories);
+        try {
+            if(o != null) {
+                Util.check(types[i-1], o);
+            }
+        } catch (IOException e) {
+            throw new HiveException(e);
+        }
+        row.add(o == null ? "null" : o);
+        row.add(":" + (o == null ? "null" : o.getClass()) + ":" + categories);
+    }
+    return row.toString();
+}
+
+private Object getJavaObject(Object o, ObjectInspector oi, List<Category> categories) {
+    if(categories != null) {
+        categories.add(oi.getCategory());
+    }
+    if(oi.getCategory() == ObjectInspector.Category.LIST) {
+        List<?> l = ((ListObjectInspector)oi).getList(o);
+        List<Object> result = new ArrayList<Object>();
+        ObjectInspector elemOI = ((ListObjectInspector)oi).getListElementObjectInspector();
+        for(Object lo : l) {
+            result.add(getJavaObject(lo, elemOI, categories));    
+        }
+        return result;
+    } else if (oi.getCategory() == ObjectInspector.Category.MAP) {
+        Map<?,?> m = ((MapObjectInspector)oi).getMap(o);
+        Map<String, String> result = new HashMap<String, String>();
+        ObjectInspector koi = ((MapObjectInspector)oi).getMapKeyObjectInspector();
+        ObjectInspector voi = ((MapObjectInspector)oi).getMapValueObjectInspector();
+        for(Entry<?,?> e: m.entrySet()) {
+            result.put((String)getJavaObject(e.getKey(), koi, null), 
+                    (String)getJavaObject(e.getValue(), voi, null));
+        }
+        return result;
+        
+    } else if (oi.getCategory() == ObjectInspector.Category.STRUCT) {
+        List<Object> s = ((StructObjectInspector)oi).getStructFieldsDataAsList(o);
+        List<? extends StructField> sf = ((StructObjectInspector)oi).getAllStructFieldRefs();
+        List<Object> result = new ArrayList<Object>();
+        for(int i = 0; i < s.size(); i++) {
+            result.add(getJavaObject(s.get(i), sf.get(i).getFieldObjectInspector(), categories));
+        }
+        return result;
+    } else if(oi.getCategory() == ObjectInspector.Category.PRIMITIVE) {
+        return ((PrimitiveObjectInspector)oi).getPrimitiveJavaObject(o);
+    }
+    throw new RuntimeException("Unexpected error!");
+}
+
+@Override
+public String getDisplayString(String[] arg0) {
+    return null;
+}
+
+@Override
+public ObjectInspector initialize(ObjectInspector[] argOIs)
+        throws UDFArgumentException {
+    this.argOIs = argOIs;
+    return ObjectInspectorFactory.getReflectionObjectInspector(String.class, 
+            ObjectInspectorOptions.JAVA);
+}
+
+}

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hcatalog.rcfile.RCFileInputDriver;
+import org.apache.hcatalog.rcfile.RCFileOutputDriver;
+import org.apache.thrift.TException;
+
+/**
+ * A utility program to annotate partitions of a pre-created table 
+ * with input storage driver and output storage driver information
+ */
+public class PartitionStorageDriverAnnotator {
+
+    /**
+     * @param args
+     * @throws MetaException 
+     * @throws TException 
+     * @throws NoSuchObjectException 
+     * @throws InvalidOperationException 
+     */
+    public static void main(String[] args) throws MetaException, NoSuchObjectException, 
+    TException, InvalidOperationException {
+        String thrifturi = null;
+        String database = "default";
+        String table = null;
+        String isd = null;
+        String osd = null;
+        Map<String, String> m = new HashMap<String, String>();
+        for(int i = 0; i < args.length; i++) {
+            if(args[i].equals("-u")) {
+                thrifturi = args[i+1];
+            } else if(args[i].equals("-t")) {
+                table = args[i+1];
+            } else if (args[i].equals("-i")) {
+                isd = args[i+1];
+            } else if (args[i].equals("-o")) {
+                osd = args[i+1];
+            } else if (args[i].equals("-p")) {
+                String[] kvps = args[i+1].split(";");
+                for(String kvp: kvps) {
+                    String[] kv = kvp.split("=");
+                    if(kv.length != 2) {
+                        System.err.println("ERROR: key value property pairs must be specified as key1=val1;key2=val2;..;keyn=valn");
+                        System.exit(1);
+                    }
+                    m.put(kv[0], kv[1]);
+                }
+            } else if(args[i].equals("-d")) {
+                database = args[i+1];
+            } else {
+                System.err.println("ERROR: Unknown option: " + args[i]);
+                usage();
+            }
+            i++; // to skip the value for an option
+        }
+        if(table == null || thrifturi == null) {
+            System.err.println("ERROR: thrift uri and table name are mandatory");
+            usage();
+        }
+        HiveConf hiveConf = new HiveConf(PartitionStorageDriverAnnotator.class);
+        hiveConf.set("hive.metastore.local", "false");
+        hiveConf.set("hive.metastore.uris", thrifturi);
+
+        HiveMetaStoreClient hmsc = new HiveMetaStoreClient(hiveConf,null);
+        List<Partition> parts = hmsc.listPartitions(database, table, Short.MAX_VALUE);
+        
+        m.put("hcat.isd", isd != null ? isd : RCFileInputDriver.class.getName());
+        m.put("hcat.osd", osd != null ? osd : RCFileOutputDriver.class.getName()); 
+
+        for(Partition p: parts) {
+            p.setParameters(m);
+            hmsc.alter_partition(database, table, p);
+        }
+    }
+
+    /**
+     * 
+     */
+    private static void usage() {
+        System.err.println("Usage: java -cp testudf.jar:<hcatjar> org.apache.hcat.utils.PartitionStorageDriverAnnotator -u <thrift uri> -t <partitioned tablename>" +
+        		" [-i input driver classname (Default rcfiledriver)] [-o output driver classname (default rcfiledriver)] " +
+        		" [-p key1=val1;key2=val2;..;keyn=valn (list of key=value property pairs to associate with each partition)]" +
+        		" [-d database (if this not supplied the default database is used)]");
+        System.exit(1);
+    }
+
+}

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreComplex.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreComplex.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreComplex.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreComplex.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.HCatOutputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.hcatalog.mapreduce.OutputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "complex"
+ * table and writes to "complex_nopart_empty_initially" table. It reads data from complex which
+ * is an unpartitioned table and stores the data as-is into complex_empty_initially table
+ * (which is also unpartitioned)
+ * 
+ * Usage: hadoop jar testudf.jar storecomplex <serveruri> <-libjars hive-hcat jar>  
+        The hcat jar location should be specified as file://<full path to jar>
+ */
+public class StoreComplex {
+
+    private static final String COMPLEX_TABLE_NAME = "complex";
+    private static final String COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME = "complex_nopart_empty_initially";
+    
+    
+  public static class ComplexMapper 
+       extends Mapper<WritableComparable, HCatRecord, WritableComparable, HCatRecord>{
+      
+    @Override
+  protected void map(WritableComparable key, HCatRecord value, 
+          org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+          WritableComparable,HCatRecord>.Context context) 
+    throws IOException ,InterruptedException {
+        // just write out the value as-is
+        context.write(new IntWritable(0), value);
+
+    }
+  }
+  
+  
+   public static void main(String[] args) throws Exception {
+    Configuration conf = new Configuration();
+    args = new GenericOptionsParser(conf, args).getRemainingArgs();
+    String[] otherArgs = new String[1];
+    int j = 0;
+    for(int i = 0; i < args.length; i++) {
+        if(args[i].equals("-libjars")) {
+            // generic options parser doesn't seem to work!
+            conf.set("tmpjars", args[i+1]);
+            i = i+1; // skip it , the for loop will skip its value                
+        } else {
+            otherArgs[j++] = args[i];
+        }
+    }
+    if (otherArgs.length != 1) {
+      usage();
+    }
+    String serverUri = otherArgs[0];
+    String tableName = COMPLEX_TABLE_NAME;
+    String dbName = "default";
+    Map<String, String> outputPartitionKvps = new HashMap<String, String>();
+    String outputTableName = null;
+    outputTableName = COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME;
+    // test with null or empty randomly
+    if(new Random().nextInt(2) == 0) {
+        System.err.println("INFO: output partition keys set to null for writing");
+        outputPartitionKvps = null;
+    }
+    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+    if(principalID != null)
+    conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+    Job job = new Job(conf, "storecomplex");
+    // initialize HCatInputFormat
+
+    HCatInputFormat.setInput(job, InputJobInfo.create(
+    		dbName, tableName, null, serverUri, principalID));
+    // initialize HCatOutputFormat
+    HCatOutputFormat.setOutput(job, OutputJobInfo.create(
+            dbName, outputTableName, outputPartitionKvps, serverUri, principalID));
+    
+    
+    HCatSchema s = HCatInputFormat.getTableSchema(job);
+    HCatOutputFormat.setSchema(job, s);
+    job.setInputFormatClass(HCatInputFormat.class);
+    job.setOutputFormatClass(HCatOutputFormat.class);
+    job.setJarByClass(StoreComplex.class);
+    job.setMapperClass(ComplexMapper.class);
+    job.setOutputKeyClass(IntWritable.class);
+    job.setOutputValueClass(DefaultHCatRecord.class);
+    System.exit(job.waitForCompletion(true) ? 0 : 1);
+  }
+
+
+    /**
+     * 
+     */
+    private static void usage() {
+        System.err.println("Usage: hadoop jar testudf.jar storecomplex <serveruri> <-libjars hive-hcat jar>\n" +
+        "The hcat jar location should be specified as file://<full path to jar>\n");
+        System.exit(2);
+        
+    }
+   
+
+}

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreDemo.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreDemo.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreDemo.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreDemo.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.HCatOutputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.hcatalog.mapreduce.OutputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table and writes data to another table. It reads data from numbers which
+ * is an unpartitioned table and adds 10 to each field. It stores the result into
+ * the datestamp='20100101' partition of the numbers_part_empty_initially table if the second
+ * command line arg is "part". If the second cmdline arg is "nopart" then the
+ * result is stored into the 'numbers_nopart_empty_initially' (unpartitioned) table.
+ * If the second cmdline arg is "nopart_pig", then the result is stored into the
+ * 'numbers_nopart_pig_empty_initially' (unpartitioned) table with the tinyint
+ * and smallint columns in "numbers" being stored as "int" (since pig cannot handle
+ * tinyint and smallint)
+ * 
+ * Usage: hadoop jar storenumbers <serveruri> <part|nopart|nopart_pig> <-libjars hive-hcat jar>
+        If the second argument is "part" data is written to datestamp = '2010101' partition of the numbers_part_empty_initially table.
+        If the second argument is "nopart", data is written to the unpartitioned numbers_nopart_empty_initially table.
+        If the second argument is "nopart_pig", data is written to the unpartitioned numbers_nopart_pig_empty_initially table.
+        The hcat jar location should be specified as file://<full path to jar>
+ */
+public class StoreDemo {
+
+    private static final String NUMBERS_PARTITIONED_TABLE_NAME = "demo_partitioned";
+    private static final String NUMBERS_TABLE_NAME = "demo";
+    
+  public static class SumMapper 
+       extends Mapper<WritableComparable, HCatRecord, WritableComparable, HCatRecord>{
+      
+    
+      Integer intnum;
+     
+      Double doublenum;
+    @Override
+  protected void map(WritableComparable key, HCatRecord value, 
+          org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+          WritableComparable,HCatRecord>.Context context) 
+    throws IOException ,InterruptedException {
+        intnum = ((Integer)value.get(0));
+        value.set(0, intnum + 20);
+        doublenum = ((Double) value.get(1));
+        value.set(1, (Double) (doublenum + 20));
+        context.write(new IntWritable(0), value);
+
+    }
+  }
+  
+  
+   public static void main(String[] args) throws Exception {
+    Configuration conf = new Configuration();
+    args = new GenericOptionsParser(conf, args).getRemainingArgs();
+    String[] otherArgs = new String[1];
+    int j = 0;
+    for(int i = 0; i < args.length; i++) {
+        if(args[i].equals("-libjars")) {
+            // generic options parser doesn't seem to work!
+            conf.set("tmpjars", args[i+1]);
+            i = i+1; // skip it , the for loop will skip its value                
+        } else {
+            otherArgs[j++] = args[i];
+        }
+    }
+    if (otherArgs.length != 1) {
+      usage();
+    }
+    String serverUri = otherArgs[0];
+    
+    String tableName = NUMBERS_TABLE_NAME;
+    String dbName = "default";
+    Map<String, String> outputPartitionKvps = new HashMap<String, String>();
+    String outputTableName = NUMBERS_PARTITIONED_TABLE_NAME;
+    outputPartitionKvps.put("datestamp", "20100102");
+    
+    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+    if(principalID != null)
+    conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+    Job job = new Job(conf, "storedemo");
+    // initialize HCatInputFormat
+    HCatInputFormat.setInput(job, InputJobInfo.create(
+    		dbName, tableName, null, serverUri, principalID));
+    // initialize HCatOutputFormat
+    HCatOutputFormat.setOutput(job, OutputJobInfo.create(
+            dbName, outputTableName, outputPartitionKvps, serverUri, principalID));
+    // test with and without specifying schema randomly
+    HCatSchema s = HCatInputFormat.getTableSchema(job);
+    System.err.println("INFO: output schema explicitly set for writing:" + s);
+    HCatOutputFormat.setSchema(job, s);
+    
+    job.setInputFormatClass(HCatInputFormat.class);
+    job.setOutputFormatClass(HCatOutputFormat.class);
+    job.setJarByClass(StoreDemo.class);
+    job.setMapperClass(SumMapper.class);
+    job.setOutputKeyClass(IntWritable.class);
+    job.setNumReduceTasks(0);
+    job.setOutputValueClass(DefaultHCatRecord.class);
+    System.exit(job.waitForCompletion(true) ? 0 : 1);
+  }
+
+
+    /**
+     * 
+     */
+    private static void usage() {
+        System.err.println("Usage: hadoop jar storenumbers <serveruri> <part|nopart|nopart_pig> <-libjars hive-hcat jar>\n" +
+                "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " +
+                "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " +
+                "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " +
+                "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" +
+        "The hcat jar location should be specified as file://<full path to jar>\n");
+    System.exit(2);
+        
+    }
+   
+
+}

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreNumbers.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreNumbers.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreNumbers.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreNumbers.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.HCatOutputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.hcatalog.mapreduce.OutputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table and writes data to another table. It reads data from numbers which
+ * is an unpartitioned table and adds 10 to each field. It stores the result into
+ * the datestamp='20100101' partition of the numbers_part_empty_initially table if the second
+ * command line arg is "part". If the second cmdline arg is "nopart" then the
+ * result is stored into the 'numbers_nopart_empty_initially' (unpartitioned) table.
+ * If the second cmdline arg is "nopart_pig", then the result is stored into the
+ * 'numbers_nopart_pig_empty_initially' (unpartitioned) table with the tinyint
+ * and smallint columns in "numbers" being stored as "int" (since pig cannot handle
+ * tinyint and smallint)
+ * 
+ * Usage: hadoop jar storenumbers <serveruri> <part|nopart|nopart_pig> <-libjars hive-hcat jar>
+        If the second argument is "part" data is written to datestamp = '2010101' partition of the numbers_part_empty_initially table.
+        If the second argument is "nopart", data is written to the unpartitioned numbers_nopart_empty_initially table.
+        If the second argument is "nopart_pig", data is written to the unpartitioned numbers_nopart_pig_empty_initially table.
+        The hcat jar location should be specified as file://<full path to jar>
+ */
+public class StoreNumbers {
+
+    private static final String NUMBERS_PARTITIONED_TABLE_NAME = "numbers_part_empty_initially";
+    private static final String NUMBERS_TABLE_NAME = "numbers";
+    private static final String NUMBERS_NON_PARTITIONED_TABLE_NAME = "numbers_nopart_empty_initially";
+    private static final String NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME = "numbers_nopart_pig_empty_initially";
+    private static final String IS_PIG_NON_PART_TABLE = "is.pig.non.part.table";
+    
+  public static class SumMapper 
+       extends Mapper<WritableComparable, HCatRecord, WritableComparable, HCatRecord>{
+      
+      Integer intnum1000;
+      // though id is given as a Short by hcat, the map will emit it as an
+      // IntWritable so we can just sum in the reduce
+      Short id;
+      
+      // though intnum5 is handed as a Byte by hcat, the map() will emit it as
+      // an IntWritable so we can just sum in the reduce
+      Byte intnum5;
+      Integer intnum100;
+      Integer intnum;
+      Long longnum;
+      Float floatnum;
+      Double doublenum;
+    @Override
+  protected void map(WritableComparable key, HCatRecord value, 
+          org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+          WritableComparable,HCatRecord>.Context context) 
+    throws IOException ,InterruptedException {
+        boolean isnoPartPig = context.getConfiguration().getBoolean(IS_PIG_NON_PART_TABLE, false);
+        intnum1000 = ((Integer)value.get(0));
+        id = ((Short) value.get(1));
+        intnum5 = (((Byte)value.get(2)));
+        intnum100 = (((Integer) value.get(3)));
+        intnum = ((Integer) value.get(4));
+        longnum = ((Long) value.get(5));
+        floatnum = ((Float) value.get(6));
+        doublenum = ((Double) value.get(7));
+        HCatRecord output = new DefaultHCatRecord(8);
+        output.set(0, intnum1000 + 10);
+        if(isnoPartPig)
+        {
+            output.set(1, ((int)(id + 10)));
+        } else {
+            output.set(1, ((short)(id + 10)));
+        }
+        if(isnoPartPig) {
+            output.set(2,  (int)(intnum5 + 10));
+        } else {
+            output.set(2, (byte) (intnum5 + 10));
+        }
+        
+        output.set(3, intnum100 + 10);
+        output.set(4, intnum + 10);
+        output.set(5, (long) (longnum + 10));
+        output.set(6, (float) (floatnum + 10));
+        output.set(7, (double) (doublenum + 10));
+        for(int i = 0; i < 8; i++) {
+            System.err.println("XXX: class:" + output.get(i).getClass());
+        }
+        context.write(new IntWritable(0), output);
+
+    }
+  }
+  
+  
+   public static void main(String[] args) throws Exception {
+    Configuration conf = new Configuration();
+    args = new GenericOptionsParser(conf, args).getRemainingArgs();
+    String[] otherArgs = new String[2];
+    int j = 0;
+    for(int i = 0; i < args.length; i++) {
+        if(args[i].equals("-libjars")) {
+            // generic options parser doesn't seem to work!
+            conf.set("tmpjars", args[i+1]);
+            i = i+1; // skip it , the for loop will skip its value                
+        } else {
+            otherArgs[j++] = args[i];
+        }
+    }
+    if (otherArgs.length != 2) {
+      usage();
+    }
+    String serverUri = otherArgs[0];
+    if(otherArgs[1] == null || (
+            !otherArgs[1].equalsIgnoreCase("part") && !otherArgs[1].equalsIgnoreCase("nopart"))
+            && !otherArgs[1].equalsIgnoreCase("nopart_pig")) {
+        usage();
+    }
+    boolean writeToPartitionedTable = (otherArgs[1].equalsIgnoreCase("part"));
+    boolean writeToNonPartPigTable = (otherArgs[1].equalsIgnoreCase("nopart_pig"));
+    String tableName = NUMBERS_TABLE_NAME;
+    String dbName = "default";
+    Map<String, String> outputPartitionKvps = new HashMap<String, String>();
+    String outputTableName = null;
+    conf.set(IS_PIG_NON_PART_TABLE, "false");
+    if(writeToPartitionedTable) {
+        outputTableName = NUMBERS_PARTITIONED_TABLE_NAME;
+        outputPartitionKvps.put("datestamp", "20100101");
+    } else {
+        if(writeToNonPartPigTable) {
+            conf.set(IS_PIG_NON_PART_TABLE, "true");
+            outputTableName = NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME;
+        } else {
+            outputTableName = NUMBERS_NON_PARTITIONED_TABLE_NAME;
+        }
+        // test with null or empty randomly
+        if(new Random().nextInt(2) == 0) {
+            outputPartitionKvps = null;
+        }
+    }
+    
+    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+    if(principalID != null)
+    conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+    Job job = new Job(conf, "storenumbers");
+    
+    // initialize HCatInputFormat
+    HCatInputFormat.setInput(job, InputJobInfo.create(
+    		dbName, tableName, null, serverUri, principalID));
+    // initialize HCatOutputFormat
+    HCatOutputFormat.setOutput(job, OutputJobInfo.create(
+            dbName, outputTableName, outputPartitionKvps, serverUri, principalID));
+    // test with and without specifying schema randomly
+    HCatSchema s = HCatInputFormat.getTableSchema(job);
+    if(writeToNonPartPigTable) {
+        List<HCatFieldSchema> newHfsList = new ArrayList<HCatFieldSchema>();
+        // change smallint and tinyint to int
+        for(HCatFieldSchema hfs: s.getFields()){
+            if(hfs.getTypeString().equals("smallint")) {
+                newHfsList.add(new HCatFieldSchema(hfs.getName(), 
+                        HCatFieldSchema.Type.INT, hfs.getComment()));
+            } else if(hfs.getTypeString().equals("tinyint")) {
+                newHfsList.add(new HCatFieldSchema(hfs.getName(), 
+                        HCatFieldSchema.Type.INT, hfs.getComment()));
+            } else {
+                newHfsList.add(hfs);
+            }
+        }
+        s = new HCatSchema(newHfsList);
+    } 
+    HCatOutputFormat.setSchema(job, s);
+    
+    
+    job.setInputFormatClass(HCatInputFormat.class);
+    job.setOutputFormatClass(HCatOutputFormat.class);
+    job.setJarByClass(StoreNumbers.class);
+    job.setMapperClass(SumMapper.class);
+    job.setOutputKeyClass(IntWritable.class);
+    job.setNumReduceTasks(0);
+    job.setOutputValueClass(DefaultHCatRecord.class);
+    System.exit(job.waitForCompletion(true) ? 0 : 1);
+  }
+
+
+    /**
+     * 
+     */
+    private static void usage() {
+        System.err.println("Usage: hadoop jar storenumbers <serveruri> <part|nopart|nopart_pig> <-libjars hive-hcat jar>\n" +
+                "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " +
+                "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " +
+                "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " +
+                "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" +
+        "The hcat jar location should be specified as file://<full path to jar>\n");
+    System.exit(2);
+        
+    }
+   
+
+}

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SumNumbers.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SumNumbers.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SumNumbers.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SumNumbers.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,257 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table. It performs a group by on the first column and a SUM operation on the
+ * other columns. This is to simulate a typical operation in a map reduce program
+ * to test that hcat hands the right data to the map reduce program
+ * 
+ * Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat jar>
+            The <tab|ctrla> argument controls the output delimiter
+            The hcat jar location should be specified as file://<full path to jar>
+ */
+public class SumNumbers {
+
+    private static final String NUMBERS_TABLE_NAME = "numbers";
+    private static final String TAB = "\t";
+    
+  public static class SumMapper 
+       extends Mapper<WritableComparable, HCatRecord, IntWritable, SumNumbers.ArrayWritable>{
+      
+      IntWritable intnum1000;
+      // though id is given as a Short by hcat, the map will emit it as an
+      // IntWritable so we can just sum in the reduce
+      IntWritable id;
+      
+      // though intnum5 is handed as a Byte by hcat, the map() will emit it as
+      // an IntWritable so we can just sum in the reduce
+      IntWritable intnum5;
+      IntWritable intnum100;
+      IntWritable intnum;
+      LongWritable longnum;
+      FloatWritable floatnum;
+      DoubleWritable doublenum;
+    @Override
+  protected void map(WritableComparable key, HCatRecord value, 
+          org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+          IntWritable,SumNumbers.ArrayWritable>.Context context) 
+    throws IOException ,InterruptedException {
+        intnum1000 = new IntWritable((Integer)value.get(0));
+        id = new IntWritable((Short) value.get(1));
+        intnum5 = new IntWritable(((Byte)value.get(2)));
+        intnum100 = new IntWritable(((Integer) value.get(3)));
+        intnum = new IntWritable((Integer) value.get(4));
+        longnum = new LongWritable((Long) value.get(5));
+        floatnum = new FloatWritable((Float) value.get(6));
+        doublenum = new DoubleWritable((Double) value.get(7));
+        SumNumbers.ArrayWritable outputValue = new SumNumbers.ArrayWritable(id, 
+                intnum5, intnum100, intnum, longnum, floatnum, doublenum);
+        context.write(intnum1000, outputValue);
+
+    }
+  }
+  
+  public static class SumReducer extends Reducer<IntWritable, SumNumbers.ArrayWritable, 
+  LongWritable, Text> {
+      
+      
+    LongWritable dummyLong = null;
+      @Override
+    protected void reduce(IntWritable key, java.lang.Iterable<ArrayWritable> 
+      values, org.apache.hadoop.mapreduce.Reducer<IntWritable,ArrayWritable,LongWritable,Text>.Context context) 
+      throws IOException ,InterruptedException {
+          String output = key.toString() + TAB;
+          Long sumid = 0l;
+          Long sumintnum5 = 0l;
+          Long sumintnum100 = 0l;
+          Long sumintnum = 0l;
+          Long sumlongnum = 0l;
+          Float sumfloatnum = 0.0f;
+          Double sumdoublenum = 0.0;
+          for (ArrayWritable value : values) {
+            sumid += value.id.get();
+            sumintnum5 += value.intnum5.get();
+            sumintnum100 += value.intnum100.get();
+            sumintnum += value.intnum.get();
+            sumlongnum += value.longnum.get();
+            sumfloatnum += value.floatnum.get();
+            sumdoublenum += value.doublenum.get();
+        }
+          output += sumid + TAB;
+          output += sumintnum5 + TAB;
+          output += sumintnum100 + TAB;
+          output += sumintnum + TAB;
+          output += sumlongnum + TAB;
+          output += sumfloatnum + TAB;
+          output += sumdoublenum + TAB;
+          context.write(dummyLong, new Text(output));
+      }
+  }
+  
+   public static void main(String[] args) throws Exception {
+    Configuration conf = new Configuration();
+    args = new GenericOptionsParser(conf, args).getRemainingArgs();
+    String[] otherArgs = new String[4];
+    int j = 0;
+    for(int i = 0; i < args.length; i++) {
+        if(args[i].equals("-libjars")) {
+            // generic options parser doesn't seem to work!
+            conf.set("tmpjars", args[i+1]);
+            i = i+1; // skip it , the for loop will skip its value                
+        } else {
+            otherArgs[j++] = args[i];
+        }
+    }
+    if (otherArgs.length != 4) {
+      System.err.println("Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat jar>\n" +
+            "The <tab|ctrla> argument controls the output delimiter.\n" +
+            "The hcat jar location should be specified as file://<full path to jar>\n");
+      System.exit(2);
+    }
+    String serverUri = otherArgs[0];
+    String tableName = NUMBERS_TABLE_NAME;
+    String outputDir = otherArgs[1];
+    String dbName = "default";
+    
+    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+    if(principalID != null)
+    conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+    Job job = new Job(conf, "sumnumbers");
+    HCatInputFormat.setInput(job, InputJobInfo.create(
+    		dbName, tableName, null, serverUri, principalID));
+    // initialize HCatOutputFormat
+    
+    job.setInputFormatClass(HCatInputFormat.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+    job.setJarByClass(SumNumbers.class);
+    job.setMapperClass(SumMapper.class);
+    job.setReducerClass(SumReducer.class);
+    job.setMapOutputKeyClass(IntWritable.class);
+    job.setMapOutputValueClass(ArrayWritable.class);
+    job.setOutputKeyClass(LongWritable.class);
+    job.setOutputValueClass(Text.class);
+    FileOutputFormat.setOutputPath(job, new Path(outputDir));
+    System.exit(job.waitForCompletion(true) ? 0 : 1);
+  }
+   
+   public static class ArrayWritable implements Writable {
+
+       // though id is given as a Short by hcat, the map will emit it as an
+       // IntWritable so we can just sum in the reduce
+       IntWritable id;
+       
+       // though intnum5 is handed as a Byte by hcat, the map() will emit it as
+       // an IntWritable so we can just sum in the reduce
+       IntWritable intnum5;
+       
+       IntWritable intnum100;
+       IntWritable intnum;
+       LongWritable longnum;
+       FloatWritable floatnum;
+       DoubleWritable doublenum;
+       
+       /**
+     * 
+     */
+    public ArrayWritable() {
+        id = new IntWritable();
+        intnum5 = new IntWritable();
+        intnum100 = new IntWritable();
+        intnum = new IntWritable();
+        longnum = new LongWritable();
+        floatnum = new FloatWritable();
+        doublenum = new DoubleWritable();
+    }
+    
+    
+       
+    /**
+     * @param id
+     * @param intnum5
+     * @param intnum100
+     * @param intnum
+     * @param longnum
+     * @param floatnum
+     * @param doublenum
+     */
+    public ArrayWritable(IntWritable id, IntWritable intnum5,
+            IntWritable intnum100, IntWritable intnum, LongWritable longnum,
+            FloatWritable floatnum, DoubleWritable doublenum) {
+        this.id = id;
+        this.intnum5 = intnum5;
+        this.intnum100 = intnum100;
+        this.intnum = intnum;
+        this.longnum = longnum;
+        this.floatnum = floatnum;
+        this.doublenum = doublenum;
+    }
+
+
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+        id.readFields(in);
+        intnum5.readFields(in);
+        intnum100.readFields(in);
+        intnum.readFields(in);
+        longnum.readFields(in);
+        floatnum.readFields(in);
+        doublenum.readFields(in);
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+        id.write(out);
+        intnum5.write(out);
+        intnum100.write(out);
+        intnum.write(out);
+        longnum.write(out);
+        floatnum.write(out);
+        doublenum.write(out);
+        
+    }
+       
+   }
+}

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/TypeDataCheck.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/TypeDataCheck.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/TypeDataCheck.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/TypeDataCheck.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,182 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hcatalog.utils.Util;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat that checks that the columns
+ * handed by hcat have the right type and right values. It achieves the first
+ * objective by checking the type of the Objects representing the columns against
+ * the schema provided as a cmdline arg. It achieves the second objective by
+ * writing the data as Text to be compared against golden results.
+ * 
+ * The schema specification consists of the types as given by "describe <table>"
+ * with each column's type separated from the next column's type by a '+'
+ * 
+ * Can be used against "numbers" and "complex" tables.
+ * 
+ * Usage: hadoop jar testudf.jar typedatacheck <serveruri> <tablename> 
+ * <hive types of cols + delimited> <output dir> <tab|ctrla> <-libjars hive-hcat jar>
+            The <tab|ctrla> argument controls the output delimiter.
+            The hcat jar location should be specified as file://<full path to jar>
+ */
+public class TypeDataCheck implements Tool{
+
+	static String SCHEMA_KEY = "schema";
+	static String DELIM = "delim";
+	private static Configuration conf = new Configuration();
+
+	public static class TypeDataCheckMapper 
+	extends Mapper<WritableComparable, HCatRecord, Long, Text>{
+
+		Long dummykey = null;
+		String[] types;
+		String delim = "\u0001";      
+		@Override
+		protected void setup(org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,Long,Text>.Context context) 
+		throws IOException ,InterruptedException {
+			String typesStr = context.getConfiguration().get(SCHEMA_KEY);
+			delim = context.getConfiguration().get(DELIM);
+			if(delim.equals("tab")) {
+				delim = "\t";
+			} else if (delim.equals("ctrla")) {
+				delim = "\u0001";
+			}
+			types = typesStr.split("\\+");
+			for(int i = 0; i < types.length; i++) {
+				types[i] = types[i].toLowerCase();
+			}
+
+
+		}
+
+		String check(HCatRecord r) throws IOException {
+			String s = "";
+			for(int i = 0; i < r.size(); i++) {
+				s += Util.check(types[i], r.get(i));
+				if(i != r.size() - 1) {
+					s += delim;
+				}
+			}
+			return s;
+		}
+
+		@Override
+		protected void map(WritableComparable key, HCatRecord value, 
+				org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,Long,Text>.Context context) 
+		throws IOException ,InterruptedException {
+			context.write(dummykey, new Text(check(value)));
+		}
+	}
+
+	public static void main(String[] args) throws Exception {
+		TypeDataCheck self = new TypeDataCheck();
+		System.exit(ToolRunner.run(conf, self, args));
+	}
+
+	public int run(String[] args) {
+		try {
+			args = new GenericOptionsParser(conf, args).getRemainingArgs();
+			String[] otherArgs = new String[5];
+			int j = 0;
+			for(int i = 0; i < args.length; i++) {
+				if(args[i].equals("-libjars")) {
+                                        conf.set("tmpjars",args[i+1]);
+					i = i+1; // skip it , the for loop will skip its value                
+				} else {
+					otherArgs[j++] = args[i];
+				}
+			}
+			if (otherArgs.length !=5 ) {
+				System.err.println("Other args:" + Arrays.asList(otherArgs));
+				System.err.println("Usage: hadoop jar testudf.jar typedatacheck " +
+						"<serveruri> <tablename> <hive types of cols + delimited> " +
+						"<output dir> <tab|ctrla> <-libjars hive-hcat jar>\n" +
+						"The <tab|ctrla> argument controls the output delimiter.\n" +
+				"The hcat jar location should be specified as file://<full path to jar>\n");
+				System.err.println(" The <tab|ctrla> argument controls the output delimiter.");
+				System.exit(2);
+			}
+			String serverUri = otherArgs[0];
+			String tableName = otherArgs[1];
+			String schemaStr = otherArgs[2];
+			String outputDir = otherArgs[3];
+			String outputdelim = otherArgs[4];
+			if(!outputdelim.equals("tab") && !outputdelim.equals("ctrla")) {
+				System.err.println("ERROR: Specify 'tab' or 'ctrla' for output delimiter");
+			}
+			String dbName = "default";
+
+			String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+			if(principalID != null){
+				conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);				
+			}
+			Job job = new Job(conf, "typedatacheck");
+			// initialize HCatInputFormat
+			HCatInputFormat.setInput(job, InputJobInfo.create(
+					dbName, tableName, null, serverUri, principalID));
+			HCatSchema s = HCatInputFormat.getTableSchema(job);
+			job.getConfiguration().set(SCHEMA_KEY, schemaStr);
+			job.getConfiguration().set(DELIM, outputdelim);
+			job.setInputFormatClass(HCatInputFormat.class);
+			job.setOutputFormatClass(TextOutputFormat.class);
+			job.setJarByClass(TypeDataCheck.class);
+			job.setMapperClass(TypeDataCheckMapper.class);
+			job.setNumReduceTasks(0);
+			job.setOutputKeyClass(Long.class);
+			job.setOutputValueClass(Text.class);
+			FileOutputFormat.setOutputPath(job, new Path(outputDir));
+			System.exit(job.waitForCompletion(true) ? 0 : 1);
+			return 0;
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	@Override
+	public Configuration getConf() {
+		return conf;
+	}
+
+	@Override
+	public void setConf(Configuration conf) {
+		TypeDataCheck.conf = conf;
+	}
+
+}