You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ha...@apache.org on 2011/08/30 06:18:18 UTC
svn commit: r1163097 [6/7] - in /incubator/hcatalog/trunk: ./ src/test/e2e/
src/test/e2e/hcatalog/ src/test/e2e/hcatalog/conf/
src/test/e2e/hcatalog/data/ src/test/e2e/hcatalog/deployers/
src/test/e2e/hcatalog/drivers/ src/test/e2e/hcatalog/paramfiles/...
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl Tue Aug 30 06:18:16 2011
@@ -0,0 +1,463 @@
+#!/usr/bin/env perl
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# A utility to generate test data for pig test harness tests.
+#
+#
+
+use strict;
+use charnames ();
+
+our @firstName = ("alice", "bob", "calvin", "david", "ethan", "fred",
+ "gabriella", "holly", "irene", "jessica", "katie", "luke", "mike", "nick",
+ "oscar", "priscilla", "quinn", "rachel", "sarah", "tom", "ulysses", "victor",
+ "wendy", "xavier", "yuri", "zach");
+
+our @lastName = ("allen", "brown", "carson", "davidson", "ellison", "falkner",
+ "garcia", "hernandez", "ichabod", "johnson", "king", "laertes", "miller",
+ "nixon", "ovid", "polk", "quirinius", "robinson", "steinbeck", "thompson",
+ "underhill", "van buren", "white", "xylophone", "young", "zipper");
+
+sub randomName()
+{
+ return sprintf("%s %s", $firstName[int(rand(26))],
+ $lastName[int(rand(26))]);
+}
+
+our @city = ("albuquerque", "bombay", "calcutta", "danville", "eugene",
+ "frankfurt", "grenoble", "harrisburg", "indianapolis",
+ "jerusalem", "kellogg", "lisbon", "marseilles",
+ "nice", "oklohoma city", "paris", "queensville", "roswell",
+ "san francisco", "twin falls", "umatilla", "vancouver", "wheaton",
+ "xacky", "youngs town", "zippy");
+
+sub randomCity()
+{
+ return $city[int(rand(26))];
+}
+
+our @state = ( "AL", "AK", "AS", "AZ", "AR", "CA", "CO", "CT", "DE", "DC",
+ "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
+ "MA", "MI", "MN", "MS", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC",
+ "ND", "OH", "OK", "OR", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA",
+ "WA", "WV", "WI", "WY");
+
+sub randomState()
+{
+ return $state[int(rand(50))];
+}
+
+our @classname = ("american history", "biology", "chemistry", "debate",
+ "education", "forestry", "geology", "history", "industrial engineering",
+ "joggying", "kindergarten", "linguistics", "mathematics", "nap time",
+ "opthamology", "philosophy", "quiet hour", "religion", "study skills",
+ "topology", "undecided", "values clariffication", "wind surfing",
+ "xylophone band", "yard duty", "zync studies");
+
+sub randomClass()
+{
+ return $classname[int(rand(26))];
+}
+
+our @grade = ("A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-",
+ "F");
+
+sub randomGrade()
+{
+ return $grade[int(rand(int(@grade)))];
+}
+
+our @registration = ("democrat", "green", "independent", "libertarian",
+ "republican", "socialist");
+
+sub randomRegistration()
+{
+ return $registration[int(rand(int(@registration)))];
+}
+
+sub randomAge()
+{
+ return (int(rand(60)) + 18);
+}
+
+sub randomGpa()
+{
+ return rand(4.0);
+}
+
+our @street = ("A", "B", "C", "D", "E", "F", "G", "H", "I",
+ "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S",
+ "T", "U", "V", "W", "X", "Y", "Z");
+
+sub randomStreet()
+{
+ return sprintf("%d %s st", int(rand(1000)), $street[int(rand(26))]);
+}
+
+sub randomZip()
+{
+ return int(rand(100000));
+}
+
+sub randomContribution()
+{
+ return sprintf("%.2f", rand(1000));
+}
+
+our @numLetter = ("1", "09", "09a");
+
+sub randomNumLetter()
+{
+ return $numLetter[int(rand(int(@numLetter)))];
+}
+
+our @greekLetter = ( "alpha", "beta", "gamma", "delta", "epsilon", "zeta",
+ "eta", "theta", "iota", "kappa", "lambda", "mu", "nu", "xi", "omicron",
+ "pi", "rho", "sigma", "tau", "upsilon", "chi", "phi", "psi", "omega" );
+
+sub randomGreekLetter()
+{
+ return $greekLetter[int(rand(int(@greekLetter)))];
+}
+
+sub randomNameAgeGpaMap()
+{
+ my $size = int(rand(3));
+ my $map = "[";
+ my @mapValues = ( "name#" . randomName(), "age#" . randomAge(), "gpa#" . randomGpa() );
+ $size = ($size == 0 ? 1 : $size);
+ for(my $i = 0; $i <= $size; $i++) {
+ $map .= $mapValues[$i];
+ if($i != $size) {
+ $map .= ",";
+ }
+ }
+ $map .= "]";
+ return $map;
+}
+
+sub getMapFields($) {
+ my $mapString = shift;
+ # remove the enclosing square brackets
+ $mapString =~ s/[\[\]]//g;
+ # get individual map fields
+ my @fields = split(/,/, $mapString);
+ # get only the values
+ my $hash;
+ for my $field (@fields) {
+ if($field =~ /(\S+)#(.*)/) {
+ $hash->{$1} = $2;
+ }
+ }
+ return $hash;
+}
+
+sub randomNameAgeGpaTuple()
+{
+ my $gpa = sprintf("%0.2f", randomGpa());
+ return "(" . randomName() . "," . randomAge() . "," . $gpa . ")" ;
+}
+
+sub randomNameAgeGpaBag()
+{
+ my $size = int(rand(int(3)));
+ my $bag = "{";
+ $size = ($size == 0 ? 1 : $size);
+ for(my $i = 0; $i <= $size; $i++) {
+ $bag .= randomNameAgeGpaTuple();
+ if($i != $size) {
+ $bag .= ",";
+ }
+ }
+ $bag .= "}";
+ return $bag;
+}
+
+our @textDoc = (
+ "The cosmological proof, which we are now about to ex-",
+ "amine, retains the connection of absolute necessity with the",
+ "highest reality, but instead of reasoning, like the former proof,",
+ "from the highest reality to necessity of existence, it reasons",
+ "from the previously given unconditioned necessity of some",
+ "being to the unlimited reality of that being. It thus enters upon",
+ "a course of reasoning which, whether rational or only pseudo-",
+ "rational, is at any rate natural, and the most convincing not",
+ "only for common sense but even for speculative understand-",
+ "ing. It also sketches the first outline of all the proofs in natural",
+ "theology, an outline which has always been and always will",
+ "be followed, however much embellished and disguised by",
+ "superfluous additions. This proof, termed by Leibniz the proof",
+ "a contingentia mundi, we shall now proceed to expound and",
+ "examine.");
+
+sub usage()
+{
+ warn "Usage: $0 filetype numrows tablename targetdir [nosql]\n";
+ warn "\tValid filetypes [studenttab, studentcolon, \n";
+ warn "\t\tstudentnulltab, studentcomplextab, studentctrla, voternulltab\n";
+ warn "\t\tvotertab, reg1459894, textdoc, unicode, manual]\n";
+}
+
+our @greekUnicode = ("\N{U+03b1}", "\N{U+03b2}", "\N{U+03b3}", "\N{U+03b4}",
+ "\N{U+03b5}", "\N{U+03b6}", "\N{U+03b7}", "\N{U+03b8}", "\N{U+03b9}",
+ "\N{U+03ba}", "\N{U+03bb}", "\N{U+03bc}", "\N{U+03bd}", "\N{U+03be}",
+ "\N{U+03bf}", "\N{U+03c0}", "\N{U+03c1}", "\N{U+03c2}", "\N{U+03c3}",
+ "\N{U+03c4}", "\N{U+03c5}", "\N{U+03c6}", "\N{U+03c7}", "\N{U+03c8}",
+ "\N{U+03c9}");
+
+sub randomUnicodeNonAscii()
+{
+ my $name = $firstName[int(rand(int(@firstName)))] .
+ $greekUnicode[int(rand(int(@greekUnicode)))];
+ return $name;
+}
+
+my $testvar = "\N{U+03b1}\N{U+03b3}\N{U+03b1}\N{U+03c0}\N{U+03b7}";
+
+sub getBulkCopyCmd(){
+ my $sourceDir= shift;
+ my $tableName = shift;
+ my $delimeter = shift;
+ $delimeter = '\t' if ( !$delimeter );
+
+# . "\nCOPY $tableName FROM \'$sourceDir/$tableName' using DELIMITERS \'". '\t' . "\' WITH NULL AS '\n';";
+
+ my $cmd= "\nbegin transaction;"
+ . "\nCOPY $tableName FROM \'$sourceDir/$tableName' using DELIMITERS \'$delimeter\';"
+ . "\ncommit;"
+ . "\n";
+
+ return $cmd;
+}
+
+
+# main($)
+{
+ # explicitly call srand so we get the same data every time
+ # we generate it. However, we set it individually for each table type.
+ # Otherwise we'd be generating the same data sets regardless of size,
+ # and this would really skew our joins.
+
+ my $filetype = shift;
+ my $numRows = shift;
+ my $tableName = shift;
+ my $targetDir= shift;
+ my $nosql = shift;
+
+ die usage() if (!defined($filetype) || !defined($numRows));
+
+ if ($numRows <= 0) { usage(); }
+
+ if ( $targetDir ) {
+ open(HDFS, "> $targetDir/$tableName") or die("Cannot open file $tableName, $!\n");
+ open(PSQL, "> $targetDir/$tableName.sql") or die("Cannot open file $tableName.sql, $!\n") unless defined $nosql;
+ } else {
+ open(HDFS, "> $tableName") or die("Cannot open file $tableName, $!\n");
+ open(PSQL, "> $tableName.sql") or die("Cannot open file $tableName.sql, $!\n") unless defined $nosql;
+ }
+
+ if ($filetype eq "manual") {
+ } elsif ($filetype eq "studenttab") {
+ srand(3.14159 + $numRows);
+ print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n" unless defined $nosql;
+ print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
+ for (my $i = 0; $i < $numRows; $i++) {
+ my $name = randomName();
+ my $age = randomAge();
+ my $gpa = randomGpa();
+ printf HDFS "%s\t%d\t%.2f\n", $name, $age, $gpa;
+ }
+
+ } elsif ($filetype eq "studentnulltab") {
+ srand(3.14159 + $numRows);
+ print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
+ print PSQL "begin transaction;\n";
+ for (my $i = 0; $i < $numRows; $i++) {
+ # generate nulls in a random fashion
+ my $name = rand(1) < 0.05 ? '' : randomName();
+ my $age = rand(1) < 0.05 ? '' : randomAge();
+ my $gpa = rand(1) < 0.05 ? '' : randomGpa();
+ printf PSQL "insert into $tableName (name, age, gpa) values(";
+ print PSQL ($name eq ''? "null, " : "'$name', "), ($age eq ''? "null, " : "$age, ");
+ if($gpa eq '') {
+ print PSQL "null);\n"
+ } else {
+ printf PSQL "%.2f);\n", $gpa;
+ }
+ print HDFS "$name\t$age\t";
+ if($gpa eq '') {
+ print HDFS "\n"
+ } else {
+ printf HDFS "%.2f\n", $gpa;
+ }
+
+ }
+ print PSQL "commit;\n" unless defined $nosql;
+
+ } elsif ($filetype eq "studentcolon") {
+ srand(2.718281828459 + $numRows);
+ print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n" unless defined $nosql;
+ print PSQL &getBulkCopyCmd( $targetDir, $tableName, ':' ) unless defined $nosql;
+ for (my $i = 0; $i < $numRows; $i++) {
+ my $name = randomName();
+ my $age = randomAge();
+ my $gpa = randomGpa();
+ printf HDFS "%s:%d:%.2f\n", $name, $age, $gpa;
+=begin
+ } elsif ($filetype eq "studentusrdef") {
+ srand(6.62606896 + $numRows);
+ for (my $i = 0; $i < $numRows; $i++) {
+ # TODO need to add SQL info.
+ printf("%s,%d,%.2f,", randomName(), randomAge(), randomGpa());
+ printf("<%s,%s,%s,%d>,", randomStreet(), randomCity(), randomState(),
+ randomZip());
+ printf("[%s:<%s,%s>],", randomClass(), randomClass(), randomName());
+ printf("{");
+ my $elementsInBag = int(rand(100));
+ for (my $j = 0; $j < $elementsInBag; $j++) {
+ if ($j != 0) { printf(","); }
+ printf("<%s,%s,%s>", randomClass(), randomName(), randomGrade());
+ }
+ printf("}\n");
+ }
+=cut
+ }
+ print PSQL "commit;\n" unless defined $nosql;
+
+ } elsif ($filetype eq "studentctrla") {
+ srand(6.14159 + $numRows);
+ print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n";
+ print PSQL "begin transaction;\n";
+ for (my $i = 0; $i < $numRows; $i++) {
+ my $name = randomName();
+ my $age = randomAge();
+ my $gpa = randomGpa();
+ printf PSQL "insert into $tableName (name, age, gpa) values('%s', %d, %.2f);\n",
+ $name, $age, $gpa;
+ printf HDFS "%s%d%.2f\n", $name, $age, $gpa;
+ }
+ print PSQL "commit;\n" unless defined $nosql;
+
+
+ } elsif ($filetype eq "studentcomplextab") {
+ srand(3.14159 + $numRows);
+ print PSQL "create table $tableName (nameagegpamap varchar(500), nameagegpatuple varchar(500), nameagegpabag varchar(500), nameagegpamap_name varchar(500), nameagegpamap_age integer, nameagegpamap_gpa float(3));\n";
+ print PSQL "begin transaction;\n";
+ for (my $i = 0; $i < $numRows; $i++) {
+ # generate nulls in a random fashion
+ my $map = rand(1) < 0.05 ? '' : randomNameAgeGpaMap();
+ my $tuple = rand(1) < 0.05 ? '' : randomNameAgeGpaTuple();
+ my $bag = rand(1) < 0.05 ? '' : randomNameAgeGpaBag();
+ printf PSQL "insert into $tableName (nameagegpamap, nameagegpatuple, nameagegpabag, nameagegpamap_name, nameagegpamap_age, nameagegpamap_gpa) values(";
+ my $mapHash;
+ if($map ne '') {
+ $mapHash = getMapFields($map);
+ }
+
+ print PSQL ($map eq ''? "null, " : "'$map', "),
+ ($tuple eq ''? "null, " : "'$tuple', "),
+ ($bag eq '' ? "null, " : "'$bag', "),
+ ($map eq '' ? "null, " : (exists($mapHash->{'name'}) ? "'".$mapHash->{'name'}."', " : "null, ")),
+ ($map eq '' ? "null, " : (exists($mapHash->{'age'}) ? "'".$mapHash->{'age'}."', " : "null, ")),
+ ($map eq '' ? "null);\n" : (exists($mapHash->{'gpa'}) ? "'".$mapHash->{'gpa'}."');\n" : "null);\n"));
+ print HDFS "$map\t$tuple\t$bag\n";
+ }
+ print PSQL "commit;\n" unless defined $nosql;
+
+ } elsif ($filetype eq "votertab") {
+ srand(299792458 + $numRows);
+ print PSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n" unless defined $nosql;
+ print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
+ for (my $i = 0; $i < $numRows; $i++) {
+ my $name = randomName();
+ my $age = randomAge();
+ my $registration = randomRegistration();
+ my $contribution = randomContribution();
+ printf HDFS "%s\t%d\t%s\t%.2f\n", $name, $age,
+ $registration, $contribution;
+ }
+
+ } elsif ($filetype eq "voternulltab") {
+ srand(299792458 + $numRows);
+ print PSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n" unless defined $nosql;
+ print PSQL "begin transaction;\n" unless defined $nosql;
+ for (my $i = 0; $i < $numRows; $i++) {
+ # generate nulls in a random fashion
+ my $name = rand(1) < 0.05 ? '' : randomName();
+ my $age = rand(1) < 0.05 ? '' : randomAge();
+ my $registration = rand(1) < 0.05 ? '' : randomRegistration();
+ my $contribution = rand(1) < 0.05 ? '' : randomContribution();
+ printf PSQL "insert into $tableName (name, age, registration, contributions) values(";
+ print PSQL ($name eq ''? "null, " : "'$name', "),
+ ($age eq ''? "null, " : "$age, "),
+ ($registration eq ''? "null, " : "'$registration', ");
+ if($contribution eq '') {
+ print PSQL "null);\n"
+ } else {
+ printf PSQL "%.2f);\n", $contribution;
+ }
+ print HDFS "$name\t$age\t$registration\t";
+ if($contribution eq '') {
+ print HDFS "\n"
+ } else {
+ printf HDFS "%.2f\n", $contribution;
+ }
+ }
+ print PSQL "commit;\n" unless defined $nosql;
+
+ } elsif ($filetype eq "reg1459894") {
+ srand(6.67428 + $numRows);
+ print PSQL "create table $tableName (first varchar(10), second varchar(10));\n" unless defined $nosql;
+ print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql;
+ for (my $i = 0; $i < $numRows; $i++) {
+ my $letter = randomNumLetter();
+ my $gkLetter = randomGreekLetter();
+ printf HDFS "%s\t%s\n", $letter, $gkLetter;
+ }
+
+ } elsif ($filetype eq "textdoc") {
+ # This one ignores the number of lines. It isn't random either.
+ print PSQL "create table $tableName (name varchar(255));\n" unless defined $nosql;
+ print PSQL "begin transaction;\n" unless defined $nosql;
+ for (my $i = 0; $i < @textDoc; $i++) {
+ my $sqlWords = $textDoc[$i];
+ $sqlWords =~ s/([\w-]+)/$1,/g;
+ print PSQL "insert into $tableName (name) values('($sqlWords)');\n" unless defined $nosql;
+ print HDFS "$textDoc[$i]\n";
+ }
+ print PSQL "commit;\n" unless defined $nosql;
+
+
+ } elsif ($filetype eq "unicode") {
+ srand(1.41421 + $numRows);
+ print PSQL "create table $tableName (name varchar(255));\n" unless defined $nosql;
+ print PSQL "begin transaction;\n" unless defined $nosql;
+ for (my $i = 0; $i < $numRows; $i++) {
+ my $name = randomUnicodeNonAscii();
+ printf PSQL "insert into $tableName (name) values('%s');\n",
+ $name unless defined $nosql;
+ printf HDFS "%s\n", $name;
+ }
+ print PSQL "commit;\n" unless defined $nosql;
+
+ } else {
+ warn "Unknown filetype $filetype\n";
+ usage();
+ }
+}
+
+
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/test/floatpostprocessor.pl
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/test/floatpostprocessor.pl?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/test/floatpostprocessor.pl (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/test/floatpostprocessor.pl Tue Aug 30 06:18:16 2011
@@ -0,0 +1,111 @@
+#!/usr/bin/env perl
+
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# A simple tool to make sure all floats in the output are written the same way.
+# It is assumed that the data in question is being read from stdin.
+#
+#
+
+use strict;
+
+our @floats;
+our $delim;
+
+sub parseLine($)
+{
+ my $line = shift;
+ chomp $line;
+ return split(/$delim/, $line);
+}
+
+sub postprocess($)
+{
+ my @fields = parseLine(shift);
+
+ for (my $i = 0; $i < @fields; $i++) {
+ if ($i != 0) { print($delim); }
+ if ($floats[$i]) {
+ printf("%.3f", $fields[$i]);
+ } else {
+ print($fields[$i]);
+ }
+ }
+ print "\n";
+}
+
+sub is_float {
+ my $n = shift;
+ if(!defined $n || $n eq ""){
+ return 0;
+ }
+ if($n =~ /^[+-]?\d+\.\d+([eE][-+]?[0-9]+)?$/){
+ return 1;
+ }
+
+ my $abs = abs($n);
+ if ($abs - int($abs) > 0) {
+ return 1;
+ }
+ return 0;
+}
+
+
+# main
+{
+ $delim = shift;
+ if (!defined($delim)) {
+ die "Usage: $0 delimiter\n";
+ }
+
+ my @sampled;
+ my $line;
+ # read away any empty lines into the sample
+ do {
+ $line = <STDIN>;
+ push(@sampled, $line);
+ } while($line && $line =~ /^\s*$/);
+ # Sample the next thousand lines to figure out which columns have floats.
+ for (my $i = 0; $i < 1000 && ($line = <STDIN>); $i++) {
+ push(@sampled, $line);
+ }
+ foreach my $line (@sampled) {
+ my @fields = parseLine($line);
+ for (my $j = 0; $j < @fields; $j++) {
+ if(is_float($fields[$j])){
+ $floats[$j] = 1;
+ }
+
+
+ }
+ }
+
+ # Now, play each of the sampled lines through the postprocessor
+ foreach my $line (@sampled) {
+ postprocess($line);
+ }
+
+ while (<STDIN>) {
+ postprocess($_);
+ }
+
+}
+
+
+
+
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/build.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/build.xml?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/build.xml (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/build.xml Tue Aug 30 06:18:16 2011
@@ -0,0 +1,51 @@
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.-->
+
+<project name="HCatalog-test-utils" default="udf-jar">
+
+ <property name="udf.jarfile" value="testudf.jar" />
+ <property name="build.dir" value="${basedir}/build" />
+ <property name="src.dir" value="${basedir}/org/" />
+
+ <path id="udf-classpath">
+ <fileset file="../../../../../../build/hcatalog/*.jar" />
+ <fileset file="../../../../../../build/ivy/lib/hcatalog/*.jar" />
+ <fileset file="../../../../../../hive/external/build/dist/lib/*.jar" />
+ <fileset file="../../../../../../hive/external/build/hadoopcore/hadoop-0.20.1/*.jar" />
+ </path>
+
+ <target name="init">
+ <mkdir dir="${build.dir}" />
+ </target>
+
+ <target name="clean">
+ <delete dir="${build.dir}" />
+ <delete file="${udf.jarfile}" />
+ </target>
+
+ <target name="udf-compile" depends="init">
+ <echo>*** Compiling UDFs ***</echo>
+ <javac srcdir="${src.dir}" destdir="${build.dir}" debug="on">
+ <classpath refid="udf-classpath" />
+ </javac>
+ </target>
+
+ <target name="udf-jar" depends="udf-compile">
+ <echo>*** Creating UDF jar ***</echo>
+ <jar duplicate="preserve" jarfile="${udf.jarfile}">
+ <fileset dir="build"/>
+ </jar>
+ </target>
+</project>
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTestDriver.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTestDriver.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTestDriver.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTestDriver.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import org.apache.hcatalog.utils.TypeDataCheck;
+import org.apache.hadoop.util.ProgramDriver;
+
+/**
+ * A description of an example program based on its class and a
+ * human-readable description.
+ */
+public class HCatTestDriver {
+
+ public static void main(String argv[]){
+ int exitCode = -1;
+ ProgramDriver pgd = new ProgramDriver();
+ try {
+ pgd.addClass("typedatacheck", TypeDataCheck.class,
+ "A map/reduce program that checks the type of each field and" +
+ " outputs the entire table (to test hcat).");
+ pgd.addClass("sumnumbers", SumNumbers.class,
+ "A map/reduce program that performs a group by on the first column and a " +
+ "SUM operation on the other columns of the \"numbers\" table.");
+ pgd.addClass("storenumbers", StoreNumbers.class, "A map/reduce program that " +
+ "reads from the \"numbers\" table and adds 10 to each fields and writes " +
+ "to the \"numbers_partitioned\" table into the datestamp=20100101 " +
+ "partition OR the \"numbers_empty_initially\" table based on a " +
+ "cmdline arg");
+ pgd.addClass("storecomplex", StoreComplex.class, "A map/reduce program that " +
+ "reads from the \"complex\" table and stores as-is into the " +
+ "\"complex_empty_initially\" table.");
+ pgd.addClass("storedemo", StoreDemo.class, "demo prog.");
+ pgd.driver(argv);
+
+ // Success
+ exitCode = 0;
+ }
+ catch(Throwable e){
+ e.printStackTrace();
+ }
+
+ System.exit(exitCode);
+ }
+}
+
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheck.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheck.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheck.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheck.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.util.Utils;
+
+/**
+ * This UDF can be used to check that a tuple presented by HCatLoader has the
+ * right types for the fields
+ *
+ * Usage is :
+ *
+ * register testudf.jar;
+ * a = load 'numbers' using HCatLoader(...);
+ * b = foreach a generate HCatTypeCheck('intnum1000:int,id:int,intnum5:int,intnum100:int,intnum:int,longnum:long,floatnum:float,doublenum:double', *);
+ * store b into 'output';
+ *
+ * The schema string (the first argument to the UDF) is of the form one would provide in a
+ * pig load statement.
+ *
+ * The output should only contain the value '1' in all rows. (This UDF returns
+ * the integer value 1 if all fields have the right type, else throws IOException)
+ *
+ */
+public class HCatTypeCheck extends EvalFunc<Integer> {
+
+ static HashMap<Byte, Class<?>> typeMap = new HashMap<Byte, Class<?>>();
+
+ @Override
+ public Integer exec(Tuple input) throws IOException {
+ String schemaStr = (String) input.get(0);
+ Schema s = null;
+ try {
+ s = getSchemaFromString(schemaStr);
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ for(int i = 0; i < s.size(); i++) {
+ check(s.getField(i).type, input.get(i+1)); // input.get(i+1) since input.get(0) is the schema;
+ }
+ return 1;
+ }
+
+ static {
+ typeMap.put(DataType.INTEGER, Integer.class);
+ typeMap.put(DataType.LONG, Long.class);
+ typeMap.put(DataType.FLOAT, Float.class);
+ typeMap.put(DataType.DOUBLE, Double.class);
+ typeMap.put(DataType.CHARARRAY, String.class);
+ typeMap.put(DataType.TUPLE, Tuple.class);
+ typeMap.put(DataType.MAP, Map.class);
+ typeMap.put(DataType.BAG, DataBag.class);
+ }
+
+
+
+ private void die(String expectedType, Object o) throws IOException {
+ throw new IOException("Expected " + expectedType + ", got " +
+ o.getClass().getName());
+ }
+
+
+ private String check(Byte type, Object o) throws IOException {
+ if(o == null) {
+ return "";
+ }
+ if(check(typeMap.get(type), o)) {
+ if(type.equals(DataType.MAP)) {
+ Map<String, String> m = (Map<String, String>) o;
+ check(m);
+ } else if(type.equals(DataType.BAG)) {
+ DataBag bg = (DataBag) o;
+ for (Tuple tuple : bg) {
+ Map<String, String> m = (Map<String, String>) tuple.get(0);
+ check(m);
+ }
+ } else if(type.equals(DataType.TUPLE)) {
+ Tuple t = (Tuple) o;
+ if(!check(Integer.class, t.get(0)) ||
+ !check(String.class, t.get(1)) ||
+ !check(Double.class, t.get(2))) {
+ die("t:tuple(num:int,str:string,dbl:double)", t);
+ }
+ }
+ } else {
+ die(typeMap.get(type).getName(), o);
+ }
+ return o.toString();
+ }
+
+ /**
+ * @param m
+ * @throws IOException
+ */
+ private void check(Map<String, String> m) throws IOException {
+ for(Entry<String, String> e: m.entrySet()) {
+ // just access key and value to ensure they are correct
+ if(!check(String.class, e.getKey())) {
+ die("String", e.getKey());
+ }
+ if(!check(String.class, e.getValue())) {
+ die("String", e.getValue());
+ }
+ }
+
+ }
+
+ private boolean check(Class<?> expected, Object actual) {
+ if(actual == null) {
+ return true;
+ }
+ return expected.isAssignableFrom(actual.getClass());
+ }
+
+ Schema getSchemaFromString(String schemaString) throws Exception {
+ /** ByteArrayInputStream stream = new ByteArrayInputStream(schemaString.getBytes()) ;
+ QueryParser queryParser = new QueryParser(stream) ;
+ Schema schema = queryParser.TupleSchema() ;
+ Schema.setSchemaDefaultType(schema, org.apache.pig.data.DataType.BYTEARRAY);
+ return schema;
+ */
+ return Utils.getSchemaFromString(schemaString);
+ }
+
+}
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheckHive.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheckHive.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheckHive.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheckHive.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+
+/**
+ * A hive udf to check types of the fields read from hcat. A sample hive query which can use this is:
+ *
+ * create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
+ * select typecheck('map<string,string>+struct<num:int,str:string,dbl:double>+array<map<string,string>>+int',
+ * mymap, mytuple, bagofmap, rownum) from complex;
+ *
+ *
+ * The first argument to the UDF is a string representing the schema of the columns in the table.
+ * The columns in the tables are the remaining args to it.
+ * The schema specification consists of the types as given by "describe <table>"
+ * with each column's type separated from the next column's type by a '+'
+ *
+ * The UDF will throw an exception (and cause the query to fail) if it does not
+ * encounter the correct types.
+ *
+ * The output is a string representation of the data , type and hive category.
+ * It is not advisable to use this against large dataset since the output would also
+ * be large.
+ *
+ */
+public final class HCatTypeCheckHive extends GenericUDF {
+
+ObjectInspector[] argOIs;
+@Override
+public Object evaluate(DeferredObject[] args) throws HiveException {
+ List<Object> row = new ArrayList<Object>();
+ String typesStr = (String) getJavaObject(args[0].get(), argOIs[0], new ArrayList<Category>());
+ String[] types = typesStr.split("\\+");
+ for(int i = 0; i < types.length; i++) {
+ types[i] = types[i].toLowerCase();
+ }
+ for(int i = 1; i < args.length; i++) {
+ ObjectInspector oi = argOIs[i];
+ List<ObjectInspector.Category> categories = new ArrayList<ObjectInspector.Category>();
+ Object o = getJavaObject(args[i].get(),oi, categories);
+ try {
+ if(o != null) {
+ Util.check(types[i-1], o);
+ }
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+ row.add(o == null ? "null" : o);
+ row.add(":" + (o == null ? "null" : o.getClass()) + ":" + categories);
+ }
+ return row.toString();
+}
+
+private Object getJavaObject(Object o, ObjectInspector oi, List<Category> categories) {
+ if(categories != null) {
+ categories.add(oi.getCategory());
+ }
+ if(oi.getCategory() == ObjectInspector.Category.LIST) {
+ List<?> l = ((ListObjectInspector)oi).getList(o);
+ List<Object> result = new ArrayList<Object>();
+ ObjectInspector elemOI = ((ListObjectInspector)oi).getListElementObjectInspector();
+ for(Object lo : l) {
+ result.add(getJavaObject(lo, elemOI, categories));
+ }
+ return result;
+ } else if (oi.getCategory() == ObjectInspector.Category.MAP) {
+ Map<?,?> m = ((MapObjectInspector)oi).getMap(o);
+ Map<String, String> result = new HashMap<String, String>();
+ ObjectInspector koi = ((MapObjectInspector)oi).getMapKeyObjectInspector();
+ ObjectInspector voi = ((MapObjectInspector)oi).getMapValueObjectInspector();
+ for(Entry<?,?> e: m.entrySet()) {
+ result.put((String)getJavaObject(e.getKey(), koi, null),
+ (String)getJavaObject(e.getValue(), voi, null));
+ }
+ return result;
+
+ } else if (oi.getCategory() == ObjectInspector.Category.STRUCT) {
+ List<Object> s = ((StructObjectInspector)oi).getStructFieldsDataAsList(o);
+ List<? extends StructField> sf = ((StructObjectInspector)oi).getAllStructFieldRefs();
+ List<Object> result = new ArrayList<Object>();
+ for(int i = 0; i < s.size(); i++) {
+ result.add(getJavaObject(s.get(i), sf.get(i).getFieldObjectInspector(), categories));
+ }
+ return result;
+ } else if(oi.getCategory() == ObjectInspector.Category.PRIMITIVE) {
+ return ((PrimitiveObjectInspector)oi).getPrimitiveJavaObject(o);
+ }
+ throw new RuntimeException("Unexpected error!");
+}
+
+@Override
+public String getDisplayString(String[] arg0) {
+ return null;
+}
+
+@Override
+public ObjectInspector initialize(ObjectInspector[] argOIs)
+ throws UDFArgumentException {
+ this.argOIs = argOIs;
+ return ObjectInspectorFactory.getReflectionObjectInspector(String.class,
+ ObjectInspectorOptions.JAVA);
+}
+
+}
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/PartitionStorageDriverAnnotator.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hcatalog.rcfile.RCFileInputDriver;
+import org.apache.hcatalog.rcfile.RCFileOutputDriver;
+import org.apache.thrift.TException;
+
+/**
+ * A utility program to annotate partitions of a pre-created table
+ * with input storage driver and output storage driver information
+ */
+public class PartitionStorageDriverAnnotator {
+
+ /**
+ * @param args
+ * @throws MetaException
+ * @throws TException
+ * @throws NoSuchObjectException
+ * @throws InvalidOperationException
+ */
+ public static void main(String[] args) throws MetaException, NoSuchObjectException,
+ TException, InvalidOperationException {
+ String thrifturi = null;
+ String database = "default";
+ String table = null;
+ String isd = null;
+ String osd = null;
+ Map<String, String> m = new HashMap<String, String>();
+ for(int i = 0; i < args.length; i++) {
+ if(args[i].equals("-u")) {
+ thrifturi = args[i+1];
+ } else if(args[i].equals("-t")) {
+ table = args[i+1];
+ } else if (args[i].equals("-i")) {
+ isd = args[i+1];
+ } else if (args[i].equals("-o")) {
+ osd = args[i+1];
+ } else if (args[i].equals("-p")) {
+ String[] kvps = args[i+1].split(";");
+ for(String kvp: kvps) {
+ String[] kv = kvp.split("=");
+ if(kv.length != 2) {
+ System.err.println("ERROR: key value property pairs must be specified as key1=val1;key2=val2;..;keyn=valn");
+ System.exit(1);
+ }
+ m.put(kv[0], kv[1]);
+ }
+ } else if(args[i].equals("-d")) {
+ database = args[i+1];
+ } else {
+ System.err.println("ERROR: Unknown option: " + args[i]);
+ usage();
+ }
+ i++; // to skip the value for an option
+ }
+ if(table == null || thrifturi == null) {
+ System.err.println("ERROR: thrift uri and table name are mandatory");
+ usage();
+ }
+ HiveConf hiveConf = new HiveConf(PartitionStorageDriverAnnotator.class);
+ hiveConf.set("hive.metastore.local", "false");
+ hiveConf.set("hive.metastore.uris", thrifturi);
+
+ HiveMetaStoreClient hmsc = new HiveMetaStoreClient(hiveConf,null);
+ List<Partition> parts = hmsc.listPartitions(database, table, Short.MAX_VALUE);
+
+ m.put("hcat.isd", isd != null ? isd : RCFileInputDriver.class.getName());
+ m.put("hcat.osd", osd != null ? osd : RCFileOutputDriver.class.getName());
+
+ for(Partition p: parts) {
+ p.setParameters(m);
+ hmsc.alter_partition(database, table, p);
+ }
+ }
+
+ /**
+ *
+ */
+ private static void usage() {
+ System.err.println("Usage: java -cp testudf.jar:<hcatjar> org.apache.hcat.utils.PartitionStorageDriverAnnotator -u <thrift uri> -t <partitioned tablename>" +
+ " [-i input driver classname (Default rcfiledriver)] [-o output driver classname (default rcfiledriver)] " +
+ " [-p key1=val1;key2=val2;..;keyn=valn (list of key=value property pairs to associate with each partition)]" +
+ " [-d database (if this not supplied the default database is used)]");
+ System.exit(1);
+ }
+
+}
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreComplex.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreComplex.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreComplex.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreComplex.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.HCatOutputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.hcatalog.mapreduce.OutputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "complex"
+ * table and writes to "complex_nopart_empty_initially" table. It reads data from complex which
+ * is an unpartitioned table and stores the data as-is into complex_empty_initially table
+ * (which is also unpartitioned)
+ *
+ * Usage: hadoop jar testudf.jar storecomplex <serveruri> <-libjars hive-hcat jar>
+ The hcat jar location should be specified as file://<full path to jar>
+ */
+public class StoreComplex {
+
+ private static final String COMPLEX_TABLE_NAME = "complex";
+ private static final String COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME = "complex_nopart_empty_initially";
+
+
+ public static class ComplexMapper
+ extends Mapper<WritableComparable, HCatRecord, WritableComparable, HCatRecord>{
+
+ @Override
+ protected void map(WritableComparable key, HCatRecord value,
+ org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+ WritableComparable,HCatRecord>.Context context)
+ throws IOException ,InterruptedException {
+ // just write out the value as-is
+ context.write(new IntWritable(0), value);
+
+ }
+ }
+
+
+ public static void main(String[] args) throws Exception {
+ Configuration conf = new Configuration();
+ args = new GenericOptionsParser(conf, args).getRemainingArgs();
+ String[] otherArgs = new String[1];
+ int j = 0;
+ for(int i = 0; i < args.length; i++) {
+ if(args[i].equals("-libjars")) {
+ // generic options parser doesn't seem to work!
+ conf.set("tmpjars", args[i+1]);
+ i = i+1; // skip it , the for loop will skip its value
+ } else {
+ otherArgs[j++] = args[i];
+ }
+ }
+ if (otherArgs.length != 1) {
+ usage();
+ }
+ String serverUri = otherArgs[0];
+ String tableName = COMPLEX_TABLE_NAME;
+ String dbName = "default";
+ Map<String, String> outputPartitionKvps = new HashMap<String, String>();
+ String outputTableName = null;
+ outputTableName = COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME;
+ // test with null or empty randomly
+ if(new Random().nextInt(2) == 0) {
+ System.err.println("INFO: output partition keys set to null for writing");
+ outputPartitionKvps = null;
+ }
+ String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+ if(principalID != null)
+ conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+ Job job = new Job(conf, "storecomplex");
+ // initialize HCatInputFormat
+
+ HCatInputFormat.setInput(job, InputJobInfo.create(
+ dbName, tableName, null, serverUri, principalID));
+ // initialize HCatOutputFormat
+ HCatOutputFormat.setOutput(job, OutputJobInfo.create(
+ dbName, outputTableName, outputPartitionKvps, serverUri, principalID));
+
+
+ HCatSchema s = HCatInputFormat.getTableSchema(job);
+ HCatOutputFormat.setSchema(job, s);
+ job.setInputFormatClass(HCatInputFormat.class);
+ job.setOutputFormatClass(HCatOutputFormat.class);
+ job.setJarByClass(StoreComplex.class);
+ job.setMapperClass(ComplexMapper.class);
+ job.setOutputKeyClass(IntWritable.class);
+ job.setOutputValueClass(DefaultHCatRecord.class);
+ System.exit(job.waitForCompletion(true) ? 0 : 1);
+ }
+
+
+ /**
+ *
+ */
+ private static void usage() {
+ System.err.println("Usage: hadoop jar testudf.jar storecomplex <serveruri> <-libjars hive-hcat jar>\n" +
+ "The hcat jar location should be specified as file://<full path to jar>\n");
+ System.exit(2);
+
+ }
+
+
+}
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreDemo.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreDemo.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreDemo.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreDemo.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.HCatOutputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.hcatalog.mapreduce.OutputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table and writes data to another table. It reads data from numbers which
+ * is an unpartitioned table and adds 10 to each field. It stores the result into
+ * the datestamp='20100101' partition of the numbers_part_empty_initially table if the second
+ * command line arg is "part". If the second cmdline arg is "nopart" then the
+ * result is stored into the 'numbers_nopart_empty_initially' (unpartitioned) table.
+ * If the second cmdline arg is "nopart_pig", then the result is stored into the
+ * 'numbers_nopart_pig_empty_initially' (unpartitioned) table with the tinyint
+ * and smallint columns in "numbers" being stored as "int" (since pig cannot handle
+ * tinyint and smallint)
+ *
+ * Usage: hadoop jar storenumbers <serveruri> <part|nopart|nopart_pig> <-libjars hive-hcat jar>
+ If the second argument is "part" data is written to datestamp = '2010101' partition of the numbers_part_empty_initially table.
+ If the second argument is "nopart", data is written to the unpartitioned numbers_nopart_empty_initially table.
+ If the second argument is "nopart_pig", data is written to the unpartitioned numbers_nopart_pig_empty_initially table.
+ The hcat jar location should be specified as file://<full path to jar>
+ */
+public class StoreDemo {
+
+ private static final String NUMBERS_PARTITIONED_TABLE_NAME = "demo_partitioned";
+ private static final String NUMBERS_TABLE_NAME = "demo";
+
+ public static class SumMapper
+ extends Mapper<WritableComparable, HCatRecord, WritableComparable, HCatRecord>{
+
+
+ Integer intnum;
+
+ Double doublenum;
+ @Override
+ protected void map(WritableComparable key, HCatRecord value,
+ org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+ WritableComparable,HCatRecord>.Context context)
+ throws IOException ,InterruptedException {
+ intnum = ((Integer)value.get(0));
+ value.set(0, intnum + 20);
+ doublenum = ((Double) value.get(1));
+ value.set(1, (Double) (doublenum + 20));
+ context.write(new IntWritable(0), value);
+
+ }
+ }
+
+
+ public static void main(String[] args) throws Exception {
+ Configuration conf = new Configuration();
+ args = new GenericOptionsParser(conf, args).getRemainingArgs();
+ String[] otherArgs = new String[1];
+ int j = 0;
+ for(int i = 0; i < args.length; i++) {
+ if(args[i].equals("-libjars")) {
+ // generic options parser doesn't seem to work!
+ conf.set("tmpjars", args[i+1]);
+ i = i+1; // skip it , the for loop will skip its value
+ } else {
+ otherArgs[j++] = args[i];
+ }
+ }
+ if (otherArgs.length != 1) {
+ usage();
+ }
+ String serverUri = otherArgs[0];
+
+ String tableName = NUMBERS_TABLE_NAME;
+ String dbName = "default";
+ Map<String, String> outputPartitionKvps = new HashMap<String, String>();
+ String outputTableName = NUMBERS_PARTITIONED_TABLE_NAME;
+ outputPartitionKvps.put("datestamp", "20100102");
+
+ String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+ if(principalID != null)
+ conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+ Job job = new Job(conf, "storedemo");
+ // initialize HCatInputFormat
+ HCatInputFormat.setInput(job, InputJobInfo.create(
+ dbName, tableName, null, serverUri, principalID));
+ // initialize HCatOutputFormat
+ HCatOutputFormat.setOutput(job, OutputJobInfo.create(
+ dbName, outputTableName, outputPartitionKvps, serverUri, principalID));
+ // test with and without specifying schema randomly
+ HCatSchema s = HCatInputFormat.getTableSchema(job);
+ System.err.println("INFO: output schema explicitly set for writing:" + s);
+ HCatOutputFormat.setSchema(job, s);
+
+ job.setInputFormatClass(HCatInputFormat.class);
+ job.setOutputFormatClass(HCatOutputFormat.class);
+ job.setJarByClass(StoreDemo.class);
+ job.setMapperClass(SumMapper.class);
+ job.setOutputKeyClass(IntWritable.class);
+ job.setNumReduceTasks(0);
+ job.setOutputValueClass(DefaultHCatRecord.class);
+ System.exit(job.waitForCompletion(true) ? 0 : 1);
+ }
+
+
+ /**
+ *
+ */
+ private static void usage() {
+ System.err.println("Usage: hadoop jar storenumbers <serveruri> <part|nopart|nopart_pig> <-libjars hive-hcat jar>\n" +
+ "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " +
+ "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " +
+ "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " +
+ "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" +
+ "The hcat jar location should be specified as file://<full path to jar>\n");
+ System.exit(2);
+
+ }
+
+
+}
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreNumbers.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreNumbers.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreNumbers.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreNumbers.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.HCatOutputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.hcatalog.mapreduce.OutputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table and writes data to another table. It reads data from numbers which
+ * is an unpartitioned table and adds 10 to each field. It stores the result into
+ * the datestamp='20100101' partition of the numbers_part_empty_initially table if the second
+ * command line arg is "part". If the second cmdline arg is "nopart" then the
+ * result is stored into the 'numbers_nopart_empty_initially' (unpartitioned) table.
+ * If the second cmdline arg is "nopart_pig", then the result is stored into the
+ * 'numbers_nopart_pig_empty_initially' (unpartitioned) table with the tinyint
+ * and smallint columns in "numbers" being stored as "int" (since pig cannot handle
+ * tinyint and smallint)
+ *
+ * Usage: hadoop jar storenumbers <serveruri> <part|nopart|nopart_pig> <-libjars hive-hcat jar>
+ If the second argument is "part" data is written to datestamp = '2010101' partition of the numbers_part_empty_initially table.
+ If the second argument is "nopart", data is written to the unpartitioned numbers_nopart_empty_initially table.
+ If the second argument is "nopart_pig", data is written to the unpartitioned numbers_nopart_pig_empty_initially table.
+ The hcat jar location should be specified as file://<full path to jar>
+ */
+public class StoreNumbers {
+
+ private static final String NUMBERS_PARTITIONED_TABLE_NAME = "numbers_part_empty_initially";
+ private static final String NUMBERS_TABLE_NAME = "numbers";
+ private static final String NUMBERS_NON_PARTITIONED_TABLE_NAME = "numbers_nopart_empty_initially";
+ private static final String NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME = "numbers_nopart_pig_empty_initially";
+ private static final String IS_PIG_NON_PART_TABLE = "is.pig.non.part.table";
+
+ public static class SumMapper
+ extends Mapper<WritableComparable, HCatRecord, WritableComparable, HCatRecord>{
+
+ Integer intnum1000;
+ // though id is given as a Short by hcat, the map will emit it as an
+ // IntWritable so we can just sum in the reduce
+ Short id;
+
+ // though intnum5 is handed as a Byte by hcat, the map() will emit it as
+ // an IntWritable so we can just sum in the reduce
+ Byte intnum5;
+ Integer intnum100;
+ Integer intnum;
+ Long longnum;
+ Float floatnum;
+ Double doublenum;
+ @Override
+ protected void map(WritableComparable key, HCatRecord value,
+ org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+ WritableComparable,HCatRecord>.Context context)
+ throws IOException ,InterruptedException {
+ boolean isnoPartPig = context.getConfiguration().getBoolean(IS_PIG_NON_PART_TABLE, false);
+ intnum1000 = ((Integer)value.get(0));
+ id = ((Short) value.get(1));
+ intnum5 = (((Byte)value.get(2)));
+ intnum100 = (((Integer) value.get(3)));
+ intnum = ((Integer) value.get(4));
+ longnum = ((Long) value.get(5));
+ floatnum = ((Float) value.get(6));
+ doublenum = ((Double) value.get(7));
+ HCatRecord output = new DefaultHCatRecord(8);
+ output.set(0, intnum1000 + 10);
+ if(isnoPartPig)
+ {
+ output.set(1, ((int)(id + 10)));
+ } else {
+ output.set(1, ((short)(id + 10)));
+ }
+ if(isnoPartPig) {
+ output.set(2, (int)(intnum5 + 10));
+ } else {
+ output.set(2, (byte) (intnum5 + 10));
+ }
+
+ output.set(3, intnum100 + 10);
+ output.set(4, intnum + 10);
+ output.set(5, (long) (longnum + 10));
+ output.set(6, (float) (floatnum + 10));
+ output.set(7, (double) (doublenum + 10));
+ for(int i = 0; i < 8; i++) {
+ System.err.println("XXX: class:" + output.get(i).getClass());
+ }
+ context.write(new IntWritable(0), output);
+
+ }
+ }
+
+
+ public static void main(String[] args) throws Exception {
+ Configuration conf = new Configuration();
+ args = new GenericOptionsParser(conf, args).getRemainingArgs();
+ String[] otherArgs = new String[2];
+ int j = 0;
+ for(int i = 0; i < args.length; i++) {
+ if(args[i].equals("-libjars")) {
+ // generic options parser doesn't seem to work!
+ conf.set("tmpjars", args[i+1]);
+ i = i+1; // skip it , the for loop will skip its value
+ } else {
+ otherArgs[j++] = args[i];
+ }
+ }
+ if (otherArgs.length != 2) {
+ usage();
+ }
+ String serverUri = otherArgs[0];
+ if(otherArgs[1] == null || (
+ !otherArgs[1].equalsIgnoreCase("part") && !otherArgs[1].equalsIgnoreCase("nopart"))
+ && !otherArgs[1].equalsIgnoreCase("nopart_pig")) {
+ usage();
+ }
+ boolean writeToPartitionedTable = (otherArgs[1].equalsIgnoreCase("part"));
+ boolean writeToNonPartPigTable = (otherArgs[1].equalsIgnoreCase("nopart_pig"));
+ String tableName = NUMBERS_TABLE_NAME;
+ String dbName = "default";
+ Map<String, String> outputPartitionKvps = new HashMap<String, String>();
+ String outputTableName = null;
+ conf.set(IS_PIG_NON_PART_TABLE, "false");
+ if(writeToPartitionedTable) {
+ outputTableName = NUMBERS_PARTITIONED_TABLE_NAME;
+ outputPartitionKvps.put("datestamp", "20100101");
+ } else {
+ if(writeToNonPartPigTable) {
+ conf.set(IS_PIG_NON_PART_TABLE, "true");
+ outputTableName = NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME;
+ } else {
+ outputTableName = NUMBERS_NON_PARTITIONED_TABLE_NAME;
+ }
+ // test with null or empty randomly
+ if(new Random().nextInt(2) == 0) {
+ outputPartitionKvps = null;
+ }
+ }
+
+ String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+ if(principalID != null)
+ conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+ Job job = new Job(conf, "storenumbers");
+
+ // initialize HCatInputFormat
+ HCatInputFormat.setInput(job, InputJobInfo.create(
+ dbName, tableName, null, serverUri, principalID));
+ // initialize HCatOutputFormat
+ HCatOutputFormat.setOutput(job, OutputJobInfo.create(
+ dbName, outputTableName, outputPartitionKvps, serverUri, principalID));
+ // test with and without specifying schema randomly
+ HCatSchema s = HCatInputFormat.getTableSchema(job);
+ if(writeToNonPartPigTable) {
+ List<HCatFieldSchema> newHfsList = new ArrayList<HCatFieldSchema>();
+ // change smallint and tinyint to int
+ for(HCatFieldSchema hfs: s.getFields()){
+ if(hfs.getTypeString().equals("smallint")) {
+ newHfsList.add(new HCatFieldSchema(hfs.getName(),
+ HCatFieldSchema.Type.INT, hfs.getComment()));
+ } else if(hfs.getTypeString().equals("tinyint")) {
+ newHfsList.add(new HCatFieldSchema(hfs.getName(),
+ HCatFieldSchema.Type.INT, hfs.getComment()));
+ } else {
+ newHfsList.add(hfs);
+ }
+ }
+ s = new HCatSchema(newHfsList);
+ }
+ HCatOutputFormat.setSchema(job, s);
+
+
+ job.setInputFormatClass(HCatInputFormat.class);
+ job.setOutputFormatClass(HCatOutputFormat.class);
+ job.setJarByClass(StoreNumbers.class);
+ job.setMapperClass(SumMapper.class);
+ job.setOutputKeyClass(IntWritable.class);
+ job.setNumReduceTasks(0);
+ job.setOutputValueClass(DefaultHCatRecord.class);
+ System.exit(job.waitForCompletion(true) ? 0 : 1);
+ }
+
+
+ /**
+ *
+ */
+ private static void usage() {
+ System.err.println("Usage: hadoop jar storenumbers <serveruri> <part|nopart|nopart_pig> <-libjars hive-hcat jar>\n" +
+ "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " +
+ "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " +
+ "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " +
+ "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" +
+ "The hcat jar location should be specified as file://<full path to jar>\n");
+ System.exit(2);
+
+ }
+
+
+}
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SumNumbers.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SumNumbers.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SumNumbers.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SumNumbers.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,257 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table. It performs a group by on the first column and a SUM operation on the
+ * other columns. This is to simulate a typical operation in a map reduce program
+ * to test that hcat hands the right data to the map reduce program
+ *
+ * Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat jar>
+ The <tab|ctrla> argument controls the output delimiter
+ The hcat jar location should be specified as file://<full path to jar>
+ */
+public class SumNumbers {
+
+ private static final String NUMBERS_TABLE_NAME = "numbers";
+ private static final String TAB = "\t";
+
+ public static class SumMapper
+ extends Mapper<WritableComparable, HCatRecord, IntWritable, SumNumbers.ArrayWritable>{
+
+ IntWritable intnum1000;
+ // though id is given as a Short by hcat, the map will emit it as an
+ // IntWritable so we can just sum in the reduce
+ IntWritable id;
+
+ // though intnum5 is handed as a Byte by hcat, the map() will emit it as
+ // an IntWritable so we can just sum in the reduce
+ IntWritable intnum5;
+ IntWritable intnum100;
+ IntWritable intnum;
+ LongWritable longnum;
+ FloatWritable floatnum;
+ DoubleWritable doublenum;
+ @Override
+ protected void map(WritableComparable key, HCatRecord value,
+ org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,
+ IntWritable,SumNumbers.ArrayWritable>.Context context)
+ throws IOException ,InterruptedException {
+ intnum1000 = new IntWritable((Integer)value.get(0));
+ id = new IntWritable((Short) value.get(1));
+ intnum5 = new IntWritable(((Byte)value.get(2)));
+ intnum100 = new IntWritable(((Integer) value.get(3)));
+ intnum = new IntWritable((Integer) value.get(4));
+ longnum = new LongWritable((Long) value.get(5));
+ floatnum = new FloatWritable((Float) value.get(6));
+ doublenum = new DoubleWritable((Double) value.get(7));
+ SumNumbers.ArrayWritable outputValue = new SumNumbers.ArrayWritable(id,
+ intnum5, intnum100, intnum, longnum, floatnum, doublenum);
+ context.write(intnum1000, outputValue);
+
+ }
+ }
+
+ public static class SumReducer extends Reducer<IntWritable, SumNumbers.ArrayWritable,
+ LongWritable, Text> {
+
+
+ LongWritable dummyLong = null;
+ @Override
+ protected void reduce(IntWritable key, java.lang.Iterable<ArrayWritable>
+ values, org.apache.hadoop.mapreduce.Reducer<IntWritable,ArrayWritable,LongWritable,Text>.Context context)
+ throws IOException ,InterruptedException {
+ String output = key.toString() + TAB;
+ Long sumid = 0l;
+ Long sumintnum5 = 0l;
+ Long sumintnum100 = 0l;
+ Long sumintnum = 0l;
+ Long sumlongnum = 0l;
+ Float sumfloatnum = 0.0f;
+ Double sumdoublenum = 0.0;
+ for (ArrayWritable value : values) {
+ sumid += value.id.get();
+ sumintnum5 += value.intnum5.get();
+ sumintnum100 += value.intnum100.get();
+ sumintnum += value.intnum.get();
+ sumlongnum += value.longnum.get();
+ sumfloatnum += value.floatnum.get();
+ sumdoublenum += value.doublenum.get();
+ }
+ output += sumid + TAB;
+ output += sumintnum5 + TAB;
+ output += sumintnum100 + TAB;
+ output += sumintnum + TAB;
+ output += sumlongnum + TAB;
+ output += sumfloatnum + TAB;
+ output += sumdoublenum + TAB;
+ context.write(dummyLong, new Text(output));
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ Configuration conf = new Configuration();
+ args = new GenericOptionsParser(conf, args).getRemainingArgs();
+ String[] otherArgs = new String[4];
+ int j = 0;
+ for(int i = 0; i < args.length; i++) {
+ if(args[i].equals("-libjars")) {
+ // generic options parser doesn't seem to work!
+ conf.set("tmpjars", args[i+1]);
+ i = i+1; // skip it , the for loop will skip its value
+ } else {
+ otherArgs[j++] = args[i];
+ }
+ }
+ if (otherArgs.length != 4) {
+ System.err.println("Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat jar>\n" +
+ "The <tab|ctrla> argument controls the output delimiter.\n" +
+ "The hcat jar location should be specified as file://<full path to jar>\n");
+ System.exit(2);
+ }
+ String serverUri = otherArgs[0];
+ String tableName = NUMBERS_TABLE_NAME;
+ String outputDir = otherArgs[1];
+ String dbName = "default";
+
+ String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+ if(principalID != null)
+ conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+ Job job = new Job(conf, "sumnumbers");
+ HCatInputFormat.setInput(job, InputJobInfo.create(
+ dbName, tableName, null, serverUri, principalID));
+ // initialize HCatOutputFormat
+
+ job.setInputFormatClass(HCatInputFormat.class);
+ job.setOutputFormatClass(TextOutputFormat.class);
+ job.setJarByClass(SumNumbers.class);
+ job.setMapperClass(SumMapper.class);
+ job.setReducerClass(SumReducer.class);
+ job.setMapOutputKeyClass(IntWritable.class);
+ job.setMapOutputValueClass(ArrayWritable.class);
+ job.setOutputKeyClass(LongWritable.class);
+ job.setOutputValueClass(Text.class);
+ FileOutputFormat.setOutputPath(job, new Path(outputDir));
+ System.exit(job.waitForCompletion(true) ? 0 : 1);
+ }
+
+ public static class ArrayWritable implements Writable {
+
+ // though id is given as a Short by hcat, the map will emit it as an
+ // IntWritable so we can just sum in the reduce
+ IntWritable id;
+
+ // though intnum5 is handed as a Byte by hcat, the map() will emit it as
+ // an IntWritable so we can just sum in the reduce
+ IntWritable intnum5;
+
+ IntWritable intnum100;
+ IntWritable intnum;
+ LongWritable longnum;
+ FloatWritable floatnum;
+ DoubleWritable doublenum;
+
+ /**
+ *
+ */
+ public ArrayWritable() {
+ id = new IntWritable();
+ intnum5 = new IntWritable();
+ intnum100 = new IntWritable();
+ intnum = new IntWritable();
+ longnum = new LongWritable();
+ floatnum = new FloatWritable();
+ doublenum = new DoubleWritable();
+ }
+
+
+
+ /**
+ * @param id
+ * @param intnum5
+ * @param intnum100
+ * @param intnum
+ * @param longnum
+ * @param floatnum
+ * @param doublenum
+ */
+ public ArrayWritable(IntWritable id, IntWritable intnum5,
+ IntWritable intnum100, IntWritable intnum, LongWritable longnum,
+ FloatWritable floatnum, DoubleWritable doublenum) {
+ this.id = id;
+ this.intnum5 = intnum5;
+ this.intnum100 = intnum100;
+ this.intnum = intnum;
+ this.longnum = longnum;
+ this.floatnum = floatnum;
+ this.doublenum = doublenum;
+ }
+
+
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ id.readFields(in);
+ intnum5.readFields(in);
+ intnum100.readFields(in);
+ intnum.readFields(in);
+ longnum.readFields(in);
+ floatnum.readFields(in);
+ doublenum.readFields(in);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ id.write(out);
+ intnum5.write(out);
+ intnum100.write(out);
+ intnum.write(out);
+ longnum.write(out);
+ floatnum.write(out);
+ doublenum.write(out);
+
+ }
+
+ }
+}
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/TypeDataCheck.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/TypeDataCheck.java?rev=1163097&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/TypeDataCheck.java (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/TypeDataCheck.java Tue Aug 30 06:18:16 2011
@@ -0,0 +1,182 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hcatalog.utils.Util;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat that checks that the columns
+ * handed by hcat have the right type and right values. It achieves the first
+ * objective by checking the type of the Objects representing the columns against
+ * the schema provided as a cmdline arg. It achieves the second objective by
+ * writing the data as Text to be compared against golden results.
+ *
+ * The schema specification consists of the types as given by "describe <table>"
+ * with each column's type separated from the next column's type by a '+'
+ *
+ * Can be used against "numbers" and "complex" tables.
+ *
+ * Usage: hadoop jar testudf.jar typedatacheck <serveruri> <tablename>
+ * <hive types of cols + delimited> <output dir> <tab|ctrla> <-libjars hive-hcat jar>
+ The <tab|ctrla> argument controls the output delimiter.
+ The hcat jar location should be specified as file://<full path to jar>
+ */
+public class TypeDataCheck implements Tool{
+
+ static String SCHEMA_KEY = "schema";
+ static String DELIM = "delim";
+ private static Configuration conf = new Configuration();
+
+ public static class TypeDataCheckMapper
+ extends Mapper<WritableComparable, HCatRecord, Long, Text>{
+
+ Long dummykey = null;
+ String[] types;
+ String delim = "\u0001";
+ @Override
+ protected void setup(org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,Long,Text>.Context context)
+ throws IOException ,InterruptedException {
+ String typesStr = context.getConfiguration().get(SCHEMA_KEY);
+ delim = context.getConfiguration().get(DELIM);
+ if(delim.equals("tab")) {
+ delim = "\t";
+ } else if (delim.equals("ctrla")) {
+ delim = "\u0001";
+ }
+ types = typesStr.split("\\+");
+ for(int i = 0; i < types.length; i++) {
+ types[i] = types[i].toLowerCase();
+ }
+
+
+ }
+
+ String check(HCatRecord r) throws IOException {
+ String s = "";
+ for(int i = 0; i < r.size(); i++) {
+ s += Util.check(types[i], r.get(i));
+ if(i != r.size() - 1) {
+ s += delim;
+ }
+ }
+ return s;
+ }
+
+ @Override
+ protected void map(WritableComparable key, HCatRecord value,
+ org.apache.hadoop.mapreduce.Mapper<WritableComparable,HCatRecord,Long,Text>.Context context)
+ throws IOException ,InterruptedException {
+ context.write(dummykey, new Text(check(value)));
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ TypeDataCheck self = new TypeDataCheck();
+ System.exit(ToolRunner.run(conf, self, args));
+ }
+
+ public int run(String[] args) {
+ try {
+ args = new GenericOptionsParser(conf, args).getRemainingArgs();
+ String[] otherArgs = new String[5];
+ int j = 0;
+ for(int i = 0; i < args.length; i++) {
+ if(args[i].equals("-libjars")) {
+ conf.set("tmpjars",args[i+1]);
+ i = i+1; // skip it , the for loop will skip its value
+ } else {
+ otherArgs[j++] = args[i];
+ }
+ }
+ if (otherArgs.length !=5 ) {
+ System.err.println("Other args:" + Arrays.asList(otherArgs));
+ System.err.println("Usage: hadoop jar testudf.jar typedatacheck " +
+ "<serveruri> <tablename> <hive types of cols + delimited> " +
+ "<output dir> <tab|ctrla> <-libjars hive-hcat jar>\n" +
+ "The <tab|ctrla> argument controls the output delimiter.\n" +
+ "The hcat jar location should be specified as file://<full path to jar>\n");
+ System.err.println(" The <tab|ctrla> argument controls the output delimiter.");
+ System.exit(2);
+ }
+ String serverUri = otherArgs[0];
+ String tableName = otherArgs[1];
+ String schemaStr = otherArgs[2];
+ String outputDir = otherArgs[3];
+ String outputdelim = otherArgs[4];
+ if(!outputdelim.equals("tab") && !outputdelim.equals("ctrla")) {
+ System.err.println("ERROR: Specify 'tab' or 'ctrla' for output delimiter");
+ }
+ String dbName = "default";
+
+ String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+ if(principalID != null){
+ conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+ }
+ Job job = new Job(conf, "typedatacheck");
+ // initialize HCatInputFormat
+ HCatInputFormat.setInput(job, InputJobInfo.create(
+ dbName, tableName, null, serverUri, principalID));
+ HCatSchema s = HCatInputFormat.getTableSchema(job);
+ job.getConfiguration().set(SCHEMA_KEY, schemaStr);
+ job.getConfiguration().set(DELIM, outputdelim);
+ job.setInputFormatClass(HCatInputFormat.class);
+ job.setOutputFormatClass(TextOutputFormat.class);
+ job.setJarByClass(TypeDataCheck.class);
+ job.setMapperClass(TypeDataCheckMapper.class);
+ job.setNumReduceTasks(0);
+ job.setOutputKeyClass(Long.class);
+ job.setOutputValueClass(Text.class);
+ FileOutputFormat.setOutputPath(job, new Path(outputDir));
+ System.exit(job.waitForCompletion(true) ? 0 : 1);
+ return 0;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ TypeDataCheck.conf = conf;
+ }
+
+}