You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ga...@apache.org on 2012/04/03 22:17:49 UTC

svn commit: r1309177 - in /incubator/hcatalog/trunk: CHANGES.txt src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm src/test/e2e/hcatalog/tests/pig.conf src/test/e2e/hcatalog/tools/generate/generate_data.pl

Author: gates
Date: Tue Apr  3 22:17:48 2012
New Revision: 1309177

URL: http://svn.apache.org/viewvc?rev=1309177&view=rev
Log:
HCATALOG-352 Need tests for complex types

Modified:
    incubator/hcatalog/trunk/CHANGES.txt
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl

Modified: incubator/hcatalog/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1309177&r1=1309176&r2=1309177&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Tue Apr  3 22:17:48 2012
@@ -77,6 +77,8 @@ Release 0.4.0 - Unreleased
   HCAT-2 Support nested schema conversion between Hive an Pig (julienledem via hashutosh)
 
   IMPROVEMENTS
+  HCAT-352 Need tests for complex types (gates)
+
   HCAT-268 Remove remnants of storage drivers. (rohini via gates)
 
   HCAT-306 Need more end-to-end tests (gates)

Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm?rev=1309177&r1=1309176&r2=1309177&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm Tue Apr  3 22:17:48 2012
@@ -162,6 +162,11 @@ sub generateData
             'rows' => 100000,
             'hdfs' => "all100krc",
             'format' => "rc",
+        }, {
+            'name' => "studentcomplextab10k",
+            'filetype' => "studentcomplextab",
+            'rows' => 10000,
+            'hdfs' => "studentcomplextab10k",
         }
     );
 

Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf?rev=1309177&r1=1309176&r2=1309177&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf Tue Apr  3 22:17:48 2012
@@ -419,6 +419,101 @@ store a into ':OUTPATH:';\,
                                 ,'delimiter' => '	'
                                 }
                         ],
-                }, # end g
+                },{
+                        'name' => 'Pig_Complex',
+                        'tests' => [
+                                {
+                                 # test reading tuples from the complex table
+                                 'num' => 1 
+                                ,'pig' => q\
+a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader();
+b = foreach a generate flatten(s); 
+store b into ':OUTPATH:';\,
+				                ,'sql'   => q\select IFNULL(name, ""), IFNULL(age, ""), IFNULL(gpa, "") from studentcomplextab10k;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },{
+                                 # test reading maps from the complex table
+                                 'num' => 2 
+                                ,'pig' => q\
+a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader();
+b = foreach a generate s.name as n1, m#'name' as n2;
+c = filter b by n1 != '' and n2 is not null;
+store c into ':OUTPATH:';\,
+				                ,'sql'   => q\select t.name, m.mvalue
+                                              from studentcomplextab10k t, studentcomplextab10k_map m
+                                              where t.id = m.tid and t.name is not null and m.mkey = 'name';\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },{
+                                 # test reading arrays from the complex table
+                                 'num' => 3 
+                                ,'pig' => q\
+a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader();
+b = foreach a generate s.name as n1, flatten(a);
+c = filter b by n1 != '' ;
+store c into ':OUTPATH:';\,
+				                ,'sql'   => q\select t.name, m.lvalue
+                                              from studentcomplextab10k t, studentcomplextab10k_list m
+                                              where t.id = m.tid and t.name is not null;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },{
+                                 # test writing tuples to a complex table.  This also tests reading with default separators.
+                                 'num' => 4 
+                                 ,'notmq' => 1
+                                ,'hcat_prep'=>q\drop table if exists pig_complex_4;
+create table pig_complex_4 (s struct<name: string, age: int, gpa: double>) STORED AS TEXTFILE;\
+                                ,'pig' => q\
+a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+b = foreach a generate TOTUPLE(name, age, gpa) as s;
+store b into 'pig_complex_4' using org.apache.hcatalog.pig.HCatStorer();
+exec;
+c = load 'pig_complex_4' using org.apache.hcatalog.pig.HCatLoader();
+d = foreach c generate flatten(s);
+store d into ':OUTPATH:';\
+				                ,'sql'   => q\select name, age, gpa from studenttab10k;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },{
+                                 # test writing maps to a complex table.  This also tests reading with default separators.
+                                 'num' => 5 
+                                 ,'notmq' => 1
+                                ,'hcat_prep'=>q\drop table if exists pig_complex_5;
+create table pig_complex_5 (m map<string, string>) STORED AS TEXTFILE;\
+                                ,'pig' => q\
+a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+b = foreach a generate TOMAP('name', name, 'age', (chararray)age, 'gpa', (chararray)gpa) as m;
+store b into 'pig_complex_5' using org.apache.hcatalog.pig.HCatStorer();
+exec;
+c = load 'pig_complex_5' using org.apache.hcatalog.pig.HCatLoader();
+d = foreach c generate m#'name', m#'age', m#'gpa';
+store d into ':OUTPATH:';\
+				                ,'sql'   => q\select name, age, gpa from studenttab10k;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                }, {
+                                 # test writing bags to a complex table.  This also tests reading with default separators.
+                                 'num' => 6 
+                                 ,'notmq' => 1
+                                ,'hcat_prep'=>q\drop table if exists pig_complex_6;
+create table pig_complex_6 (a array<string>) STORED AS TEXTFILE;\
+                                ,'pig' => q\
+a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader();
+b = foreach a generate name;
+c = distinct b;
+d = group c all;
+e = foreach d generate $1 as a;
+store e into 'pig_complex_6' using org.apache.hcatalog.pig.HCatStorer();
+exec;
+f = load 'pig_complex_6' using org.apache.hcatalog.pig.HCatLoader();
+g = foreach f generate flatten(a);
+store g into ':OUTPATH:';\
+				                ,'sql'   => q\select distinct name from studenttab10k;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                }
+                        ],
+                } # end g
          ]
 }

Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl?rev=1309177&r1=1309176&r2=1309177&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl Tue Apr  3 22:17:48 2012
@@ -535,13 +535,20 @@ sub findHiveJars()
         }
     } elsif ($filetype eq "studentcomplextab") {
         srand(3.14159 + $numRows);
+        my $mapTable = $tableName . "_map";
+        my $listTable = $tableName . "_list";
         print MYSQL "drop table if exists $tableName;\n";
-        print MYSQL "create table $tableName (nameagegpamap varchar(500), nameagegpatuple varchar(500), nameagegpabag varchar(500), nameagegpamap_name varchar(500), nameagegpamap_age integer, nameagegpamap_gpa float(3));\n";
-        print MYSQL "begin transaction;\n";
-        print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
-            nameagegpamap map<string, string>,
-            struct <name: string, age: int, gpa: float>,
-            array <int>)
+        print MYSQL "drop table if exists $mapTable;\n";
+        print MYSQL "drop table if exists $listTable;\n";
+        print MYSQL "create table $tableName (id integer, name varchar(100), age integer, gpa float(3));\n";
+        print MYSQL "create table $mapTable (tid integer, mkey varchar(100), mvalue varchar(100));\n";
+        print MYSQL "create table $listTable (tid integer, lvalue integer);\n";
+        print MYSQL "begin;\n";
+        print $hivefp "drop table if exists $tableName;
+        create external table $tableName(
+            m map<string, string>,
+            s struct <name: string, age: int, gpa: float>,
+            a array <int>)
         row format delimited
         fields terminated by '\\t'
         collection items terminated by ','
@@ -552,20 +559,30 @@ sub findHiveJars()
             # generate nulls in a random fashion
             my $map = rand(1) < 0.05 ? '' : randomNameAgeGpaMap();
             my $tuple = rand(1) < 0.05 ? '' : randomNameAgeGpaTuple();
-            my $bag = rand(1) < 0.05 ? '' : randomList();
-            printf MYSQL "insert into $tableName (nameagegpamap, nameagegpatuple, nameagegpabag, nameagegpamap_name, nameagegpamap_age, nameagegpamap_gpa) values(";
-            my $mapHash;
-            if($map ne '') {
-                $mapHash = getMapFields($map);
+            my $list = rand(1) < 0.05 ? '' : randomList();
+            print MYSQL "insert into $tableName (id, name, age, gpa) values(";
+            print MYSQL "$i, ";
+            if ($tuple eq '') {
+                print MYSQL "null, null, null";
+            } else {
+                my @t = split(',', $tuple);
+                print MYSQL "'$t[0]', $t[1], $t[2]";
+            }
+            print MYSQL ");\n";
+            if ($map ne '') {
+                my $mapHash = getMapFields($map);
+                foreach my $k (keys(%$mapHash)) {
+                    print MYSQL "insert into $mapTable (tid, mkey, mvalue) values($i, '$k', '$mapHash->{$k}');\n";
+                }
             }
 
-            print MYSQL ($map eq ''? "null, " : "'$map', "), 
-                        ($tuple eq ''? "null, " : "'$tuple', "),
-                        ($bag eq '' ? "null, " : "'$bag', "),
-                        ($map eq '' ? "null, " : (exists($mapHash->{'name'}) ? "'".$mapHash->{'name'}."', " : "null, ")),
-                        ($map eq '' ? "null, " : (exists($mapHash->{'age'}) ? "'".$mapHash->{'age'}."', " : "null, ")),
-                        ($map eq '' ? "null);\n" : (exists($mapHash->{'gpa'}) ? "'".$mapHash->{'gpa'}."');\n" : "null);\n"));
-            print HDFS "$map\t$tuple\t$bag\n";
+            if ($list ne '') {
+                my @ls = split(',', $list);
+                foreach my $e (@ls) {
+                    print MYSQL "insert into $listTable (tid, lvalue) values($i, $e);\n";
+                }
+            }
+            print HDFS "$map\t$tuple\t$list\n";
         }
         print MYSQL "commit;\n";
 
@@ -634,7 +651,7 @@ for (my $i = 0; $i < $numRows; $i++) {
         srand(1.41421 + $numRows);
         print MYSQL "drop table if exists $tableName;\n";
         print MYSQL "create table $tableName (name varchar(255));\n";
-        print MYSQL "begin transaction;\n";
+        print MYSQL "begin;\n";
         print $hivefp "drop table if exists $tableName;\ncreate external table $tableName(
             name string)
         row format delimited