You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ha...@apache.org on 2011/12/06 20:05:39 UTC
svn commit: r1211077 [5/7] - in /incubator/hcatalog/trunk: ./ conf/
src/test/e2e/hcatalog/ src/test/e2e/hcatalog/conf/
src/test/e2e/hcatalog/deployers/ src/test/e2e/hcatalog/drivers/
src/test/e2e/hcatalog/tests/ src/test/e2e/hcatalog/tools/generate/ sr...
Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hcat.conf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hcat.conf?rev=1211077&r1=1211076&r2=1211077&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hcat.conf (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hcat.conf Tue Dec 6 20:05:37 2011
@@ -13,2993 +13,126 @@
$cfg = {
'driver' => 'HCat',
-
-# 'run_as' => 'hadoopqa',
- 'use-pig.pl' => 1,
'groups' => [
# This first group should be moved to deployer ?
{
- 'name' => 'hcat_setup_Hive_createTable',
+ 'name' => 'HCat_CreateTable',
'tests' => [
{
'num' => 1
- ,'hcat_cmdline_args' => ['-g', 'users', '-p', 'rwxrwx---'],
- ,'hcat' => "
-CREATE external TABLE IF NOT EXISTS numbers (
-INTNUM1000 int,
-ID smallint,
-INTNUM5 tinyint,
-INTNUM100 int,
-INTNUM int,
-LONGNUM bigint,
-FLOATNUM float,
-DOUBLENUM double
-)
-stored as RCFile
-location ':INPATH:/numbers';
-"
+ ,'hcat' => q\
+drop table if exists hcat_createtable_1;
+create table hcat_createtable_1(name string,
+age int,
+gpa double)
+stored as textfile;
+describe hcat_createtable_1;\
,'rc' => 0
+ ,'expected_out_regex' => 'name string(\s)*age int(\s)*gpa double'
+ ,'expected_err_regex' => 'OK(.*)OK(.*)OK'
},
{
- 'num' => 6
- ,'hcat_cmdline_args' => ['-g', 'users', '-p', 'rwxrwx---'],
- ,'hcat' => "
-CREATE external TABLE IF NOT EXISTS complex (
- mymap map<string, string>,
- mytuple struct<num:int,str:string,dbl:double>,
- bagofmap array<map<string,string>>,
- rownum int
-)
-row format DELIMITED FIELDS TERMINATED BY '\001'
- COLLECTION ITEMS TERMINATED BY '\002'
- MAP KEYS TERMINATED BY '\003'
-stored as rcfile
-location ':INPATH:/complex';
-"
- ,'rc' => 0
- },
-
- {
- 'num' => 7
- ,'hcat_cmdline_args' => ['-g', 'users', '-p', 'rwxrwx---'],
- ,'hcat' => "
-CREATE external TABLE IF NOT EXISTS boolean_table (
-myflag boolean,
-rownum int
-)
-row format DELIMITED FIELDS TERMINATED BY '\001'
- COLLECTION ITEMS TERMINATED BY '\002'
- MAP KEYS TERMINATED BY '\003'
-stored as rcfile
-location ':INPATH:/boolean';
-"
+ 'num' => 2
+ ,'hcat' => q\
+drop table if exists hcat_createtable_2;
+create table hcat_createtable_2(name string,
+age int,
+gpa double) partitioned by (b string) stored as TEXTFILE;
+describe extended hcat_createtable_2;
+\,
,'rc' => 0
+ ,'expected_out_regex' => 'name string(\s)*age int(\s)*gpa double'
},
-
{
- 'num' => 8
- # same as 'numbers' from above, just with 'int' for small/tiny ints.
- ,'hcat_cmdline_args' => ['-g', 'users', '-p', 'rwxrwx---'],
- ,'hcat' => "
-CREATE external TABLE IF NOT EXISTS numbers_pig (
-INTNUM1000 int,
-ID int, -- smallint,
-INTNUM5 int, -- tinyint,
-INTNUM100 int,
-INTNUM int,
-LONGNUM bigint,
-FLOATNUM float,
-DOUBLENUM double
-)
-stored as RCFile
-location ':INPATH:/numbers_pig';
-"
+ 'num' => 3
+ ,'hcat' => q\
+drop table if exists hcat_create_table_3;
+create table if not exists hcat_createtable_3(name string, age int, gpa double) stored as textfile;
+create table if not exists hcat_createtable_3(name string, age int, gpa double) stored as textfile;
+describe hcat_createtable_3;
+\,
,'rc' => 0
+ ,'expected_out_regex' => 'name string(\s)*age int(\s)*gpa double'
+ ,'expected_err_regex' => 'OK(.*)OK(.*)OK(.*)OK'
},
],
}, # end g
-################################################################################
-# HIVE STORED DATA
-################################################################################
-
-#-------------------------------------------------------------------------------
-# Create partitioned test table using Hive
-#-------------------------------------------------------------------------------
- {
- 'name' => 'hcat_hive2hive_partitioned',
- 'tests' => [
-
- {
- 'num' => 1,
- 'hive' => "
-CREATE TABLE tmp_hive_partitioned_:RUNID: (
- id smallint,
- intnum int,
- floatnum float
-)
-partitioned by (
- idmod5 tinyint
-)
-stored as rcfile
-location ':TMP:/hcat_tmp_tables/tmp_hive_partitioned_:RUNID:'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0,
- },
-
- {
- 'num' => 2,
- 'depends_on' => 'hcat_hive2hive_partitioned_1',
- 'hadoop' => "fs -ls :TMP:/hcat_tmp_tables",
- 'rc' => 0,
- 'expected_out_regex' => ":TMP:/hcat_tmp_tables/tmp_hive_partitioned_:RUNID:",
- },
-
- {
- 'num' => 3,
- 'depends_on' => 'hcat_hive2hive_partitioned_1',
- 'hive' => "show tables;",
- 'rc' => 0,
- 'expected_out_regex' => "tmp_hive_partitioned_:RUNID:",
- },
-
- {
- 'num' => 4,
- # select from empty table
- 'depends_on' => 'hcat_hive2hive_partitioned_1',
- 'hive' => "select * from tmp_hive_partitioned_:RUNID:;",
- 'rc' => 0,
- 'expected_err_regex' => "OK",
- 'expected_out' => "",
- },
-
- {
- 'num' => 5,
- #
- 'depends_on' => 'hcat_hive2hive_partitioned_1',
- 'hive' => "
-set hive.exec.dynamic.partition.mode=nonstrict;
-set hive.exec.dynamic.partition=true;
-
-insert overwrite table tmp_hive_partitioned_:RUNID:
-partition (idmod5=1)
-select id, intnum, floatnum
-from numbers
-where id % 5 = 1;
-
-insert overwrite table tmp_hive_partitioned_:RUNID:
-partition (idmod5=2)
-select id, intnum, floatnum
-from numbers
-where id % 5 = 2;
-",
- 'rc' => 0,
- 'expected_err_regex' => "OK",
- 'expected_out' => "",
- },
-
- {
- 'num' => 6,
- #
- 'depends_on' => 'hcat_hive2hive_partitioned_5',
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- 'hive' => "
-select idmod5, id, intnum, floatnum
-from tmp_hive_partitioned_:RUNID:
-;
-",
- 'sql' => "
-select id % 5, id, intnum, floatnum
-from numbers
-where (id % 5 = 1)
- or (id % 5 = 2)
-;
-",
- },
-
- ] # end tests
- }, # end group
-
-#-------------------------------------------------------------------------------
-# HIVE STORED -> HIVE SECTION
-#-------------------------------------------------------------------------------
-
- {
- 'name' => 'hcat_hive2hive_type_check',
- 'tests' => [
- # for how the 'typecheck' udf works.
- # If the the typecheck does not pass, the udf will abort.
- # Look at the jobtracker for info about the actual value,
- # e.g. something like: "Expected java.lang.String, got java.lang.Integer"
- # The verification:
- # 'expected_out_regex' => "class java",
- # is needed because otherwise the tests might "PASS" because the output was empty...
-
- {
- 'num' => 1,
- 'hive' => q\
-add jar :FUNCPATH:/testudf.jar;
-create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
-select typecheck('int+smallint+tinyint+int+int+bigint+float+double',
-intnum1000,id,intnum5,intnum100,intnum,longnum,floatnum,doublenum) from numbers;
-\,
- 'rc' => 0,
- 'expected_out_regex' => "class java",
- 'expected_err_regex' => "OK",
- 'not_expected_err_regex' => "FAILED",
- },
-
- {
- 'num' => 2,
- 'hive' => q\
-add jar :FUNCPATH:/testudf.jar;
-create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
-select typecheck('map<string,string>+struct<num:int,str:string,dbl:double>+array<map<string,string>>+int',
-mymap, mytuple, bagofmap, rownum) from complex;
-\,
- 'rc' => 0,
- 'expected_out_regex' => "class java",
- 'expected_err_regex' => "OK",
- 'not_expected_err_regex' => "FAILED",
- },
-
- {
- 'num' => 3,
- 'hive' => q\
-add jar :FUNCPATH:/testudf.jar;
-create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
-select typecheck('boolean+int',
-myflag,rownum) from boolean_table;
-\,
- 'rc' => 0,
- 'expected_out_regex' => "class java",
- 'expected_err_regex' => "OK",
- 'not_expected_err_regex' => "FAILED",
- },
-
- {
- 'num' => 5,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- 'hive' => q\
-select
- id, -- expect smallint
- intnum5 + id, -- expect smallint
- intnum5 + id + intnum, -- expect int
- intnum5 + id + intnum + longnum, -- expect bigint
- intnum5 * id, -- expect smallint
- intnum5 * id * intnum, -- expect int
- intnum5 * id * intnum * longnum, -- expect bigint
- intnum5 + 500, -- expect int
- intnum5 + 1.5, -- expect float
- cast(intnum5 + 1.5 as double), -- expect double
- cast(intnum5 + 1.0 as int), -- expect int
- floatnum + doublenum, -- expect double
- floatnum * doublenum -- expect double
-from numbers
-order by id
-limit 500
-;
-\,
- 'sql' => "
-select
- id,
- intnum5 + id,
- intnum5 + id + intnum,
- intnum5 + id + intnum + longnum,
- intnum5 * id,
- intnum5 * id * intnum,
- intnum5 * id * intnum * longnum,
- intnum5 + 500,
- intnum5 + 1.5,
- cast(intnum5 + 1.5 as double precision),
- cast(intnum5 + 1.0 as integer),
- floatnum + doublenum,
- floatnum * doublenum
-from numbers
-order by id
-limit 500
-;
-",
- },
-
- {
- 'num' => 6,
- 'ignore' => '!!! Hive truncates where Postgres rounds',
- 'hive' => q\
-select
- id,
- intnum5,
- cast(intnum5 + 1.9 as int)
-from numbers
-order by id
-limit 5
-;
-\,
- 'sql' => "
-select
- id,
- intnum5,
- cast(intnum5 + 1.9 as integer)
-from numbers
-order by id
-limit 5
-;
-",
- },
-
- ] # end tests
- }, # end group
-
- {
- 'name' => 'hcat_hive2hive_diagnostic_operators',
- 'tests' => [
-
- {
- 'num' => 1,
- 'hive' => "show tables;",
- 'expected_err_regex' => "OK",
- },
- {
- 'num' => 5,
- 'hive' => "describe complex;",
- 'sql' => "\\d", # dummy statement
- },
-
- {
- 'num' => 6,
- 'hive' => "describe complex.mytuple;",
- 'expected_out_regex' => 'num\s+int\s+from\s+deserializer\s*\nstr\s+string\s+from\s+deserializer\s*\ndbl\s+double\s+from\s+deserializer',
- },
-
- {
- 'num' => 7,
- 'hive' => "describe complex.bagofmap;",
- 'expected_out_regex' => 'bagofmap\s+array<map<string,string>>\s+from deserializer',
- },
-
- {
- 'num' => 8,
- 'hive' => "describe boolean_table;",
- 'sql' => "\\d", # dummy statement
- },
-
- {
- 'num' => 9,
- 'hive' => "describe boolean_table.myflag;",
- 'expected_out_regex' => 'myflag\s+boolean\s+from deserializer',
- },
-
- ] # end tests
- }, # end group
-
- {
- 'name' => 'hcat_hive2hive_select',
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'tests' => [
-
- {
- 'num' => 1,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- # actually no nulls in table, so had not needed the casting...,
- 'hive' => q\
-select
- if (intnum1000 IS NULL, "", cast(intnum1000 as STRING)),
- if (id IS NULL, "", cast(id as STRING)),
- if (intnum5 IS NULL, "", cast(intnum5 as STRING)),
- if (intnum100 IS NULL, "", cast(intnum100 as STRING)),
- if (intnum IS NULL, "", cast(intnum as STRING)),
- if (longnum IS NULL, "", cast(longnum as STRING)),
- if (floatnum IS NULL, "", cast(floatnum as STRING)),
- if (doublenum IS NULL, "", cast(doublenum as STRING))
-from numbers;
-\,
- 'sql' => q\
-select
- intnum1000,
- id,
- intnum5,
- intnum100,
- intnum,
- longnum,
- floatnum,
- doublenum
-from numbers;
-\,
- },
-
- {
- 'num' => 2,
- 'hive' => q\
-select
- mymap,
- mytuple,
- bagofmap,
- rownum
-from complex;
-\,
- 'sql' => "\\d", # dummy statement
- },
-
- {
- 'num' => 3,
- 'hive' => q\
-select
- myflag, rownum
-from boolean_table;
-\,
- 'sql' => "\\d", # dummy statement
- },
-
-
-
- {
- 'num' => 8,
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'hive' => q\
-select
- distinct
- intnum1000,
- intnum5,
- intnum100,
- intnum,
- longnum
-from numbers;
-\,
- 'sql' => q\
-select
- distinct
- intnum1000,
- intnum5,
- intnum100,
- intnum,
- longnum
-from numbers;
-\,
- },
-
- ] # end tests
- }, # end group
-
- {
- 'name' => 'hcat_hive2hive_lateral',
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- # Had work-around for Hive problem
- # set hive.optimize.cp=false;
- 'tests' => [
-
- # NOTE:
- # The queries below written w/o WHERE clauses until this jira is resolved:
- # https://issues.apache.org/jira/browse/HIVE-1056
- # http://wiki.apache.org/hadoop/Hive/LanguageManual/LateralView
-
- {
- 'num' => 1,
- 'hive' => q\
-select rownum, item
-from complex LATERAL VIEW explode(bagofmap) explodedTable AS item;
-\,
- 'sql' => "\\d", # dummy statement
- },
-
-
- {
- 'num' => 3,
- 'hive' => q\
-select
- rownum,
- item,
- "item['a1']:", if (item['a1'] IS NULL, "", cast(item['a1'] as STRING)),
- "item['a2']:", if (item['a2'] IS NULL, "", cast(item['a2'] as STRING)),
- "item['k1']:", if (item['k1'] IS NULL, "", cast(item['k1'] as STRING)),
- "item['k6']:", if (item['k6'] IS NULL, "", cast(item['k6'] as STRING))
-from complex LATERAL VIEW explode(bagofmap) explodedTable AS item
-;
-\,
- 'sql' => "\\d", # dummy statement
- },
-
-
-
- ] # end tests
- }, # end group
-
- {
- 'name' => 'hcat_hive2hive_join',
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'tests' => [
-
- {
- 'num' => 1,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- 'hive' => q\
-select
- a.intnum1000,
- b.intnum1000,
- a.id,
- b.id,
- a.intnum5,
- b.intnum5,
- a.intnum100,
- b.intnum100,
- a.intnum,
- b.intnum,
- a.longnum,
- b.longnum,
- a.floatnum,
- b.floatnum,
- a.doublenum,
- b.doublenum
-from
- numbers a
- join numbers b
- on (a.intnum5 = b.intnum5)
-where
- a.id < 30
- and b.id < 40
-order by a.intnum5
-;
-\,
- 'sql' => "
-select
- a.intnum1000,
- b.intnum1000,
- a.id,
- b.id,
- a.intnum5,
- b.intnum5,
- a.intnum100,
- b.intnum100,
- a.intnum,
- b.intnum,
- a.longnum,
- b.longnum,
- a.floatnum,
- b.floatnum,
- a.doublenum,
- b.doublenum
-from
- numbers as a
- join numbers as b
- on (a.intnum5 = b.intnum5)
-where
- a.id < 30
- and b.id < 40
-order by a.intnum5
-;
-",
- },
-
- {
- 'num' => 2,
- # join by map entry
- # full outer join
- 'hive' => q\
-select
- a.rownum,
- b.rownum,
- a.mymap['mymapk1'],
- b.mymap['mymapk1'],
- a.mymap['mymapk3'],
- b.mymap['mymapk3'],
- a.mymap,
- b.mymap
-from
- complex a
- full outer join complex b
- on (a.mymap['mymapk1']
- = b.mymap['mymapk3'])
-;
-\,
- 'sql' => "\\d", # dummy statement
- },
-
- {
- 'num' => 3,
- # join by tuple item
- # inner join
- 'hive' => q\
-select
- a.rownum,
- b.rownum,
- a.mytuple.dbl,
- b.mytuple.dbl,
- a.mytuple,
- b.mytuple
-from
- complex a
-join complex b
-on (a.mytuple.dbl = b.mytuple.dbl + 1)
-;
-\,
- 'sql' => "\\d", # dummy statement
- },
-
-
- ] # end tests
- }, # end group
-# end group
-#-------------------------------------------------------------------------------
-# HIVE STORED -> PIG SECTION
-#-------------------------------------------------------------------------------
-
- {
- 'name' => 'hcat_hive2pig_load_describe',
- #
-
- 'tests' => [
-
- {
- 'num' => 1,
- 'pig' => "
-a = load 'default.numbers_pig' using org.apache.hcatalog.pig.HCatLoader();
-describe a;
-",
- 'rc' => 0,
- 'expected_out' => 'a: {intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double}
-',
- 'not_expected_err_regex' => "ERROR",
- },
-
- {
- 'num' => 2,
- 'pig' => "
-a = load 'default.complex' using org.apache.hcatalog.pig.HCatLoader();
-describe a;
-",
- 'rc' => 0,
- 'expected_out' => 'a: {mymap: map[],mytuple: (num: int,str: chararray,dbl: double),bagofmap: {(innerfield: map[])},rownum: int}
-',
- 'not_expected_err_regex' => "ERROR",
- },
-
- {
- 'num' => 3,
- 'ignore' => 'Pig does not understand boolean. Made a negative test for the error message.',
- 'pig' => "
-a = load 'default.boolean_table' using org.apache.hcatalog.pig.HCatLoader();
-describe a;
-",
- 'rc' => 0,
- 'expected_out' => "",
- 'not_expected_err_regex' => "ERROR",
- },
-
- ] # end tests
- }, # end group
- {
- 'name' => 'hcat_hive2pig_type_check',
- #
- # * This UDF can be used to check that a tuple presented by org.apache.hcatalog.pig.HCatLoader has the
- # * right types for the fields
- # * (...)
- # * The output should only contain the value '1' in all rows. (This UDF returns
- # * the integer value 1 if all fields have the right type, else throws IOException)
-
- 'tests' => [
-
- {
- 'num' => 1,
- 'pig' => "
-register :FUNCPATH:/testudf.jar;
-a = load 'default.numbers_pig' using org.apache.hcatalog.pig.HCatLoader();
-b = foreach a generate org.apache.hcatalog.utils.HCatTypeCheck('intnum1000:int,id:int,intnum5:int,intnum100:int,intnum:int,longnum:long,floatnum:float,doublenum:double', *);
-store b into ':OUTPATH:';
-",
- 'rc' => 0,
- 'expected_err_regex' => "Success",
- # 'expected_out_regex' => "1",
- # 'not_expected_out_regex' => "[^1\\t]",
- },
-
- {
- 'num' => 2,
- 'pig' => "
-register :FUNCPATH:/testudf.jar;
-a = load 'default.complex' using org.apache.hcatalog.pig.HCatLoader();
-b = foreach a generate org.apache.hcatalog.utils.HCatTypeCheck('mymap: map[],mytuple: (num: int,str: chararray,dbl: double),bagofmap: {(innerfield: map[])},rownum: int', *);
-store b into ':OUTPATH:';
-",
- 'rc' => 0,
- 'expected_err_regex' => "Success",
- # 'expected_out_regex' => "1",
- # 'not_expected_out_regex' => "[^1\\t]",
- },
-
- {
- 'num' => 3,
- 'pig' => "
-register :FUNCPATH:/testudf.jar;
-a = load 'default.boolean_table' using org.apache.hcatalog.pig.HCatLoader();
-b = foreach a generate org.apache.hcatalog.utils.HCatTypeCheck('boolean+int', *);
-store b into ':OUTPATH:';
-",
- 'rc' => 6,
- 'expected_err_regex' => "ERROR",
- # 'expected_out_regex' => "1",
- 'not_expected_out_regex' => "Success",
- },
-
-
- ] # end tests
- }, # end group
- {
- 'name' => 'hcat_hive2pig_empty',
- 'tests' => [
-
- {
- 'num' => 1,
- 'hive' => "
-CREATE TABLE tmp_hive_empty_:RUNID: (
- INTNUM1000 int,
- ID int,
- INTNUM5 int,
- INTNUM100 int,
- INTNUM int,
- LONGNUM bigint,
- FLOATNUM float,
- DOUBLENUM double
-)
-stored as rcfile
-location ':TMP:/hcat_tmp_tables/tmp_hive_empty_:RUNID:'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0,
- },
-
- {
- 'num' => 2,
- 'depends_on' => 'hcat_hive2pig_empty_1',
- 'pig' => "
-a = load 'default.tmp_hive_empty_:RUNID:' using org.apache.hcatalog.pig.HCatLoader();
-store a into ':OUTPATH:';
-",
- 'rc' => 0,
- 'expected_err_regex' => "Success",
- 'not_expected_err_regex' => "ERROR",
- 'expected_out' => "",
- },
-
- {
- 'num' => 3,
- 'depends_on' => 'hcat_hive2pig_empty_1',
- 'pig' => "
-a = load 'default.tmp_hive_empty_:RUNID:' using org.apache.hcatalog.pig.HCatLoader();
-store a into 'default.tmp_hive_empty_:RUNID:'
- using org.apache.hcatalog.pig.HCatStorer
- ('',
- 'intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double');
-",
- 'rc' => 0,
- 'expected_err_regex' => "Success",
- 'expected_out' => "",
- },
-
- ] # end tests
- }, # end group
- {
-
- 'name' => 'hcat_hive2pig_arithmetic_operators',
- # covering any items from the "Arithmetic Operators and More" section not covered elswhere
- 'delimiter' => ' ',
- 'tests' => [
-
- {
- 'num' => 5,
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- # COUNT star
- 'pig' => q?
-a = load 'default.numbers_pig' USING org.apache.hcatalog.pig.HCatLoader();
-b = group a all;
-c = foreach b generate COUNT(a.$0);
-store c into ':OUTPATH:';
-?,
- 'rc' => 0,
- 'expected_err_regex' => "Success!",
- 'expected_stdout' => "5000",
- },
- {
- 'num' => 6,
- # Group
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- # 'a: {intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double}'
- 'pig' => q?
-a = load 'default.numbers_pig' USING org.apache.hcatalog.pig.HCatLoader();
-b = group a by intnum5;
-c = foreach b generate group as i5, COUNT(a) as count_rows, SUM(a.id) as sum_id;
-store c into ':OUTPATH:';
-?,
- 'sql' => '
-select intnum5, COUNT(id), SUM(id)
-from numbers
-group by intnum5
-;',
- },
- {
- 'num' => 7,
- # Order by
- # 'a: {intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double}'
- ##########################################
- #
- # a = load '/user/hadoopqa/pig/tests/data/txt/numbers.txt' using PigStorage(':')
- # as (intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double);
- # b = group a by intnum5;
- # c = foreach b generate group as i5, COUNT(a) as count_rows, SUM(a.id) as sum_id;
- # d = order c by i5;
- # dump d;
- #
- ##########################################
- #
- 'floatpostprocess' => 1,
- # WE SHOULD REALLY NOT BE SORTING HERE, BUT WE CAN'T GET STABLE SORT OUT
- # OF PIG AND POSTGRES IN THE SAME MANNER
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'pig' => q?
-a = load 'default.numbers_pig' USING org.apache.hcatalog.pig.HCatLoader();
-b = order a by intnum5;
-store b into ':OUTPATH:';
-?,
- # 'rc' => 0,
- 'sql' => 'select * from numbers order by intnum5 ;',
- },
-
-
- {
- 'num' => 10,
- # 9.3.5.1 FILTER such that an expression of the form: - Part I
- # FILTER alias by exp1
- # FILTER alias by exp2
- # FILTER alias by exp3
- # gives the same result as
- #
- # FILTER alias by exp1, exp2, expr3
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- 'pig' => q?
-a = load 'default.numbers_pig' USING org.apache.hcatalog.pig.HCatLoader();
-b = filter a by intnum1000 > 3000;
-c = filter b by id > 2500;
-d = filter c by intnum5 > 3;
-e = filter d by intnum > 5050;
-f = filter e by longnum > 5050;
-g = filter f by floatnum > 683;
-h = filter g by doublenum > 683;
-store h into ':OUTPATH:';
-?,
- 'sql' => "
-select * from numbers
-where
- intnum1000 > 3000
-and id > 2500
-and intnum5 > 3
-and intnum > 5050
-and longnum > 5050
-and floatnum > 683
-and doublenum > 683
-;
-",
- },
-
- {
- 'num' => 11,
- # 9.3.5.1 FILTER such that an expression of the form: - Part II
- # FILTER alias by exp1
- # FILTER alias by exp2
- # FILTER alias by exp3
- # gives the same result as
- #
- # FILTER alias by exp1, exp2, expr3
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- 'pig' => q?
-a = load 'default.numbers_pig' USING org.apache.hcatalog.pig.HCatLoader();
-b = filter a by
- intnum1000 > 3000
- and id > 2500
- and intnum5 > 3
- and intnum > 5050
- and longnum > 5050
- and floatnum > 683
- and doublenum > 683;
-;
-store b into ':OUTPATH:';
-?,
- 'sql' => "
-select * from numbers
-where
- intnum1000 > 3000
-and id > 2500
-and intnum5 > 3
-and intnum > 5050
-and longnum > 5050
-and floatnum > 683
-and doublenum > 683
-;
-",
- },
- ] # end tests
- }, # end group
-
-#-------------------------------------------------------------------------------
-# HIVE STORED -> HADOOP SECTION
-#-------------------------------------------------------------------------------
-
- {
- 'name' => 'hcat_hive2hadoop_read',
- # From:
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- 'hadoop_classpath' => ':HCAT_JAR:',
- 'tests' => [
-
- {
- 'num' => 1,
- # sum numbers
- # Using doctored benchmark file due to rounding error, e.g.:
- # actual line : 5000 4500500 2949 416084 5050809 5050809 1083307.100 1083308.561
- # expected line: 5000 4500500 2949 416084 5050809 5050809 1083310.000 1083308.561
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SumNumbers thrift://:THRIFTSERVER: :OUTPATH: -libjars file://:HCAT_JAR:
-\,
- 'sql' => "
-select
- intnum1000,
- sum (id),
- sum (intnum5),
- sum (intnum100),
- sum (intnum),
- sum (longnum),
- sum (floatnum),
- sum (doublenum)
-from numbers
-group by intnum1000
-order by intnum1000;
-",
- },
-
- ] # end tests
- }, # end group
-
- {
- 'name' => 'hcat_hive2hadoop_type_check',
- # From:
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- 'hadoop_classpath' => ':HCAT_JAR:',
- 'tests' => [
-
- {
- 'num' => 1,
- # typedatacheck complex
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.TypeDataCheck thrift://:THRIFTSERVER: complex 'map<string,string>+struct<num:int,str:string,dbl:double>+array<map<string,string>>+int' :OUTPATH: tab -libjars file://:HCAT_JAR:
-\,
- 'sql' => "\\d;", # dummy
- },
-
- {
- 'num' => 2,
- # typedatacheck numbers
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.TypeDataCheck thrift://:THRIFTSERVER: numbers int+smallint+tinyint+int+int+bigint+float+double :OUTPATH: tab -libjars file://:HCAT_JAR:
-\,
- 'sql' => "select * from numbers;",
- },
-
- {
- 'num' => 3,
- # typedatacheck boolean_table
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.TypeDataCheck thrift://:THRIFTSERVER: boolean_table boolean+int :OUTPATH: tab -libjars file://:HCAT_JAR:
-\,
- 'sql' => "select * from boolean_table;",
- },
-
- ] # end tests
- }, # end group
-
- {
- 'name' => 'hcat_hive2hadoop_write_numbers_nopart',
- 'hadoop_classpath' => ':HCAT_JAR:',
- 'tests' => [
-
- {
- 'num' => 1,
- 'hive' => "
-drop table numbers_nopart_empty_initially;
-",
- # does not give error even if the table does not exist
- 'expected_out' => "",
- },
-
- {
- 'num' => 2,
- 'depends_on' => 'hcat_hive2hadoop_write_numbers_nopart_1',
- 'hive' => "
-CREATE TABLE numbers_nopart_empty_initially (
- INTNUM1000 int,
- ID smallint,
- INTNUM5 tinyint,
- INTNUM100 int,
- INTNUM int,
- LONGNUM bigint,
- FLOATNUM float,
- DOUBLENUM double
-)
-stored as RCFile
-location ':TMP:/hcat_tmp_tables/numbers_nopart_empty_initially'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0
- },
-
- {
- 'num' => 3,
- # storenumbers
- # 'nopart' -> data is written to the numbers_nopart_empty_initially table.
- 'depends_on' => 'hcat_hive2hadoop_write_numbers_nopart_2',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.StoreNumbers thrift://:THRIFTSERVER: nopart -libjars file://:HCAT_JAR:
-\,
- 'rc' => 0,
- 'expected_out' => "",
- 'expected_err_regex' => "HDFS_BYTES_WRITTEN=\\d+",
- 'not_expected_err_regex' => "FAILED",
- },
-
- {
- 'num' => 4,
- 'depends_on' => 'hcat_hive2hadoop_write_numbers_nopart_3',
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'hive' => "
-select
- intnum1000,
- id,
- intnum5,
- intnum100,
- intnum,
- longnum,
- floatnum,
- doublenum
-from numbers_nopart_empty_initially;
-",
- 'sql' => "
-select
- 10 + intnum1000,
- 10 + id,
- 10 + intnum5,
- 10 + intnum100,
- 10 + intnum,
- 10 + longnum,
- 10 + floatnum,
- 10 + doublenum
-from numbers;
-",
- },
-
- # pig cannot handle tinyint and smallint, hence create a version of the table with 'int' instead
- {
- 'num' => 5,
- 'hive' => "
-drop table numbers_nopart_pig_empty_initially;
-",
- # does not give error even if the table does not exist
- 'expected_out' => "",
- },
-
- {
- 'num' => 6,
- 'depends_on' => 'hcat_hive2hadoop_write_numbers_nopart_5',
- 'hive' => "
-CREATE TABLE numbers_nopart_pig_empty_initially (
- INTNUM1000 int,
- ID int,
- INTNUM5 int,
- INTNUM100 int,
- INTNUM int,
- LONGNUM bigint,
- FLOATNUM float,
- DOUBLENUM double
-)
-stored as RCFile
-location ':TMP:/hcat_tmp_tables/numbers_nopart_pig_empty_initially'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0
- },
-
- {
- 'num' => 7,
- # storenumbers
- # 'nopart' -> data is written to the numbers_nopart_pig_empty_initially table.
- 'depends_on' => 'hcat_hive2hadoop_write_numbers_nopart_6',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.StoreNumbers thrift://:THRIFTSERVER: nopart_pig -libjars file://:HCAT_JAR:
-\,
- 'rc' => 0,
- 'expected_out' => "",
- 'expected_err_regex' => "HDFS_BYTES_WRITTEN=\\d+",
- 'not_expected_err_regex' => "FAILED",
- },
-
- {
- 'num' => 8,
- 'depends_on' => 'hcat_hive2hadoop_write_numbers_nopart_7',
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'hive' => "
-select
- intnum1000,
- id,
- intnum5,
- intnum100,
- intnum,
- longnum,
- floatnum,
- doublenum
-from numbers_nopart_pig_empty_initially;
-",
- 'sql' => "
-select
- 10 + intnum1000,
- 10 + id,
- 10 + intnum5,
- 10 + intnum100,
- 10 + intnum,
- 10 + longnum,
- 10 + floatnum,
- 10 + doublenum
-from numbers;
-",
- },
-
- ] # end tests
- }, # end group
-
- {
- 'name' => 'hcat_hive2hadoop_write_numbers_part',
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- 'hadoop_classpath' => ':HCAT_JAR:',
- 'tests' => [
-
- {
- 'num' => 1,
- 'hive' => "
-drop table numbers_part_empty_initially;
-",
- # does not give error even if the table does not exist
- 'expected_out' => "",
- },
-
- {
- 'num' => 2,
- 'depends_on' => 'hcat_hive2hadoop_write_numbers_part_1',
- 'hive' => "
-CREATE TABLE numbers_part_empty_initially (
- INTNUM1000 int,
- ID smallint,
- INTNUM5 tinyint,
- INTNUM100 int,
- INTNUM int,
- LONGNUM bigint,
- FLOATNUM float,
- DOUBLENUM double
-)
-partitioned by (
- datestamp string
-)
-stored as RCFile
-location ':TMP:/hcat_tmp_tables/numbers_part_empty_initially'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0
- },
-
- {
- 'num' => 3,
- # storenumbers
- # 'part' -> data is written to datestamp = '20100101' partition of the numbers_part_empty_initially table.
- 'depends_on' => 'hcat_hive2hadoop_write_numbers_part_2',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.StoreNumbers thrift://:THRIFTSERVER: part -libjars file://:HCAT_JAR:
-\,
- 'rc' => 0,
- 'expected_out' => "",
- 'expected_err_regex' => "HDFS_BYTES_WRITTEN=\\d+",
- 'not_expected_err_regex' => "FAILED",
- },
-
- {
- 'num' => 4,
- 'depends_on' => 'hcat_hive2hadoop_write_numbers_part_3',
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- 'hive' => "
-select
- intnum1000,
- id,
- intnum5,
- intnum100,
- intnum,
- longnum,
- floatnum,
- doublenum,
- datestamp
-from numbers_part_empty_initially;
-",
- 'sql' => "
-select
- 10 + intnum1000,
- 10 + id,
- 10 + intnum5,
- 10 + intnum100,
- 10 + intnum,
- 10 + longnum,
- 10 + floatnum,
- 10 + doublenum,
- '20100101'
-from numbers;
-",
- },
-
- ] # end tests
- }, # end group
-
- {
- 'name' => 'hcat_hive2hadoop_write_complex_nopart',
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- 'hadoop_classpath' => ':HCAT_JAR:',
- 'tests' => [
-
- {
- 'num' => 1,
- 'hive' => "
-drop table complex_nopart_empty_initially;
-",
- # does not give error even if the table does not exist
- 'expected_out' => "",
- },
-
- {
- 'num' => 2,
- 'depends_on' => 'hcat_hive2hadoop_write_complex_nopart_1',
- 'hive' => "
-CREATE TABLE complex_nopart_empty_initially (
- mymap map<string,string>,
- mytuple struct<num:int,str:string,dbl:double>,
- bagofmap array<Map<string,string>>,
- rownum int
-)
-stored as rcfile
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0
- },
-
- {
- 'num' => 3,
- # storecomplex
- 'depends_on' => 'hcat_hive2hadoop_write_complex_nopart_2',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.StoreComplex thrift://:THRIFTSERVER: -libjars file://:HCAT_JAR:
-\,
- 'rc' => 0,
- 'expected_out' => "",
- 'expected_err_regex' => "HDFS_BYTES_WRITTEN=\\d+",
- 'not_expected_err_regex' => "FAILED",
- },
-
- {
- 'num' => 4,
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- 'depends_on' => 'hcat_hive2hadoop_write_complex_nopart_3',
- 'hive' => "
-select
- mymap,
- mytuple,
- bagofmap,
- rownum
-from complex_nopart_empty_initially;
-",
- 'sql' => "\\d;", # dummy
- },
-
- ] # end tests
- }, # end group
-
-
-
-
-################################################################################
-# PIG STORED DATA
-################################################################################
-#
-# The bootstrap creates tables and loads data using Hive.
-# Here tables with identical schemas are created and data stored to them using Pig.
-# The tables are then verified and used by each of pig, hive, and hadoop.
-#
-# NOTE: The hcat_pig2pig_setup_tables group must run before the rest of the
-# "pig stored data" groups.
-#
-################################################################################
-
-#-------------------------------------------------------------------------------
-# Prepare test tables using PIG + other 'store' tests
-#-------------------------------------------------------------------------------
-
- # Currently loading from txt "using PigStorage(':') ..."
-
- {
- 'name' => 'hcat_pig2pig_setup_tables',
- # From:
- 'tests' => [
-
- {
- 'num' => 1,
- # numbers stored by pig
- 'hive' => "
-CREATE TABLE tmp_pig2pig_stored_numbers_:RUNID: (
- INTNUM1000 int,
- ID int, -- smallint,
- INTNUM5 int, -- tinyint,
- INTNUM100 int,
- INTNUM int,
- LONGNUM bigint,
- FLOATNUM float,
- DOUBLENUM double
-)
-stored as rcfile
-location ':TMP:/hcat_tmp_tables/tmp_pig2pig_stored_numbers_:RUNID:'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0,
- },
-
- {
- 'num' => 2, # GOOD
- 'depends_on' => 'hcat_pig2pig_setup_tables_1',
- # currently not loading from Hive
- 'pig' => q\
-A = load ':INPATH:/txt/numbers.txt' using PigStorage(':') as (intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double);
-store A into 'default.tmp_pig2pig_stored_numbers_:RUNID:'
- using org.apache.hcatalog.pig.HCatStorer
- ('',
- 'intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double');
-\,
- 'rc' => 0,
- },
-
- {
- 'num' => 3,
- # complex stored by pig
- # 'depends_on' => 'hcat_pig2pig_setup_tables_2', # not really, but #4 does
- 'hive' => "
-CREATE TABLE tmp_pig2pig_stored_complex_:RUNID: (
- mymap map<string, string>,
- mytuple struct<num:int,str:string,dbl:double>,
- bagofmap array<map<string,string>>,
- rownum int
-)
-stored as rcfile
-location ':TMP:/hcat_tmp_tables/tmp_pig2pig_stored_complex_:RUNID:'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0,
- },
-
- {
- 'num' => 4,
- 'depends_on' => 'hcat_pig2pig_setup_tables_3',
- # currently not loading from Hive
- 'pig' => q\
-A = load 'default.complex' using org.apache.hcatalog.pig.HCatLoader();
-store A into 'default.tmp_pig2pig_stored_complex_:RUNID:'
- using org.apache.hcatalog.pig.HCatStorer
- ('',
- 'mymap: map[],mytuple: (num: int,str: chararray,dbl: double),bagofmap: {(innerfield: map[])},rownum: int');
-\,
- 'rc' => 0,
- },
-
- {
- 'num' => 5,
- # boolean_table stored by pig
- 'hive' => "
-CREATE TABLE tmp_pig2pig_stored_boolean_table_:RUNID: (
- myflag boolean,
- rownum int
-)
-row format DELIMITED FIELDS TERMINATED BY '\001'
- COLLECTION ITEMS TERMINATED BY '\002'
- MAP KEYS TERMINATED BY '\003'
-stored as rcfile
-location ':TMP:/hcat_tmp_tables/tmp_pig2pig_stored_boolean_table_:RUNID:'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0,
- },
-
- {
- 'num' => 6,
- 'depends_on' => 'hcat_pig2pig_setup_tables_5',
- 'ignore' => 'Pig does not understnad boolean',
- 'pig' => q\
-A = load 'default.boolean_table' using org.apache.hcatalog.pig.HCatLoader();
-store A into 'default.tmp_pig2pig_stored_boolean_table_:RUNID:'
- using org.apache.hcatalog.pig.HCatStorer
- ('',
- 'boolean:string,rownum:integer');
-\,
- 'rc' => 0,
- },
-
- ] # end tests
-
- }, # end group
-
- {
- 'name' => 'hcat_pig2pig_more_store',
- 'tests' => [
-
- {
- 'num' => 1,
- 'hive' => "
-CREATE TABLE tmp_pig2pig_store_table_1_:RUNID: (
- INTNUM1000 int,
- ID int, -- smallint,
- INTNUM5 int, -- tinyint,
- INTNUM100 int,
- INTNUM int,
- LONGNUM bigint,
- FLOATNUM float,
- DOUBLENUM double
-)
-stored as rcfile
-location ':TMP:/hcat_tmp_tables/tmp_pig2pig_store_table_1_:RUNID:'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0,
- },
-
- {
- 'num' => 2,
- 'depends_on' => 'hcat_pig2pig_more_store_1',
- 'hive' => "
-CREATE TABLE tmp_pig2pig_store_table_2_:RUNID: (
- INTNUM1000 int,
- ID int, -- smallint,
- INTNUM5 int, -- tinyint,
- INTNUM100 int,
- INTNUM int,
- LONGNUM bigint,
- FLOATNUM float,
- DOUBLENUM double
-)
-stored as rcfile
-location ':TMP:/hcat_tmp_tables/tmp_pig2pig_store_table_2_:RUNID:'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0,
- },
-
- {
- 'num' => 3,
- 'depends_on' => 'hcat_pig2pig_more_store_2',
- 'pig' => q\
-A = load ':INPATH:/txt/numbers.txt' using PigStorage(':') as (intnum1000,id,intnum5,intnum100,intnum,longnum,floatnum,doublenum);
--- A = load 'default.numbers_pig' using org.apache.hcatalog.pig.HCatLoader();
-B = foreach A generate (int)intnum1000, (int)id, (int)intnum5, (int)intnum100, (int)intnum, (long)longnum, (float)floatnum, (double)doublenum;
-C = filter B by id < 2000;
-D = filter B by id >= 2000;
--- store to 1st table
-store C into 'default.tmp_pig2pig_store_table_1_:RUNID:'
- using org.apache.hcatalog.pig.HCatStorer
- ('',
- 'intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double');
--- store to 2nd table
-store D into 'default.tmp_pig2pig_store_table_2_:RUNID:'
- using org.apache.hcatalog.pig.HCatStorer
- ('',
- 'intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double');
-\,
- 'rc' => 0,
- 'expected_err_regex' => "Success",
- },
-
- {
- 'num' => 4,
- # to verify data in two tables written above
- 'depends_on' => 'hcat_pig2pig_more_store_3',
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'hive' => "
-select id, intnum from tmp_pig2pig_store_table_1_:RUNID:
-",
- 'sql' => "
-select id, intnum
-from numbers
-where id < 2000
-;
-",
- },
-
- {
- 'num' => 5,
- # to verify data in two tables written above
- 'depends_on' => 'hcat_pig2pig_more_store_3',
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'hive' => "
-select id, intnum from tmp_pig2pig_store_table_2_:RUNID:
-",
- 'sql' => "
-select id, intnum
-from numbers
-where id >= 2000
-;
-",
- },
-
-# ADD HCatStorer tests for:
-# * not specifying schema
-# - schema from HCatLoad ===> DONE
-# - schema from load with PigStorage and 'as' ===> DONE
-# - schema from 'generate' with types ===> DONE
-# * not specifying anything
-# - as above plus non-partitioned
-# * adding columns
-# ERROR tests:
-# * missing columns
- {
- 'num' => 6,
- # setup step
- 'hive' => "
-CREATE TABLE tmp_pig2pig_store_numbers_partitioned_:RUNID: (
- INTNUM1000 int,
- ID int, --smallint,
- INTNUM5 int, --tinyint,
- INTNUM100 int,
- INTNUM int,
- LONGNUM bigint,
- FLOATNUM float,
- DOUBLENUM double
-)
-partitioned by (
- STR1 string,
- STR2 string
-)
-stored as rcfile
-location ':TMP:/hcat_tmp_tables/pig2pig_store_numbers_partitioned_:RUNID:'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0,
- },
-
- {
- 'num' => 9,
- 'depends_on' => 'hcat_pig2pig_more_store_6',
- # Store into a new partition with org.apache.hcatalog.pig.HCatStorer
- # Store without specifying schema, schema from PigStorage 'as'
- 'pig' => q\
-a = load ':INPATH:/txt/numbers.txt' using PigStorage(':')
- as (intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double);
-store a into 'default.tmp_pig2pig_store_numbers_partitioned_:RUNID:'
- using org.apache.hcatalog.pig.HCatStorer
- ('STR1=str1part1,STR2=str2part1');
-\,
- 'rc' => 0,
- },
-
- {
- 'num' => 10,
- 'depends_on' => 'hcat_pig2pig_more_store_9',
- # verify the above
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'pig' => q\
-a = load 'default.tmp_pig2pig_store_numbers_partitioned_:RUNID:' using org.apache.hcatalog.pig.HCatLoader();
-b = filter a BY (
- str1 == 'str1part1' and
- str2 == 'str2part1'
- );
-store b into ':OUTPATH:';
-\,
- 'sql' => q\
-select *, 'str1part1', 'str2part1' from numbers;
-\,
- },
-
- {
- 'num' => 11,
- 'depends_on' => 'hcat_pig2pig_more_store_6',
- # Store into a new partition with org.apache.hcatalog.pig.HCatStorer
- # Store without specifying schema, schema from PigStorage 'as'
- 'pig' => q\
-a = load ':INPATH:/txt/numbers.txt' using PigStorage(':')
- as (intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double);
-store a into 'default.tmp_pig2pig_store_numbers_partitioned_:RUNID:'
- using org.apache.hcatalog.pig.HCatStorer
- ('STR1=str1part2,STR2=str2part2');
-\,
- 'rc' => 0,
- },
-
- {
- 'num' => 12,
- 'depends_on' => 'hcat_pig2pig_more_store_11',
- # verify the above
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'pig' => q\
-a = load 'default.tmp_pig2pig_store_numbers_partitioned_:RUNID:' using org.apache.hcatalog.pig.HCatLoader();
-b = filter a BY (
- str1 == 'str1part1' and
- str2 == 'str2part1'
- );
-store b into ':OUTPATH:';
-\,
- 'sql' => q\
-select *, 'str1part1', 'str2part1' from numbers;
-\,
- },
-
- {
- 'num' => 13,
- 'depends_on' => 'hcat_pig2pig_more_store_6',
- # Store into a new partition with org.apache.hcatalog.pig.HCatStorer
- # Store without specifying schema, schema from 'generate' with types
- 'pig' => q\
-a = load ':INPATH:/txt/numbers.txt' using PigStorage(':') as (intnum1000, id, intnum5, intnum100, intnum, longnum, floatnum, doublenum);
-b = foreach a generate (int)intnum1000, (int)id, (int)intnum5, (int)intnum100, (int)intnum, (long)longnum, (float)floatnum, (double)doublenum;
-store b into 'default.tmp_pig2pig_store_numbers_partitioned_:RUNID:'
- using org.apache.hcatalog.pig.HCatStorer
- ('STR1=str1part3,STR2=str2part3');
-\,
- 'rc' => 0,
- },
-
- {
- 'num' => 14,
- 'depends_on' => 'hcat_pig2pig_more_store_13',
- # verify the above
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'pig' => q\
-a = load 'default.tmp_pig2pig_store_numbers_partitioned_:RUNID:' using org.apache.hcatalog.pig.HCatLoader();
-b = filter a BY (
- str1 == 'str1part2' and
- str2 == 'str2part2'
- );
-store b into ':OUTPATH:';
-\,
- 'sql' => q\
-select *, 'str1part2', 'str2part2' from numbers;
-\,
- },
-
-
- ] # end tests
-
- }, # end group
-
-#-------------------------------------------------------------------------------
-# PIG STORED -> HIVE SECTION
-# Not a likely use case, commented out for time being
-#-------------------------------------------------------------------------------
-
- #NaUC# {
- #NaUC# 'name' => 'hcat_pig2hive_type_check',
- #NaUC# 'tests' => [
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 1,
- #NaUC# 'hive' => q\
- #NaUC# add jar :FUNCPATH:/testudf.jar;
- #NaUC# create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
- #NaUC# select typecheck('int+smallint+tinyint+int+int+bigint+float+double',
- #NaUC# intnum1000,id,intnum5,intnum100,intnum,longnum,floatnum,doublenum) from tmp_pig2pig_stored_numbers_:RUNID:;
- #NaUC# \,
- #NaUC# 'rc' => 0,
- #NaUC# 'expected_out_regex' => "class java",
- #NaUC# 'expected_err_regex' => "OK",
- #NaUC# 'not_expected_err_regex' => "FAILED",
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 2,
- #NaUC# 'hive' => q\
- #NaUC# add jar :FUNCPATH:/testudf.jar;
- #NaUC# create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
- #NaUC# select typecheck('map<string,string>+struct<num:int,str:string,dbl:double>+array<map<string,string>>+int',
- #NaUC# mymap, mytuple, bagofmap, rownum) from tmp_pig2pig_stored_complex_:RUNID:;
- #NaUC# \,
- #NaUC# 'rc' => 0,
- #NaUC# 'expected_out_regex' => "class java",
- #NaUC# 'expected_err_regex' => "OK",
- #NaUC# 'not_expected_err_regex' => "FAILED",
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 3,
- #NaUC# 'hive' => q\
- #NaUC# add jar :FUNCPATH:/testudf.jar;
- #NaUC# create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
- #NaUC# select typecheck('boolean+int',
- #NaUC# myflag,rownum) from tmp_pig2pig_stored_boolean_table_:RUNID:;
- #NaUC# \,
- #NaUC# 'rc' => 0,
- #NaUC# 'expected_out_regex' => "class java",
- #NaUC# 'expected_err_regex' => "OK",
- #NaUC# 'not_expected_err_regex' => "FAILED",
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 4,
- #NaUC# 'hive' => q\
- #NaUC# add jar :FUNCPATH:/testudf.jar;
- #NaUC# create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
- #NaUC# select typecheck('string+string+string+string+int+string+string+string+string+string+string+string+string+string+map<string,string>+map<string,string>+array<map<string,string>>+string+string+string+string',
- #NaUC# bcookie,
- #NaUC# src_spaceid,
- #NaUC# srcpvid,
- #NaUC# ts,
- #NaUC# browser,
- #NaUC# bckt,
- #NaUC# type,
- #NaUC# ip,
- #NaUC# yuid,
- #NaUC# referrer,
- #NaUC# pg_spaceid,
- #NaUC# dstid,
- #NaUC# dstpvid,
- #NaUC# dst_spaceid,
- #NaUC# page_params,
- #NaUC# clickinfo,
- #NaUC# viewinfo,
- #NaUC# datestamp,
- #NaUC# srcid,
- #NaUC# action,
- #NaUC# testid
- #NaUC# )
- #NaUC# where
- #NaUC# datestamp = '20091102'
- #NaUC# and srcid = '19174'
- #NaUC# and browser = 3
- #NaUC# and src_spaceid = '2114728002'
- #NaUC# limit 15
- #NaUC# ;
- #NaUC# \,
- #NaUC# 'rc' => 0,
- #NaUC# 'expected_out_regex' => "class java",
- #NaUC# 'expected_err_regex' => "OK",
- #NaUC# 'not_expected_err_regex' => "FAILED",
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 5,
- #NaUC# 'floatpostprocess' => 1,
- #NaUC# 'delimiter' => ' ',
- #NaUC# 'hive' => q\
- #NaUC# select
- #NaUC# id, -- expect smallint
- #NaUC# intnum5 + id, -- expect smallint
- #NaUC# intnum5 + id + intnum, -- expect int
- #NaUC# intnum5 + id + intnum + longnum, -- expect bigint
- #NaUC# intnum5 * id, -- expect smallint
- #NaUC# intnum5 * id * intnum, -- expect int
- #NaUC# intnum5 * id * intnum * longnum, -- expect bigint
- #NaUC# intnum5 + 500, -- expect int
- #NaUC# intnum5 + 1.5, -- expect float
- #NaUC# cast(intnum5 + 1.5 as double), -- expect double
- #NaUC# cast(intnum5 + 1.0 as int), -- expect int
- #NaUC# floatnum + doublenum, -- expect double
- #NaUC# floatnum * doublenum -- expect double
- #NaUC# from tmp_pig2pig_stored_numbers_:RUNID:
- #NaUC# order by id
- #NaUC# limit 500
- #NaUC# ;
- #NaUC# \,
- #NaUC# 'sql' => "
- #NaUC# select
- #NaUC# id,
- #NaUC# intnum5 + id,
- #NaUC# intnum5 + id + intnum,
- #NaUC# intnum5 + id + intnum + longnum,
- #NaUC# intnum5 * id,
- #NaUC# intnum5 * id * intnum,
- #NaUC# intnum5 * id * intnum * longnum,
- #NaUC# intnum5 + 500,
- #NaUC# intnum5 + 1.5,
- #NaUC# cast(intnum5 + 1.5 as double precision),
- #NaUC# cast(intnum5 + 1.0 as integer),
- #NaUC# floatnum + doublenum,
- #NaUC# floatnum * doublenum
- #NaUC# from numbers
- #NaUC# order by id
- #NaUC# limit 500
- #NaUC# ;
- #NaUC# ",
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 6,
- #NaUC# 'ignore' => '!!! Hive truncates where Postgres rounds',
- #NaUC# 'hive' => q\
- #NaUC# select
- #NaUC# intnum5,
- #NaUC# cast(intnum5 + 1.9 as int),
- #NaUC# from tmp_pig2pig_stored_numbers_:RUNID:
- #NaUC# order by id
- #NaUC# limit 5
- #NaUC# ;
- #NaUC# \,
- #NaUC# 'sql' => "
- #NaUC# select
- #NaUC# intnum5,
- #NaUC# cast(intnum5 + 1.9 as integer),
- #NaUC# from numbers
- #NaUC# order by id
- #NaUC# limit 5
- #NaUC# ;
- #NaUC# ",
- #NaUC# },
- #NaUC#
- #NaUC# ] # end tests
- #NaUC# }, # end group
- #NaUC#
- #NaUC# {
- #NaUC# 'name' => 'hcat_pig2hive_diagnostic_operators',
- #NaUC# 'tests' => [
- #NaUC#
- #NaUC# #NaUC#
- #NaUC# {
- #NaUC# 'num' => 5,
- #NaUC# 'hive' => "describe tmp_pig2pig_stored_complex_:RUNID:;",
- #NaUC# 'sql' => "\\d", # dummy statement
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 6,
- #NaUC# 'ignore' => 'Open question about expected output',
- #NaUC# 'hive' => "describe tmp_pig2pig_stored_complex_:RUNID:.mytuple;",
- #NaUC# 'rc' => 0,
- #NaUC# 'expected_out_regex' => "mytuple\tstruct<num:int,str:string,dbl:double>\tfrom deserializer",
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 7,
- #NaUC# 'hive' => "describe tmp_pig2pig_stored_complex_:RUNID:.bagofmap;",
- #NaUC# 'rc' => 0,
- #NaUC# 'expected_out_regex' => "bagofmap\tarray<map<string,string>>\tfrom deserializer",
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 8,
- #NaUC# 'hive' => "describe tmp_pig2pig_stored_boolean_table_:RUNID:;",
- #NaUC# 'sql' => "\\d", # dummy statement
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 9,
- #NaUC# 'hive' => "describe tmp_pig2pig_stored_boolean_table_:RUNID:.myflag;",
- #NaUC# 'rc' => 0,
- #NaUC# 'expected_out_regex' => "myflag\tboolean\tfrom deserializer",
- #NaUC# },
- #NaUC#
- #NaUC# ] # end tests
- #NaUC# }, # end group
- #NaUC#
- #NaUC# {
- #NaUC# 'name' => 'hcat_pig2hive_select',
- #NaUC# 'sortBenchmark' => 1,
- #NaUC# 'sortResults' => 1,
- #NaUC# 'tests' => [
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 1,
- #NaUC# 'floatpostprocess' => 1,
- #NaUC# 'delimiter' => ' ',
- #NaUC# # actually no nulls in table, so had not needed the casting...,
- #NaUC# 'hive' => q\
- #NaUC# select
- #NaUC# if (intnum1000 IS NULL, "", cast(intnum1000 as STRING)),
- #NaUC# if (id IS NULL, "", cast(id as STRING)),
- #NaUC# if (intnum5 IS NULL, "", cast(intnum5 as STRING)),
- #NaUC# if (intnum100 IS NULL, "", cast(intnum100 as STRING)),
- #NaUC# if (intnum IS NULL, "", cast(intnum as STRING)),
- #NaUC# if (longnum IS NULL, "", cast(longnum as STRING)),
- #NaUC# if (floatnum IS NULL, "", cast(floatnum as STRING)),
- #NaUC# if (doublenum IS NULL, "", cast(doublenum as STRING))
- #NaUC# from tmp_pig2pig_stored_numbers_:RUNID:;
- #NaUC# \,
- #NaUC# 'sql' => q\
- #NaUC# select
- #NaUC# intnum1000,
- #NaUC# id,
- #NaUC# intnum5,
- #NaUC# intnum100,
- #NaUC# intnum,
- #NaUC# longnum,
- #NaUC# floatnum,
- #NaUC# doublenum
- #NaUC# from numbers;
- #NaUC# \,
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 2,
- #NaUC# 'hive' => q\
- #NaUC# select
- #NaUC# mymap,
- #NaUC# mytuple,
- #NaUC# bagofmap,
- #NaUC# rownum
- #NaUC# from tmp_pig2pig_stored_complex_:RUNID:;
- #NaUC# \,
- #NaUC# 'sql' => "\\d", # dummy statement
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 3,
- #NaUC# 'hive' => q\
- #NaUC# select
- #NaUC# myflag, rownum
- #NaUC# from tmp_pig2pig_stored_boolean_table_:RUNID:;
- #NaUC# \,
- #NaUC# 'sql' => "\\d", # dummy statement
- #NaUC# },
- #NaUC#
- #NaUC# #NaUC# #NaUC#
- #NaUC# #NaUC#
- #NaUC# #NaUC# {
- #NaUC# 'num' => 8,
- #NaUC# 'sortBenchmark' => 1,
- #NaUC# 'sortResults' => 1,
- #NaUC# 'hive' => q\
- #NaUC# select
- #NaUC# distinct
- #NaUC# intnum1000,
- #NaUC# intnum5,
- #NaUC# intnum100,
- #NaUC# intnum,
- #NaUC# longnum
- #NaUC# from tmp_pig2pig_stored_numbers_:RUNID:;
- #NaUC# \,
- #NaUC# 'sql' => q\
- #NaUC# select
- #NaUC# distinct
- #NaUC# intnum1000,
- #NaUC# intnum5,
- #NaUC# intnum100,
- #NaUC# intnum,
- #NaUC# longnum
- #NaUC# from numbers;
- #NaUC# \,
- #NaUC# },
- #NaUC#
- #NaUC# ] # end tests
- #NaUC# }, # end group
- #NaUC#
- #NaUC# {
- #NaUC# 'name' => 'hcat_pig2hive_lateral',
- #NaUC# 'sortBenchmark' => 1,
- #NaUC# 'sortResults' => 1,
- #NaUC# 'tests' => [
- #NaUC#
- #NaUC# # NOTE:
- #NaUC# # The queries below written w/o WHERE clauses until this jira is resolved:
- #NaUC# # https://issues.apache.org/jira/browse/HIVE-1056
- #NaUC# # http://wiki.apache.org/hadoop/Hive/LanguageManual/LateralView
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 1,
- #NaUC# 'hive' => q\
- #NaUC# select rownum, item
- #NaUC# from tmp_pig2pig_stored_complex_:RUNID:
- #NaUC# LATERAL VIEW explode(bagofmap) explodedTable AS item;
- #NaUC# \,
- #NaUC# 'sql' => "\\d", # dummy statement
- #NaUC# },
- #NaUC#
- #NaUC# #NaUC# {
- #NaUC# 'num' => 3,
- #NaUC# 'hive' => q\
- #NaUC# select
- #NaUC# rownum,
- #NaUC# item,
- #NaUC# "item['a1']:", if (item['a1'] IS NULL, "", cast(item['a1'] as STRING)),
- #NaUC# "item['a2']:", if (item['a2'] IS NULL, "", cast(item['a2'] as STRING)),
- #NaUC# "item['k1']:", if (item['k1'] IS NULL, "", cast(item['k1'] as STRING)),
- #NaUC# "item['k6']:", if (item['k6'] IS NULL, "", cast(item['k6'] as STRING))
- #NaUC# from complex LATERAL VIEW explode(bagofmap) explodedTable AS item
- #NaUC# ;
- #NaUC# \,
- #NaUC# 'sql' => "\\d", # dummy statement
- #NaUC# },
- #NaUC#
- #NaUC# #NaUC#
- #NaUC# ] # end tests
- #NaUC# }, # end group
- #NaUC#
- #NaUC# {
- #NaUC# 'name' => 'hcat_pig2hive_join',
- #NaUC# 'sortBenchmark' => 1,
- #NaUC# 'sortResults' => 1,
- #NaUC# 'tests' => [
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 1,
- #NaUC# 'floatpostprocess' => 1,
- #NaUC# 'delimiter' => ' ',
- #NaUC# 'hive' => q\
- #NaUC# select
- #NaUC# a.intnum1000,
- #NaUC# b.intnum1000,
- #NaUC# a.id,
- #NaUC# b.id,
- #NaUC# a.intnum5,
- #NaUC# b.intnum5,
- #NaUC# a.intnum100,
- #NaUC# b.intnum100,
- #NaUC# a.intnum,
- #NaUC# b.intnum,
- #NaUC# a.longnum,
- #NaUC# b.longnum,
- #NaUC# a.floatnum,
- #NaUC# b.floatnum,
- #NaUC# a.doublenum,
- #NaUC# b.doublenum
- #NaUC# from
- #NaUC# tmp_pig2pig_stored_numbers_:RUNID: a
- #NaUC# join numbers b
- #NaUC# on (a.intnum5 = b.intnum5)
- #NaUC# where
- #NaUC# a.id < 30
- #NaUC# and b.id < 40
- #NaUC# order by a.intnum5
- #NaUC# ;
- #NaUC# \,
- #NaUC# 'sql' => "
- #NaUC# select
- #NaUC# a.intnum1000,
- #NaUC# b.intnum1000,
- #NaUC# a.id,
- #NaUC# b.id,
- #NaUC# a.intnum5,
- #NaUC# b.intnum5,
- #NaUC# a.intnum100,
- #NaUC# b.intnum100,
- #NaUC# a.intnum,
- #NaUC# b.intnum,
- #NaUC# a.longnum,
- #NaUC# b.longnum,
- #NaUC# a.floatnum,
- #NaUC# b.floatnum,
- #NaUC# a.doublenum,
- #NaUC# b.doublenum
- #NaUC# from
- #NaUC# numbers as a
- #NaUC# join numbers as b
- #NaUC# on (a.intnum5 = b.intnum5)
- #NaUC# where
- #NaUC# a.id < 30
- #NaUC# and b.id < 40
- #NaUC# order by a.intnum5
- #NaUC# ;
- #NaUC# ",
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 2,
- #NaUC# # join by map entry
- #NaUC# # full outer join
- #NaUC# 'hive' => q\
- #NaUC# select
- #NaUC# a.rownum,
- #NaUC# b.rownum,
- #NaUC# a.mymap['mymapk1'],
- #NaUC# b.mymap['mymapk1'],
- #NaUC# a.mymap['mymapk3'],
- #NaUC# b.mymap['mymapk3'],
- #NaUC# a.mymap,
- #NaUC# b.mymap
- #NaUC# from
- #NaUC# tmp_pig2pig_stored_complex_:RUNID: a
- #NaUC# full outer join complex b
- #NaUC# on (a.mymap['mymapk1']
- #NaUC# = b.mymap['mymapk3'])
- #NaUC# ;
- #NaUC# \,
- #NaUC# 'sql' => "\\d", # dummy statement
- #NaUC# },
- #NaUC#
- #NaUC# {
- #NaUC# 'num' => 3,
- #NaUC# # join by tuple item
- #NaUC# # inner join
- #NaUC# 'hive' => q\
- #NaUC# select
- #NaUC# a.rownum,
- #NaUC# b.rownum,
- #NaUC# a.mytuple.dbl,
- #NaUC# b.mytuple.dbl,
- #NaUC# a.mytuple,
- #NaUC# b.mytuple
- #NaUC# from
- #NaUC# tmp_pig2pig_stored_complex_:RUNID: a
- #NaUC# join complex b
- #NaUC# on (a.mytuple.dbl = b.mytuple.dbl + 1)
- #NaUC# ;
- #NaUC# \,
- #NaUC# 'sql' => "\\d", # dummy statement
- #NaUC# },
- #NaUC#
- #NaUC# ] # end tests
- #NaUC# }, # end group
- #NaUC#
- #NaUC#
-#-------------------------------------------------------------------------------
-# PIG STORED -> PIG SECTION
-#-------------------------------------------------------------------------------
-
- {
- 'name' => 'hcat_pig2pig_type_check',
- # Using udf from:
- #
- # * This UDF can be used to check that a tuple presented by org.apache.hcatalog.pig.HCatLoader has the
- # * right types for the fields
- # * (...)
- # * The output should only contain the value '1' in all rows. (This UDF returns
- # * the integer value 1 if all fields have the right type, else throws IOException)
-
- 'tests' => [
-
- {
- 'num' => 1,
- 'pig' => "
-register :FUNCPATH:/testudf.jar;
-a = load 'default.tmp_pig2pig_stored_numbers_:RUNID:' using org.apache.hcatalog.pig.HCatLoader();
-b = foreach a generate org.apache.hcatalog.utils.HCatTypeCheck('intnum1000:int,id:int,intnum5:int,intnum100:int,intnum:int,longnum:long,floatnum:float,doublenum:double', *);
-store b into ':OUTPATH:';
-",
- 'rc' => 0,
- 'expected_err_regex' => "Success",
- # 'expected_out_regex' => "1",
- # 'not_expected_out_regex' => "[^1\\t]",
- },
-
- {
- 'num' => 2,
- 'pig' => "
-register :FUNCPATH:/testudf.jar;
-a = load 'default.tmp_pig2pig_stored_complex_:RUNID:' using org.apache.hcatalog.pig.HCatLoader();
-b = foreach a generate org.apache.hcatalog.utils.HCatTypeCheck('mymap: map[],mytuple: (num: int,str: chararray,dbl: double),bagofmap: {(innerfield: map[])},rownum: int', *);
-store b into ':OUTPATH:';
-",
- 'rc' => 0,
- 'expected_err_regex' => "Success",
- # 'expected_out_regex' => "1",
- # 'not_expected_out_regex' => "[^1\\t]",
- },
-
- {
- 'num' => 3,
- 'ignore' => 'pig does not understand boolean',
- 'pig' => "
-register :FUNCPATH:/testudf.jar;
-a = load 'default.tmp_pig2pig_stored_boolean_table_:RUNID:' using org.apache.hcatalog.pig.HCatLoader();
-b = foreach a generate org.apache.hcatalog.utils.HCatTypeCheck('boolean+int', *);
-store b into ':OUTPATH:';
-",
- 'rc' => 0,
- 'expected_err_regex' => "Success",
- # 'expected_out_regex' => "1",
- # 'not_expected_out_regex' => "[^1\\t]",
- },
-
- ] # end tests
- }, # end group
-
-#-------------------------------------------------------------------------------
-# PIG STORED -> HADOOP SECTION
-#-------------------------------------------------------------------------------
-
- {
- 'name' => 'hcat_pig2hadoop_read',
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- 'hadoop_classpath' => ':HCAT_JAR:',
- 'tests' => [
-
- # This tests is covered under hcat_hive2hadoop_read_1
- # {
- # 'num' => 1,
- # # sum numbers
- # 'ignore' => '"sumnumbers" is hard coded to run on "numbers", which is created by Hive, not pig.',
- # 'floatpostprocess' => 0,
- # 'delimiter' => ' ',
- # 'hadoop' => q\
- #jar :FUNCPATH:/testudf.jar sumnumbers thrift://:THRIFTSERVER: :OUTPATH: -libjars file://:HCAT_JAR:
- #\,
- # 'sql' => "
- #select
- # intnum1000,
- # sum (id),
- # sum (intnum5),
- # sum (intnum100),
- # sum (intnum),
- # sum (longnum),
- # sum (floatnum),
- # sum (doublenum)
- #from numbers
- #group by intnum1000
- #order by intnum1000;
- #",
- # },
-
- ] # end tests
- }, # end group
-
- {
- 'name' => 'hcat_pig2hadoop_type_check',
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- 'hadoop_classpath' => ':HCAT_JAR:',
- 'tests' => [
-
- {
- 'num' => 1,
- # typedatacheck complex
-# aborts !!!
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.TypeDataCheck thrift://:THRIFTSERVER: tmp_pig2pig_stored_complex_:RUNID: 'map<string,string>+struct<num:int,str:string,dbl:double>+array<map<string,string>>+int' :OUTPATH: tab -libjars file://:HCAT_JAR:
-\,
- 'sql' => "select * from complex;",
- },
-
- {
- 'num' => 2,
- # typedatacheck numbers
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.TypeDataCheck thrift://:THRIFTSERVER: tmp_pig2pig_stored_numbers_:RUNID: int+int+int+int+int+bigint+float+double :OUTPATH: tab -libjars file://:HCAT_JAR:
-\,
- 'sql' => "select * from numbers;",
- },
-
- {
- 'num' => 3,
- # typedatacheck boolean_table
- 'ignore' => 'pig does not understand boolean',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.TypeDataCheck thrift://:THRIFTSERVER: tmp_pig2pig_stored_boolean_table_:RUNID: boolean+int :OUTPATH: tab -libjars file://:HCAT_JAR:
-\,
- 'sql' => "select * from boolean_table;",
- },
-
- ] # end tests
- }, # end group
-
-
-################################################################################
-# HADOOP STORED DATA
-################################################################################
-#
-# The bootstrap creates tables and loads data using Hive.
-# Here tables with identical schemas are created and data stored to them using Hadoop.
-# The tables are then verified and used by each of pig, hive, and hadoop.
-#
-# NOTE: The hcat_hive2hadoop_setup_tables group must run before the rest of the
-# "hadoop stored data" groups.
-#
-################################################################################
-
-#-------------------------------------------------------------------------------
-# Prepare test tables using Hadoop
-#-------------------------------------------------------------------------------
-
-# This replicates the drop, create and writing parts from the hcat_hive2hadoop_... tests.
-# This should ideally be moved to the bootstrap conf, but would require a change in table name.
-
{
- 'name' => 'hcat_hive2hadoop_setup_tables',
- 'sortResults' => 1,
- 'sortBenchmark' => 1,
- 'hadoop_classpath' => ':HCAT_JAR:',
- 'tests' => [
-
- {
- 'num' => 1,
- 'hive' => "
-drop table numbers_nopart_empty_initially;
-",
- # does not give error even if the table does not exist
- 'expected_out' => "",
- },
-
- {
- 'num' => 2,
- 'depends_on' => 'hcat_hive2hadoop_setup_tables_1',
- 'hive' => "
-CREATE TABLE numbers_nopart_empty_initially (
- INTNUM1000 int,
- ID smallint,
- INTNUM5 tinyint,
- INTNUM100 int,
- INTNUM int,
- LONGNUM bigint,
- FLOATNUM float,
- DOUBLENUM double
-)
-stored as RCFile
-location ':TMP:/hcat_tmp_tables/numbers_nopart_empty_initially'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0
- },
-
- {
- 'num' => 3,
- # storenumbers
- # 'nopart' -> data is written to the numbers_nopart_empty_initially table.
- 'depends_on' => 'hcat_hive2hadoop_setup_tables_2',
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.StoreNumbers thrift://:THRIFTSERVER: nopart -libjars file://:HCAT_JAR:
-\,
- 'rc' => 0,
- 'expected_out' => "",
- 'expected_err_regex' => "HDFS_BYTES_WRITTEN=\\d+",
- 'not_expected_err_regex' => "FAILED",
- },
+ 'name' => 'HCat_DropTable',
+ 'tests' => [
- {
- 'num' => 4,
- 'hive' => "
-drop table numbers_part_empty_initially;
-",
- # does not give error even if the table does not exist
- 'expected_out' => "",
- },
-
- {
- 'num' => 5,
- 'depends_on' => 'hcat_hive2hadoop_setup_tables_4',
- 'hive' => "
-CREATE TABLE numbers_part_empty_initially (
- INTNUM1000 int,
- ID smallint,
- INTNUM5 tinyint,
- INTNUM100 int,
- INTNUM int,
- LONGNUM bigint,
- FLOATNUM float,
- DOUBLENUM double
-)
-partitioned by (
- datestamp string
-)
-stored as RCFile
-location ':TMP:/hcat_tmp_tables/numbers_part_empty_initially'
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0
- },
-
- {
- 'num' => 6,
- # storenumbers
- # 'part' -> data is written to datestamp = '20100101' partition of the numbers_part_empty_initially table.
- 'depends_on' => 'hcat_hive2hadoop_setup_tables_5',
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.StoreNumbers thrift://:THRIFTSERVER: part -libjars file://:HCAT_JAR:
-\,
- 'rc' => 0,
- 'expected_out' => "",
- 'expected_err_regex' => "HDFS_BYTES_WRITTEN=\\d+",
- 'not_expected_err_regex' => "FAILED",
- },
-
- {
- 'num' => 7,
- 'hive' => "
-drop table complex_nopart_empty_initially;
-",
- # does not give error even if the table does not exist
- 'expected_out' => "",
- },
-
- {
- 'num' => 8,
- 'depends_on' => 'hcat_hive2hadoop_setup_tables_7',
- 'hive' => "
-CREATE TABLE complex_nopart_empty_initially (
- mymap map<string,string>,
- mytuple struct<num:int,str:string,dbl:double>,
- bagofmap array<Map<string,string>>,
- rownum int
-)
-stored as rcfile
-TBLPROPERTIES (
- 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
- 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
-);
-",
- 'rc' => 0
- },
-
- {
- 'num' => 9,
- # storecomplex
- 'depends_on' => 'hcat_hive2hadoop_setup_tables_8',
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.StoreComplex thrift://:THRIFTSERVER: -libjars file://:HCAT_JAR:
-\,
- 'rc' => 0,
- 'expected_out' => "",
- 'expected_err_regex' => "HDFS_BYTES_WRITTEN=",
- 'not_expected_err_regex' => "FAILED",
- },
-
- # !!! Add tests reading the just stored data
-
- ] # end tests
- }, # end group
-
-#-------------------------------------------------------------------------------
-# HADOOP STORED -> HIVE SECTION
-#-------------------------------------------------------------------------------
-
- {
- 'name' => 'hcat_hadoop2hive_type_check',
- 'tests' => [
- # for how the 'typecheck' udf works.
- # If the the typecheck does not pass, the udf will abort.
- # Look at the jobtracker for info about the actual value,
- # e.g. something like: "Expected java.lang.String, got java.lang.Integer"
- # The verification:
- # 'expected_out_regex' => "class java",
- # is needed because otherwise the tests might "PASS" because the output was empty...
-
- {
- 'num' => 1,
- 'hive' => q\
-add jar :FUNCPATH:/testudf.jar;
-create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
-select typecheck('int+smallint+tinyint+int+int+bigint+float+double',
-intnum1000,id,intnum5,intnum100,intnum,longnum,floatnum,doublenum) from numbers_nopart_empty_initially;
-\,
- 'rc' => 0,
- 'expected_out_regex' => "class java",
- 'expected_err_regex' => "OK",
- 'not_expected_err_regex' => "FAILED",
- },
-
- {
- 'num' => 2,
- 'depends_on' => 'hcat_hive2hadoop_setup_tables_6',
- 'hive' => q\
-add jar :FUNCPATH:/testudf.jar;
-create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
-select typecheck('int+smallint+tinyint+int+int+bigint+float+double+string',
-intnum1000,id,intnum5,intnum100,intnum,longnum,floatnum,doublenum,datestamp) from numbers_part_empty_initially;
-\,
- 'rc' => 0,
- 'expected_out_regex' => "class java",
- 'expected_err_regex' => "OK",
- 'not_expected_err_regex' => "FAILED",
- },
-
- {
- 'num' => 3,
- 'hive' => q\
-add jar :FUNCPATH:/testudf.jar;
-create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive';
-select typecheck('map<string,string>+struct<num:int,str:string,dbl:double>+array<map<string,string>>+int',
-mymap, mytuple, bagofmap, rownum) from complex_nopart_empty_initially;
-\,
- 'rc' => 0,
- 'expected_out_regex' => "class java",
- 'expected_err_regex' => "OK",
- 'not_expected_err_regex' => "FAILED",
- },
-
- ] # end tests
- }, # end group
-
-#-------------------------------------------------------------------------------
-# HADOOP STORED -> PIG SECTION
-#-------------------------------------------------------------------------------
-
- {
- 'name' => 'hcat_hadoop2pig_type_check',
- # * This UDF can be used to check that a tuple presented by org.apache.hcatalog.pig.HCatLoader has the
- # * right types for the fields
- # * (...)
- # * The output should only contain the value '1' in all rows. (This UDF returns
- # * the integer value 1 if all fields have the right type, else throws IOException)
-
- 'tests' => [
-
- {
- 'num' => 1,
- 'pig' => "
-register :FUNCPATH:/testudf.jar;
-a = load 'default.numbers_nopart_pig_empty_initially' using org.apache.hcatalog.pig.HCatLoader();
-b = foreach a generate org.apache.hcatalog.utils.HCatTypeCheck('intnum1000:int,id:int,intnum5:int,intnum100:int,intnum:int,longnum:long,floatnum:float,doublenum:double', *);
-store b into ':OUTPATH:';
-",
- 'rc' => 0,
- 'expected_err_regex' => "Success",
- # 'expected_out_regex' => "1",
- # 'not_expected_out_regex' => "[^1\\t]",
- },
-
- # default.numbers_part_empty_initially has smallint/tinyint which cannot be handled by pig
- # {
- # 'num' => 2,
- # 'pig' => "
- #register :FUNCPATH:/testudf.jar;
- #a = load 'default.numbers_part_empty_initially' using org.apache.hcatalog.pig.HCatLoader();
- #b = foreach a generate org.apache.hcatalog.utils.HCatTypeCheck('intnum1000:int,id:int,intnum5:int,intnum100:int,intnum:int,longnum:long,floatnum:float,doublenum:double+string', *);
- #store b into ':OUTPATH:';
- #",
- # 'rc' => 0,
- # 'expected_err_regex' => "Success",
- # # 'expected_out_regex' => "1",
- # # 'not_expected_out_regex' => "[^1\\t]",
- # },
-
- {
- 'num' => 3,
- 'pig' => "
-register :FUNCPATH:/testudf.jar;
-a = load 'default.complex_nopart_empty_initially' using org.apache.hcatalog.pig.HCatLoader();
-b = foreach a generate org.apache.hcatalog.utils.HCatTypeCheck('m:map[],t:tuple(num:int,str:chararray,dbl:double),bg:bag{t:tuple(m:map[])},i:int', *);
-store b into ':OUTPATH:';
-",
- 'rc' => 0,
- 'expected_err_regex' => "Success",
- # 'expected_out_regex' => "1",
- # 'not_expected_out_regex' => "[^1\\t]",
- },
-
- ] # end tests
- }, # end group
-
-#-------------------------------------------------------------------------------
-# HADOOP STORED -> HADOOP SECTION
-#-------------------------------------------------------------------------------
-
- {
- 'name' => 'hcat_hadoop2hadoop_type_check',
- # * This UDF can be used to check that a tuple presented by org.apache.hcatalog.pig.HCatLoader has the
- # * right types for the fields
- # * (...)
- # * The output should only contain the value '1' in all rows. (This UDF returns
- # * the integer value 1 if all fields have the right type, else throws IOException)
-
- 'tests' => [
-
- {
- 'num' => 1,
- # typedatacheck complex_nopart_empty_initially
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.TypeDataCheck thrift://:THRIFTSERVER: complex_nopart_empty_initially 'map<string,string>+struct<num:int,str:string,dbl:double>+array<map<string,string>>+int' :OUTPATH: tab -libjars file://:HCAT_JAR:
-\,
- 'sql' => "select * from complex;",
- },
-
- {
- 'num' => 2,
- # typedatacheck numbers_nopart_empty_initially
- 'depends_on' => 'hcat_hive2hadoop_write_numbers_nopart_3',
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.TypeDataCheck thrift://:THRIFTSERVER: numbers_nopart_empty_initially int+smallint+tinyint+int+int+bigint+float+double :OUTPATH: tab -libjars file://:HCAT_JAR:
-\,
- 'sql' => "select intnum1000+10, id+10, intnum5+10, intnum100 + 10, intnum+10, longnum+10, floatnum+10, doublenum+10 from numbers;",
- },
-
- {
- 'num' => 3,
- # typedatacheck numbers_part_empty_initially
- 'floatpostprocess' => 0,
- 'delimiter' => ' ',
- 'hadoop' => q\
-jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.TypeDataCheck thrift://:THRIFTSERVER: numbers_part_empty_initially int+smallint+tinyint+int+int+bigint+float+double+string :OUTPATH: tab -libjars file://:HCAT_JAR:
-\,
- 'sql' => "select intnum1000+10, id+10, intnum5+10, intnum100 + 10, intnum+10, longnum+10, floatnum+10, doublenum+10 , 20100101 from numbers;",
- },
-
- ] # end tests
- }, # end group
-
-################################################################################
-# NEGATIVE
-################################################################################
-
-#-------------------------------------------------------------------------------
-# Negative: hive
-#-------------------------------------------------------------------------------
-
- {
- 'name' => 'hcat_negative_hive',
[... 362 lines stripped ...]