You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2012/03/18 09:38:47 UTC
svn commit: r1302096 - in /pig/trunk: ./ test/e2e/pig/deployers/
test/e2e/pig/tests/ test/e2e/pig/tools/generate/ test/e2e/pig/udfs/python/
Author: daijy
Date: Sun Mar 18 08:38:47 2012
New Revision: 1302096
URL: http://svn.apache.org/viewvc?rev=1302096&view=rev
Log:
PIG-2589: Additional e2e test for 0.10 new features
Modified:
pig/trunk/CHANGES.txt
pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
pig/trunk/test/e2e/pig/deployers/LocalDeployer.pm
pig/trunk/test/e2e/pig/tests/cmdline.conf
pig/trunk/test/e2e/pig/tests/nightly.conf
pig/trunk/test/e2e/pig/tools/generate/generate_data.pl
pig/trunk/test/e2e/pig/udfs/python/scriptingudf.py
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1302096&r1=1302095&r2=1302096&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sun Mar 18 08:38:47 2012
@@ -149,6 +149,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-2589: Additional e2e test for 0.10 new features (daijy)
+
PIG-2182: Add more append support to DataByteArray (gsingers via daijy)
PIG-438: Handle realiasing of existing Alias (A=B;) (daijy)
Modified: pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm?rev=1302096&r1=1302095&r2=1302096&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm (original)
+++ pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm Sun Mar 18 08:38:47 2012
@@ -218,7 +218,12 @@ sub generateData
'filetype' => "voternulltab",
'rows' => 10000,
'hdfs' => "singlefile/voternulltab10k",
- },
+ }, , {
+ 'name' => "allscalar10k",
+ 'filetype' => "allscalar",
+ 'rows' => 10000,
+ 'hdfs' => "singlefile/allscalar10k",
+ }
);
# Create the HDFS directories
Modified: pig/trunk/test/e2e/pig/deployers/LocalDeployer.pm
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/deployers/LocalDeployer.pm?rev=1302096&r1=1302095&r2=1302096&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/deployers/LocalDeployer.pm (original)
+++ pig/trunk/test/e2e/pig/deployers/LocalDeployer.pm Sun Mar 18 08:38:47 2012
@@ -194,7 +194,12 @@ sub generateData
'filetype' => "voternulltab",
'rows' => 10000,
'outfile' => "singlefile/voternulltab10k",
- },
+ }, {
+ 'name' => "allscalar10k",
+ 'filetype' => "allscalar",
+ 'rows' => 10000,
+ 'outfile' => "singlefile/allscalar10k",
+ }
);
# Create the target directories
Modified: pig/trunk/test/e2e/pig/tests/cmdline.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/cmdline.conf?rev=1302096&r1=1302095&r2=1302096&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/cmdline.conf (original)
+++ pig/trunk/test/e2e/pig/tests/cmdline.conf Sun Mar 18 08:38:47 2012
@@ -129,6 +129,12 @@ describe A;\,
# B : { m :map }
'expected_out_regex' => "B: {m: map\\[\\]}\nA: {m: map\\[\\],x: bytearray,y: bytearray}",
},
+ {
+ 'num' => 14,
+ 'pig' => q\A = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean);
+describe A;\,
+ 'expected_out_regex' => "A: {name: chararray,age: int,gpa: double,instate: boolean}",
+ }
#JIRA[PIG-379]
{
Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1302096&r1=1302095&r2=1302096&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Sun Mar 18 08:38:47 2012
@@ -242,7 +242,100 @@ store b into ':OUTPATH:' using PigStorag
b = filter a by (int)age > 40 and (int)age <50 and (double)gpa > 2.0 and (double)gpa < 3.0 and (chararray)name > 'bob' and (chararray)name < 'fred';
store b into ':OUTPATH:' using PigStorage;\,
},
- ],
+ {
+ 'num' => 18,
+ 'ignore' => 1, # PIG-2593
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
+b = filter a by instate;
+store b into ':OUTPATH:' using PigStorage;\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
+b = filter a by instate == 'true';
+store b into ':OUTPATH:' using PigStorage;\,
+ },
+ {
+ 'num' => 19,
+ 'ignore' => 1, # PIG-2593
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
+b = filter a by not instate;
+store b into ':OUTPATH:' using PigStorage;\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
+b = filter a by instate == 'false';
+store b into ':OUTPATH:' using PigStorage;\,
+ },
+ {
+ 'num' => 20,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
+b = filter a by instate is null;
+store b into ':OUTPATH:' using PigStorage;\,
+ },
+ {
+ 'num' => 21,
+ 'ignore' => 1, # TODO Need to file a JIRA-2
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
+b = filter a by instate == true;
+store b into ':OUTPATH:' using PigStorage;\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
+b = filter a by instate == 'true';
+store b into ':OUTPATH:' using PigStorage;\,
+ },
+ {
+ 'num' => 22,
+ 'ignore' => 1, # TODO Need to file a JIRA-2
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
+b = filter a by instate == false;
+store b into ':OUTPATH:' using PigStorage;\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
+b = filter a by instate == 'false';
+store b into ':OUTPATH:' using PigStorage;\,
+ },
+ {
+ 'num' => 23,
+ 'ignore' => 1, # TODO Need to file a JIRA-1
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+b = filter a by instate;
+store b into ':OUTPATH:' using PigStorage;\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
+b = filter a by instate == 'true';
+store b into ':OUTPATH:' using PigStorage;\,
+ },
+ {
+ 'num' => 24,
+ 'ignore' => 1, # TODO Need to file a JIRA-1
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+b = filter a by not instate;
+store b into ':OUTPATH:' using PigStorage;\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
+b = filter a by instate == 'false';
+store b into ':OUTPATH:' using PigStorage;\,
+ },
+ {
+ 'num' => 25,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+b = filter a by instate is null;
+store b into ':OUTPATH:' using PigStorage;\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
+b = filter a by instate is null;
+store b into ':OUTPATH:' using PigStorage;\,
+ },
+ {
+ 'num' => 26,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+b = filter a by instate == true;
+store b into ':OUTPATH:' using PigStorage;\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
+b = filter a by instate == 'true';
+store b into ':OUTPATH:' using PigStorage;\,
+ },
+ {
+ 'num' => 27,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+b = filter a by instate == false;
+store b into ':OUTPATH:' using PigStorage;\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
+b = filter a by instate == 'false';
+store b into ':OUTPATH:' using PigStorage;\,
+ },
+ ],
},
{
'name' => 'FilterEq',
@@ -519,6 +612,17 @@ store e into ':OUTPATH:';\,
'floatpostprocess' => 1,
'delimiter' => ' ',
},
+ {
+ 'num' => 14,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+b = group a by (name);
+e = foreach b generate COUNT(a.name);
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
+b = group a by (name);
+e = foreach b generate COUNT(a.name);
+store e into ':OUTPATH:';\,
+ }
],
},
{
@@ -625,7 +729,18 @@ a = load ':INPATH:/singlefile/studenttab
b = filter a by name lt 'b';
c = foreach b generate org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
store c into ':OUTPATH:' using org.apache.pig.test.udf.storefunc.StringStore();\,
- }
+ },
+ {
+ 'num' => 5,
+ 'pig' => q\register :FUNCPATH:/testudf.jar;
+a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+b = foreach a generate org.apache.pig.test.udf.evalfunc.TestBoolean(instate);
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
+b = foreach a generate (instate is null ? '' : (instate == 'true' ? 'false' : 'true'));
+store b into ':OUTPATH:';\,
+ }
+
]
},
# TODO DIFF
@@ -843,6 +958,21 @@ store c into ':OUTPATH:';\,
e = join c by name, d by name;
store e into ':OUTPATH:';\,
},
+ {
+ 'num' => 13,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+c = filter a by age < 20;
+d = filter b by age < 20;
+e = join c by instate, d by instate parallel 5;
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
+b = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
+c = filter a by age < 20;
+d = filter b by age < 20;
+e = join c by instate, d by instate parallel 5;
+store e into ':OUTPATH:';\,
+ }
]
},
{
@@ -959,7 +1089,16 @@ store c into ':OUTPATH:';\,
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = foreach a generate name, org.apache.pig.test.udf.evalfunc.Fred() as fred;
store b into ':OUTPATH:';\,
- }
+ },
+ {
+ 'num' => 13,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+b = foreach a generate *;
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:chararray);
+b = foreach a generate *;
+store b into ':OUTPATH:';\,
+ }
]
},
{
@@ -1108,6 +1247,18 @@ b = order a by $1 parallel 100;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '+1', '-2'],
},
+ {
+ 'num' => 19,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+b = foreach a generate instate;
+c = order b by instate;
+store c into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
+b = foreach a generate instate;
+c = order b by instate;
+store c into ':OUTPATH:';\,
+ 'sortArgs' => ['-t', ' ', '+0', '-1'],
+ },
]
},
{
@@ -1771,6 +1922,15 @@ store c into ':OUTPATH:';\,
b = foreach a generate -(age + 1 + 0.2f + 253645L), -(gpa+1);
store b into ':OUTPATH:';\,
},
+ {
+ 'num' => 39,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean);
+b = foreach a generate instate, true, false;
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
+b = foreach a generate instate, 'true', 'false';
+store b into ':OUTPATH:';\,
+ },
]
},
{
@@ -1865,9 +2025,33 @@ store c into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k1,2'],
},
+ {
+ 'num' => 10,
+ 'pig' =>q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+b = group a all;
+c = foreach b generate COUNT(a) as count;
+d = limit a c.count/10;
+store d into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+b = limit a 1000;
+store b into ':OUTPATH:';\,
+ },
+ {
+ 'num' => 11,
+ 'pig' =>q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+b = group a all;
+c = foreach b generate COUNT(a) as count;
+d = load ':INPATH:/singlefile/votertab10k';
+e = group d all;
+f = foreach e generate COUNT(d) as count;
+d = limit a c.count/10+f.count/10;
+store d into ':OUTPATH:';\,
-
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+b = limit a 2000;
+store b into ':OUTPATH:';\,
+ }
]
},
{
@@ -1942,6 +2126,17 @@ b1 = order a1 by name;
c = cogroup b2 by name, b1 by name;
d = foreach c generate flatten(group), COUNT($1), COUNT($2);
store d into ':OUTPATH:';\,
+ },
+ {
+ 'num' => 10,
+ 'pig' =>q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+split a into a1 if age > 50, a2 otherwise;
+store a1 into ':OUTPATH:.1';
+store a2 into ':OUTPATH:.2';\,
+ 'verify_pig_script' =>q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+split a into a1 if age > 50, a2 if age<=50;
+store a1 into ':OUTPATH:.1';
+store a2 into ':OUTPATH:.2';\,
}
]
},
@@ -1999,6 +2194,21 @@ store S into ':OUTPATH:';\,
},
+ {
+ 'num' => 2,
+ 'pig' => q\
+A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa: double);
+B = group A all;
+C = foreach B generate COUNT(A) as count;
+D = group A all;
+E = foreach D generate (double)COUNT(A) as count;
+S = sample A E.count/C.count;
+store S into ':OUTPATH:';\,
+'verify_pig_script' => q\
+A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa: double);
+S = sample A 1;
+store S into ':OUTPATH:';\,
+ },
],
},
{
@@ -2248,7 +2458,17 @@ store e into ':OUTPATH:';\,
'notmq' => 1,
},
-
+ {
+ 'num' => 7,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name, age, gpa, instate);
+b = foreach a generate (boolean)instate;
+c = filter b by instate == true;
+store c into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
+b = foreach a generate instate;
+c = filter b by instate == 'true';
+store c into ':OUTPATH:';\,
+ }
],
},
{
@@ -2901,6 +3121,24 @@ store e into ':OUTPATH:';\,
};
store c into ':OUTPATH:';\,
},
+ {
+ # secondary sort boolean
+ 'num' => 10,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean);
+ b = group a by age;
+ c = foreach b {
+ d = order a by instate;
+ generate group, flatten(d);
+ };
+ store c into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
+ b = group a by age;
+ c = foreach b {
+ d = order a by instate;
+ generate group, flatten(d);
+ };
+ store c into ':OUTPATH:';\,
+ }
]
},
{
@@ -3165,6 +3403,34 @@ a = load ':INPATH:/singlefile/studenttab
b = group a by name;
c = foreach b generate group, COUNT(a);
store c into ':OUTPATH:';\,
+ },
+ {
+ # test that functions with same names resolve correctly across name spaces
+ 'num' => 10,
+ 'ignore23' => 'MAPREDUCE-3700',
+ 'pig' => q\
+register ':SCRIPTHOMEPATH:/python/scriptingudf.py' using jython as myfuncs;
+a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
+b = foreach a generate name, myfuncs.adjustgpa(gpa, instate);
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
+b = foreach a generate name, (instate=='true'?gpa:gpa+1);
+store b into ':OUTPATH:';\,
+ },
+ {
+ # test that functions with same names resolve correctly across name spaces
+ 'num' => 11,
+ 'ignore' => 1, # PIG-2596
+ 'pig' => q\
+register ':SCRIPTHOMEPATH:/python/scriptingudf.py' using jython as myfuncs;
+a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+b = foreach a generate name, myfuncs.isretired(age);
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
+b = foreach a generate name, (age>=60?1:0);
+store b into ':OUTPATH:';\,
}
]
},
@@ -3771,7 +4037,21 @@ C = foreach A generate name, (chararray)
D = foreach B generate Nil(line), Nil(line), Nil(line), line;
E = union C, D;
store E into ':OUTPATH:';\,
- }
+ },
+ {
+ # Test Union using merge where schema is identical | A&B have identical schema
+ 'num' => 7,
+ 'pig' => q\
+a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean);
+b = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean);
+C = union onschema a, b;
+store C into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
+b = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
+C = union a, b;
+store C into ':OUTPATH:';\,
+ }
]
},
@@ -4177,7 +4457,19 @@ store E into ':OUTPATH:';\,
B = LOAD ':INPATH:/singlefile/votertab10k' AS (name:chararray, age:int, registration:chararray, contributions:double);
C = JOIN A by name, B by name;
store C into ':OUTPATH:';?,
- }
+ }, {
+ 'num' => 3,
+ 'ignore' => 1, # PIG-2594
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean);
+ store a into ':OUTPATH:.intermediate' using JsonStorage();
+ exec
+ B = LOAD ':OUTPATH:.intermediate' using JsonLoader();
+ store B into ':OUTPATH:';\,
+ 'notmq' => 1,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray);
+store a into ':OUTPATH:';\,
+ }
+
],
},{
'name' => 'STRSPLIT',
@@ -4236,7 +4528,93 @@ store E into ':OUTPATH:';\,
}
]
},
-
+ {
+ 'name' => 'NestedForEach',
+ 'tests' => [
+ {
+ 'num' => 1,
+ 'pig' => q\
+ A = LOAD ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+ B = group A by name;
+ C = foreach B {
+ C1 = foreach A generate UPPER(name), age+1 as age, gpa;
+ generate C1;
+ }
+ D = foreach C generate flatten(C1);
+ store D into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+ A = LOAD ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+ B = foreach A generate UPPER(name), age+1, gpa;
+ store B into ':OUTPATH:';\,
+ },
+ {
+ 'num' => 2,
+ 'pig' => q\
+ A = LOAD ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);;
+ B = group A by name;
+ C = foreach B {
+ C1 = A.age;
+ C2 = filter C1 by age>=30;
+ C3 = foreach C2 generate age+1 as age;
+ C4 = order C3 by age desc;
+ generate C4;
+ }
+ D = foreach C generate flatten(C4);
+ store D into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+ A = LOAD ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+ B = filter A by age>=30;
+ C = foreach B generate age+1 as age;
+ D = order C by age desc;
+ store D into ':OUTPATH:';\,
+ }
+ ]
+ },
+ {
+ 'name' => 'NestedCross',
+ 'tests' => [
+ {
+ 'num' => 1,
+ 'pig' => q\
+ A = LOAD ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+ B = LOAD ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int, registration, contributions:double);
+ C = cogroup A by name, B by name;
+ D = foreach C {
+ C1 = cross A, B;
+ generate flatten(C1);
+ }
+ store D into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+ A = LOAD ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+ B = LOAD ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int, registration, contributions:double);
+ C = JOIN A by name, B by name;
+ store C into ':OUTPATH:';\,
+ },
+ {
+ 'num' => 2,
+ 'pig' => q\
+ A = LOAD ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+ B = LOAD ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int, registration, contributions:double);
+ C = cogroup A by name, B by name;
+ D = foreach C {
+ C1 = filter A by gpa > 4;
+ C2 = filter B by contributions > 500;
+ C3 = cross C1, C2;
+ C4 = foreach C3 generate CONCAT(CONCAT(gpa, '_'), contributions);
+ generate flatten(C4);
+ }
+ store D into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+ A = LOAD ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
+ B = LOAD ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int, registration, contributions:double);
+ C = filter A by gpa > 4;
+ D = filter B by contributions > 500;
+ E = JOIN C by name, D by name;
+ F = foreach E generate CONCAT(CONCAT(gpa, '_'), contributions);
+ store F into ':OUTPATH:';\,
+ }
+ ]
+ }
],
},
;
Modified: pig/trunk/test/e2e/pig/tools/generate/generate_data.pl
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tools/generate/generate_data.pl?rev=1302096&r1=1302095&r2=1302096&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tools/generate/generate_data.pl (original)
+++ pig/trunk/test/e2e/pig/tools/generate/generate_data.pl Sun Mar 18 08:38:47 2012
@@ -454,6 +454,15 @@ sub getBulkCopyCmd(){
}
print PSQL "commit;\n" unless defined $nosql;
+ } elsif ($filetype eq "allscalar") {
+ srand(1228.2011 + $numRows);
+ for (my $i = 0; $i < $numRows; $i++) {
+ my $name = rand(1) < 0.05 ? '' : randomName();
+ my $age = rand(1) < 0.05 ? '' : randomAge();
+ my $gpa = rand(1) < 0.05 ? '' : randomGpa();
+ my $instate = rand(1) < 0.05 ? '' : (rand(1) < 0.5 ? 'true' : 'false');
+ printf HDFS "%s\t%d\t%.2f\t%s\n", $name, $age, $gpa, $instate;
+ }
} else {
warn "Unknown filetype $filetype\n";
usage();
Modified: pig/trunk/test/e2e/pig/udfs/python/scriptingudf.py
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/udfs/python/scriptingudf.py?rev=1302096&r1=1302095&r2=1302096&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/udfs/python/scriptingudf.py (original)
+++ pig/trunk/test/e2e/pig/udfs/python/scriptingudf.py Sun Mar 18 08:38:47 2012
@@ -69,3 +69,21 @@ def count(bag):
for r in bag:
cnt += 1
return cnt
+
+@outputSchema("gpa:double")
+def adjustgpa(gpa, instate):
+ if instate == None:
+ return None
+ elif instate:
+ return gpa
+ else:
+ return gpa+1
+
+@outputSchema("retired:boolean")
+def isretired(age):
+ if age == None:
+ return None
+ elif age>=60:
+ return True
+ else:
+ return False