You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2016/11/02 21:52:16 UTC
svn commit: r1767791 - in /pig/trunk: CHANGES.txt
test/e2e/pig/deployers/ExistingClusterDeployer.pm
test/e2e/pig/tests/nightly.conf test/e2e/pig/tools/generate/generate_data.pl
Author: daijy
Date: Wed Nov 2 21:52:15 2016
New Revision: 1767791
URL: http://svn.apache.org/viewvc?rev=1767791&view=rev
Log:
PIG-5036: Remove biggish from e2e input dataset
Modified:
pig/trunk/CHANGES.txt
pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
pig/trunk/test/e2e/pig/tests/nightly.conf
pig/trunk/test/e2e/pig/tools/generate/generate_data.pl
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Nov 2 21:52:15 2016
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-5036: Remove biggish from e2e input dataset (daijy)
+
PIG-5053: Can't change HDFS user home in e2e tests using Ant (nkollar via daijy)
PIG-5037: Add api getDisplayString to PigStats (zjffdu)
Modified: pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm (original)
+++ pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm Wed Nov 2 21:52:15 2016
@@ -231,11 +231,6 @@ sub generateData
'rows' => 5000,
'hdfs' => "types/numbers.txt",
}, {
- 'name' => "biggish",
- 'filetype' => "biggish",
- 'rows' => 1000000,
- 'hdfs' => "singlefile/biggish",
- }, {
'name' => "prerank",
'filetype' => "ranking",
'rows' => 30,
Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Wed Nov 2 21:52:15 2016
@@ -5657,13 +5657,15 @@ store a into ':OUTPATH:';\,
'execonly' => 'mapred,tez',
'pig' => q\
SET default_parallel 7;
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ SET mapreduce.input.fileinputformat.split.maxsize '300';
+ SET pig.splitCombination false;
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
B = rank A;
C = foreach B generate rank_A,a,b,c;
store C into ':OUTPATH:';
\,
'verify_pig_script' => q\
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
C = foreach A generate rownumber,a,b,c;
store C into ':OUTPATH:';
\,
@@ -5672,13 +5674,15 @@ store a into ':OUTPATH:';\,
'execonly' => 'mapred,tez',
'pig' => q\
SET default_parallel 9;
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ SET mapreduce.input.fileinputformat.split.maxsize '300';
+ SET pig.splitCombination false;
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
B = rank A by b DESC,a ASC;
C = foreach B generate rank_A,b,a;
store C into ':OUTPATH:';
\,
'verify_pig_script' => q\
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
C = foreach A generate rankbdaa,b,a;
store C into ':OUTPATH:';
\,
@@ -5687,13 +5691,15 @@ store a into ':OUTPATH:';\,
'execonly' => 'mapred,tez',
'pig' => q\
SET default_parallel 7;
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ SET mapreduce.input.fileinputformat.split.maxsize '300';
+ SET pig.splitCombination false;
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
B = rank A by c ASC,b DESC;
C = foreach B generate rank_A,c,b;
store C into ':OUTPATH:';
\,
'verify_pig_script' => q\
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
C = foreach A generate rankcabd,c,b;
store C into ':OUTPATH:';
\,
@@ -5701,26 +5707,29 @@ store a into ':OUTPATH:';\,
'num' => 4,
'execonly' => 'mapred,tez',
'pig' => q\
- SET default_parallel 25;
- A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+ SET default_parallel 5;
+ SET mapreduce.input.fileinputformat.split.maxsize '300';
+ SET pig.splitCombination false;
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
B = rank A;
C = order B by rank_A;
- D = foreach C generate rank_A,rownumber;
+ D = foreach C generate rank_A,a,b,c;
store D into ':OUTPATH:';
\,
'verify_pig_script' => q\
- A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
- D = foreach A generate idx,rownumber;
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+ D = foreach A generate rownumber,a,b,c;
store D into ':OUTPATH:';
\,
}, {
'num' => 5,
'execonly' => 'mapred,tez',
'pig' => q\
- SET default_parallel 11;
+ SET default_parallel 5;
+ SET mapreduce.input.fileinputformat.split.maxsize '300';
SET pig.splitCombination false;
- A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
- B = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+ B = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
C = join A by rownumber, B by rownumber;
D = order C by B::rankcabd,B::rankbdca,B::rankaaba;
E = rank D;
@@ -5730,7 +5739,7 @@ store a into ':OUTPATH:';\,
store H into ':OUTPATH:';
\,
'verify_pig_script' => q\
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,idx:long);
B = foreach A generate rownumber,1;
C = order B by rownumber;
store C into ':OUTPATH:';
@@ -5739,14 +5748,16 @@ store a into ':OUTPATH:';\,
'num' => 6,
'execonly' => 'mapred,tez',
'pig' => q\
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ SET mapreduce.input.fileinputformat.split.maxsize '300';
+ SET pig.splitCombination false;
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
split A into M if rownumber > 15, N if rownumber < 25;
C = rank N;
D = foreach C generate $0, a, b, c;
store D into ':OUTPATH:';
\,
'verify_pig_script' => q\
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
B = filter A by rownumber < 25;
D = foreach B generate rownumber, a, b, c;
store D into ':OUTPATH:';
@@ -5761,14 +5772,16 @@ store a into ':OUTPATH:';\,
'num' => 1,
'execonly' => 'mapred,tez',
'pig' => q\
+ SET mapreduce.input.fileinputformat.split.maxsize '300';
+ SET pig.splitCombination false;
SET default_parallel 9;
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
B = rank A by a ASC,b ASC DENSE;
C = foreach B generate rank_A,a,b;
store C into ':OUTPATH:';
\,
'verify_pig_script' => q\
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
C = foreach A generate rankaaba,a,b;
store C into ':OUTPATH:';
\,
@@ -5776,14 +5789,16 @@ store a into ':OUTPATH:';\,
'num' => 2,
'execonly' => 'mapred,tez',
'pig' => q\
+ SET mapreduce.input.fileinputformat.split.maxsize '300';
+ SET pig.splitCombination false;
SET default_parallel 9;
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
B = rank A by a ASC,c DESC DENSE;
C = foreach B generate rank_A,a,c;
store C into ':OUTPATH:';
\,
'verify_pig_script' => q\
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
C = foreach A generate rankaacd,a,c;
store C into ':OUTPATH:';
\,
@@ -5791,14 +5806,16 @@ store a into ':OUTPATH:';\,
'num' => 3,
'execonly' => 'mapred,tez',
'pig' => q\
+ SET mapreduce.input.fileinputformat.split.maxsize '300';
+ SET pig.splitCombination false;
SET default_parallel 7;
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
B = rank A by b DESC,c ASC DENSE;
C = foreach B generate rank_A,b,c;
store C into ':OUTPATH:';
\,
'verify_pig_script' => q\
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
C = foreach A generate rankbdca,b,c;
store C into ':OUTPATH:';
\,
@@ -5806,9 +5823,11 @@ store a into ':OUTPATH:';\,
'num' => 4,
'execonly' => 'mapred,tez',
'pig' => q\
+ SET mapreduce.input.fileinputformat.split.maxsize '300';
+ SET pig.splitCombination false;
SET default_parallel 7;
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
- B = foreach A generate a,b,c,tail;
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+ B = foreach A generate a,b,c;
C = rank B by a ASC,b ASC DENSE;
D = rank C by a ASC,c DESC DENSE;
E = rank D by b DESC,c ASC DENSE;
@@ -5816,7 +5835,7 @@ store a into ':OUTPATH:';\,
store F into ':OUTPATH:';
\,
'verify_pig_script' => q\
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
B = foreach A generate rankbdca,rankaacd,rankaaba,a,b,c;
store B into ':OUTPATH:';
\,
@@ -5825,8 +5844,9 @@ store a into ':OUTPATH:';\,
'execonly' => 'mapred,tez',
'pig' => q\
SET default_parallel 9;
+ SET mapreduce.input.fileinputformat.split.maxsize '300';
SET pig.splitCombination false;
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
B = foreach A generate a,b,c;
C = rank B by a ASC,b ASC DENSE;
D = rank B by a ASC,c DESC DENSE;
@@ -5836,7 +5856,7 @@ store a into ':OUTPATH:';\,
store H into ':OUTPATH:';
\,
'verify_pig_script' => q\
- A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
C = foreach A generate rankaaba,a,b,c;
E = order C by a ASC,b ASC;
D = foreach A generate rankaacd,a,b,c;
Modified: pig/trunk/test/e2e/pig/tools/generate/generate_data.pl
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tools/generate/generate_data.pl?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tools/generate/generate_data.pl (original)
+++ pig/trunk/test/e2e/pig/tools/generate/generate_data.pl Wed Nov 2 21:52:15 2016
@@ -41,7 +41,6 @@ our @lastName = ("allen", "brown", "cars
# rankaacd: RANK BY a ASC , c DESC
# rankaaba: RANK BY a ASC , b ASC
# a,b,c: values
-# tail: long value in order to create multiple mappers
############################################################################
our @rankedTuples = (
"1,21,5,7,1,1,0,8,8","2,26,2,3,2,5,1,9,10","3,30,24,21,2,3,1,3,10","4,6,10,8,3,4,1,7,2",
@@ -501,22 +500,10 @@ sub getBulkCopyCmd(){
my $randf = rand(10);
printf HDFS "%d:%d:%d:%d:%d:%dL:%.2ff:%.2f\n", $tid, $i, $rand5, $rand100, $rand1000, $rand1000, $randf, $randf;
}
- } elsif ($filetype eq "ranking") {
+ } elsif ($filetype eq "ranking") {
for (my $i = 0; $i < $numRows; $i++) {
my $tuple = $rankedTuples[int($i)];
- printf HDFS "$tuple,";
- for my $j ( 0 .. 1000000) {
- printf HDFS "%d",$j;
- }
- printf HDFS "\n";
- }
- } elsif ($filetype eq "biggish") {
- for (my $i = 1; $i < $numRows; $i++) {
- printf HDFS "$i,$i,";
- for my $j ( 0 .. 1000) {
- printf HDFS "%d",$j;
- }
- printf HDFS "\n";
+ printf HDFS "$tuple\n";
}
} elsif ($filetype eq "utf8Student") {
srand(3.14159 + $numRows);