You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2016/11/02 21:52:16 UTC

svn commit: r1767791 - in /pig/trunk: CHANGES.txt test/e2e/pig/deployers/ExistingClusterDeployer.pm test/e2e/pig/tests/nightly.conf test/e2e/pig/tools/generate/generate_data.pl

Author: daijy
Date: Wed Nov  2 21:52:15 2016
New Revision: 1767791

URL: http://svn.apache.org/viewvc?rev=1767791&view=rev
Log:
PIG-5036: Remove biggish from e2e input dataset

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
    pig/trunk/test/e2e/pig/tests/nightly.conf
    pig/trunk/test/e2e/pig/tools/generate/generate_data.pl

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Nov  2 21:52:15 2016
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
  
 IMPROVEMENTS
 
+PIG-5036: Remove biggish from e2e input dataset (daijy)
+
 PIG-5053: Can't change HDFS user home in e2e tests using Ant (nkollar via daijy)
 
 PIG-5037: Add api getDisplayString to PigStats (zjffdu)

Modified: pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm (original)
+++ pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm Wed Nov  2 21:52:15 2016
@@ -231,11 +231,6 @@ sub generateData
             'rows' => 5000,
             'hdfs' => "types/numbers.txt",
         }, {
-            'name' => "biggish",
-            'filetype' => "biggish",
-            'rows' => 1000000,
-            'hdfs' => "singlefile/biggish",
-        }, {
             'name' => "prerank",
             'filetype' => "ranking",
             'rows' => 30,

Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Wed Nov  2 21:52:15 2016
@@ -5657,13 +5657,15 @@ store a into ':OUTPATH:';\,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
                                     SET default_parallel 7;
-                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A;
                                     C = foreach B generate rank_A,a,b,c;
                                     store C into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = foreach A generate rownumber,a,b,c;
                                     store C into ':OUTPATH:';
                                 \,
@@ -5672,13 +5674,15 @@ store a into ':OUTPATH:';\,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
                                     SET default_parallel 9;
-                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A by b DESC,a ASC;
                                     C = foreach B generate rank_A,b,a;
                                     store C into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = foreach A generate rankbdaa,b,a;
                                     store C into ':OUTPATH:';
                                 \,
@@ -5687,13 +5691,15 @@ store a into ':OUTPATH:';\,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
                                     SET default_parallel 7;
-                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A by c ASC,b DESC;
                                     C = foreach B generate rank_A,c,b;
                                     store C into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = foreach A generate rankcabd,c,b;
                                     store C into ':OUTPATH:';
                                 \,
@@ -5701,26 +5707,29 @@ store a into ':OUTPATH:';\,
                         'num' => 4,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
-                                    SET default_parallel 25;
-                                    A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+                                    SET default_parallel 5;
+                                    SET mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A;
                                     C = order B by rank_A;
-                                    D = foreach C generate rank_A,rownumber;
+                                    D = foreach C generate rank_A,a,b,c;
                                     store D into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
-                                    D = foreach A generate idx,rownumber;
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+                                    D = foreach A generate rownumber,a,b,c;
                                     store D into ':OUTPATH:';
                                 \,
                     }, {
                         'num' => 5,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
-                                    SET default_parallel 11;
+                                    SET default_parallel 5;
+                                    SET mapreduce.input.fileinputformat.split.maxsize '300';
                                     SET pig.splitCombination false;
-                                    A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
-                                    B = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+                                    B = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = join A by rownumber, B by rownumber;
                                     D = order C by B::rankcabd,B::rankbdca,B::rankaaba;
                                     E = rank D;
@@ -5730,7 +5739,7 @@ store a into ':OUTPATH:';\,
                                     store H into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,idx:long);
                                     B = foreach A generate rownumber,1;
                                     C = order B by rownumber;
                                     store C into ':OUTPATH:';
@@ -5739,14 +5748,16 @@ store a into ':OUTPATH:';\,
                         'num' => 6,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     split A into M if rownumber > 15, N if rownumber < 25;
                                     C = rank N;
                                     D = foreach C generate $0, a, b, c;
                                     store D into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = filter A by rownumber < 25;
                                     D = foreach B generate rownumber, a, b, c;
                                     store D into ':OUTPATH:';
@@ -5761,14 +5772,16 @@ store a into ':OUTPATH:';\,
 						'num' => 1,
 						'execonly' => 'mapred,tez',
 						'pig' => q\
+                                                                        SET mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET pig.splitCombination false;
 									SET default_parallel 9;
-									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
 									B = rank A by a ASC,b ASC DENSE;
 									C = foreach B generate rank_A,a,b;
 									store C into ':OUTPATH:';
 								\,
 						'verify_pig_script' => q\
-									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
 									C = foreach A generate rankaaba,a,b;
 									store C into ':OUTPATH:';
 								\,
@@ -5776,14 +5789,16 @@ store a into ':OUTPATH:';\,
 						'num' => 2,
 						'execonly' => 'mapred,tez',
 						'pig' => q\
+                                                                        SET mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET pig.splitCombination false;
 									SET default_parallel 9;
-									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
 									B = rank A by a ASC,c DESC DENSE;
 									C = foreach B generate rank_A,a,c;
 									store C into ':OUTPATH:';
 								\,
 						'verify_pig_script' => q\
-									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
 									C = foreach A generate rankaacd,a,c;
 									store C into ':OUTPATH:';
 								\,
@@ -5791,14 +5806,16 @@ store a into ':OUTPATH:';\,
 						'num' => 3,
 						'execonly' => 'mapred,tez',
 						'pig' => q\
+                                                                        SET mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET pig.splitCombination false;
 									SET default_parallel 7;
-									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
 									B = rank A by b DESC,c ASC DENSE;
 									C = foreach B generate rank_A,b,c;
 									store C into ':OUTPATH:';
 								\,
 						'verify_pig_script' => q\
-									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
 									C = foreach A generate rankbdca,b,c;
 									store C into ':OUTPATH:';
 								\,
@@ -5806,9 +5823,11 @@ store a into ':OUTPATH:';\,
 						'num' => 4,
 						'execonly' => 'mapred,tez',
 						'pig' => q\
+                                                                        SET mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET pig.splitCombination false;
 									SET default_parallel 7;
-									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
-									B = foreach A generate a,b,c,tail;
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+									B = foreach A generate a,b,c;
 									C = rank B by a ASC,b ASC DENSE;
 									D = rank C by a ASC,c DESC DENSE;
 									E = rank D by b DESC,c ASC DENSE;
@@ -5816,7 +5835,7 @@ store a into ':OUTPATH:';\,
 									store F into ':OUTPATH:';
 								\,
 						'verify_pig_script' => q\
-									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
 									B = foreach A generate rankbdca,rankaacd,rankaaba,a,b,c;
 									store B into ':OUTPATH:';
 								\,
@@ -5825,8 +5844,9 @@ store a into ':OUTPATH:';\,
 						'execonly' => 'mapred,tez',
 						'pig' => q\
 									SET default_parallel 9;
+                                                                        SET mapreduce.input.fileinputformat.split.maxsize '300';
 									SET pig.splitCombination false;
-									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
 									B = foreach A generate a,b,c;
 									C = rank B by a ASC,b ASC DENSE;
 									D = rank B by a ASC,c DESC DENSE;
@@ -5836,7 +5856,7 @@ store a into ':OUTPATH:';\,
 									store H into ':OUTPATH:';
 								\,
 						'verify_pig_script' => q\
-									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
 									C = foreach A generate rankaaba,a,b,c;
 									E = order C by a ASC,b ASC;
 									D = foreach A generate rankaacd,a,b,c;

Modified: pig/trunk/test/e2e/pig/tools/generate/generate_data.pl
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tools/generate/generate_data.pl?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tools/generate/generate_data.pl (original)
+++ pig/trunk/test/e2e/pig/tools/generate/generate_data.pl Wed Nov  2 21:52:15 2016
@@ -41,7 +41,6 @@ our @lastName = ("allen", "brown", "cars
 #	rankaacd: RANK BY a ASC , c DESC
 #	rankaaba: RANK BY a ASC , b ASC
 #	a,b,c:    values
-#	tail:     long value in order to create multiple mappers
 ############################################################################
 our @rankedTuples = (
 	"1,21,5,7,1,1,0,8,8","2,26,2,3,2,5,1,9,10","3,30,24,21,2,3,1,3,10","4,6,10,8,3,4,1,7,2",
@@ -501,22 +500,10 @@ sub getBulkCopyCmd(){
             my $randf = rand(10);
             printf HDFS "%d:%d:%d:%d:%d:%dL:%.2ff:%.2f\n", $tid, $i, $rand5, $rand100, $rand1000, $rand1000, $randf, $randf;
         }
-    }  elsif ($filetype eq "ranking") {
+    } elsif ($filetype eq "ranking") {
         for (my $i = 0; $i < $numRows; $i++) {
             my $tuple = $rankedTuples[int($i)];
-            printf HDFS "$tuple,";
-            for my $j ( 0 .. 1000000) {
-				printf HDFS "%d",$j;
-			}
-			printf HDFS "\n";
-        }
-    } elsif ($filetype eq "biggish") {
-        for (my $i = 1; $i < $numRows; $i++) {
-            printf HDFS "$i,$i,";
-            for my $j ( 0 .. 1000) {
-				printf HDFS "%d",$j;
-            }
-            printf HDFS "\n";
+            printf HDFS "$tuple\n";
         }
     } elsif ($filetype eq "utf8Student") {
         srand(3.14159 + $numRows);