You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2011/05/09 22:28:30 UTC
svn commit: r1101209 - in /pig/trunk: ./ test/e2e/harness/ test/e2e/pig/
test/e2e/pig/conf/ test/e2e/pig/drivers/ test/e2e/pig/tests/
Author: gates
Date: Mon May 9 20:28:29 2011
New Revision: 1101209
URL: http://svn.apache.org/viewvc?rev=1101209&view=rev
Log:
PIG-1949: e2e test harness should use bin/pig rather than calling java directly
Added:
pig/trunk/test/e2e/pig/tests/streaming_local.conf
Modified:
pig/trunk/CHANGES.txt
pig/trunk/test/e2e/harness/TestDriver.pm
pig/trunk/test/e2e/pig/Makefile
pig/trunk/test/e2e/pig/conf/default.conf
pig/trunk/test/e2e/pig/drivers/TestDriverPig.pm
pig/trunk/test/e2e/pig/tests/cmdline.conf
pig/trunk/test/e2e/pig/tests/nightly.conf
pig/trunk/test/e2e/pig/tests/streaming.conf
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1101209&r1=1101208&r2=1101209&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Mon May 9 20:28:29 2011
@@ -24,6 +24,9 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-1949: e2e test harness should use bin/pig rather than calling java
+directly (gates)
+
PIG-2026: e2e tests in eclipse classpath (azaroth via hashutosh)
PIG-2024: Incorrect jar paths in .classpath template for eclipse (azaroth via hashutosh)
Modified: pig/trunk/test/e2e/harness/TestDriver.pm
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/harness/TestDriver.pm?rev=1101209&r1=1101208&r2=1101209&view=diff
==============================================================================
--- pig/trunk/test/e2e/harness/TestDriver.pm (original)
+++ pig/trunk/test/e2e/harness/TestDriver.pm Mon May 9 20:28:29 2011
@@ -61,8 +61,8 @@ sub printResults
my $msg = "$prefix, PASSED: $pass FAILED: $fail SKIPPED: $skipped ABORTED: $abort "
. "FAILED DEPENDENCY: $depend";
- print $log "\n$msg\n";
-# print "$msg\n";
+ print $log "$msg\n";
+ print "$msg\r";
}
@@ -200,6 +200,7 @@ sub generateBenchmark
# testResult - reference to hash returned by runTest.
# benchmarkResult - reference to hash returned by generateBenchmark.
# log - reference to a stream pointer for the logs
+# testHash - reference to hash with meta tags and commands
#
# Returns:
# @returns reference true if results are the same, false otherwise. "the
@@ -300,7 +301,7 @@ sub run
$self->globalSetup(\%globalHash, $log);
my $report=0;
- my $properties= new Properties( 1, $globalHash{'propertiesFile'} );
+ my $properties= new Properties(0, $globalHash{'propertiesFile'});
my %groupExecuted;
foreach my $group (@{$cfg->{'groups'}}) {
@@ -429,20 +430,19 @@ sub run
$endTime = time;
$benchmarkResult = $self->generateBenchmark(\%testHash, $log);
my $result =
- $self->compare($testResult, $benchmarkResult, $log);
- print $log "INFO: $subName() at ".__LINE__.":Test $testName\n";
- #print $log "Test $testName\n";
+ $self->compare($testResult, $benchmarkResult, $log, \%testHash);
+ $msg = "INFO: $subName() at ".__LINE__.":Test $testName";
if ($result) {
- $msg=" SUCCEEDED";
+ $msg .= " SUCCEEDED";
$testStatuses->{$testName} = $passedStr;
} else {
- $msg= " FAILED";
+ $msg .= " FAILED";
$testStatuses->{$testName} = $failedStr;
}
- $msg= "$msg at " . time . "\n";
+ $msg= "$msg at " . time . "\n";
#print $msg;
print $log $msg;
$duration = $endTime - $beginTime;
Modified: pig/trunk/test/e2e/pig/Makefile
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/Makefile?rev=1101209&r1=1101208&r2=1101209&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/Makefile (original)
+++ pig/trunk/test/e2e/pig/Makefile Mon May 9 20:28:29 2011
@@ -105,18 +105,18 @@ $(ARCHIVE): $(TESTS) $(DRIVERS) $(CONF_F
# Note, you must set PH_PIG_JARFILE environment variable
build_udfs:
- (if [ "$${PH_PIG}x" == "x" ] ; then echo \
+ @(if [ "$${PH_PIG}x" == "x" ] ; then echo \
"You must set the environment variable PH_PIG" \
"to the directory your pig.jar is in " \
"before building the UDFs"; 1; fi)
(cd $(JAVA_UDF_DIR); ant -Dpig.jarfile=$${PH_PIG}/pig.jar)
test: $(ARCHIVE) build_udfs
- (if [ "$${PH_CLUSTER}x" == "x" ] ; then echo \
+ @(if [ "$${PH_CLUSTER}x" == "x" ] ; then echo \
"You must set the environment variable PH_CLUSTER" \
"to the directory that contains your hadoop-site.xml" \
"before running the tests"; 1; fi)
- (if [ "$${PH_JYTHON_JAR}x" == "x" ] ; then echo \
+ @(if [ "$${PH_JYTHON_JAR}x" == "x" ] ; then echo \
"You must set the environment variable PH_JYTHON_JAR" \
"to the path of your jython jar" \
"before running the tests"; 1; fi)
Modified: pig/trunk/test/e2e/pig/conf/default.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/conf/default.conf?rev=1101209&r1=1101208&r2=1101209&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/conf/default.conf (original)
+++ pig/trunk/test/e2e/pig/conf/default.conf Mon May 9 20:28:29 2011
@@ -46,7 +46,7 @@ $cfg = {
, 'testconfigpath' => "$ENV{PH_CLUSTER}/conf/"
, 'funcjarPath' => "$ENV{PH_ROOT}/lib/java"
, 'paramPath' => "$ENV{PH_ROOT}/paramfiles"
- , 'testjar' => "$ENV{PH_PIG}/pig.jar"
+ , 'pigpath' => "$ENV{PH_PIG}"
#HADOOP
, 'hadoopHome' => "$ENV{PH_ROOT}/lib"
@@ -66,5 +66,6 @@ $cfg = {
,'logDir' => "$ENV{PH_OUT}/log"
,'propertiesFile' => "./conf/testpropertiesfile.conf"
+ ,'pigharness.console.level' => 'ERROR'
};
Modified: pig/trunk/test/e2e/pig/drivers/TestDriverPig.pm
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/drivers/TestDriverPig.pm?rev=1101209&r1=1101208&r2=1101209&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/drivers/TestDriverPig.pm (original)
+++ pig/trunk/test/e2e/pig/drivers/TestDriverPig.pm Mon May 9 20:28:29 2011
@@ -159,13 +159,6 @@ sub globalSetup
$globalHash->{'outpath'} = $globalHash->{'outpathbase'} . "/" . $globalHash->{'runid'} . "/";
$globalHash->{'localpath'} = $globalHash->{'localpathbase'} . "/" . $globalHash->{'runid'} . "/";
- # extract the current zebra.jar file path from the classpath
- # and enter it in the hash for use in the substitution of :ZEBRAJAR:
- my $zebrajar = $globalHash->{'cp'};
- $zebrajar =~ s/zebra.jar.*/zebra.jar/;
- $zebrajar =~ s/.*://;
- $globalHash->{'zebrajar'} = $zebrajar;
-
# add libexec location to the path
if (defined($ENV{'PATH'})) {
$ENV{'PATH'} = $globalHash->{'scriptPath'} . ":" . $ENV{'PATH'};
@@ -174,7 +167,7 @@ sub globalSetup
$ENV{'PATH'} = $globalHash->{'scriptPath'};
}
- my @cmd = (Util::getBasePigCmd($globalHash), '-e', 'mkdir', $globalHash->{'outpath'});
+ my @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', $globalHash->{'outpath'});
if($self->{'exectype'} eq "local")
{
@@ -201,16 +194,16 @@ sub globalSetup
sub runTest
{
- my ($self, $testCmd, $log, $copyResults) = @_;
+ my ($self, $testCmd, $log) = @_;
my $subName = (caller(0))[3];
# Handle the various methods of running used in
# the original TestDrivers
if ( $testCmd->{'pig'} && $self->hasCommandLineVerifications( $testCmd, $log) ) {
- return $self->runPigCmdLine( $testCmd, $log, $copyResults );
+ return $self->runPigCmdLine( $testCmd, $log, 1);
} elsif( $testCmd->{'pig'} ){
- return $self->runPig( $testCmd, $log, $copyResults );
+ return $self->runPig( $testCmd, $log, 1);
#} elsif( $testCmd->{'pigsql'} ){
# return $self->runPigSql( $testCmd, $log, $copyResults );
} elsif( $testCmd->{'script'} ){
@@ -249,7 +242,7 @@ sub runPigCmdLine
close(FH);
# Build the command
- my @baseCmd = Util::getBasePigCmd($testCmd);
+ my @baseCmd = $self->getPigCmd($testCmd, $log);
my @cmd = @baseCmd;
# Add option -l giving location for secondary logs
@@ -341,6 +334,41 @@ sub runScript
}
+sub getPigCmd($$$)
+{
+ my ($self, $testCmd, $log) = @_;
+
+ my @pigCmd;
+
+ # set the PIG_CLASSPATH environment variable
+ my $pcp .= $testCmd->{'jythonjar'} if (defined($testCmd->{'jythonjar'}));
+ $pcp .= ":" . $testCmd->{'classpath'} if (defined($testCmd->{'classpath'}));
+ $pcp .= ":" . $testCmd->{'testconfigpath'} if ($testCmd->{'exectype'} ne "local");
+
+ # Set it in our current environment. It will get inherited by the IPC::Run
+ # command.
+ $ENV{'PIG_CLASSPATH'} = $pcp;
+
+ @pigCmd = ("$testCmd->{'pigpath'}/bin/pig");
+
+ if (defined($testCmd->{'additionaljars'})) {
+ push(@pigCmd, '-Dpig.additional.jars='.$testCmd->{'additionaljars'});
+ }
+
+ if ($testCmd->{'exectype'} eq "local") {
+ push(@{$testCmd->{'java_params'}}, "-Xmx1024m");
+ push(@pigCmd, ("-x", "local"));
+ }
+
+ if (defined($testCmd->{'java_params'})) {
+ $ENV{'PIG_OPTS'} = join(" ", @{$testCmd->{'java_params'}});
+ }
+
+
+ return @pigCmd;
+}
+
+
sub runPig
{
@@ -361,7 +389,7 @@ sub runPig
# Build the command
- my @baseCmd = Util::getBasePigCmd($testCmd);
+ my @baseCmd = $self->getPigCmd($testCmd, $log);
my @cmd = @baseCmd;
# Add option -l giving location for secondary logs
@@ -382,6 +410,7 @@ sub runPig
# Run the command
+ print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n";
print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n";
IPC::Run::run(\@cmd, \undef, $log, $log) or
@@ -392,6 +421,7 @@ sub runPig
# Get results from the command locally
my $localoutfile;
my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+ $self->parseSqlQueries($testCmd);
my @SQLQuery = @{$testCmd->{'queries'}}; # here only used to determine if single-guery of multi-query
# mapreduce
@@ -515,6 +545,7 @@ sub generateBenchmark
my %result;
+ $self->parseSqlQueries($testCmd);
my @SQLQuery = @{$testCmd->{'queries'}};
if ($#SQLQuery == 0) {
@@ -663,7 +694,7 @@ sub compareScript
print $log "$0::$subName INFO Checking test stdout' " .
"as exact match against expected <$testCmd->{'expected_out'}>\n";
if ($testResult->{'stdout'} ne $testCmd->{'expected_out'}) {
- print $log "$0::$subName INFO Check failed: exact match of <$testCmd->{'expected_out'}> expected in stdout: $testResult->{'output'}\n";
+ print $log "$0::$subName INFO Check failed: exact match of <$testCmd->{'expected_out'}> expected in stdout: $testResult->{'stdout'}\n";
$result = 0;
}
}
@@ -672,7 +703,7 @@ sub compareScript
print $log "$0::$subName INFO Checking test stdout " .
"as NOT exact match against expected <$testCmd->{'expected_out'}>\n";
if ($testResult->{'stdout'} eq $testCmd->{'not_expected_out'}) {
- print $log "$0::$subName INFO Check failed: NON-match of <$testCmd->{'expected_out'}> expected to stdout: $testResult->{'output'}\n";
+ print $log "$0::$subName INFO Check failed: NON-match of <$testCmd->{'expected_out'}> expected to stdout: $testResult->{'stdout'}\n";
$result = 0;
}
}
@@ -681,7 +712,7 @@ sub compareScript
print $log "$0::$subName INFO Checking test stdout " .
"for regular expression <$testCmd->{'expected_out_regex'}>\n";
if ($testResult->{'stdout'} !~ $testCmd->{'expected_out_regex'}) {
- print $log "$0::$subName INFO Check failed: regex match of <$testCmd->{'expected_out_regex'}> expected in stdout: $testResult->{'output'}\n";
+ print $log "$0::$subName INFO Check failed: regex match of <$testCmd->{'expected_out_regex'}> expected in stdout: $testResult->{'stdout'}\n";
$result = 0;
}
}
@@ -690,7 +721,7 @@ sub compareScript
print $log "$0::$subName INFO Checking test stdout " .
"for NON-match of regular expression <$testCmd->{'not_expected_out_regex'}>\n";
if ($testResult->{'stdout'} =~ $testCmd->{'not_expected_out_regex'}) {
- print $log "$0::$subName INFO Check failed: regex NON-match of <$testCmd->{'not_expected_out_regex'}> expected in stdout: $testResult->{'output'}\n";
+ print $log "$0::$subName INFO Check failed: regex NON-match of <$testCmd->{'not_expected_out_regex'}> expected in stdout: $testResult->{'stdout'}\n";
$result = 0;
}
}
@@ -752,6 +783,7 @@ sub comparePig
my $subName = (caller(0))[3];
my $result;
+ $self->parseSqlQueries($testCmd);
my @SQLQuery = @{$testCmd->{'queries'}};
if ($#SQLQuery == 0) {
@@ -776,6 +808,8 @@ sub compareSingleOutput
{
my ($self, $testResult, $testOutput, $benchmarkOutput, $log) = @_;
+print $log "testResult: $testResult testOutput: $testOutput benchmarkOutput: $benchmarkOutput\n";
+
# cksum the the two files to see if they are the same
my ($testChksm, $benchmarkChksm);
IPC::Run::run((['cat', $testOutput], '|', ['cksum']), \$testChksm,
@@ -815,6 +849,26 @@ sub compareSingleOutput
return $result;
}
+##############################################################################
+# Parse the SQL queries from a string into an array
+#
+sub parseSqlQueries($$)
+{
+ my ($self, $testCmd) = @_;
+
+ my @SQLQuery = split /;/, $testCmd->{'sql'};
+
+ # Throw out the last one if it is just space
+ if ($SQLQuery[$#SQLQuery] =~ /^\s*$/) { $#SQLQuery--; }
+
+ # If the last one is a comment, decrement the count
+ if ($#SQLQuery > 0 && $SQLQuery[$#SQLQuery] !~ /select/i && $SQLQuery[$#SQLQuery] =~ /--/) {
+ $#SQLQuery--;
+ }
+
+ $testCmd->{'queries'} = \@SQLQuery;
+}
+
###############################################################################
# This method has been copied over from TestDriver to make changes to
# support skipping tests which do not match current execution mode
@@ -833,6 +887,7 @@ sub compareSingleOutput
# @returns nothing
# failed.
#
+=begin
sub run
{
my ($self, $testsToRun, $testsToMatch, $cfg, $log, $dbh, $testStatuses,
@@ -1108,6 +1163,7 @@ sub run
# Do the global cleanup
$self->globalCleanup(\%globalHash, $log);
}
+=cut
##############################################################################
# Sub: printGroupResultsXml
Modified: pig/trunk/test/e2e/pig/tests/cmdline.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/cmdline.conf?rev=1101209&r1=1101208&r2=1101209&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/cmdline.conf (original)
+++ pig/trunk/test/e2e/pig/tests/cmdline.conf Mon May 9 20:28:29 2011
@@ -84,7 +84,7 @@ describe A;\,
'pig' => q\
A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y);
describe A;\,
- 'expected_out_regex' =>"A: {m: map\\[ \\],x: bytearray,y: bytearray}",
+ 'expected_out_regex' =>"A: {m: map\\[\\],x: bytearray,y: bytearray}",
},
#JIRA[PIG-373]
@@ -99,7 +99,7 @@ describe B;\,
#Expect
# A : { m:map, x:bytearray, y:bytearray }
# B : { m :map }
- 'expected_out_regex' => "A: {m: map\\[ \\],x: bytearray,y: bytearray}\nB: {m: map\\[ \\]}",
+ 'expected_out_regex' => "A: {m: map\\[\\],x: bytearray,y: bytearray}\nB: {m: map\\[\\]}",
},
#JIRA[PIG-373]
#JIRA[PIG-405]
@@ -113,7 +113,7 @@ describe B;\,
#Expect
# A : { m:map, x:bytearray, y:bytearray }
# B : { m :map }
- 'expected_out_regex' => "A: {m: map\\[ \\],x: bytearray,y: bytearray}\nB: {m: map\\[ \\]}",
+ 'expected_out_regex' => "A: {m: map\\[\\],x: bytearray,y: bytearray}\nB: {m: map\\[\\]}",
},
#JIRA[PIG-373]
#JIRA[PIG-405]
@@ -127,7 +127,7 @@ describe A;\,
#Expect
# A : { m:map, x:bytearray, y:bytearray }
# B : { m :map }
- 'expected_out_regex' => "B: {m: map\\[ \\]}\nA: {m: map\\[ \\],x: bytearray,y: bytearray}",
+ 'expected_out_regex' => "B: {m: map\\[\\]}\nA: {m: map\\[\\],x: bytearray,y: bytearray}",
},
#JIRA[PIG-379]
@@ -148,7 +148,7 @@ describe D;\,
# C: {name: chararray,age: integer}
# D: {group: chararray C: {name: chararray,age: integer}}
'expected_out_regex' =>
-"A: {name: chararray,age: int,gpa: float}\nB: {name: chararray,age: int}\nC: {name: chararray,age: int}\nD: {group: chararray,C: {name: chararray,age: int}}\n",
+"A: {name: chararray,age: int,gpa: float}\nB: {name: chararray,age: int}\nC: {name: chararray,age: int}\nD: {group: chararray,C: {(name: chararray,age: int)}}",
},
{
@@ -168,7 +168,7 @@ describe D;\,
# C: {name: chararray,age: integer}
# D: {group: chararray C: {name: chararray,age: integer}}
'expected_out_regex' =>
-"A: {name: chararray,age: int,gpa: float}\nB: {name: chararray,age: int}\nC: {name: chararray,age: int}\nD: {group: chararray,C: {name: chararray,age: int}}\n",
+"A: {name: chararray,age: int,gpa: float}\nB: {name: chararray,age: int}\nC: {name: chararray,age: int}\nD: {group: chararray,C: {(name: chararray,age: int)}}\n",
},
{
@@ -188,7 +188,7 @@ describe A;\,
# C: {name: chararray,age: integer}
# D: {group: chararray C: {name: chararray,age: integer}}
'expected_out_regex' =>
-"D: {group: chararray,C: {name: chararray,age: int}}\nC: {name: chararray,age: int}\nB: {name: chararray,age: int}\nA: {name: chararray,age: int,gpa: float}",
+"D: {group: chararray,C: {(name: chararray,age: int)}}\nC: {name: chararray,age: int}\nB: {name: chararray,age: int}\nA: {name: chararray,age: int,gpa: float}",
},
{
Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1101209&r1=1101208&r2=1101209&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Mon May 9 20:28:29 2011
@@ -927,7 +927,7 @@ from studenttab10k as a join studenttab1
'num' => 6,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
-c = join a by name, b by name using "repl";
+c = join a by name, b by name using 'repl';
store c into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.gpa
from studenttab10k as a join studenttab10k as b using(name);",
@@ -938,7 +938,7 @@ from studenttab10k as a join studenttab1
'num' => 7,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
-c = join a by name, b by name using "repl";
+c = join a by name, b by name using 'repl';
store c into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.gpa
from studenttab10k as a join studenttab10k as b using(name);",
@@ -949,7 +949,7 @@ from studenttab10k as a join studenttab1
'num' => 8,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
-c = join a by gpa, b by gpa using "repl";
+c = join a by gpa, b by gpa using 'repl';
store c into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.gpa
from studenttab10k as a join studenttab10k as b using(gpa);",
@@ -2720,7 +2720,7 @@ store c into ':OUTPATH:';\,
exec;
e = load ':OUTPATH:.intermediate1';
f = load ':OUTPATH:.intermediate2';
- g = join e by $0, f by $0 using "merge";
+ g = join e by $0, f by $0 using 'merge';
store g into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions from studenttab10k as a join votertab10k as b using(name)",
},
@@ -2737,7 +2737,7 @@ store c into ':OUTPATH:';\,
e = load ':OUTPATH:.intermediate1';
h = filter e by $1 > 30;
f = load ':OUTPATH:.intermediate2';
- g = join h by $0, f by $0 using "merge";
+ g = join h by $0, f by $0 using 'merge';
store g into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions from studenttab10k as a join votertab10k as b using(name) where a.age > 30",
},
@@ -2754,7 +2754,7 @@ store c into ':OUTPATH:';\,
e = load ':OUTPATH:.intermediate1';
f = load ':OUTPATH:.intermediate2';
i = filter f by $2 != 'democrat';
- g = join e by $0, i by $0 using "merge";
+ g = join e by $0, i by $0 using 'merge';
store g into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions from studenttab10k as a join votertab10k as b using(name) where b.registration != 'democrat'",
},
@@ -2770,7 +2770,7 @@ store c into ':OUTPATH:';\,
exec;
e = load ':OUTPATH:.intermediate1' as (name:chararray, age:int, gpa:float);
f = load ':OUTPATH:.intermediate2' as (name:chararray, age:int, reg:chararray, contrib:float);
- g = join e by $0, f by $0 using "merge";
+ g = join e by $0, f by $0 using 'merge';
store g into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions from studenttab10k as a join votertab10k as b using(name)",
},
@@ -2786,7 +2786,7 @@ store c into ':OUTPATH:';\,
exec;
e = load ':OUTPATH:.intermediate1';
f = load ':OUTPATH:.intermediate2';
- g = join e by ($0,$1), f by ($0,$1) using "merge";
+ g = join e by ($0,$1), f by ($0,$1) using 'merge';
store g into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions from studenttab10k as a join votertab10k as b using(name,age)",
},
@@ -2802,7 +2802,7 @@ store c into ':OUTPATH:';\,
exec;
e = load ':OUTPATH:.intermediate1';
f = load ':OUTPATH:.intermediate2';
- g = join e by ($1+10), f by ($1+10) using "merge";
+ g = join e by ($1+10), f by ($1+10) using 'merge';
store g into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions from studenttab10k as a join votertab10k as b using(age)",
},
@@ -2818,7 +2818,7 @@ store c into ':OUTPATH:';\,
exec;
e = load ':OUTPATH:.intermediate1';
f = load ':OUTPATH:.intermediate2';
- g = join e by $0, f by $0 using "merge";
+ g = join e by $0, f by $0 using 'merge';
store g into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions from studentnulltab10k as a join voternulltab10k as b using(name)",
},
@@ -2836,7 +2836,7 @@ store c into ':OUTPATH:';\,
exec;
e = load ':OUTPATH:.intermediate1' as (name:chararray, age:int, reg:chararray, contrib:float);
f = load ':OUTPATH:.intermediate2'as (name:chararray, age:int, gpa:float);
- g = join e by $0, f by $0 using "merge";
+ g = join e by $0, f by $0 using 'merge';
i = filter g by $2 == 'democrat' and $1 > 76;
store i into ':OUTPATH:';\,
'sql' => "select b.name, b.age, b.registration, b.contributions, a.name, a.age, a.gpa from studenttab20m as a join votertab10k as b using(name) where a.gpa < 1.5 and b.registration = 'democrat' and b.age > 76",
@@ -2853,7 +2853,7 @@ store c into ':OUTPATH:';\,
exec;
e = load ':OUTPATH:.intermediate1' as (name:chararray, age:int, gpa:float);
f = load ':OUTPATH:.intermediate2' as (name:chararray, age:int, reg:chararray, contrib:float);
- g = join e by age, f by age using "merge";
+ g = join e by age, f by age using 'merge';
store g into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions from studenttab10k as a join votertab10k as b using(age)",
},
@@ -2870,7 +2870,7 @@ store c into ':OUTPATH:';\,
'java_params' => ['-Dpig.skewedjoin.reduce.maxtuple=100'],
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
-e = join a by name, b by name using "skewed" parallel 8;
+e = join a by name, b by name using 'skewed' parallel 8;
store e into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions
from studenttab10k as a join votertab10k as b using(name);",
@@ -2883,7 +2883,7 @@ from studenttab10k as a join votertab10k
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age,
gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
-e = join a by name, b by name using "skewed";
+e = join a by name, b by name using 'skewed';
store e into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions
from studenttab10k as a join votertab10k as b using(name) ;",
@@ -2898,7 +2898,7 @@ gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 20;
d = filter b by age < 20;
-e = join c by $0, d by $0 using "skewed" parallel 8;
+e = join c by $0, d by $0 using 'skewed' parallel 8;
store e into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions
from studenttab10k as a join votertab10k as b using(name)
@@ -2913,7 +2913,7 @@ where a.age < 20 and b.age < 20;",
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 20;
d = filter b by age < 20;
-e = join c by (name, age), d by (name, age) using "skewed" parallel 8;
+e = join c by (name, age), d by (name, age) using 'skewed' parallel 8;
store e into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions
from studenttab10k as a join votertab10k as b using(name, age)
@@ -2928,7 +2928,7 @@ where a.age < 20 and b.age < 20;",
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 20;
d = filter b by age < 20;
-e = join c by age+10, d by age + 20 using "skewed" parallel 10;
+e = join c by age+10, d by age + 20 using 'skewed' parallel 10;
store e into ':OUTPATH:';\,
'sql' => "select studenttab10k.*, votertab10k.*
from studenttab10k , votertab10k where studenttab10k.age<20 AND votertab10k.age<20 AND studenttab10k.age = votertab10k.age+10;",
@@ -2940,7 +2940,7 @@ from studenttab10k , votertab10k where s
'java_params' => ['-Dpig.skewedjoin.reduce.maxtuple=100'],
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k';
b = filter a by $1 > 25;
-c = join a by $0, b by $0 using "skewed" parallel 7;
+c = join a by $0, b by $0 using 'skewed' parallel 7;
store c into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.gpa
from studenttab10k as a join studenttab10k as b using(name)
@@ -2955,7 +2955,7 @@ where b.age > 25;",
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
c = filter a by age < 20;
d = filter b by age < 20;
-e = join c by SIZE(name), d by SIZE(name) using "skewed" parallel 7;
+e = join c by SIZE(name), d by SIZE(name) using 'skewed' parallel 7;
store e into ':OUTPATH:';\,
'sql' => "select studenttab10k.*, votertab10k.*
from studenttab10k, votertab10k where studenttab10k.age<20 AND votertab10k.age<20 AND LENGTH(studenttab10k.name) = LENGTH(votertab10k.name);",
@@ -2966,7 +2966,7 @@ from studenttab10k, votertab10k where st
'java_params' => ['-Dpig.skewedjoin.reduce.maxtuple=100'],
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
-e = join a by name left outer, b by name using "skewed" parallel 8;
+e = join a by name left outer, b by name using 'skewed' parallel 8;
store e into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions
from studenttab10k as a left outer join votertab10k as b using(name);",
@@ -2977,7 +2977,7 @@ from studenttab10k as a left outer join
'java_params' => ['-Dpig.skewedjoin.reduce.maxtuple=100'],
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
-e = join a by name right outer, b by name using "skewed" parallel 8;
+e = join a by name right outer, b by name using 'skewed' parallel 8;
store e into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions
from studenttab10k as a right outer join votertab10k as b using(name);",
@@ -2988,7 +2988,7 @@ from studenttab10k as a right outer join
'java_params' => ['-Dpig.skewedjoin.reduce.maxtuple=100'],
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
-e = join a by name full outer, b by name using "skewed" parallel 8;
+e = join a by name full outer, b by name using 'skewed' parallel 8;
store e into ':OUTPATH:';\,
'sql' => "select a.name, a.age, a.gpa, b.name, b.age, b.registration, b.contributions
from studenttab10k as a full outer join votertab10k as b using(name);",
@@ -3011,7 +3011,7 @@ from studenttab10k as a full outer join
exec;
register :FUNCPATH:/testudf.jar;
c = load ':OUTPATH:.intermediate' using org.apache.pig.test.udf.storefunc.SimpleCollectableLoader();
- d = group c by $0 using "collected";
+ d = group c by $0 using 'collected';
e = foreach d generate group, COUNT(c);
store e into ':OUTPATH:';\,
'sql' => "select name, count(*) from studenttab10k group by name",
@@ -3026,7 +3026,7 @@ from studenttab10k as a full outer join
register :FUNCPATH:/testudf.jar;
c = load ':OUTPATH:.intermediate' using org.apache.pig.test.udf.storefunc.SimpleCollectableLoader();
d = filter c by $1 > 30;
- e = group d by $0 using "collected";
+ e = group d by $0 using 'collected';
f = foreach e generate group, COUNT(d);
store f into ':OUTPATH:';\,
'sql' => "select name, count(*) from studenttab10k where age > 30 group by name",
@@ -3040,7 +3040,7 @@ from studenttab10k as a full outer join
exec;
register :FUNCPATH:/testudf.jar;
c = load ':OUTPATH:.intermediate' using org.apache.pig.test.udf.storefunc.SimpleCollectableLoader() as (name:chararray, age:int, gpa:float);
- d = group c by $0 using "collected";
+ d = group c by $0 using 'collected';
e = foreach d generate group, MAX(c.age);
store e into ':OUTPATH:';\,
'sql' => "select name, max(age) from studenttab10k group by name",
@@ -3054,7 +3054,7 @@ from studenttab10k as a full outer join
exec;
register :FUNCPATH:/testudf.jar;
c = load ':OUTPATH:.intermediate' using org.apache.pig.test.udf.storefunc.SimpleCollectableLoader() as (name:chararray, age:int, gpa:float);
- d = group c by (name, age) using "collected";
+ d = group c by (name, age) using 'collected';
e = foreach d generate group.name, group.age, MIN(c.gpa);
store e into ':OUTPATH:';\,
'sql' => "select name, age, min(gpa) from studenttab10k group by name, age",
@@ -3068,7 +3068,7 @@ from studenttab10k as a full outer join
exec;
register :FUNCPATH:/testudf.jar;
c = load ':OUTPATH:.intermediate' using org.apache.pig.test.udf.storefunc.SimpleCollectableLoader() as (name:chararray, age:int, gpa:float);
- d = group c by $0 using "collected";
+ d = group c by $0 using 'collected';
e = foreach d generate group, SUM(c.$1);
store e into ':OUTPATH:';\,
'sql' => "select name, sum(age) from studentnulltab10k group by name",
@@ -3082,7 +3082,7 @@ from studenttab10k as a full outer join
exec;
register :FUNCPATH:/testudf.jar;
c = load ':OUTPATH:.intermediate' using org.apache.pig.test.udf.storefunc.SimpleCollectableLoader() as (name:chararray, age:int, gpa:float);
- d = group c by age using "collected";
+ d = group c by age using 'collected';
e = foreach d generate group, AVG(c.gpa), COUNT(c.name);
store e into ':OUTPATH:';\,
'sql' => "select age, avg(gpa), count(name) from studenttab10k group by age",
Modified: pig/trunk/test/e2e/pig/tests/streaming.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/streaming.conf?rev=1101209&r1=1101208&r2=1101209&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/streaming.conf (original)
+++ pig/trunk/test/e2e/pig/tests/streaming.conf Mon May 9 20:28:29 2011
@@ -595,262 +595,6 @@ store C into ':OUTPATH:';#,
]
},
- {
- # This group is for local mode testing
- 'name' => 'StreamingLocal',
- 'sortBenchmark' => 1,
- 'sortResults' => 1,
- 'floatpostprocess' => 1,
- 'delimiter' => ' ',
- 'tests' => [
- {
- #Section 1.1: perl script, no parameters
- 'num' => 1,
- 'exectype' => 'local',
- 'pig' => q#
-A = load ':INPATH:/singlefile/studenttab10k';
-B = foreach A generate $0, $1, $2;
-C = stream B through `perl :SCRIPTHOMEPATH:/PigStreaming.pl`;
-store C into ':OUTPATH:';#,
- 'sql' => "select name, age, gpa from studenttab10k;",
- },
- {
- #Section 1.3: define clause; perl script, with parameters
- 'num' => 2,
- 'exectype' => 'local',
- 'pig' => q#
-define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl - -`;
-A = load ':INPATH:/singlefile/studenttab10k';
-B = foreach A generate $0, $1, $2;
-C = stream B through CMD;
-store C into ':OUTPATH:';#,
- 'sql' => "select name, age, gpa from studenttab10k;",
- },
- {
- # Section 1.4: grouped data
- 'num' => 3,
- 'exectype' => 'local',
- 'pig' => q#
-define CMD `perl :SCRIPTHOMEPATH:/GroupBy.pl '\t' 0` ship(':SCRIPTHOMEPATH:/GroupBy.pl');
-A = load ':INPATH:/singlefile/studenttab10k';
-B = group A by $0;
-C = foreach B generate flatten(A);
-D = stream C through CMD;
-store D into ':OUTPATH:';#,
- 'sql' => "select name, count(*) from studenttab10k group by name;",
- },
- {
- # Section 1.4: grouped and ordered data
- 'num' => 4,
- 'exectype' => 'local',
- 'pig' => q#
-define CMD `perl :SCRIPTHOMEPATH:/GroupBy.pl '\t' 0 1`;
-A = load ':INPATH:/singlefile/studenttab10k';
-B = group A by $0;
-C = foreach B {
- D = order A by $1;
- generate flatten(D);
-};
-E = stream C through CMD;
-store E into ':OUTPATH:';#,
- 'sql' => "select name, age, count(*) from studenttab10k group by name, age;",
- },
- {
- # Section 1.5: multiple streaming operators - adjacent - before local rearrange
- 'num' => 5,
- 'exectype' => 'local',
- 'pig' => q#
-register :FUNCPATH:/testudf.jar;
-define CMD `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump);
-A = load ':INPATH:/singlefile/studenttab10k';
-B = stream A through `perl :SCRIPTHOMEPATH:/PigStreaming.pl`;
-C = stream B through CMD as (name, age, gpa);
-D = foreach C generate name, age;
-store D into ':OUTPATH:';#,
- 'sql' => "select name, age from studenttab10k;",
- },
- {
- # Section 1.5: multiple streaming operators - not adjacent - before local rearrange
- 'num' => 6,
- 'exectype' => 'local',
- 'pig' => q#
-register :FUNCPATH:/testudf.jar;
-A = load ':INPATH:/singlefile/studenttab10k';
-define CMD `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump);
-B = stream A through CMD as (name, age, gpa);
-C = filter B by age < '20';
-D = foreach C generate name;
-define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl - - :SCRIPTHOMEPATH:/nameMap`;
-E = stream D through CMD;
-store E into ':OUTPATH:';#,
- 'sql' => "select UPPER(name) from studenttab10k where age < '20';",
- },
- {
- # Section 1.5: multiple streaming operators - adjacent - after local rearrange
- 'num' => 7,
- 'exectype' => 'local',
- 'pig' => q#
-register :FUNCPATH:/testudf.jar;
-define CMD1 `perl :SCRIPTHOMEPATH:/GroupBy.pl '\t' 0 1`;
-define CMD2 `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump);
-A = load ':INPATH:/singlefile/studenttab10k';
-B = group A by $0;
-C = foreach B {
- D = order A by $1;
- generate flatten(D);
-};
-E = stream C through CMD1;
-F = stream E through CMD2;
-store F into ':OUTPATH:';#,
- 'sql' => "select name, age, count(*) from studenttab10k group by name, age;",
- },
- {
- # Section 1.5: multiple streaming operators - one before and one after local rearrange
- # same alias name
- 'num' => 8,
- 'exectype' => 'local',
- 'pig' => q#
-register :FUNCPATH:/testudf.jar;
-define CMD1 `perl :SCRIPTHOMEPATH:/GroupBy.pl '\t' 0`;
-define CMD2 `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump);
-A = load ':INPATH:/singlefile/studenttab10k';
-B = stream A through CMD2;
-C = group B by $0;
-D = foreach C generate flatten(B);
-B = stream D through CMD1;
-store B into ':OUTPATH:';#,
- 'sql' => "select name, count(*) from studenttab10k group by name;",
- },
- {
- # Section 3.1: use of custom deserializer
- 'num' => 9,
- 'exectype' => 'local',
- 'pig' => q#
-define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl` output(stdout using PigStreaming());
-A = load ':INPATH:/singlefile/studenttab10k';
-B = stream A through CMD;
-store B into ':OUTPATH:';#,
- 'sql' => "select name, age, gpa from studenttab10k;",
- },
- {
- # Section 3.1: use of custom serializer and deserializer
- 'num' => 10,
- 'exectype' => 'local',
- 'pig' => q#
-register :FUNCPATH:/testudf.jar;
-define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump) output(stdout using org.apache.pig.test.udf.streaming.DumpStreamer);
-A = load ':INPATH:/singlefile/studenttab10k';
-B = stream A through CMD as (name, age, gpa);
-C = foreach B generate name, age;
-store C into ':OUTPATH:';#,
- 'sql' => "select name, age from studenttab10k;",
- },
- {
- # Section 3.3: streaming application reads from file rather than stdin
- 'num' => 11,
- 'exectype' => 'local',
- 'pig' => q#
-define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl foo -` input('foo');
-A = load ':INPATH:/singlefile/studenttab10k';
-B = stream A through CMD;
-store B into ':OUTPATH:';#,
- 'sql' => "select name, age, gpa from studenttab10k;",
- },
- {
- # Section 3.4: streaming application writes single output to a file
- 'num' => 12,
- 'exectype' => 'local',
- 'pig' => q#
-define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl - foo :SCRIPTHOMEPATH:/nameMap` output('foo' using PigStreaming);
-A = load ':INPATH:/singlefile/studenttab10k';
-B = foreach A generate $0;
-C = stream B through CMD;
-store C into ':OUTPATH:';#,
- 'sql' => "select upper(name) from studenttab10k;",
- },
- {
- # Section 3.4: streaming application writes multiple outputs to file
- 'num' => 13,
- 'exectype' => 'local',
- 'pig' => q#
-register :FUNCPATH:/testudf.jar;
-define CMD `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl - sio_5_1 sio_5_2` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump) output('sio_5_1', 'sio_5_2');
-A = load ':INPATH:/singlefile/studenttab10k';
-B = stream A through CMD;
-store B into ':OUTPATH:';#,
- 'sql' => "select name, age, gpa from studenttab10k;",
- },
- {
- # Section 3.4: streaming application writes multiple outputs: 1 to file and 1 to stdout
- 'num' => 14,
- 'exectype' => 'local',
- 'pig' => q#
-register :FUNCPATH:/testudf.jar;
-define CMD `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl - - sio_5_2` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump) output(stdout, 'sio_5_2');
-A = load ':INPATH:/singlefile/studenttab10k';
-B = stream A through CMD;
-store B into ':OUTPATH:';#,
- 'sql' => "select name, age, gpa from studenttab10k;",
- },
- {
- # Section 4.3: integration with parameter substitition
- 'num' => 15,
- 'exectype' => 'local',
- 'pig_params' => ['-p', qq(script_name='PigStreaming.pl')],
- 'pig' => q#
-define CMD `perl :SCRIPTHOMEPATH:/$script_name - - :SCRIPTHOMEPATH:/nameMap`;
-A = load ':INPATH:/singlefile/studenttab10k';
-B = foreach A generate $0;
-C = stream B through CMD as (name);
-D = group C by name;
-E = foreach D generate group, COUNT(C);
-store E into ':OUTPATH:';#,
- 'sql' => "select upper(name) as nm, count(*) from studenttab10k group by nm;",
- },
- {
- # Section 5.1: load/store optimization
- 'num' => 16,
- 'exectype' => 'local',
- 'pig' => q#
-define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl`;
-A = load ':INPATH:/singlefile/studenttab10k';
-C = stream A through CMD;
-store C into ':OUTPATH:';#,
- 'sql' => "select name, age, gpa from studenttab10k;",
- },
- {
- # PIG-272: problem with optimization and intermediate store
- 'num' => 17,
- 'exectype' => 'local',
- 'pig' => q#
-define CMD1 `perl -ne 'print $_;print STDERR "stderr $_";'`;
-define CMD2 `:SCRIPTHOMEPATH:/Split.pl 3` input(stdin using PigStreaming(','));
-A = load ':INPATH:/singlefile/studenttab10k';
-B = stream A through CMD1;
-C = stream B through CMD1;
-D = stream C through CMD2;
-store D into ':OUTPATH:';#,
- 'sql' => "select name, age, gpa from studenttab10k;",
- },
- {
- # PIG-272: problem with optimization and intermediate store
- 'num' => 18,
- 'exectype' => 'local',
- 'pig' => q#
-define CMD1 `perl -ne 'print $_;'`;
-define CMD2 `:SCRIPTHOMEPATH:/Split.pl 3` input(stdin using PigStreaming(','));
-A = load ':INPATH:/singlefile/studenttab10k';
-B = stream A through CMD1;
-store B into ':OUTPATH:.intermediate';
-C = stream B through CMD1;
-D = stream C through CMD2;
-E = JOIN B by $0, D by $0;
-store E into ':OUTPATH:';#,
-
- 'sql' => "select A.name, A.age, A.gpa, B.name, B.age, B.gpa from studenttab10k as A join studenttab10k as B using(name);",
- },
- ]
- },
]
}
;
Added: pig/trunk/test/e2e/pig/tests/streaming_local.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/streaming_local.conf?rev=1101209&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/tests/streaming_local.conf (added)
+++ pig/trunk/test/e2e/pig/tests/streaming_local.conf Mon May 9 20:28:29 2011
@@ -0,0 +1,288 @@
+#!/usr/bin/env perl
+
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+###############################################################################
+# Tests for pig streaming.
+#
+# This configuration file follows streaming functional spec: http://wiki.apache.org/pig/PigStreamingFunctionalSpec
+
+$cfg = {
+ 'driver' => 'Pig',
+ 'nummachines' => 5,
+
+ 'groups' => [
+ {
+ # This group is for local mode testing
+ 'name' => 'StreamingLocal',
+ 'sortBenchmark' => 1,
+ 'sortResults' => 1,
+ 'floatpostprocess' => 1,
+ 'delimiter' => ' ',
+ 'tests' => [
+ {
+ #Section 1.1: perl script, no parameters
+ 'num' => 1,
+ 'exectype' => 'local',
+ 'pig' => q#
+A = load ':INPATH:/singlefile/studenttab10k';
+B = foreach A generate $0, $1, $2;
+C = stream B through `perl :SCRIPTHOMEPATH:/PigStreaming.pl`;
+store C into ':OUTPATH:';#,
+ 'sql' => "select name, age, gpa from studenttab10k;",
+ },
+ {
+ #Section 1.3: define clause; perl script, with parameters
+ 'num' => 2,
+ 'exectype' => 'local',
+ 'pig' => q#
+define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl - -`;
+A = load ':INPATH:/singlefile/studenttab10k';
+B = foreach A generate $0, $1, $2;
+C = stream B through CMD;
+store C into ':OUTPATH:';#,
+ 'sql' => "select name, age, gpa from studenttab10k;",
+ },
+ {
+ # Section 1.4: grouped data
+ 'num' => 3,
+ 'exectype' => 'local',
+ 'pig' => q#
+define CMD `perl :SCRIPTHOMEPATH:/GroupBy.pl '\t' 0` ship(':SCRIPTHOMEPATH:/GroupBy.pl');
+A = load ':INPATH:/singlefile/studenttab10k';
+B = group A by $0;
+C = foreach B generate flatten(A);
+D = stream C through CMD;
+store D into ':OUTPATH:';#,
+ 'sql' => "select name, count(*) from studenttab10k group by name;",
+ },
+ {
+ # Section 1.4: grouped and ordered data
+ 'num' => 4,
+ 'exectype' => 'local',
+ 'pig' => q#
+define CMD `perl :SCRIPTHOMEPATH:/GroupBy.pl '\t' 0 1`;
+A = load ':INPATH:/singlefile/studenttab10k';
+B = group A by $0;
+C = foreach B {
+ D = order A by $1;
+ generate flatten(D);
+};
+E = stream C through CMD;
+store E into ':OUTPATH:';#,
+ 'sql' => "select name, age, count(*) from studenttab10k group by name, age;",
+ },
+ {
+ # Section 1.5: multiple streaming operators - adjacent - before local rearrange
+ 'num' => 5,
+ 'exectype' => 'local',
+ 'pig' => q#
+register :FUNCPATH:/testudf.jar;
+define CMD `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump);
+A = load ':INPATH:/singlefile/studenttab10k';
+B = stream A through `perl :SCRIPTHOMEPATH:/PigStreaming.pl`;
+C = stream B through CMD as (name, age, gpa);
+D = foreach C generate name, age;
+store D into ':OUTPATH:';#,
+ 'sql' => "select name, age from studenttab10k;",
+ },
+ {
+ # Section 1.5: multiple streaming operators - not adjacent - before local rearrange
+ 'num' => 6,
+ 'exectype' => 'local',
+ 'pig' => q#
+register :FUNCPATH:/testudf.jar;
+A = load ':INPATH:/singlefile/studenttab10k';
+define CMD `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump);
+B = stream A through CMD as (name, age, gpa);
+C = filter B by age < '20';
+D = foreach C generate name;
+define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl - - :SCRIPTHOMEPATH:/nameMap`;
+E = stream D through CMD;
+store E into ':OUTPATH:';#,
+ 'sql' => "select UPPER(name) from studenttab10k where age < '20';",
+ },
+ {
+ # Section 1.5: multiple streaming operators - adjacent - after local rearrange
+ 'num' => 7,
+ 'exectype' => 'local',
+ 'pig' => q#
+register :FUNCPATH:/testudf.jar;
+define CMD1 `perl :SCRIPTHOMEPATH:/GroupBy.pl '\t' 0 1`;
+define CMD2 `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump);
+A = load ':INPATH:/singlefile/studenttab10k';
+B = group A by $0;
+C = foreach B {
+ D = order A by $1;
+ generate flatten(D);
+};
+E = stream C through CMD1;
+F = stream E through CMD2;
+store F into ':OUTPATH:';#,
+ 'sql' => "select name, age, count(*) from studenttab10k group by name, age;",
+ },
+ {
+ # Section 1.5: multiple streaming operators - one before and one after local rearrange
+ # same alias name
+ 'num' => 8,
+ 'exectype' => 'local',
+ 'pig' => q#
+register :FUNCPATH:/testudf.jar;
+define CMD1 `perl :SCRIPTHOMEPATH:/GroupBy.pl '\t' 0`;
+define CMD2 `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump);
+A = load ':INPATH:/singlefile/studenttab10k';
+B = stream A through CMD2;
+C = group B by $0;
+D = foreach C generate flatten(B);
+B = stream D through CMD1;
+store B into ':OUTPATH:';#,
+ 'sql' => "select name, count(*) from studenttab10k group by name;",
+ },
+ {
+ # Section 3.1: use of custom deserializer
+ 'num' => 9,
+ 'exectype' => 'local',
+ 'pig' => q#
+define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl` output(stdout using PigStreaming());
+A = load ':INPATH:/singlefile/studenttab10k';
+B = stream A through CMD;
+store B into ':OUTPATH:';#,
+ 'sql' => "select name, age, gpa from studenttab10k;",
+ },
+ {
+ # Section 3.1: use of custom serializer and deserializer
+ 'num' => 10,
+ 'exectype' => 'local',
+ 'pig' => q#
+register :FUNCPATH:/testudf.jar;
+define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump) output(stdout using org.apache.pig.test.udf.streaming.DumpStreamer);
+A = load ':INPATH:/singlefile/studenttab10k';
+B = stream A through CMD as (name, age, gpa);
+C = foreach B generate name, age;
+store C into ':OUTPATH:';#,
+ 'sql' => "select name, age from studenttab10k;",
+ },
+ {
+ # Section 3.3: streaming application reads from file rather than stdin
+ 'num' => 11,
+ 'exectype' => 'local',
+ 'pig' => q#
+define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl foo -` input('foo');
+A = load ':INPATH:/singlefile/studenttab10k';
+B = stream A through CMD;
+store B into ':OUTPATH:';#,
+ 'sql' => "select name, age, gpa from studenttab10k;",
+ },
+ {
+ # Section 3.4: streaming application writes single output to a file
+ 'num' => 12,
+ 'exectype' => 'local',
+ 'pig' => q#
+define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl - foo :SCRIPTHOMEPATH:/nameMap` output('foo' using PigStreaming);
+A = load ':INPATH:/singlefile/studenttab10k';
+B = foreach A generate $0;
+C = stream B through CMD;
+store C into ':OUTPATH:';#,
+ 'sql' => "select upper(name) from studenttab10k;",
+ },
+ {
+ # Section 3.4: streaming application writes multiple outputs to file
+ 'num' => 13,
+ 'exectype' => 'local',
+ 'pig' => q#
+register :FUNCPATH:/testudf.jar;
+define CMD `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl - sio_5_1 sio_5_2` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump) output('sio_5_1', 'sio_5_2');
+A = load ':INPATH:/singlefile/studenttab10k';
+B = stream A through CMD;
+store B into ':OUTPATH:';#,
+ 'sql' => "select name, age, gpa from studenttab10k;",
+ },
+ {
+ # Section 3.4: streaming application writes multiple outputs: 1 to file and 1 to stdout
+ 'num' => 14,
+ 'exectype' => 'local',
+ 'pig' => q#
+register :FUNCPATH:/testudf.jar;
+define CMD `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl - - sio_5_2` input(stdin using org.apache.pig.test.udf.streaming.StreamingDump) output(stdout, 'sio_5_2');
+A = load ':INPATH:/singlefile/studenttab10k';
+B = stream A through CMD;
+store B into ':OUTPATH:';#,
+ 'sql' => "select name, age, gpa from studenttab10k;",
+ },
+ {
+ # Section 4.3: integration with parameter substitition
+ 'num' => 15,
+ 'exectype' => 'local',
+ 'pig_params' => ['-p', qq(script_name='PigStreaming.pl')],
+ 'pig' => q#
+define CMD `perl :SCRIPTHOMEPATH:/$script_name - - :SCRIPTHOMEPATH:/nameMap`;
+A = load ':INPATH:/singlefile/studenttab10k';
+B = foreach A generate $0;
+C = stream B through CMD as (name);
+D = group C by name;
+E = foreach D generate group, COUNT(C);
+store E into ':OUTPATH:';#,
+ 'sql' => "select upper(name) as nm, count(*) from studenttab10k group by nm;",
+ },
+ {
+ # Section 5.1: load/store optimization
+ 'num' => 16,
+ 'exectype' => 'local',
+ 'pig' => q#
+define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl`;
+A = load ':INPATH:/singlefile/studenttab10k';
+C = stream A through CMD;
+store C into ':OUTPATH:';#,
+ 'sql' => "select name, age, gpa from studenttab10k;",
+ },
+ {
+ # PIG-272: problem with optimization and intermediate store
+ 'num' => 17,
+ 'exectype' => 'local',
+ 'pig' => q#
+define CMD1 `perl -ne 'print $_;print STDERR "stderr $_";'`;
+define CMD2 `:SCRIPTHOMEPATH:/Split.pl 3` input(stdin using PigStreaming(','));
+A = load ':INPATH:/singlefile/studenttab10k';
+B = stream A through CMD1;
+C = stream B through CMD1;
+D = stream C through CMD2;
+store D into ':OUTPATH:';#,
+ 'sql' => "select name, age, gpa from studenttab10k;",
+ },
+ {
+ # PIG-272: problem with optimization and intermediate store
+ 'num' => 18,
+ 'exectype' => 'local',
+ 'pig' => q#
+define CMD1 `perl -ne 'print $_;'`;
+define CMD2 `:SCRIPTHOMEPATH:/Split.pl 3` input(stdin using PigStreaming(','));
+A = load ':INPATH:/singlefile/studenttab10k';
+B = stream A through CMD1;
+store B into ':OUTPATH:.intermediate';
+C = stream B through CMD1;
+D = stream C through CMD2;
+E = JOIN B by $0, D by $0;
+store E into ':OUTPATH:';#,
+
+ 'sql' => "select A.name, A.age, A.gpa, B.name, B.age, B.gpa from studenttab10k as A join studenttab10k as B using(name);",
+ },
+ ]
+ },
+ ]
+}
+;
+