You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ha...@apache.org on 2011/12/06 20:05:39 UTC
svn commit: r1211077 [3/7] - in /incubator/hcatalog/trunk: ./ conf/
src/test/e2e/hcatalog/ src/test/e2e/hcatalog/conf/
src/test/e2e/hcatalog/deployers/ src/test/e2e/hcatalog/drivers/
src/test/e2e/hcatalog/tests/ src/test/e2e/hcatalog/tools/generate/ sr...
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm Tue Dec 6 20:05:37 2011
@@ -0,0 +1,735 @@
+package TestDriverHadoop;
+
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+###############################################################################
+# Test driver for pig nightly tests.
+#
+#
+
+use TestDriver;
+use IPC::Run; # don't do qw(run), it screws up TestDriver which also has a run method
+use Digest::MD5 qw(md5_hex);
+use Util;
+use File::Path;
+use Cwd;
+
+use English;
+
+our $className= "TestDriver";
+our @ISA = "$className";
+our $ROOT = (defined $ENV{'HARNESS_ROOT'} ? $ENV{'HARNESS_ROOT'} : die "ERROR: You must set environment variable HARNESS_ROOT\n");
+our $toolpath = "$ROOT/libexec/HCatTest";
+
+my $passedStr = 'passed';
+my $failedStr = 'failed';
+my $abortedStr = 'aborted';
+my $skippedStr = 'skipped';
+my $dependStr = 'failed_dependency';
+
+sub new
+{
+ # Call our parent
+ my ($proto) = @_;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new;
+
+ bless($self, $class);
+ return $self;
+}
+
+sub replaceParameters
+{
+##!!! Move this to Util.pm
+
+ my ($self, $cmd, $outfile, $testCmd, $log) = @_;
+
+ # $self
+ $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g;
+
+ # $outfile
+ $cmd =~ s/:OUTPATH:/$outfile/g;
+
+ # $ENV
+ $cmd =~ s/:PIGHARNESS:/$ENV{HARNESS_ROOT}/g;
+
+ # $testCmd
+ $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g;
+ $cmd =~ s/:OUTPATH:/$outfile/g;
+ $cmd =~ s/:FUNCPATH:/$testCmd->{'funcjarPath'}/g;
+ $cmd =~ s/:PIGPATH:/$testCmd->{'pigpath'}/g;
+ $cmd =~ s/:RUNID:/$testCmd->{'UID'}/g;
+ $cmd =~ s/:USRHOMEPATH:/$testCmd->{'userhomePath'}/g;
+ $cmd =~ s/:MAPREDJARS:/$testCmd->{'mapredjars'}/g;
+ $cmd =~ s/:SCRIPTHOMEPATH:/$testCmd->{'scriptPath'}/g;
+ $cmd =~ s/:DBUSER:/$testCmd->{'dbuser'}/g;
+ $cmd =~ s/:DBNAME:/$testCmd->{'dbdb'}/g;
+# $cmd =~ s/:LOCALINPATH:/$testCmd->{'localinpathbase'}/g;
+# $cmd =~ s/:LOCALOUTPATH:/$testCmd->{'localoutpathbase'}/g;
+# $cmd =~ s/:LOCALTESTPATH:/$testCmd->{'localpathbase'}/g;
+ $cmd =~ s/:BMPATH:/$testCmd->{'benchmarkPath'}/g;
+ $cmd =~ s/:TMP:/$testCmd->{'tmpPath'}/g;
+ $cmd =~ s/:HDFSTMP:/tmp\/$testCmd->{'runid'}/g;
+
+ if ( $testCmd->{'hadoopSecurity'} eq "secure" ) {
+ $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteSecureCluster'}/g;
+ } else {
+ $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteNotSecureCluster'}/g;
+ }
+
+ $cmd =~ s/:THRIFTSERVER:/$testCmd->{'thriftserver'}/g;
+ $cmd =~ s/:HADOOP_CLASSPATH:/$testCmd->{'hadoop_classpath'}/g;
+ $cmd =~ s/:HCAT_JAR:/$testCmd->{'hcatalog.jar'}/g;
+
+ return $cmd;
+}
+
+sub globalSetup
+{
+ my ($self, $globalHash, $log) = @_;
+ my $subName = (caller(0))[3];
+
+
+ # Setup the output path
+ my $me = `whoami`;
+ chomp $me;
+ $globalHash->{'runid'} = $me . "." . time;
+
+ # if "-ignore false" was provided on the command line,
+ # it means do run tests even when marked as 'ignore'
+ if(defined($globalHash->{'ignore'}) && $globalHash->{'ignore'} eq 'false')
+ {
+ $self->{'ignore'} = 'false';
+ }
+
+ $globalHash->{'outpath'} = $globalHash->{'outpathbase'} . "/" . $globalHash->{'runid'} . "/";
+ $globalHash->{'localpath'} = $globalHash->{'localpathbase'} . "/" . $globalHash->{'runid'} . "/";
+
+ # add libexec location to the path
+ if (defined($ENV{'PATH'})) {
+ $ENV{'PATH'} = $globalHash->{'scriptPath'} . ":" . $ENV{'PATH'};
+ }
+ else {
+ $ENV{'PATH'} = $globalHash->{'scriptPath'};
+ }
+
+ my @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', $globalHash->{'outpath'});
+
+ print $log "Going to run " . join(" ", @cmd) . "\n";
+ IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n";
+
+ IPC::Run::run(['mkdir', '-p', $globalHash->{'localpath'}], \undef, $log, $log) or
+ die "Cannot create localpath directory " . $globalHash->{'localpath'} .
+ " " . "$ERRNO\n";
+
+ IPC::Run::run(['mkdir', '-p', $globalHash->{'benchmarkPath'}], \undef, $log, $log) or
+ die "Cannot create benchmark directory " . $globalHash->{'benchmarkPath'} .
+ " " . "$ERRNO\n";
+
+ # Create the temporary directory
+ IPC::Run::run(['mkdir', '-p', $globalHash->{'tmpPath'}], \undef, $log, $log) or
+ die "Cannot create temporary directory " . $globalHash->{'tmpPath'} .
+ " " . "$ERRNO\n";
+
+ # Create the HDFS temporary directory
+ @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', "tmp/$globalHash->{'runid'}");
+ print $log "Going to run " . join(" ", @cmd) . "\n";
+ IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n";
+}
+
+sub globalCleanup
+{
+}
+
+
+sub runTest
+{
+ my ($self, $testCmd, $log) = @_;
+ my $subName = (caller(0))[3];
+
+ # Handle the various methods of running used in
+ # the original TestDrivers
+
+ if ( $testCmd->{'hcat_prep'} ) {
+ Util::prepareHCat($self, $testCmd, $log);
+ }
+
+ if ( $testCmd->{'hadoop'} ) {
+ my $result;
+ if (defined($testCmd->{'result_table'})) {
+ $result = $self->runHadoop( $testCmd, $log );
+ my @results = ();
+ my @outputs = ();
+ if (ref($testCmd->{'result_table'}) ne 'ARRAY') {
+ $results[0] = $testCmd->{'result_table'};
+ } else {
+ @results = @{$testCmd->{'result_table'}};
+ }
+
+ my $id = 0; # regular ouput count
+ for (my $i = 0; $i < @results; $i++) {
+ if ($results[$i] ne '?') {
+ my %modifiedTestCmd = %{$testCmd};
+ $pigfiles[$i] = $testCmd->{'localpath'} .
+ $testCmd->{'group'} . "_" . $testCmd->{'num'} .
+ ".dumptable.$i.pig";
+ $outfiles[$i] = $testCmd->{'thisResultsPath'} . "/" .
+ $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".$i.out";
+ $tableName = $results[$i];
+ $modifiedTestCmd{'num'} = $testCmd->{'num'} . "_" . $i . "_benchmark";
+ $modifiedTestCmd{'pig'} = "a = load '$tableName' using org.apache.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';";
+ my $r = $self->runPig(\%modifiedTestCmd, $log, 1);
+ $outputs[$i] = $r->{'output'};
+ } else {
+ $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out/$id";
+ my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+ # Copy result file out of hadoop
+ my @baseCmd = $self->getPigCmd($testCmd, $log);
+ my $testOut = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+ $outputs[$i] = $testOut;
+ $id++;
+ }
+ }
+ $result->{'outputs'}=\@outputs;
+ if ($self->countStores($testCmd)==1) {
+ $result->{'output'}=$outputs[0];
+ }
+ }
+ else {
+ $result = $self->runHadoop( $testCmd, $log );
+ }
+ return $result;
+ } else {
+ die "$subName FATAL Did not find a testCmd that I know how to handle";
+ }
+}
+
+sub dumpPigTable
+{
+ my ($self, $testCmd, $table, $log, $id) = @_;
+ my $subName = (caller(0))[3];
+
+ my %result;
+
+ # Write the pig script to a file.
+ my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.pig";
+ my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . "dump.out";
+
+ open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n";
+ print FH "a = load '$table' using org.apache.hcatalog.pig.HCatLoader(); store a into '$outfile';\n";
+ close(FH);
+
+
+ # Build the command
+ my @baseCmd = $self->getPigCmd($testCmd, $log);
+ my @cmd = @baseCmd;
+
+ push(@cmd, $pigfile);
+
+
+ # Run the command
+ print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n";
+ print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n";
+
+ IPC::Run::run(\@cmd, \undef, $log, $log) or
+ die "Failed running $pigfile\n";
+ $result{'rc'} = $? >> 8;
+
+
+ # Get results from the command locally
+ my $localoutfile;
+ my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.out";
+
+ $outfile = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+ return $outfile;
+}
+
+sub postProcessSingleOutputFile
+{
+ my ($self, $outfile, $localdir, $baseCmd, $testCmd, $log) = @_;
+ my $subName = (caller(0))[3];
+
+ my @baseCmd = @{$baseCmd};
+ my @copyCmd = @baseCmd;
+ push(@copyCmd, ('-e', 'copyToLocal', $outfile, $localdir));
+ print $log "$0::$className::$subName INFO: Going to run pig command: @copyCmd\n";
+
+ IPC::Run::run(\@copyCmd, \undef, $log, $log) or die "Cannot copy results from HDFS $outfile to $localdir\n";
+
+
+ # Sort the result if necessary. Keep the original output in one large file.
+ # Use system not IPC run so that the '*' gets interpolated by the shell.
+
+ # Build command to:
+ # 1. Combine part files
+ my $fppCmd = "cat $localdir/map* $localdir/part* 2>/dev/null";
+
+ # 2. Standardize float precision
+ if (defined $testCmd->{'floatpostprocess'} &&
+ defined $testCmd->{'delimiter'}) {
+ $fppCmd .= " | $toolpath/floatpostprocessor.pl '" .
+ $testCmd->{'delimiter'} . "'";
+ }
+
+ $fppCmd .= " > $localdir/out_original";
+
+ # run command
+ print $log "$fppCmd\n";
+ system($fppCmd);
+
+ # Sort the results for the benchmark compare.
+ my @sortCmd = ('sort', "$localdir/out_original");
+ print $log join(" ", @sortCmd) . "\n";
+ IPC::Run::run(\@sortCmd, '>', "$localdir/out_sorted");
+
+ return "$localdir/out_sorted";
+}
+
+sub runHadoop
+# Being modified from runPig
+# !!! Works, but need to add other arguments, like queue...???
+{
+ my ($self, $testCmd, $log) = @_;
+ my $subName = (caller(0))[3];
+
+ my %result;
+
+ # Write the hadoop command to a file.
+ my $hadoopfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".hadoop";
+ my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+ my $hadoopcmd = $self->replaceParameters( $testCmd->{'hadoop'}, $outfile, $testCmd, $log );
+
+ # adjust for the leading and trailing new line often seen in the conf file's command directives
+ $hadoopcmd =~ s/^\s*(.*?)\s*$/\1/s;
+
+ open(FH, "> $hadoopfile") or die "Unable to open file $hadoopfile to write hadoop command file, $ERRNO\n";
+ print FH $hadoopcmd . "\n";
+ close(FH);
+
+
+ # Build the command
+ my @cmd = Util::getHadoopCmd($testCmd);
+
+ # Add command line arguments if they're provided
+ if (defined($testCmd->{'hadoop_cmdline_args'})) {
+ push(@cmd, @{$testCmd->{'hadoop_cmdline_args'}});
+ }
+
+ # Add the test command elements
+ push(@cmd, split(/ +/,$hadoopcmd));
+
+ # Set HADOOP_CLASSPATH environment variable if provided
+ if (defined($testCmd->{'hadoop_classpath'})) {
+ my $hadoop_classpath = $self->replaceParameters( $testCmd->{'hadoop_classpath'}, $outfile, $testCmd, $log );
+ my $cp = $testCmd->{'hcatalog.jar'};
+ $cp =~ s/,/:/g;
+ $ENV{'HADOOP_CLASSPATH'} = $cp;
+ }
+
+ if (defined($testCmd->{'metastore.principal'}) && ($testCmd->{'metastore.principal'} =~ m/\S+/)) {
+ $ENV{'HADOOP_OPTS'} = "-Dhcat.metastore.principal=" . $testCmd->{'metastore.principal'};
+ $ENV{'HADOOP_CLIENT_OPTS'} = "-Dhcat.metastore.principal=" . $testCmd->{'metastore.principal'};
+ }
+
+ # Add su user if provided
+ if (defined($testCmd->{'run_as'})) {
+ my $cmd = '"' . join (" ", @cmd) . '"';
+ @cmd = ("echo", $cmd, "|", "su", $testCmd->{'run_as'});
+ }
+
+ my $script = $hadoopfile . ".sh";
+ open(FH, ">$script") or die "Unable to open file $script to write script, $ERRNO\n";
+ print FH join (" ", @cmd) . "\n";
+ close(FH);
+ my @result=`chmod +x $script`;
+
+ # Run the command
+ print $log "$0::$className::$subName INFO: Going to run hadoop command in shell script: $script\n";
+ print $log "$0::$className::$subName INFO: Going to run hadoop command: " . join(" ", @cmd) . "\n";
+ print $log "With HADOOP_CLASSPATH set to " . $ENV{'HADOOP_CLASSPATH'} . " and HADOOP_OPTS set to " . $ENV{'HADOOP_OPTS'} . "\n";
+
+ my @runhadoop = ("$script");
+ IPC::Run::run(\@runhadoop, \undef, $log, $log) or
+ die "Failed running $script\n";
+
+ my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.out";
+ my @baseCmd = $self->getPigCmd($testCmd, $log);
+ if ($self->countStores($testCmd)==1) {
+ @outputs = ();
+ $outputs[0] = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+ $result{'outputs'} = \@outputs;
+ }
+
+ return \%result;
+} # end sub runHadoop
+
+
+sub compare
+{
+ my ($self, $testResult, $benchmarkResult, $log, $testCmd) = @_;
+ my $subName = (caller(0))[3];
+
+ my $result;
+
+ if (defined($testResult->{'outputs'})) {
+ my $res = 0;
+ my @outputs = $testResult->{'outputs'};
+ my $count = @outputs;
+ for (my $id = 0; $id < $count; $id++) {
+ my $testOutput = ($testResult->{'outputs'})->[$id];
+ my $benchmarkOutput = ($benchmarkResult->{'outputs'})->[$id];
+ $res += $self->compareSingleOutput($testResult, $testOutput,
+ $benchmarkOutput, $log);
+ $result = ($res == ($count)) ? 1 : 0;
+ }
+ } else {
+ $result = $self->compareSingleOutput($testResult, $testResult->{'output'},
+ $benchmarkResult->{'output'}, $log);
+ }
+
+ return $result;
+}
+
+sub generateBenchmark
+{
+ my ($self, $testCmd, $log) = @_;
+
+ my %result;
+
+ my @SQLQuery = @{$testCmd->{'sql'}};
+ my @SQLQuery = ();
+ if (ref($testCmd->{'sql'}) ne 'ARRAY') {
+ $SQLQuery[0] = $testCmd->{'sql'};
+ } else {
+ @SQLQuery = @{$testCmd->{'sql'}};
+ }
+
+ my @outfiles = ();
+ for (my $id = 0; $id < ($#SQLQuery + 1); $id++) {
+ my $sql = $SQLQuery[$id];
+ my $outfile = $self->generateSingleSQLBenchmark($testCmd, $sql, ($id+1), $log);
+ push(@outfiles, $outfile);
+ }
+ $result{'outputs'} = \@outfiles;
+
+ return \%result;
+}
+
+sub generateSingleSQLBenchmark
+{
+ my ($self, $testCmd, $sql, $id, $log) = @_;
+
+ my $qmd5 = substr(md5_hex($testCmd->{'pig'}), 0, 5);
+ my $sqlfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".benchmark.$id.sql";
+ my $outfile = $testCmd->{'benchmarkPath'} . "/" . $testCmd->{'group'} . "_" . $testCmd->{'num'};
+
+ $outfile .= defined($id) ? ".$id" . ".out" : ".out";
+
+ my $outfp;
+ open($outfp, "> $outfile") or
+ die "Unable to open output file $outfile, $!\n";
+
+ open(FH, "> $sqlfile") or
+ die "Unable to open file $sqlfile to write SQL script, $ERRNO\n";
+ print FH $sql;
+ close(FH);
+
+ Util::runDbCmd($testCmd, $log, $sqlfile, $outfp);
+
+ $rcs[$i] = $? >> 8;
+ close($outfp);
+
+ my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+ $outfile =
+ $self->postProcessSingleSQLOutputFile($outfile, $testCmd, $log);
+
+ return $outfile;
+}
+
+sub postProcessSingleSQLOutputFile
+{
+ my ($self, $outfile, $testCmd, $log, $isBenchmark) = @_;
+
+ # If requested, process the data to smooth over floating point
+ # differences.
+ if (defined $testCmd->{'floatpostprocess'} &&
+ defined $testCmd->{'delimiter'}) {
+ # Move the file to a temp file and run through the pre-processor.
+ my $tmpfile = "$outfile.tmp";
+ link($outfile, $tmpfile) or
+ die "Unable to create temporary file $tmpfile, $!\n";
+ unlink($outfile) or
+ die "Unable to unlink file $outfile, $!\n";
+ open(IFH, "< $tmpfile") or
+ die "Unable to open file $tmpfile, $!\n";
+ open(OFH, "> $outfile") or
+ die "Unable to open file $outfile, $!\n";
+ my @cmd = ("$toolpath/floatpostprocessor.pl",
+ $testCmd->{'delimiter'});
+ print $log "Going to run [" . join(" ", @cmd) . "]\n";
+ IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or
+ die "Failed to run float postprocessor, $!\n";
+ close(IFH);
+ close(OFH);
+ unlink($tmpfile);
+ }
+
+ if ($isBenchmark && defined $testCmd->{'nullpostprocess'}) {
+ # Move the file to a temp file and run through the pre-processor.
+ my $tmpfile = "$outfile.tmp";
+ link($outfile, $tmpfile) or
+ die "Unable to create temporary file $tmpfile, $!\n";
+ unlink($outfile) or
+ die "Unable to unlink file $outfile, $!\n";
+ open(IFH, "< $tmpfile") or
+ die "Unable to open file $tmpfile, $!\n";
+ open(OFH, "> $outfile") or
+ die "Unable to open file $outfile, $!\n";
+ my @cmd = ("sed", "s/NULL//g");
+ print $log "Going to run [" . join(" ", @cmd) . "]\n";
+ IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or
+ die "Failed to run float postprocessor, $!\n";
+ close(IFH);
+ close(OFH);
+ unlink($tmpfile);
+ }
+
+ # Sort the results for the benchmark compare.
+ my $sortfile = "$outfile.sorted";
+ my @cmd = ("sort", $outfile);
+ print $log "Going to run [" . join(" ", @cmd) . "]\n";
+ IPC::Run::run(\@cmd, '>', "$sortfile");
+
+ return $sortfile;
+}
+
+sub runPig
+{
+ my ($self, $testCmd, $log, $copyResults) = @_;
+ my $subName = (caller(0))[3];
+
+ my %result;
+
+ # Write the pig script to a file.
+ my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".pig";
+ my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+ my $pigcmd = $self->replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log );
+
+ open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n";
+ print FH $pigcmd . "\n";
+ close(FH);
+
+
+ # Build the command
+ my @baseCmd = $self->getPigCmd($testCmd, $log);
+ my @cmd = @baseCmd;
+
+ # Add option -l giving location for secondary logs
+ my $locallog = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".log";
+ push(@cmd, "-logfile");
+ push(@cmd, $locallog);
+
+ # Add pig parameters if they're provided
+ if (defined($testCmd->{'pig_params'})) {
+ # Processing :PARAMPATH: in parameters
+ foreach my $param (@{$testCmd->{'pig_params'}}) {
+ $param =~ s/:PARAMPATH:/$testCmd->{'paramPath'}/g;
+ }
+ push(@cmd, @{$testCmd->{'pig_params'}});
+ }
+
+ push(@cmd, $pigfile);
+
+
+ # Run the command
+ print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n";
+ print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n";
+
+ IPC::Run::run(\@cmd, \undef, $log, $log) or
+ die "Failed running $pigfile\n";
+ $result{'rc'} = $? >> 8;
+
+
+ # Get results from the command locally
+ my $localoutfile;
+ my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+ my $stores = $self->countStores($testCmd);
+
+ # single query
+ if ($stores == 1) {
+ if ($copyResults) {
+ $result{'output'} = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+ $result{'originalOutput'} = "$localdir/out_original"; # populated by postProcessSingleOutputFile
+ } else {
+ $result{'output'} = "NO_COPY";
+ }
+ }
+ # multi query
+ else {
+ my @outfiles = ();
+ for (my $id = 1; $id <= ($stores); $id++) {
+ $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out/$id";
+ $localoutfile = $outfile . ".$id";
+
+ # Copy result file out of hadoop
+ my $testOut;
+ if ($copyResults) {
+ $testOut = $self->postProcessSingleOutputFile($localoutfile, $localdir, \@baseCmd, $testCmd, $log);
+ } else {
+ $testOut = "NO_COPY";
+ }
+ push(@outfiles, $testOut);
+ }
+ ##!!! originalOutputs not set! Needed?
+ $result{'outputs'} = \@outfiles;
+ }
+
+ # Compare doesn't get the testCmd hash, so I need to stuff the necessary
+ # info about sorting into the result.
+ if (defined $testCmd->{'sortArgs'} && $testCmd->{'sortArgs'}) {
+ $result{'sortArgs'} = $testCmd->{'sortArgs'};
+ }
+
+ return \%result;
+}
+
+sub getPigCmd($$$)
+{
+ my ($self, $testCmd, $log) = @_;
+
+ my @pigCmd;
+
+ # set the PIG_CLASSPATH environment variable
+ my $pcp .= $testCmd->{'jythonjar'} if (defined($testCmd->{'jythonjar'}));
+ $pcp .= ":" . $testCmd->{'classpath'} if (defined($testCmd->{'classpath'}));
+ $pcp .= ":" . $testCmd->{'additionaljars'} if (defined($testCmd->{'additionaljars'}));
+ # Only add testconfigpath to PIG_CLASSPATH if HADOOP_HOME isn't defined
+ $pcp .= ":" . $testCmd->{'testconfigpath'} if ($testCmd->{'exectype'} ne "local"); #&& (! defined $ENV{'HADOOP_HOME'});
+
+ # Set it in our current environment. It will get inherited by the IPC::Run
+ # command.
+ $ENV{'PIG_CLASSPATH'} = $pcp;
+
+ @pigCmd = ("$testCmd->{'pigpath'}/bin/pig");
+
+ if (defined($testCmd->{'additionaljars'})) {
+ push(@pigCmd, '-Dpig.additional.jars='.$testCmd->{'additionaljars'});
+ }
+
+ if ($testCmd->{'exectype'} eq "local") {
+ push(@{$testCmd->{'java_params'}}, "-Xmx1024m");
+ push(@pigCmd, ("-x", "local"));
+ }
+
+ my $opts .= "-Dhcat.metastore.uri=$testCmd->{'thriftserver'}";
+ if (defined($testCmd->{'java_params'})) {
+ $opts = $opts . " " . join(" ", @{$testCmd->{'java_params'}});
+ }
+
+ $ENV{'PIG_OPTS'} = $opts;
+
+ print $log "Returning Pig command " . join(" ", @pigCmd) . "\n";
+ print $log "With PIG_CLASSPATH set to " . $ENV{'PIG_CLASSPATH'} . " and PIG_OPTS set to " . $ENV{'PIG_OPTS'} . "\n";
+ return @pigCmd;
+}
+
+sub compareSingleOutput
+{
+ my ($self, $testResult, $testOutput, $benchmarkOutput, $log) = @_;
+
+print $log "testResult: $testResult testOutput: $testOutput benchmarkOutput: $benchmarkOutput\n";
+
+ # cksum the the two files to see if they are the same
+ my ($testChksm, $benchmarkChksm);
+ IPC::Run::run((['cat', $testOutput], '|', ['cksum']), \$testChksm,
+ $log) or die "$0: error: cannot run cksum on test results\n";
+ IPC::Run::run((['cat', $benchmarkOutput], '|', ['cksum']),
+ \$benchmarkChksm, $log) or die "$0: error: cannot run cksum on benchmark\n";
+
+ chomp $testChksm;
+ chomp $benchmarkChksm;
+ print $log "test cksum: $testChksm\nbenchmark cksum: $benchmarkChksm\n";
+
+ my $result;
+ if ($testChksm ne $benchmarkChksm) {
+ print $log "Test output checksum does not match benchmark checksum\n";
+ print $log "Test checksum = <$testChksm>\n";
+ print $log "Expected checksum = <$benchmarkChksm>\n";
+ print $log "RESULTS DIFFER: vimdiff " . cwd . "/$testOutput " . cwd . "/$benchmarkOutput\n";
+ } else {
+ $result = 1;
+ }
+
+ # Now, check if the sort order is specified
+ if (defined($testResult->{'sortArgs'})) {
+ Util::setLocale();
+ my @sortChk = ('sort', '-cs');
+ push(@sortChk, @{$testResult->{'sortArgs'}});
+ push(@sortChk, $testResult->{'originalOutput'});
+ print $log "Going to run sort check command: " . join(" ", @sortChk) . "\n";
+ IPC::Run::run(\@sortChk, \undef, $log, $log);
+ my $sortrc = $?;
+ if ($sortrc) {
+ print $log "Sort check failed\n";
+ $result = 0;
+ }
+ }
+
+ return $result;
+}
+
+##############################################################################
+# Count the number of stores in a Pig Latin script, so we know how many files
+# we need to compare.
+#
+sub countStores($$)
+{
+ my ($self, $testCmd) = @_;
+
+ if (defined $testCmd->{'pig'}) {
+ my $count;
+
+ # hope they don't have more than store per line
+ # also note that this won't work if you comment out a store
+ my @q = split(/\n/, $testCmd->{'pig'});
+ for (my $i = 0; $i < @q; $i++) {
+ $count += $q[$i] =~ /store\s+[a-zA-Z][a-zA-Z0-9_]*\s+into/i;
+ }
+
+ return $count;
+
+ }
+ else {
+ #defined $testCmd->{'hadoop'}
+ my $count;
+
+ my @q = split(/\n/, $testCmd->{'hadoop'});
+ for (my $i = 0; $i < @q; $i++) {
+ $count += $q[$i] =~ /OUTPATH/ig;
+ }
+
+ return $count;
+
+ }
+
+}
+
+1;
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHive.pm
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHive.pm?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHive.pm (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHive.pm Tue Dec 6 20:05:37 2011
@@ -0,0 +1,383 @@
+package TestDriverHive;
+
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+###############################################################################
+# Test driver for hive nightly tests.
+#
+#
+
+use TestDriver;
+use IPC::Run; # don't do qw(run), it screws up TestDriver which also has a run method
+use Digest::MD5 qw(md5_hex);
+use Util;
+use File::Path;
+use Cwd;
+
+use strict;
+use English;
+
+our $className= "TestDriver";
+our @ISA = "$className";
+our $ROOT = (defined $ENV{'HARNESS_ROOT'} ? $ENV{'HARNESS_ROOT'} : die "ERROR: You must set environment variable HARNESS_ROOT\n");
+our $toolpath = "$ROOT/libexec/HCatTest";
+
+my $passedStr = 'passed';
+my $failedStr = 'failed';
+my $abortedStr = 'aborted';
+my $skippedStr = 'skipped';
+my $dependStr = 'failed_dependency';
+
+sub new
+{
+ # Call our parent
+ my ($proto) = @_;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new;
+
+ bless($self, $class);
+ return $self;
+}
+
+sub replaceParameters
+{
+##!!! Move this to Util.pm
+
+ my ($self, $cmd, $outfile, $testCmd, $log) = @_;
+
+ # $self
+ $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g;
+
+ # $outfile
+ $cmd =~ s/:OUTPATH:/$outfile/g;
+
+ # $ENV
+ $cmd =~ s/:HARNESS:/$ENV{HARNESS_ROOT}/g;
+
+ # $testCmd
+ $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g;
+
+ return $cmd;
+}
+
+sub globalSetup
+{
+ my ($self, $globalHash, $log) = @_;
+ my $subName = (caller(0))[3];
+
+ # Set up values for the metastore
+ Util::setupHiveProperties($globalHash, $log);
+
+ # Setup the output path
+ my $me = `whoami`;
+ chomp $me;
+ $globalHash->{'runid'} = $me . "." . time;
+
+ $globalHash->{'localpath'} = $globalHash->{'localpathbase'} . "/" . $globalHash->{'runid'} . "/";
+
+ IPC::Run::run(['mkdir', '-p', $globalHash->{'localpath'}], \undef, $log, $log) or
+ die "Cannot create localpath directory " . $globalHash->{'localpath'} .
+ " " . "$ERRNO\n";
+
+ IPC::Run::run(['mkdir', '-p', $globalHash->{'benchmarkPath'}], \undef, $log, $log) or
+ die "Cannot create benchmark directory " . $globalHash->{'benchmarkPath'} .
+ " " . "$ERRNO\n";
+
+ $globalHash->{'thisResultsPath'} = $globalHash->{'localpath'} . "/"
+ . $globalHash->{'resultsPath'};
+ IPC::Run::run(['mkdir', '-p', $globalHash->{'thisResultsPath'}], \undef, $log, $log) or
+ die "Cannot create results directory " . $globalHash->{'thisResultsPath'} .
+ " " . "$ERRNO\n";
+}
+
+sub globalCleanup
+{
+ my ($self, $globalHash, $log) = @_;
+}
+
+
+sub runTest
+{
+ my ($self, $testCmd, $log) = @_;
+
+ my %result;
+
+ my @hivefiles = ();
+ my @outfiles = ();
+ # Write the hive script to a file.
+ $hivefiles[0] = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" .
+ $testCmd->{'num'} . ".0.sql";
+ $outfiles[0] = $testCmd->{'thisResultsPath'} . "/" . $testCmd->{'group'} .
+ "_" . $testCmd->{'num'} . ".0.out";
+
+ open(FH, "> $hivefiles[0]") or
+ die "Unable to open file $hivefiles[0] to write SQL script, $ERRNO\n";
+ print FH $testCmd->{'sql'} . "\n";
+ close(FH);
+
+ # If the results are written to a table run the command and then
+ # run a another Hive command to dump the results of the table.
+ if (defined($testCmd->{'result_table'})) {
+ Util::runHiveCmdFromFile($testCmd, $log, $hivefiles[0]);
+ $result{'rc'} = $? >> 8;
+
+ my @results = ();
+ if (ref($testCmd->{'result_table'}) ne 'ARRAY') {
+ $results[0] = $testCmd->{'result_table'};
+ } else {
+ @results = @{$testCmd->{'result_table'}};
+ }
+ for (my $i = 0; $i < @results; $i++) {
+ $hivefiles[$i] = $testCmd->{'localpath'} .
+ $testCmd->{'group'} . "_" . $testCmd->{'num'} .
+ ".dumptable.$i.sql";
+ $outfiles[$i] = $testCmd->{'thisResultsPath'} . "/" .
+ $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".$i.out";
+ open(FH, "> $hivefiles[$i]") or
+ die "Unable to open file $hivefiles[$i] to write SQL " .
+ "script, $ERRNO\n";
+ print FH "select * from " . $results[$i] . ";\n";
+ close(FH);
+ }
+ }
+
+ my @originalOutputs = ();
+ my @outputs = ();
+ $result{'originalOutput'} = \@originalOutputs;
+ $result{'output'} = \@outputs;
+
+ for (my $i = 0; $i < @hivefiles; $i++) {
+ my $outfp;
+ open($outfp, "> $outfiles[$i]") or
+ die "Unable to open output file $outfiles[$i], $!\n";
+
+ Util::runHiveCmdFromFile($testCmd, $log, $hivefiles[$i], $outfp);
+
+ # Don't overwrite rc if we set it above
+ $result{'rc'} = $? >> 8 unless defined $result{'rc'};
+ close($outfp);
+
+ $originalOutputs[$i] = $outfiles[$i];
+ $outputs[$i] =
+ $self->postProcessSingleOutputFile($outfiles[$i], $testCmd, $log);
+ }
+
+ # Compare doesn't get the testCmd hash, so I need to stuff the necessary
+ # info about sorting into the result.
+ if (defined $testCmd->{'sortArgs'} && $testCmd->{'sortArgs'}) {
+ $result{'sortArgs'} = $testCmd->{'sortArgs'};
+ }
+
+ return \%result;
+}
+
+
+
+sub generateBenchmark
+{
+ my ($self, $testCmd, $log) = @_;
+
+ my %result;
+
+ # Write the SQL to a file.
+ my @verifies = ();
+ if (defined $testCmd->{'verify_sql'}) {
+ if (ref($testCmd->{'verify_sql'}) eq "ARRAY") {
+ @verifies = @{$testCmd->{'verify_sql'}};
+ } else {
+ $verifies[0] = $testCmd->{'verify_sql'};
+ }
+ } else {
+ $verifies[0] = $testCmd->{'sql'};
+ }
+
+ my @rcs = ();
+ $result{'rc'} = \@rcs;
+ my @outputs = ();
+ $result{'output'} = \@outputs;
+ for (my $i = 0; $i < @verifies; $i++) {
+ my $sqlfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" .
+ $testCmd->{'num'} . ".benchmark.$i.sql";
+ my $outfile = $testCmd->{'benchmarkPath'} . "/" .
+ $testCmd->{'group'} . "_" . $testCmd->{'num'} .
+ ".benchmark.$i.out";
+
+ open(FH, "> $sqlfile") or
+ die "Unable to open file $sqlfile to write SQL script, $ERRNO\n";
+ print FH $verifies[$i];
+ close(FH);
+
+ my $outfp;
+ open($outfp, "> $outfile") or
+ die "Unable to open output file $outfile, $!\n";
+
+ Util::runDbCmd($testCmd, $log, $sqlfile, $outfp);
+ $rcs[$i] = $? >> 8;
+ close($outfp);
+
+ $outputs[$i] =
+ $self->postProcessSingleOutputFile($outfile, $testCmd, $log, 1);
+ }
+
+ return \%result;
+}
+
+sub compare
+{
+ my ($self, $testResult, $benchmarkResult, $log, $testCmd) = @_;
+
+ # Make sure we have the same number of results from runTest and
+ # generateBenchmark
+ if (scalar(@{$testResult->{'output'}}) !=
+ scalar(@{$benchmarkResult->{'output'}})) {
+ die "runTest returned " . scalar(@{$testResult->{'output'}}) .
+ " results, but generateBenchmark returned " .
+ scalar(@{$benchmarkResult->{'output'}}) . "\n";
+ }
+
+ my $totalFailures = 0;
+ for (my $i = 0; $i < @{$testResult->{'output'}}; $i++) {
+ # cksum the the two files to see if they are the same
+ my ($testChksm, $benchmarkChksm);
+ IPC::Run::run((['cat', @{$testResult->{'output'}}[$i]], '|',
+ ['cksum']), \$testChksm, $log) or
+ die "$0: error: cannot run cksum on test results\n";
+ IPC::Run::run((['cat', @{$benchmarkResult->{'output'}}[$i]], '|',
+ ['cksum']), \$benchmarkChksm, $log) or
+ die "$0: error: cannot run cksum on benchmark\n";
+
+ chomp $testChksm;
+ chomp $benchmarkChksm;
+ print $log
+ "test cksum: $testChksm\nbenchmark cksum: $benchmarkChksm\n";
+
+ if ($testChksm ne $benchmarkChksm) {
+ print $log "Test output $i checksum does not match benchmark " .
+ "checksum\n";
+ print $log "Test $i checksum = <$testChksm>\n";
+ print $log "Expected $i checksum = <$benchmarkChksm>\n";
+ print $log "RESULTS DIFFER: vimdiff " . cwd .
+ "/" . @{$testResult->{'output'}}[$i] . " " . cwd .
+ "/" . @{$benchmarkResult->{'output'}}[$i] . "\n";
+ $totalFailures++;
+ }
+
+ # Now, check if the sort order is specified
+ if (defined($testResult->{'sortArgs'})) {
+ my @sortChk = ('sort', '-cs');
+ push(@sortChk, @{$testResult->{'sortArgs'}});
+ push(@sortChk, @{$testResult->{'originalOutput'}}[$i]);
+ print $log "Going to run sort check command: " .
+ join(" ", @sortChk) . "\n";
+ IPC::Run::run(\@sortChk, \undef, $log, $log);
+ my $sortrc = $?;
+ if ($sortrc) {
+ print $log "Sort check failed\n";
+ $totalFailures++;
+ }
+ }
+ }
+
+ return $totalFailures == 0;
+}
+
+sub postProcessSingleOutputFile
+{
+ my ($self, $outfile, $testCmd, $log, $isBenchmark) = @_;
+
+ # If requested, process the data to smooth over floating point
+ # differences.
+ if (defined $testCmd->{'floatpostprocess'} &&
+ defined $testCmd->{'delimiter'}) {
+ # Move the file to a temp file and run through the pre-processor.
+ my $tmpfile = "$outfile.tmp";
+ link($outfile, $tmpfile) or
+ die "Unable to create temporary file $tmpfile, $!\n";
+ unlink($outfile) or
+ die "Unable to unlink file $outfile, $!\n";
+ open(IFH, "< $tmpfile") or
+ die "Unable to open file $tmpfile, $!\n";
+ open(OFH, "> $outfile") or
+ die "Unable to open file $outfile, $!\n";
+ my @cmd = ("$toolpath/floatpostprocessor.pl",
+ $testCmd->{'delimiter'});
+ print $log "Going to run [" . join(" ", @cmd) . "]\n";
+ IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or
+ die "Failed to run float postprocessor, $!\n";
+ close(IFH);
+ close(OFH);
+ unlink($tmpfile);
+ }
+
+ if ($isBenchmark && defined $testCmd->{'nullpostprocess'}) {
+ # Move the file to a temp file and run through the pre-processor.
+ my $tmpfile = "$outfile.tmp";
+ link($outfile, $tmpfile) or
+ die "Unable to create temporary file $tmpfile, $!\n";
+ unlink($outfile) or
+ die "Unable to unlink file $outfile, $!\n";
+ open(IFH, "< $tmpfile") or
+ die "Unable to open file $tmpfile, $!\n";
+ open(OFH, "> $outfile") or
+ die "Unable to open file $outfile, $!\n";
+ my @cmd = ("sed", "s/NULL//g");
+ print $log "Going to run [" . join(" ", @cmd) . "]\n";
+ IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or
+ die "Failed to run float postprocessor, $!\n";
+ close(IFH);
+ close(OFH);
+ unlink($tmpfile);
+ }
+
+ # Sort the results for the benchmark compare.
+ my $sortfile = "$outfile.sorted";
+ my @cmd = ("sort", $outfile);
+ print $log "Going to run [" . join(" ", @cmd) . "]\n";
+ IPC::Run::run(\@cmd, '>', "$sortfile");
+
+ return $sortfile;
+}
+
+
+
+##############################################################################
+# Count the number of stores in a Pig Latin script, so we know how many files
+# we need to compare.
+#
+sub countStores($$)
+{
+ my ($self, $testCmd) = @_;
+
+ # Special work around for queries with more than one store that are not
+ # actually multiqueries.
+ if (defined $testCmd->{'notmq'}) {
+ return 1;
+ }
+
+ my $count;
+
+ # hope they don't have more than store per line
+ # also note that this won't work if you comment out a store
+ my @q = split(/\n/, $testCmd->{'pig'});
+ for (my $i = 0; $i < @q; $i++) {
+ $count += $q[$i] =~ /store\s+[a-zA-Z][a-zA-Z0-9_]*\s+into/i;
+ }
+
+ return $count;
+}
+
+1;
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm Tue Dec 6 20:05:37 2011
@@ -0,0 +1,1000 @@
+package TestDriverPig;
+
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+###############################################################################
+# Test driver for pig nightly tests.
+#
+#
+
+use TestDriver;
+use IPC::Run; # don't do qw(run), it screws up TestDriver which also has a run method
+use Digest::MD5 qw(md5_hex);
+use Util;
+use File::Path;
+use Cwd;
+
+use English;
+
+our $className= "TestDriver";
+our @ISA = "$className";
+our $ROOT = (defined $ENV{'HARNESS_ROOT'} ? $ENV{'HARNESS_ROOT'} : die "ERROR: You must set environment variable HARNESS_ROOT\n");
+our $toolpath = "$ROOT/libexec/HCatTest";
+
+my $passedStr = 'passed';
+my $failedStr = 'failed';
+my $abortedStr = 'aborted';
+my $skippedStr = 'skipped';
+my $dependStr = 'failed_dependency';
+
+sub new
+{
+ # Call our parent
+ my ($proto) = @_;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new;
+
+ bless($self, $class);
+ return $self;
+}
+
+sub replaceParameters
+{
+##!!! Move this to Util.pm
+
+ my ($self, $cmd, $outfile, $testCmd, $log) = @_;
+
+ # $self
+ $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g;
+
+ # $outfile
+ $cmd =~ s/:OUTPATH:/$outfile/g;
+
+ # $ENV
+ $cmd =~ s/:PIGHARNESS:/$ENV{HARNESS_ROOT}/g;
+
+ # $testCmd
+ $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g;
+ $cmd =~ s/:OUTPATH:/$outfile/g;
+ $cmd =~ s/:FUNCPATH:/$testCmd->{'funcjarPath'}/g;
+ $cmd =~ s/:PIGPATH:/$testCmd->{'pigpath'}/g;
+ $cmd =~ s/:RUNID:/$testCmd->{'UID'}/g;
+ $cmd =~ s/:USRHOMEPATH:/$testCmd->{'userhomePath'}/g;
+ $cmd =~ s/:MAPREDJARS:/$testCmd->{'mapredjars'}/g;
+ $cmd =~ s/:SCRIPTHOMEPATH:/$testCmd->{'scriptPath'}/g;
+ $cmd =~ s/:DBUSER:/$testCmd->{'dbuser'}/g;
+ $cmd =~ s/:DBNAME:/$testCmd->{'dbdb'}/g;
+# $cmd =~ s/:LOCALINPATH:/$testCmd->{'localinpathbase'}/g;
+# $cmd =~ s/:LOCALOUTPATH:/$testCmd->{'localoutpathbase'}/g;
+# $cmd =~ s/:LOCALTESTPATH:/$testCmd->{'localpathbase'}/g;
+ $cmd =~ s/:BMPATH:/$testCmd->{'benchmarkPath'}/g;
+ $cmd =~ s/:TMP:/$testCmd->{'tmpPath'}/g;
+ $cmd =~ s/:HDFSTMP:/tmp\/$testCmd->{'runid'}/g;
+
+ if ( $testCmd->{'hadoopSecurity'} eq "secure" ) {
+ $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteSecureCluster'}/g;
+ } else {
+ $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteNotSecureCluster'}/g;
+ }
+
+ return $cmd;
+}
+
+sub globalSetup
+{
+ my ($self, $globalHash, $log) = @_;
+ my $subName = (caller(0))[3];
+
+
+ # Setup the output path
+ my $me = `whoami`;
+ chomp $me;
+ $globalHash->{'runid'} = $me . "." . time;
+
+ # if "-ignore false" was provided on the command line,
+ # it means do run tests even when marked as 'ignore'
+ if(defined($globalHash->{'ignore'}) && $globalHash->{'ignore'} eq 'false')
+ {
+ $self->{'ignore'} = 'false';
+ }
+
+ $globalHash->{'outpath'} = $globalHash->{'outpathbase'} . "/" . $globalHash->{'runid'} . "/";
+ $globalHash->{'localpath'} = $globalHash->{'localpathbase'} . "/" . $globalHash->{'runid'} . "/";
+
+ # add libexec location to the path
+ if (defined($ENV{'PATH'})) {
+ $ENV{'PATH'} = $globalHash->{'scriptPath'} . ":" . $ENV{'PATH'};
+ }
+ else {
+ $ENV{'PATH'} = $globalHash->{'scriptPath'};
+ }
+
+ my @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', $globalHash->{'outpath'});
+
+
+ print $log "Going to run " . join(" ", @cmd) . "\n";
+ IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n";
+
+ IPC::Run::run(['mkdir', '-p', $globalHash->{'localpath'}], \undef, $log, $log) or
+ die "Cannot create localpath directory " . $globalHash->{'localpath'} .
+ " " . "$ERRNO\n";
+
+ IPC::Run::run(['mkdir', '-p', $globalHash->{'benchmarkPath'}], \undef, $log, $log) or
+ die "Cannot create benchmark directory " . $globalHash->{'benchmarkPath'} .
+ " " . "$ERRNO\n";
+
+ # Create the temporary directory
+ IPC::Run::run(['mkdir', '-p', $globalHash->{'tmpPath'}], \undef, $log, $log) or
+ die "Cannot create temporary directory " . $globalHash->{'tmpPath'} .
+ " " . "$ERRNO\n";
+
+ # Create the HDFS temporary directory
+ @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', "tmp/$globalHash->{'runid'}");
+ print $log "Going to run " . join(" ", @cmd) . "\n";
+ IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n";
+}
+
+sub globalCleanup
+{
+ my ($self, $globalHash, $log) = @_;
+
+ IPC::Run::run(['rm', '-rf', $globalHash->{'tmpPath'}], \undef, $log, $log) or
+ warn "Cannot remove temporary directory " . $globalHash->{'tmpPath'} .
+ " " . "$ERRNO\n";
+
+ # Cleanup the HDFS temporary directory
+ my @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'fs', '-rmr', "tmp/$globalHash->{'runid'}");
+ print $log "Going to run " . join(" ", @cmd) . "\n";
+ IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n";
+}
+
+
+sub runTest
+{
+ my ($self, $testCmd, $log) = @_;
+ my $subName = (caller(0))[3];
+
+ # Check that we should run this test. If the current execution type
+ # doesn't match the execonly flag, then skip this one.
+ if ($self->wrongExecutionMode($testCmd)) {
+ print $log "Skipping test $testCmd->{'group'}" . "_" .
+ $testCmd->{'num'} . " since it is executed only in " .
+ $testCmd->{'execonly'} . " mode and we are executing in " .
+ $testCmd->{'exectype'} . " mode.\n";
+ my %result;
+ return \%result;
+ }
+
+ if ( $testCmd->{'hcat_prep'} ) {
+ Util::prepareHCat($self, $testCmd, $log);
+ }
+ # Handle the various methods of running used in
+ # the original TestDrivers
+
+ if ( $testCmd->{'pig'} && $self->hasCommandLineVerifications( $testCmd, $log) ) {
+ return $self->runPigCmdLine( $testCmd, $log, 1);
+ } elsif( $testCmd->{'pig'} ){
+ # If the results are written to a table run the command and then
+ # run a another Pig script to dump the results of the table.
+ my $result;
+ if (defined($testCmd->{'result_table'})) {
+ $result = $self->runPig( $testCmd, $log, 0);
+ my @results = ();
+ my @outputs = ();
+ if (ref($testCmd->{'result_table'}) ne 'ARRAY') {
+ $results[0] = $testCmd->{'result_table'};
+ } else {
+ @results = @{$testCmd->{'result_table'}};
+ }
+ my $stores = $self->countStores($testCmd);
+
+ my $id = 0; # regular ouput count
+ for (my $i = 0; $i < @results; $i++) {
+ if ($results[$i] ne '?') {
+ my %modifiedTestCmd = %{$testCmd};
+ $pigfiles[$i] = $testCmd->{'localpath'} .
+ $testCmd->{'group'} . "_" . $testCmd->{'num'} .
+ ".dumptable.$i.pig";
+ $outfiles[$i] = $testCmd->{'thisResultsPath'} . "/" .
+ $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".$i.out";
+ $tableName = $results[$i];
+ $modifiedTestCmd{'num'} = $testCmd->{'num'} . "_" . $i . "_benchmark";
+ $modifiedTestCmd{'pig'} = "a = load '$tableName' using org.apache.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';";
+ my $r = $self->runPig(\%modifiedTestCmd, $log, 1);
+ $outputs[$i] = $r->{'output'};
+ } else {
+ $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out/$id";
+ my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+ # Copy result file out of hadoop
+ my @baseCmd = $self->getPigCmd($testCmd, $log);
+ my $testOut = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+ $outputs[$i] = $testOut;
+ $id++;
+ }
+ }
+ $result->{'outputs'}=\@outputs;
+ if ($self->countStores($testCmd)==1) {
+ $result->{'output'}=$outputs[0];
+ }
+ }
+ else {
+ $result = $self->runPig( $testCmd, $log, 1);
+ }
+ return $result;
+ } elsif( $testCmd->{'script'} ){
+ return $self->runScript( $testCmd, $log );
+ } else {
+ die "$subName FATAL Did not find a testCmd that I know how to handle";
+ }
+}
+
+sub runPigCmdLine
+{
+ my ($self, $testCmd, $log) = @_;
+ my $subName = (caller(0))[3];
+ my %result;
+
+ # Set up file locations
+ my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".pig";
+ my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+ my $outdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+ my $stdoutfile = "$outdir/stdout";
+ my $stderrfile = "$outdir/stderr";
+
+ mkpath( [ $outdir ] , 0, 0755) if ( ! -e outdir );
+ if ( ! -e $outdir ){
+ print $log "$0.$subName FATAL could not mkdir $outdir\n";
+ die "$0.$subName FATAL could not mkdir $outdir\n";
+ }
+
+ # Write the pig script to a file.
+ my $pigcmd = $self->replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log );
+
+ open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n";
+ print FH $pigcmd . "\n";
+ close(FH);
+
+ # Build the command
+ my @baseCmd = $self->getPigCmd($testCmd, $log);
+ my @cmd = @baseCmd;
+
+ # Add option -l giving location for secondary logs
+ ##!!! Should that even be here?
+ my $locallog = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".log";
+ push(@cmd, "-logfile");
+ push(@cmd, $locallog);
+
+ # Add pig parameters if they're provided
+ if (defined($testCmd->{'pig_params'})) {
+ # Processing :PARAMPATH: in parameters
+ foreach my $param (@{$testCmd->{'pig_params'}}) {
+ $param =~ s/:PARAMPATH:/$testCmd->{'paramPath'}/g;
+ }
+ push(@cmd, @{$testCmd->{'pig_params'}});
+ }
+
+ # Add pig file and redirections
+ push(@cmd, $pigfile);
+ my $command= join (" ", @cmd) . " 1> $stdoutfile 2> $stderrfile";
+
+ # Run the command
+ print $log "$0:$subName Going to run command: ($command)\n";
+ print $log "$0:$subName STD OUT IS IN FILE ($stdoutfile)\n";
+ print $log "$0:$subName STD ERROR IS IN FILE ($stderrfile)\n";
+ print $log "$0:$subName PIG SCRIPT CONTAINS ($pigfile): \n$pigcmd\n";
+
+ my @result=`$command`;
+ $result{'rc'} = $? >> 8;
+ $result{'output'} = $outfile;
+ $result{'stdout'} = `cat $stdoutfile`;
+ $result{'stderr'} = `cat $stderrfile`;
+ $result{'stderr_file'} = $stderrfile;
+
+ print $log "STD ERROR CONTAINS:\n$result{'stderr'}\n";
+
+ return \%result;
+}
+
+
+sub runScript
+{
+ my ($self, $testCmd, $log) = @_;
+ my $subName = (caller(0))[3];
+ my %result;
+
+ # Set up file locations
+ my $script = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".sh";
+ my $outdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+ my $outfile = "$outdir/script.out";
+ my $stdoutfile = "$outdir/script.out";
+ my $stderrfile = "$outdir/script.err";
+
+ mkpath( [ $outdir ] , 0, 0755) if ( ! -e outdir );
+ if ( ! -e $outdir ){
+ print $log "$0.$subName FATAL could not mkdir $outdir\n";
+ die "$0.$subName FATAL could not mkdir $outdir\n";
+ }
+
+ # Write the script to a file
+ my $cmd = $self->replaceParameters( $testCmd->{'script'}, $outfile, $testCmd, $log );
+
+ open(FH, ">$script") or die "Unable to open file $script to write script, $ERRNO\n";
+ print FH $cmd . "\n";
+ close(FH);
+
+ my @result=`chmod +x $script`;
+
+ # Build the command
+ my $command= "$script 1> $stdoutfile 2> $stderrfile";
+
+ # Run the script
+ print $log "$0:$subName Going to run command: ($command)\n";
+ print $log "$0:$subName STD OUT IS IN FILE ($stdoutfile)\n";
+ print $log "$0:$subName STD ERROR IS IN FILE ($stderrfile)\n";
+ print $log "$0:$subName SCRIPT CONTAINS ($script): \n$cmd\n";
+
+ @result=`$command`;
+ $result{'rc'} = $? >> 8;
+ $result{'output'} = $outfile;
+ $result{'stdout'} = `cat $stdoutfile`;
+ $result{'stderr'} = `cat $stderrfile`;
+ $result{'stderr_file'} = $stderrfile;
+
+ print $log "STD ERROR CONTAINS:\n$result{'stderr'}\n";
+
+ return \%result;
+}
+
+
+sub getPigCmd($$$)
+{
+ my ($self, $testCmd, $log) = @_;
+
+ my @pigCmd;
+
+ # set the PIG_CLASSPATH environment variable
+ my $pcp .= $testCmd->{'jythonjar'} if (defined($testCmd->{'jythonjar'}));
+ $pcp .= ":" . $testCmd->{'classpath'} if (defined($testCmd->{'classpath'}));
+ $pcp .= ":" . $testCmd->{'additionaljars'} if (defined($testCmd->{'additionaljars'}));
+ # Only add testconfigpath to PIG_CLASSPATH if HADOOP_HOME isn't defined
+ $pcp .= ":" . $testCmd->{'testconfigpath'} if ($testCmd->{'exectype'} ne "local"); #&& (! defined $ENV{'HADOOP_HOME'});
+
+ # Set it in our current environment. It will get inherited by the IPC::Run
+ # command.
+ $ENV{'PIG_CLASSPATH'} = $pcp;
+
+ @pigCmd = ("$testCmd->{'pigpath'}/bin/pig");
+
+ if (defined($testCmd->{'additionaljars'})) {
+ push(@pigCmd, '-Dpig.additional.jars='.$testCmd->{'additionaljars'});
+ }
+
+ if ($testCmd->{'exectype'} eq "local") {
+ push(@{$testCmd->{'java_params'}}, "-Xmx1024m");
+ push(@pigCmd, ("-x", "local"));
+ }
+
+ my $opts .= "-Dhcat.metastore.uri=$testCmd->{'thriftserver'}";
+ if (defined($testCmd->{'java_params'})) {
+ $opts = $opts . " " . join(" ", @{$testCmd->{'java_params'}});
+ }
+
+ $ENV{'PIG_OPTS'} = $opts;
+
+ print $log "Returning Pig command " . join(" ", @pigCmd) . "\n";
+ print $log "With PIG_CLASSPATH set to " . $ENV{'PIG_CLASSPATH'} . " and PIG_OPTS set to " . $ENV{'PIG_OPTS'} . "\n";
+ return @pigCmd;
+}
+
+sub dumpPigTable
+{
+ my ($self, $testCmd, $table, $log, $id) = @_;
+ my $subName = (caller(0))[3];
+
+ my %result;
+
+ # Write the pig script to a file.
+ my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.pig";
+ my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . "dump.out";
+
+ open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n";
+ print FH "a = load '$table' using org.apache.hcatalog.pig.HCatLoader(); store a into '$outfile';\n";
+ close(FH);
+
+
+ # Build the command
+ my @baseCmd = $self->getPigCmd($testCmd, $log);
+ my @cmd = @baseCmd;
+
+ push(@cmd, $pigfile);
+
+
+ # Run the command
+ print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n";
+ print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n";
+
+ IPC::Run::run(\@cmd, \undef, $log, $log) or
+ die "Failed running $pigfile\n";
+ $result{'rc'} = $? >> 8;
+
+
+ # Get results from the command locally
+ my $localoutfile;
+ my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.out";
+ my $stores = $self->countStores($testCmd);
+
+ $outfile = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+ return $outfile;
+}
+
+sub runPig
+{
+ my ($self, $testCmd, $log, $copyResults) = @_;
+ my $subName = (caller(0))[3];
+
+ my %result;
+
+ # Write the pig script to a file.
+ my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".pig";
+ my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+ my $pigcmd = $self->replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log );
+
+ open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n";
+ print FH $pigcmd . "\n";
+ close(FH);
+
+
+ # Build the command
+ my @baseCmd = $self->getPigCmd($testCmd, $log);
+ my @cmd = @baseCmd;
+
+ # Add option -l giving location for secondary logs
+ my $locallog = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".log";
+ push(@cmd, "-logfile");
+ push(@cmd, $locallog);
+
+ # Add pig parameters if they're provided
+ if (defined($testCmd->{'pig_params'})) {
+ # Processing :PARAMPATH: in parameters
+ foreach my $param (@{$testCmd->{'pig_params'}}) {
+ $param =~ s/:PARAMPATH:/$testCmd->{'paramPath'}/g;
+ }
+ push(@cmd, @{$testCmd->{'pig_params'}});
+ }
+
+ push(@cmd, $pigfile);
+
+
+ # Run the command
+ print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n";
+ print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n";
+
+ IPC::Run::run(\@cmd, \undef, $log, $log) or
+ die "Failed running $pigfile\n";
+ $result{'rc'} = $? >> 8;
+
+
+ # Get results from the command locally
+ my $localoutfile;
+ my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+ my $stores = $self->countStores($testCmd);
+
+ # single query
+ if ($stores == 1) {
+ if ($copyResults) {
+ $result{'output'} = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+ $result{'originalOutput'} = "$localdir/out_original"; # populated by postProcessSingleOutputFile
+ } else {
+ $result{'output'} = "NO_COPY";
+ }
+ }
+ # multi query
+ else {
+ my @outfiles = ();
+ for (my $id = 1; $id <= ($stores); $id++) {
+ $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out/$id";
+ $localoutfile = $outfile . ".$id";
+
+ # Copy result file out of hadoop
+ my $testOut;
+ if ($copyResults) {
+ $testOut = $self->postProcessSingleOutputFile($localoutfile, $localdir, \@baseCmd, $testCmd, $log);
+ } else {
+ $testOut = "NO_COPY";
+ }
+ push(@outfiles, $testOut);
+ }
+ ##!!! originalOutputs not set! Needed?
+ $result{'outputs'} = \@outfiles;
+ }
+
+ # Compare doesn't get the testCmd hash, so I need to stuff the necessary
+ # info about sorting into the result.
+ if (defined $testCmd->{'sortArgs'} && $testCmd->{'sortArgs'}) {
+ $result{'sortArgs'} = $testCmd->{'sortArgs'};
+ }
+
+ return \%result;
+}
+
+sub postProcessSingleSQLOutputFile
+{
+ my ($self, $outfile, $testCmd, $log, $isBenchmark) = @_;
+
+ # If requested, process the data to smooth over floating point
+ # differences.
+ if (defined $testCmd->{'floatpostprocess'} &&
+ defined $testCmd->{'delimiter'}) {
+ # Move the file to a temp file and run through the pre-processor.
+ my $tmpfile = "$outfile.tmp";
+ link($outfile, $tmpfile) or
+ die "Unable to create temporary file $tmpfile, $!\n";
+ unlink($outfile) or
+ die "Unable to unlink file $outfile, $!\n";
+ open(IFH, "< $tmpfile") or
+ die "Unable to open file $tmpfile, $!\n";
+ open(OFH, "> $outfile") or
+ die "Unable to open file $outfile, $!\n";
+ my @cmd = ("$toolpath/floatpostprocessor.pl",
+ $testCmd->{'delimiter'});
+ print $log "Going to run [" . join(" ", @cmd) . "]\n";
+ IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or
+ die "Failed to run float postprocessor, $!\n";
+ close(IFH);
+ close(OFH);
+ unlink($tmpfile);
+ }
+
+ if ($isBenchmark && defined $testCmd->{'nullpostprocess'}) {
+ # Move the file to a temp file and run through the pre-processor.
+ my $tmpfile = "$outfile.tmp";
+ link($outfile, $tmpfile) or
+ die "Unable to create temporary file $tmpfile, $!\n";
+ unlink($outfile) or
+ die "Unable to unlink file $outfile, $!\n";
+ open(IFH, "< $tmpfile") or
+ die "Unable to open file $tmpfile, $!\n";
+ open(OFH, "> $outfile") or
+ die "Unable to open file $outfile, $!\n";
+ my @cmd = ("sed", "s/NULL//g");
+ print $log "Going to run [" . join(" ", @cmd) . "]\n";
+ IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or
+ die "Failed to run float postprocessor, $!\n";
+ close(IFH);
+ close(OFH);
+ unlink($tmpfile);
+ }
+
+ # Sort the results for the benchmark compare.
+ my $sortfile = "$outfile.sorted";
+ my @cmd = ("sort", $outfile);
+ print $log "Going to run [" . join(" ", @cmd) . "]\n";
+ IPC::Run::run(\@cmd, '>', "$sortfile");
+
+ return $sortfile;
+}
+
+sub postProcessSingleOutputFile
+{
+ my ($self, $outfile, $localdir, $baseCmd, $testCmd, $log) = @_;
+ my $subName = (caller(0))[3];
+
+ my @baseCmd = @{$baseCmd};
+ my @copyCmd = @baseCmd;
+ push(@copyCmd, ('-e', 'copyToLocal', $outfile, $localdir));
+ print $log "$0::$className::$subName INFO: Going to run pig command: @copyCmd\n";
+
+ IPC::Run::run(\@copyCmd, \undef, $log, $log) or die "Cannot copy results from HDFS $outfile to $localdir\n";
+
+
+ # Sort the result if necessary. Keep the original output in one large file.
+ # Use system not IPC run so that the '*' gets interpolated by the shell.
+
+ # Build command to:
+ # 1. Combine part files
+ my $fppCmd = "cat $localdir/map* $localdir/part* 2>/dev/null";
+
+ # 2. Standardize float precision
+ if (defined $testCmd->{'floatpostprocess'} &&
+ defined $testCmd->{'delimiter'}) {
+ $fppCmd .= " | $toolpath/floatpostprocessor.pl '" .
+ $testCmd->{'delimiter'} . "'";
+ }
+
+ $fppCmd .= " > $localdir/out_original";
+
+ # run command
+ print $log "$fppCmd\n";
+ system($fppCmd);
+
+ # Sort the results for the benchmark compare.
+ my @sortCmd = ('sort', "$localdir/out_original");
+ print $log join(" ", @sortCmd) . "\n";
+ IPC::Run::run(\@sortCmd, '>', "$localdir/out_sorted");
+
+ return "$localdir/out_sorted";
+}
+
+sub generateBenchmark
+{
+ my ($self, $testCmd, $log) = @_;
+
+ my %result;
+
+ my @SQLQuery = @{$testCmd->{'sql'}};
+ my @SQLQuery = ();
+ if (ref($testCmd->{'sql'}) ne 'ARRAY') {
+ $SQLQuery[0] = $testCmd->{'sql'};
+ } else {
+ @SQLQuery = @{$testCmd->{'sql'}};
+ }
+
+ if ($#SQLQuery == 0) {
+ my $outfile = $self->generateSingleSQLBenchmark($testCmd, $SQLQuery[0], undef, $log);
+ $result{'output'} = $outfile;
+ } else {
+ my @outfiles = ();
+ for (my $id = 0; $id < ($#SQLQuery + 1); $id++) {
+ my $sql = $SQLQuery[$id];
+ my $outfile = $self->generateSingleSQLBenchmark($testCmd, $sql, ($id+1), $log);
+ push(@outfiles, $outfile);
+ }
+ $result{'outputs'} = \@outfiles;
+ }
+
+ return \%result;
+}
+
+sub generateSingleSQLBenchmark
+{
+ my ($self, $testCmd, $sql, $id, $log) = @_;
+
+ my $qmd5 = substr(md5_hex($testCmd->{'pig'}), 0, 5);
+ my $sqlfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".benchmark.$id.sql";
+ my $outfile = $testCmd->{'benchmarkPath'} . "/" . $testCmd->{'group'} . "_" . $testCmd->{'num'};
+
+ $outfile .= defined($id) ? ".$id" . ".out" : ".out";
+
+ my $outfp;
+ open($outfp, "> $outfile") or
+ die "Unable to open output file $outfile, $!\n";
+
+ open(FH, "> $sqlfile") or
+ die "Unable to open file $sqlfile to write SQL script, $ERRNO\n";
+ print FH $sql;
+ close(FH);
+
+ Util::runDbCmd($testCmd, $log, $sqlfile, $outfp);
+
+ $rcs[$i] = $? >> 8;
+ close($outfp);
+
+ my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+ $outfile =
+ $self->postProcessSingleSQLOutputFile($outfile, $testCmd, $log);
+
+ return $outfile;
+}
+
+sub hasCommandLineVerifications
+{
+ my ($self, $testCmd, $log) = @_;
+
+ foreach my $key ('rc', 'expected_out', 'expected_out_regex', 'expected_err', 'expected_err_regex',
+ 'not_expected_out', 'not_expected_out_regex', 'not_expected_err', 'not_expected_err_regex' ) {
+ if (defined $testCmd->{$key}) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+sub compare
+{
+ my ($self, $testResult, $benchmarkResult, $log, $testCmd) = @_;
+ my $subName = (caller(0))[3];
+
+ # Check that we should run this test. If the current execution type
+ # doesn't match the execonly flag, then skip this one.
+ if ($self->wrongExecutionMode($testCmd)) {
+ # Special magic value
+ return $self->{'wrong_execution_mode'};
+ }
+
+ # For now, if the test has
+ # - testCmd pig, and 'sql' for benchmark, then use comparePig, i.e. using benchmark
+ # - any verification directives formerly used by CmdLine or Script drivers (rc, regex on out and err...)
+ # then use compareScript even if testCmd is "pig"
+ # - testCmd script, then use compareScript
+ # - testCmd pig, and none of the above, then use comparePig
+ #
+ # Later, should add ability to have same tests both verify with the 'script' directives,
+ # and do a benchmark compare, if it was a pig cmd. E.g. 'rc' could still be checked when
+ # doing the benchmark compare.
+
+ if ( $testCmd->{'script'} || $self->hasCommandLineVerifications( $testCmd, $log) ){
+ return $self->compareScript ( $testResult, $log, $testCmd);
+ } elsif( $testCmd->{'pig'} ){
+ return $self->comparePig ( $testResult, $benchmarkResult, $log, $testCmd);
+ } else {
+ # Should have been caught by runTest, still...
+ print $log "$0.$subName WARNING Did not find a testCmd that I know how to handle\n";
+ return 0;
+ }
+}
+
+
+sub compareScript
+{
+ my ($self, $testResult, $log, $testCmd) = @_;
+ my $subName = (caller(0))[3];
+
+
+ # IMPORTANT NOTES:
+ #
+ # If you are using a regex to compare stdout or stderr
+ # and if the pattern that you are trying to match spans two line
+ # explicitly use '\n' (without the single quotes) in the regex
+ #
+ # If any verification directives are added here
+ # do remember also to add them to the hasCommandLineVerifications subroutine.
+ #
+ # If the test conf file misspells the directive, you won't be told...
+ #
+
+ my $result = 1; # until proven wrong...
+
+
+ # Return Code
+ if (defined $testCmd->{'rc'}) {
+ print $log "$0::$subName INFO Checking return code " .
+ "against expected <$testCmd->{'rc'}>\n";
+ if ( (! defined $testResult->{'rc'}) || ($testResult->{'rc'} != $testCmd->{'rc'})) {
+ print $log "$0::$subName INFO Check failed: rc = <$testCmd->{'rc'}> expected, test returned rc = <$testResult->{'rc'}>\n";
+ $result = 0;
+ }
+ }
+
+ # Standard Out
+ if (defined $testCmd->{'expected_out'}) {
+ print $log "$0::$subName INFO Checking test stdout' " .
+ "as exact match against expected <$testCmd->{'expected_out'}>\n";
+ if ($testResult->{'stdout'} ne $testCmd->{'expected_out'}) {
+ print $log "$0::$subName INFO Check failed: exact match of <$testCmd->{'expected_out'}> expected in stdout: $testResult->{'stdout'}\n";
+ $result = 0;
+ }
+ }
+
+ if (defined $testCmd->{'not_expected_out'}) {
+ print $log "$0::$subName INFO Checking test stdout " .
+ "as NOT exact match against expected <$testCmd->{'expected_out'}>\n";
+ if ($testResult->{'stdout'} eq $testCmd->{'not_expected_out'}) {
+ print $log "$0::$subName INFO Check failed: NON-match of <$testCmd->{'expected_out'}> expected to stdout: $testResult->{'stdout'}\n";
+ $result = 0;
+ }
+ }
+
+ if (defined $testCmd->{'expected_out_regex'}) {
+ print $log "$0::$subName INFO Checking test stdout " .
+ "for regular expression <$testCmd->{'expected_out_regex'}>\n";
+ if ($testResult->{'stdout'} !~ $testCmd->{'expected_out_regex'}) {
+ print $log "$0::$subName INFO Check failed: regex match of <$testCmd->{'expected_out_regex'}> expected in stdout: $testResult->{'stdout'}\n";
+ $result = 0;
+ }
+ }
+
+ if (defined $testCmd->{'not_expected_out_regex'}) {
+ print $log "$0::$subName INFO Checking test stdout " .
+ "for NON-match of regular expression <$testCmd->{'not_expected_out_regex'}>\n";
+ if ($testResult->{'stdout'} =~ $testCmd->{'not_expected_out_regex'}) {
+ print $log "$0::$subName INFO Check failed: regex NON-match of <$testCmd->{'not_expected_out_regex'}> expected in stdout: $testResult->{'stdout'}\n";
+ $result = 0;
+ }
+ }
+
+ # Standard Error
+ if (defined $testCmd->{'expected_err'}) {
+ print $log "$0::$subName INFO Checking test stderr " .
+ "as exact match against expected <$testCmd->{'expected_err'}>\n";
+ if ($testResult->{'stderr'} ne $testCmd->{'expected_err'}) {
+ print $log "$0::$subName INFO Check failed: exact match of <$testCmd->{'expected_err'}> expected in stderr: $testResult->{'stderr_file'}\n";
+ $result = 0;
+ }
+ }
+
+ if (defined $testCmd->{'not_expected_err'}) {
+ print $log "$0::$subName INFO Checking test stderr " .
+ "as NOT an exact match against expected <$testCmd->{'expected_err'}>\n";
+ if ($testResult->{'stderr'} eq $testCmd->{'not_expected_err'}) {
+ print $log "$0::$subName INFO Check failed: NON-match of <$testCmd->{'expected_err'}> expected to stderr: $testResult->{'stderr_file'}\n";
+ $result = 0;
+ }
+ }
+
+ if (defined $testCmd->{'expected_err_regex'}) {
+ print $log "$0::$subName INFO Checking test stderr " .
+ "for regular expression <$testCmd->{'expected_err_regex'}>\n";
+ if ($testResult->{'stderr'} !~ $testCmd->{'expected_err_regex'}) {
+ print $log "$0::$subName INFO Check failed: regex match of <$testCmd->{'expected_err_regex'}> expected in stderr: $testResult->{'stderr_file'}\n";
+ $result = 0;
+ }
+ }
+
+ if (defined $testCmd->{'not_expected_err_regex'}) {
+ print $log "$0::$subName INFO Checking test stderr " .
+ "for NON-match of regular expression <$testCmd->{'not_expected_err_regex'}>\n";
+ if ($testResult->{'stderr'} =~ $testCmd->{'not_expected_err_regex'}) {
+ print $log "$0::$subName INFO Check failed: regex NON-match of <$testCmd->{'not_expected_err_regex'}> expected in stderr: $testResult->{'stderr_file'}\n";
+ $result = 0;
+ }
+ }
+
+ return $result;
+}
+
+
+sub comparePig
+{
+ my ($self, $testResult, $benchmarkResult, $log, $testCmd) = @_;
+ my $subName = (caller(0))[3];
+
+ my $result;
+ my $stores = $self->countStores($testCmd);
+
+ if ($stores == 1) {
+ $result = $self->compareSingleOutput($testResult, $testResult->{'output'},
+ $benchmarkResult->{'output'}, $log);
+ } else {
+ my $res = 0;
+ for (my $id = 0; $id < ($stores); $id++) {
+ my $testOutput = ($testResult->{'outputs'})->[$id];
+ my $benchmarkOutput = ($benchmarkResult->{'outputs'})->[$id];
+ $res += $self->compareSingleOutput($testResult, $testOutput,
+ $benchmarkOutput, $log);
+ $result = ($res == ($stores)) ? 1 : 0;
+ }
+ }
+
+ return $result;
+}
+
+
+sub compareSingleOutput
+{
+ my ($self, $testResult, $testOutput, $benchmarkOutput, $log) = @_;
+
+print $log "testResult: $testResult testOutput: $testOutput benchmarkOutput: $benchmarkOutput\n";
+
+ # cksum the the two files to see if they are the same
+ my ($testChksm, $benchmarkChksm);
+ IPC::Run::run((['cat', $testOutput], '|', ['cksum']), \$testChksm,
+ $log) or die "$0: error: cannot run cksum on test results\n";
+ IPC::Run::run((['cat', $benchmarkOutput], '|', ['cksum']),
+ \$benchmarkChksm, $log) or die "$0: error: cannot run cksum on benchmark\n";
+
+ chomp $testChksm;
+ chomp $benchmarkChksm;
+ print $log "test cksum: $testChksm\nbenchmark cksum: $benchmarkChksm\n";
+
+ my $result;
+ if ($testChksm ne $benchmarkChksm) {
+ print $log "Test output checksum does not match benchmark checksum\n";
+ print $log "Test checksum = <$testChksm>\n";
+ print $log "Expected checksum = <$benchmarkChksm>\n";
+ print $log "RESULTS DIFFER: vimdiff " . cwd . "/$testOutput " . cwd . "/$benchmarkOutput\n";
+ } else {
+ $result = 1;
+ }
+
+ # Now, check if the sort order is specified
+ if (defined($testResult->{'sortArgs'})) {
+ Util::setLocale();
+ my @sortChk = ('sort', '-cs');
+ push(@sortChk, @{$testResult->{'sortArgs'}});
+ push(@sortChk, $testResult->{'originalOutput'});
+ print $log "Going to run sort check command: " . join(" ", @sortChk) . "\n";
+ IPC::Run::run(\@sortChk, \undef, $log, $log);
+ my $sortrc = $?;
+ if ($sortrc) {
+ print $log "Sort check failed\n";
+ $result = 0;
+ }
+ }
+
+ return $result;
+}
+
+##############################################################################
+# Count the number of stores in a Pig Latin script, so we know how many files
+# we need to compare.
+#
+sub countStores($$)
+{
+ my ($self, $testCmd) = @_;
+
+ # Special work around for queries with more than one store that are not
+ # actually multiqueries.
+ if (defined $testCmd->{'notmq'}) {
+ return 1;
+ }
+
+ my $count;
+
+ # hope they don't have more than store per line
+ # also note that this won't work if you comment out a store
+ my @q = split(/\n/, $testCmd->{'pig'});
+ for (my $i = 0; $i < @q; $i++) {
+ $count += $q[$i] =~ /store\s+[a-zA-Z][a-zA-Z0-9_]*\s+into/i;
+ }
+
+ return $count;
+}
+
+##############################################################################
+# Check whether we should be running this test or not.
+#
+sub wrongExecutionMode($$)
+{
+ my ($self, $testCmd) = @_;
+
+ # Check that we should run this test. If the current execution type
+ # doesn't match the execonly flag, then skip this one.
+ return (defined $testCmd->{'execonly'} &&
+ $testCmd->{'execonly'} ne $testCmd->{'exectype'});
+}
+
+##############################################################################
+# Sub: printGroupResultsXml
+# Print the results for the group using junit xml schema using values from the testStatuses hash.
+#
+# Paramaters:
+# $report - the report object to use to generate the report
+# $groupName - the name of the group to report totals for
+# $testStatuses - the hash containing the results for the tests run so far
+# $totalDuration- The total time it took to run the group of tests
+#
+# Returns:
+# None.
+#
+sub printGroupResultsXml
+{
+ my ( $report, $groupName, $testStatuses, $totalDuration) = @_;
+ $totalDuration=0 if ( !$totalDuration );
+
+ my ($pass, $fail, $abort, $depend) = (0, 0, 0, 0);
+
+ foreach my $key (keys(%$testStatuses)) {
+ if ( $key =~ /^$groupName/ ){
+ ($testStatuses->{$key} eq $passedStr) && $pass++;
+ ($testStatuses->{$key} eq $failedStr) && $fail++;
+ ($testStatuses->{$key} eq $abortedStr) && $abort++;
+ ($testStatuses->{$key} eq $dependStr) && $depend++;
+ }
+ }
+
+ my $total= $pass + $fail + $abort;
+ $report->totals( $groupName, $total, $fail, $abort, $totalDuration );
+
+}
+
+1;