You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ha...@apache.org on 2011/12/06 20:05:39 UTC
svn commit: r1211077 [3/7] - in /incubator/hcatalog/trunk: ./ conf/ src/test/e2e/hcatalog/ src/test/e2e/hcatalog/conf/ src/test/e2e/hcatalog/deployers/ src/test/e2e/hcatalog/drivers/ src/test/e2e/hcatalog/tests/ src/test/e2e/hcatalog/tools/generate/ sr...

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm Tue Dec  6 20:05:37 2011
@@ -0,0 +1,735 @@
+package TestDriverHadoop;
+
+############################################################################           
+#  Licensed to the Apache Software Foundation (ASF) under one or more                  
+#  contributor license agreements.  See the NOTICE file distributed with               
+#  this work for additional information regarding copyright ownership.                 
+#  The ASF licenses this file to You under the Apache License, Version 2.0             
+#  (the "License"); you may not use this file except in compliance with                
+#  the License.  You may obtain a copy of the License at                               
+#                                                                                      
+#      http://www.apache.org/licenses/LICENSE-2.0                                      
+#                                                                                      
+#  Unless required by applicable law or agreed to in writing, software                 
+#  distributed under the License is distributed on an "AS IS" BASIS,                   
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
+#  See the License for the specific language governing permissions and                 
+#  limitations under the License.                                                      
+                                                                                       
+###############################################################################
+# Test driver for pig nightly tests.
+# 
+#
+
+use TestDriver;
+use IPC::Run; # don't do qw(run), it screws up TestDriver which also has a run method
+use Digest::MD5 qw(md5_hex);
+use Util;
+use File::Path;
+use Cwd;
+
+use English;
+
+our $className= "TestDriver";
+our @ISA = "$className";
+our $ROOT = (defined $ENV{'HARNESS_ROOT'} ? $ENV{'HARNESS_ROOT'} : die "ERROR: You must set environment variable HARNESS_ROOT\n");
+our $toolpath = "$ROOT/libexec/HCatTest";
+
+my $passedStr  = 'passed';
+my $failedStr  = 'failed';
+my $abortedStr = 'aborted';
+my $skippedStr = 'skipped';
+my $dependStr  = 'failed_dependency';
+
+sub new
+{
+    # Call our parent
+    my ($proto) = @_;
+    my $class = ref($proto) || $proto;
+    my $self = $class->SUPER::new;
+
+    bless($self, $class);
+    return $self;
+}
+
+sub replaceParameters
+{
+##!!! Move this to Util.pm
+
+    my ($self, $cmd, $outfile, $testCmd, $log) = @_;
+
+    # $self
+    $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g;
+
+    # $outfile
+    $cmd =~ s/:OUTPATH:/$outfile/g;
+
+    # $ENV
+    $cmd =~ s/:PIGHARNESS:/$ENV{HARNESS_ROOT}/g;
+
+    # $testCmd
+    $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g;
+    $cmd =~ s/:OUTPATH:/$outfile/g;
+    $cmd =~ s/:FUNCPATH:/$testCmd->{'funcjarPath'}/g;
+    $cmd =~ s/:PIGPATH:/$testCmd->{'pigpath'}/g;
+    $cmd =~ s/:RUNID:/$testCmd->{'UID'}/g;
+    $cmd =~ s/:USRHOMEPATH:/$testCmd->{'userhomePath'}/g;
+    $cmd =~ s/:MAPREDJARS:/$testCmd->{'mapredjars'}/g;
+    $cmd =~ s/:SCRIPTHOMEPATH:/$testCmd->{'scriptPath'}/g;
+    $cmd =~ s/:DBUSER:/$testCmd->{'dbuser'}/g;
+    $cmd =~ s/:DBNAME:/$testCmd->{'dbdb'}/g;
+#    $cmd =~ s/:LOCALINPATH:/$testCmd->{'localinpathbase'}/g;
+#    $cmd =~ s/:LOCALOUTPATH:/$testCmd->{'localoutpathbase'}/g;
+#    $cmd =~ s/:LOCALTESTPATH:/$testCmd->{'localpathbase'}/g;
+    $cmd =~ s/:BMPATH:/$testCmd->{'benchmarkPath'}/g;
+    $cmd =~ s/:TMP:/$testCmd->{'tmpPath'}/g;
+    $cmd =~ s/:HDFSTMP:/tmp\/$testCmd->{'runid'}/g;
+
+    if ( $testCmd->{'hadoopSecurity'} eq "secure" ) { 
+      $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteSecureCluster'}/g;
+    } else {
+      $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteNotSecureCluster'}/g;
+    }
+
+    $cmd =~ s/:THRIFTSERVER:/$testCmd->{'thriftserver'}/g;
+    $cmd =~ s/:HADOOP_CLASSPATH:/$testCmd->{'hadoop_classpath'}/g;
+    $cmd =~ s/:HCAT_JAR:/$testCmd->{'hcatalog.jar'}/g;
+
+    return $cmd;
+}
+
+sub globalSetup
+{
+    my ($self, $globalHash, $log) = @_;
+    my $subName = (caller(0))[3];
+
+
+    # Setup the output path
+    my $me = `whoami`;
+    chomp $me;
+    $globalHash->{'runid'} = $me . "." . time;
+
+    # if "-ignore false" was provided on the command line,
+    # it means do run tests even when marked as 'ignore'
+    if(defined($globalHash->{'ignore'}) && $globalHash->{'ignore'} eq 'false')
+    {
+        $self->{'ignore'} = 'false';
+    }
+
+    $globalHash->{'outpath'} = $globalHash->{'outpathbase'} . "/" . $globalHash->{'runid'} . "/";
+    $globalHash->{'localpath'} = $globalHash->{'localpathbase'} . "/" . $globalHash->{'runid'} . "/";
+
+    # add libexec location to the path
+    if (defined($ENV{'PATH'})) {
+        $ENV{'PATH'} = $globalHash->{'scriptPath'} . ":" . $ENV{'PATH'};
+    }
+    else {
+        $ENV{'PATH'} = $globalHash->{'scriptPath'};
+    }
+
+    my @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', $globalHash->{'outpath'});
+
+    print $log "Going to run " . join(" ", @cmd) . "\n";
+    IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n";
+
+    IPC::Run::run(['mkdir', '-p', $globalHash->{'localpath'}], \undef, $log, $log) or
+        die "Cannot create localpath directory " . $globalHash->{'localpath'} .
+        " " . "$ERRNO\n";
+
+    IPC::Run::run(['mkdir', '-p', $globalHash->{'benchmarkPath'}], \undef, $log, $log) or
+        die "Cannot create benchmark directory " .  $globalHash->{'benchmarkPath'} .
+        " " . "$ERRNO\n";
+
+    # Create the temporary directory
+    IPC::Run::run(['mkdir', '-p', $globalHash->{'tmpPath'}], \undef, $log, $log) or
+        die "Cannot create temporary directory " . $globalHash->{'tmpPath'} .
+        " " . "$ERRNO\n";
+
+    # Create the HDFS temporary directory
+    @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', "tmp/$globalHash->{'runid'}");
+        print $log "Going to run " . join(" ", @cmd) . "\n";
+    IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n";
+}
+
+sub globalCleanup
+{
+}
+
+
+sub runTest
+{
+    my ($self, $testCmd, $log) = @_;
+    my $subName  = (caller(0))[3];
+
+    # Handle the various methods of running used in 
+    # the original TestDrivers
+
+    if ( $testCmd->{'hcat_prep'} ) {
+        Util::prepareHCat($self, $testCmd, $log);
+    }
+
+    if ( $testCmd->{'hadoop'} ) {
+       my $result;
+       if (defined($testCmd->{'result_table'})) {
+           $result = $self->runHadoop( $testCmd, $log );
+           my @results = ();
+           my @outputs = ();
+           if (ref($testCmd->{'result_table'}) ne 'ARRAY') {
+               $results[0] = $testCmd->{'result_table'};
+           } else {
+               @results = @{$testCmd->{'result_table'}};
+           }
+
+           my $id = 0; # regular ouput count
+           for (my $i = 0; $i < @results; $i++) {
+               if ($results[$i] ne '?') {
+	           my %modifiedTestCmd = %{$testCmd};
+	           $pigfiles[$i] = $testCmd->{'localpath'} .
+	               $testCmd->{'group'} . "_" .  $testCmd->{'num'} .
+	               ".dumptable.$i.pig";
+	           $outfiles[$i] = $testCmd->{'thisResultsPath'} . "/" .
+	               $testCmd->{'group'} .  "_" .  $testCmd->{'num'} . ".$i.out";
+                   $tableName = $results[$i];
+	           $modifiedTestCmd{'num'} = $testCmd->{'num'} . "_" . $i . "_benchmark";
+                   $modifiedTestCmd{'pig'} = "a = load '$tableName' using org.apache.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';";
+                   my $r = $self->runPig(\%modifiedTestCmd, $log, 1);
+	           $outputs[$i] = $r->{'output'};
+               } else {
+                   $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out/$id";
+                   my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+                   # Copy result file out of hadoop
+                   my @baseCmd = $self->getPigCmd($testCmd, $log);
+                   my $testOut = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+                   $outputs[$i] = $testOut;
+                   $id++;
+               }
+           }
+           $result->{'outputs'}=\@outputs;
+           if ($self->countStores($testCmd)==1) {
+               $result->{'output'}=$outputs[0];
+           }
+       }
+       else {
+           $result = $self->runHadoop( $testCmd, $log );
+       }
+       return $result;
+    } else {
+       die "$subName FATAL Did not find a testCmd that I know how to handle";
+    }
+}
+
+sub dumpPigTable
+{
+    my ($self, $testCmd, $table, $log, $id) = @_;
+    my $subName  = (caller(0))[3];
+
+    my %result;
+
+    # Write the pig script to a file.
+    my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.pig";
+    my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'}  . $id . "dump.out";
+
+    open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n";
+    print FH "a = load '$table' using org.apache.hcatalog.pig.HCatLoader(); store a into '$outfile';\n";
+    close(FH);
+
+
+    # Build the command
+    my @baseCmd = $self->getPigCmd($testCmd, $log);
+    my @cmd = @baseCmd;
+
+    push(@cmd, $pigfile);
+
+
+    # Run the command
+    print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n";
+    print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n";
+
+    IPC::Run::run(\@cmd, \undef, $log, $log) or
+        die "Failed running $pigfile\n";
+    $result{'rc'} = $? >> 8;
+
+
+    # Get results from the command locally
+    my $localoutfile;
+    my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.out";
+       
+    $outfile = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+    return $outfile;
+}
+
+sub postProcessSingleOutputFile
+{
+    my ($self, $outfile, $localdir, $baseCmd, $testCmd, $log) = @_;
+    my $subName  = (caller(0))[3];
+
+    my @baseCmd = @{$baseCmd};
+    my @copyCmd = @baseCmd;
+    push(@copyCmd, ('-e', 'copyToLocal', $outfile, $localdir)); 
+    print $log "$0::$className::$subName INFO: Going to run pig command: @copyCmd\n";
+ 
+    IPC::Run::run(\@copyCmd, \undef, $log, $log) or die "Cannot copy results from HDFS $outfile to $localdir\n";
+
+
+    # Sort the result if necessary.  Keep the original output in one large file.
+    # Use system not IPC run so that the '*' gets interpolated by the shell.
+    
+    # Build command to:
+    # 1. Combine part files
+    my $fppCmd = "cat $localdir/map* $localdir/part* 2>/dev/null";
+    
+    # 2. Standardize float precision
+    if (defined $testCmd->{'floatpostprocess'} &&
+            defined $testCmd->{'delimiter'}) {
+        $fppCmd .= " | $toolpath/floatpostprocessor.pl '" .
+            $testCmd->{'delimiter'} . "'";
+    }
+    
+    $fppCmd .= " > $localdir/out_original";
+    
+    # run command
+    print $log "$fppCmd\n";
+    system($fppCmd);
+
+    # Sort the results for the benchmark compare.
+    my @sortCmd = ('sort', "$localdir/out_original");
+    print $log join(" ", @sortCmd) . "\n";
+    IPC::Run::run(\@sortCmd, '>', "$localdir/out_sorted");
+
+    return "$localdir/out_sorted";
+}
+
+sub runHadoop
+# Being modified from runPig
+# !!! Works, but need to add other arguments, like queue...???
+{
+    my ($self, $testCmd, $log) = @_;
+    my $subName  = (caller(0))[3];
+
+    my %result;
+
+    # Write the hadoop command to a file.
+    my $hadoopfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".hadoop";
+    my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+    my $hadoopcmd = $self->replaceParameters( $testCmd->{'hadoop'}, $outfile, $testCmd, $log );
+
+    # adjust for the leading and trailing new line often seen in the conf file's command directives
+    $hadoopcmd =~ s/^\s*(.*?)\s*$/\1/s;
+
+    open(FH, "> $hadoopfile") or die "Unable to open file $hadoopfile to write hadoop command file, $ERRNO\n";
+    print FH $hadoopcmd . "\n";
+    close(FH);
+
+
+    # Build the command
+    my @cmd = Util::getHadoopCmd($testCmd);
+
+    # Add command line arguments if they're provided
+    if (defined($testCmd->{'hadoop_cmdline_args'})) {
+        push(@cmd, @{$testCmd->{'hadoop_cmdline_args'}});
+    }
+
+    # Add the test command elements
+    push(@cmd, split(/ +/,$hadoopcmd));
+
+    # Set HADOOP_CLASSPATH environment variable if provided
+    if (defined($testCmd->{'hadoop_classpath'})) {
+        my $hadoop_classpath = $self->replaceParameters( $testCmd->{'hadoop_classpath'}, $outfile, $testCmd, $log );
+        my $cp = $testCmd->{'hcatalog.jar'};
+        $cp =~ s/,/:/g;
+        $ENV{'HADOOP_CLASSPATH'} = $cp;
+    }
+
+    if (defined($testCmd->{'metastore.principal'}) && ($testCmd->{'metastore.principal'} =~ m/\S+/)) {
+        $ENV{'HADOOP_OPTS'} = "-Dhcat.metastore.principal=" . $testCmd->{'metastore.principal'};
+        $ENV{'HADOOP_CLIENT_OPTS'} = "-Dhcat.metastore.principal=" . $testCmd->{'metastore.principal'};
+    }
+
+    # Add su user if provided
+    if (defined($testCmd->{'run_as'})) {
+      my $cmd = '"' . join (" ", @cmd) . '"';
+      @cmd = ("echo", $cmd, "|", "su", $testCmd->{'run_as'});
+    }
+
+    my $script = $hadoopfile . ".sh";
+    open(FH, ">$script") or die "Unable to open file $script to write script, $ERRNO\n";
+    print FH join (" ", @cmd) . "\n";
+    close(FH);
+    my @result=`chmod +x $script`;
+
+    # Run the command
+    print $log "$0::$className::$subName INFO: Going to run hadoop command in shell script: $script\n";
+    print $log "$0::$className::$subName INFO: Going to run hadoop command: " . join(" ", @cmd) . "\n";
+    print $log "With HADOOP_CLASSPATH set to " . $ENV{'HADOOP_CLASSPATH'} . " and HADOOP_OPTS set to " . $ENV{'HADOOP_OPTS'} . "\n";
+
+    my @runhadoop = ("$script");
+    IPC::Run::run(\@runhadoop, \undef, $log, $log) or
+        die "Failed running $script\n";
+
+    my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.out";
+    my @baseCmd = $self->getPigCmd($testCmd, $log);
+    if ($self->countStores($testCmd)==1) {
+        @outputs = ();
+        $outputs[0] = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+        $result{'outputs'} = \@outputs;
+    }
+
+    return \%result;
+} # end sub runHadoop
+
+
+sub compare
+{
+    my ($self, $testResult, $benchmarkResult, $log, $testCmd) = @_;
+    my $subName  = (caller(0))[3];
+
+    my $result;
+    
+    if (defined($testResult->{'outputs'})) {
+        my $res = 0;
+        my @outputs = $testResult->{'outputs'};
+        my $count = @outputs;
+        for (my $id = 0; $id < $count; $id++) {
+            my $testOutput = ($testResult->{'outputs'})->[$id];
+            my $benchmarkOutput = ($benchmarkResult->{'outputs'})->[$id];
+            $res += $self->compareSingleOutput($testResult, $testOutput,
+                                               $benchmarkOutput, $log);
+            $result = ($res == ($count)) ? 1 : 0;
+        }
+    } else {
+        $result = $self->compareSingleOutput($testResult, $testResult->{'output'},
+                $benchmarkResult->{'output'}, $log);
+    }
+
+    return $result;
+}
+
+sub generateBenchmark
+{
+    my ($self, $testCmd, $log) = @_;
+
+    my %result;
+
+    my @SQLQuery = @{$testCmd->{'sql'}};
+    my @SQLQuery = ();
+        if (ref($testCmd->{'sql'}) ne 'ARRAY') {
+            $SQLQuery[0] = $testCmd->{'sql'};
+        } else {
+            @SQLQuery = @{$testCmd->{'sql'}};
+        }
+ 
+    my @outfiles = ();
+    for (my $id = 0; $id < ($#SQLQuery + 1); $id++) {
+        my $sql = $SQLQuery[$id];
+        my $outfile = $self->generateSingleSQLBenchmark($testCmd, $sql, ($id+1), $log); 
+        push(@outfiles, $outfile);
+    }
+    $result{'outputs'} = \@outfiles;
+
+    return \%result;
+}
+
+sub generateSingleSQLBenchmark
+{
+    my ($self, $testCmd, $sql, $id, $log) = @_;
+
+    my $qmd5 = substr(md5_hex($testCmd->{'pig'}), 0, 5);
+    my $sqlfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".benchmark.$id.sql";
+    my $outfile = $testCmd->{'benchmarkPath'} . "/" . $testCmd->{'group'} . "_" . $testCmd->{'num'};
+
+    $outfile .= defined($id) ? ".$id" . ".out" :  ".out";
+    
+    my $outfp;
+    open($outfp, "> $outfile") or
+        die "Unable to open output file $outfile, $!\n";
+
+    open(FH, "> $sqlfile") or
+        die "Unable to open file $sqlfile to write SQL script, $ERRNO\n";
+    print FH $sql;
+    close(FH);
+
+    Util::runDbCmd($testCmd, $log, $sqlfile, $outfp);
+    
+    $rcs[$i] =  $? >> 8;
+    close($outfp);
+
+    my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+    $outfile =
+        $self->postProcessSingleSQLOutputFile($outfile, $testCmd, $log);
+
+    return $outfile;
+}
+
+sub postProcessSingleSQLOutputFile
+{
+    my ($self, $outfile, $testCmd, $log, $isBenchmark) = @_;
+
+    # If requested, process the data to smooth over floating point
+    # differences.
+    if (defined $testCmd->{'floatpostprocess'} &&
+            defined $testCmd->{'delimiter'}) {
+        # Move the file to a temp file and run through the pre-processor.
+        my $tmpfile = "$outfile.tmp";
+        link($outfile, $tmpfile) or
+            die "Unable to create temporary file $tmpfile, $!\n";
+        unlink($outfile) or
+            die "Unable to unlink file $outfile, $!\n";
+        open(IFH, "< $tmpfile") or
+            die "Unable to open file $tmpfile, $!\n";
+        open(OFH, "> $outfile") or
+            die "Unable to open file $outfile, $!\n";
+        my @cmd = ("$toolpath/floatpostprocessor.pl",
+            $testCmd->{'delimiter'});
+        print $log "Going to run [" . join(" ", @cmd) . "]\n";
+        IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or 
+            die "Failed to run float postprocessor, $!\n"; 
+        close(IFH);
+        close(OFH);
+        unlink($tmpfile);
+    }
+
+    if ($isBenchmark && defined $testCmd->{'nullpostprocess'}) {
+        # Move the file to a temp file and run through the pre-processor.
+        my $tmpfile = "$outfile.tmp";
+        link($outfile, $tmpfile) or
+            die "Unable to create temporary file $tmpfile, $!\n";
+        unlink($outfile) or
+            die "Unable to unlink file $outfile, $!\n";
+        open(IFH, "< $tmpfile") or
+            die "Unable to open file $tmpfile, $!\n";
+        open(OFH, "> $outfile") or
+            die "Unable to open file $outfile, $!\n";
+        my @cmd = ("sed", "s/NULL//g");
+        print $log "Going to run [" . join(" ", @cmd) . "]\n";
+        IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or 
+            die "Failed to run float postprocessor, $!\n"; 
+        close(IFH);
+        close(OFH);
+        unlink($tmpfile);
+    }
+
+    # Sort the results for the benchmark compare.
+    my $sortfile = "$outfile.sorted";
+    my @cmd = ("sort", $outfile);
+    print $log "Going to run [" . join(" ", @cmd) . "]\n";
+    IPC::Run::run(\@cmd, '>', "$sortfile");
+
+    return $sortfile;
+}
+
+sub runPig
+{
+    my ($self, $testCmd, $log, $copyResults) = @_;
+    my $subName  = (caller(0))[3];
+
+    my %result;
+
+    # Write the pig script to a file.
+    my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".pig";
+    my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+    my $pigcmd = $self->replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log );
+
+    open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n";
+    print FH $pigcmd . "\n";
+    close(FH);
+
+
+    # Build the command
+    my @baseCmd = $self->getPigCmd($testCmd, $log);
+    my @cmd = @baseCmd;
+
+    # Add option -l giving location for secondary logs
+    my $locallog = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".log";
+    push(@cmd, "-logfile");
+    push(@cmd, $locallog);
+
+    # Add pig parameters if they're provided
+    if (defined($testCmd->{'pig_params'})) {
+        # Processing :PARAMPATH: in parameters
+        foreach my $param (@{$testCmd->{'pig_params'}}) {
+            $param =~ s/:PARAMPATH:/$testCmd->{'paramPath'}/g;
+        }
+        push(@cmd, @{$testCmd->{'pig_params'}});
+    }
+
+    push(@cmd, $pigfile);
+
+
+    # Run the command
+    print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n";
+    print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n";
+
+    IPC::Run::run(\@cmd, \undef, $log, $log) or
+        die "Failed running $pigfile\n";
+    $result{'rc'} = $? >> 8;
+
+
+    # Get results from the command locally
+    my $localoutfile;
+    my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+    my $stores = $self->countStores($testCmd);
+       
+    # single query
+    if ($stores == 1) {
+        if ($copyResults) {
+            $result{'output'} = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+            $result{'originalOutput'} = "$localdir/out_original"; # populated by postProcessSingleOutputFile
+        } else {
+            $result{'output'} = "NO_COPY";
+        }
+    }
+    # multi query
+    else {
+        my @outfiles = ();
+        for (my $id = 1; $id <= ($stores); $id++) {
+            $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out/$id";
+            $localoutfile = $outfile . ".$id";
+
+            # Copy result file out of hadoop
+            my $testOut;
+            if ($copyResults) {
+              $testOut = $self->postProcessSingleOutputFile($localoutfile, $localdir, \@baseCmd, $testCmd, $log);
+            } else {
+              $testOut = "NO_COPY";
+            }
+            push(@outfiles, $testOut);
+        }
+        ##!!! originalOutputs not set! Needed?
+        $result{'outputs'} = \@outfiles;
+    }
+
+    # Compare doesn't get the testCmd hash, so I need to stuff the necessary
+    # info about sorting into the result.
+    if (defined $testCmd->{'sortArgs'} && $testCmd->{'sortArgs'}) {
+        $result{'sortArgs'} = $testCmd->{'sortArgs'};
+    }
+
+    return \%result;
+}
+
+sub getPigCmd($$$)
+{
+    my ($self, $testCmd, $log) = @_;
+
+    my @pigCmd;
+
+    # set the PIG_CLASSPATH environment variable
+	my $pcp .= $testCmd->{'jythonjar'} if (defined($testCmd->{'jythonjar'}));
+    $pcp .= ":" . $testCmd->{'classpath'} if (defined($testCmd->{'classpath'}));
+    $pcp .= ":" . $testCmd->{'additionaljars'} if (defined($testCmd->{'additionaljars'}));
+    # Only add testconfigpath to PIG_CLASSPATH if HADOOP_HOME isn't defined
+    $pcp .= ":" . $testCmd->{'testconfigpath'} if ($testCmd->{'exectype'} ne "local"); #&& (! defined $ENV{'HADOOP_HOME'});
+
+    # Set it in our current environment.  It will get inherited by the IPC::Run
+    # command.
+    $ENV{'PIG_CLASSPATH'} = $pcp;
+
+    @pigCmd = ("$testCmd->{'pigpath'}/bin/pig");
+
+    if (defined($testCmd->{'additionaljars'})) {
+        push(@pigCmd, '-Dpig.additional.jars='.$testCmd->{'additionaljars'});
+    }
+
+    if ($testCmd->{'exectype'} eq "local") {
+		push(@{$testCmd->{'java_params'}}, "-Xmx1024m");
+        push(@pigCmd, ("-x", "local"));
+    }
+
+    my $opts .= "-Dhcat.metastore.uri=$testCmd->{'thriftserver'}";
+    if (defined($testCmd->{'java_params'})) {
+        $opts = $opts . " " . join(" ", @{$testCmd->{'java_params'}});
+    }
+
+    $ENV{'PIG_OPTS'} = $opts;
+
+	print $log "Returning Pig command " . join(" ", @pigCmd) . "\n";
+	print $log "With PIG_CLASSPATH set to " . $ENV{'PIG_CLASSPATH'} . " and PIG_OPTS set to " . $ENV{'PIG_OPTS'} . "\n";
+    return @pigCmd;
+}
+
+sub compareSingleOutput
+{
+    my ($self, $testResult, $testOutput, $benchmarkOutput, $log) = @_;
+
+print $log "testResult: $testResult testOutput: $testOutput benchmarkOutput: $benchmarkOutput\n";
+
+    # cksum the the two files to see if they are the same
+    my ($testChksm, $benchmarkChksm);
+    IPC::Run::run((['cat', $testOutput], '|', ['cksum']), \$testChksm,
+        $log) or die "$0: error: cannot run cksum on test results\n";
+    IPC::Run::run((['cat', $benchmarkOutput], '|', ['cksum']),
+        \$benchmarkChksm, $log) or die "$0: error: cannot run cksum on benchmark\n";
+
+    chomp $testChksm;
+    chomp $benchmarkChksm;
+    print $log "test cksum: $testChksm\nbenchmark cksum: $benchmarkChksm\n";
+
+    my $result;
+    if ($testChksm ne $benchmarkChksm) {
+        print $log "Test output checksum does not match benchmark checksum\n";
+        print $log "Test checksum = <$testChksm>\n";
+        print $log "Expected checksum = <$benchmarkChksm>\n";
+        print $log "RESULTS DIFFER: vimdiff " . cwd . "/$testOutput " . cwd . "/$benchmarkOutput\n";
+    } else {
+        $result = 1;
+    }
+
+    # Now, check if the sort order is specified
+    if (defined($testResult->{'sortArgs'})) {
+        Util::setLocale();
+	my @sortChk = ('sort', '-cs');
+        push(@sortChk, @{$testResult->{'sortArgs'}});
+        push(@sortChk, $testResult->{'originalOutput'});
+        print $log "Going to run sort check command: " . join(" ", @sortChk) . "\n";
+        IPC::Run::run(\@sortChk, \undef, $log, $log);
+	my $sortrc = $?;
+        if ($sortrc) {
+            print $log "Sort check failed\n";
+            $result = 0;
+        }
+    }
+
+    return $result;
+}
+
+##############################################################################
+# Count the number of stores in a Pig Latin script, so we know how many files
+# we need to compare.
+#
+sub countStores($$)
+{
+    my ($self, $testCmd) = @_;
+
+    if (defined $testCmd->{'pig'}) {
+        my $count;
+
+        # hope they don't have more than store per line
+        # also note that this won't work if you comment out a store
+        my @q = split(/\n/, $testCmd->{'pig'});
+            for (my $i = 0; $i < @q; $i++) {
+                $count += $q[$i] =~ /store\s+[a-zA-Z][a-zA-Z0-9_]*\s+into/i;
+        }
+
+        return $count;
+
+    }
+    else {
+        #defined $testCmd->{'hadoop'}
+        my $count;
+
+        my @q = split(/\n/, $testCmd->{'hadoop'});
+            for (my $i = 0; $i < @q; $i++) {
+                $count += $q[$i] =~ /OUTPATH/ig;
+        }
+
+        return $count;
+
+    }
+
+}
+
+1;

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHive.pm
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHive.pm?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHive.pm (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHive.pm Tue Dec  6 20:05:37 2011
@@ -0,0 +1,383 @@
+package TestDriverHive;
+
+############################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+###############################################################################
+# Test driver for hive nightly tests.
+# 
+#
+
+use TestDriver;
+use IPC::Run; # don't do qw(run), it screws up TestDriver which also has a run method
+use Digest::MD5 qw(md5_hex);
+use Util;
+use File::Path;
+use Cwd;
+
+use strict;
+use English;
+
+our $className= "TestDriver";
+our @ISA = "$className";
+our $ROOT = (defined $ENV{'HARNESS_ROOT'} ? $ENV{'HARNESS_ROOT'} : die "ERROR: You must set environment variable HARNESS_ROOT\n");
+our $toolpath = "$ROOT/libexec/HCatTest";
+
+my $passedStr  = 'passed';
+my $failedStr  = 'failed';
+my $abortedStr = 'aborted';
+my $skippedStr = 'skipped';
+my $dependStr  = 'failed_dependency';
+
+sub new
+{
+    # Call our parent
+    my ($proto) = @_;
+    my $class = ref($proto) || $proto;
+    my $self = $class->SUPER::new;
+
+    bless($self, $class);
+    return $self;
+}
+
+sub replaceParameters
+{
+##!!! Move this to Util.pm
+
+    my ($self, $cmd, $outfile, $testCmd, $log) = @_;
+
+    # $self
+    $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g;
+
+    # $outfile
+    $cmd =~ s/:OUTPATH:/$outfile/g;
+
+    # $ENV
+    $cmd =~ s/:HARNESS:/$ENV{HARNESS_ROOT}/g;
+
+    # $testCmd
+    $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g;
+
+    return $cmd;
+}
+
+sub globalSetup
+{
+    my ($self, $globalHash, $log) = @_;
+    my $subName = (caller(0))[3];
+
+    # Set up values for the metastore
+    Util::setupHiveProperties($globalHash, $log);
+
+    # Setup the output path
+    my $me = `whoami`;
+    chomp $me;
+    $globalHash->{'runid'} = $me . "." . time;
+
+    $globalHash->{'localpath'} = $globalHash->{'localpathbase'} . "/" . $globalHash->{'runid'} . "/";
+
+    IPC::Run::run(['mkdir', '-p', $globalHash->{'localpath'}], \undef, $log, $log) or 
+        die "Cannot create localpath directory " . $globalHash->{'localpath'} .
+        " " . "$ERRNO\n";
+
+    IPC::Run::run(['mkdir', '-p', $globalHash->{'benchmarkPath'}], \undef, $log, $log) or 
+        die "Cannot create benchmark directory " .  $globalHash->{'benchmarkPath'} .
+        " " . "$ERRNO\n";
+
+    $globalHash->{'thisResultsPath'} = $globalHash->{'localpath'} . "/"
+        . $globalHash->{'resultsPath'};
+    IPC::Run::run(['mkdir', '-p', $globalHash->{'thisResultsPath'}], \undef, $log, $log) or 
+        die "Cannot create results directory " .  $globalHash->{'thisResultsPath'} .
+        " " . "$ERRNO\n";
+}
+
+sub globalCleanup
+{
+    my ($self, $globalHash, $log) = @_;
+}
+
+
+sub runTest
+{
+    my ($self, $testCmd, $log) = @_;
+
+    my %result;
+
+    my @hivefiles = ();
+    my @outfiles = ();
+    # Write the hive script to a file.
+    $hivefiles[0] = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" .
+        $testCmd->{'num'} . ".0.sql";
+    $outfiles[0] = $testCmd->{'thisResultsPath'} . "/" . $testCmd->{'group'} .
+        "_" .  $testCmd->{'num'} . ".0.out";
+
+    open(FH, "> $hivefiles[0]") or
+        die "Unable to open file $hivefiles[0] to write SQL script, $ERRNO\n";
+    print FH $testCmd->{'sql'} . "\n";
+    close(FH);
+
+    # If the results are written to a table run the command and then 
+    # run a another Hive command to dump the results of the table.
+    if (defined($testCmd->{'result_table'})) {
+        Util::runHiveCmdFromFile($testCmd, $log, $hivefiles[0]);
+        $result{'rc'} = $? >> 8;
+
+        my @results = ();
+        if (ref($testCmd->{'result_table'}) ne 'ARRAY') {
+            $results[0] = $testCmd->{'result_table'};
+        } else {
+            @results = @{$testCmd->{'result_table'}};
+        }
+        for (my $i = 0; $i < @results; $i++) {
+            $hivefiles[$i] = $testCmd->{'localpath'} .
+                $testCmd->{'group'} . "_" .  $testCmd->{'num'} .
+                ".dumptable.$i.sql";
+            $outfiles[$i] = $testCmd->{'thisResultsPath'} . "/" .
+                $testCmd->{'group'} .  "_" .  $testCmd->{'num'} . ".$i.out";
+            open(FH, "> $hivefiles[$i]") or
+                die "Unable to open file $hivefiles[$i] to write SQL " .
+                    "script, $ERRNO\n";
+            print FH "select * from " . $results[$i] .  ";\n";
+            close(FH);
+        }
+    }
+
+    my @originalOutputs = ();
+    my @outputs = ();
+    $result{'originalOutput'} = \@originalOutputs;
+    $result{'output'} = \@outputs;
+
+    for (my $i = 0; $i < @hivefiles; $i++) {
+        my $outfp;
+        open($outfp, "> $outfiles[$i]") or
+            die "Unable to open output file $outfiles[$i], $!\n";
+
+        Util::runHiveCmdFromFile($testCmd, $log, $hivefiles[$i], $outfp);
+
+        # Don't overwrite rc if we set it above
+        $result{'rc'} = $? >> 8 unless defined $result{'rc'};
+        close($outfp);
+
+        $originalOutputs[$i] = $outfiles[$i];
+        $outputs[$i] = 
+            $self->postProcessSingleOutputFile($outfiles[$i], $testCmd, $log);
+    }
+
+    # Compare doesn't get the testCmd hash, so I need to stuff the necessary
+    # info about sorting into the result.
+    if (defined $testCmd->{'sortArgs'} && $testCmd->{'sortArgs'}) {
+        $result{'sortArgs'} = $testCmd->{'sortArgs'};
+    }
+
+    return \%result;
+}
+
+
+
+sub generateBenchmark
+{
+    my ($self, $testCmd, $log) = @_;
+
+    my %result;
+
+    # Write the SQL to a file.
+    my @verifies = ();
+    if (defined $testCmd->{'verify_sql'}) {
+        if (ref($testCmd->{'verify_sql'}) eq "ARRAY") {
+            @verifies = @{$testCmd->{'verify_sql'}};
+        } else {
+            $verifies[0] = $testCmd->{'verify_sql'};
+        }
+    } else {
+        $verifies[0] = $testCmd->{'sql'};
+    }
+
+    my @rcs = ();
+    $result{'rc'} = \@rcs;
+    my @outputs = ();
+    $result{'output'} = \@outputs;
+    for (my $i = 0; $i < @verifies; $i++) {
+        my $sqlfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" .
+            $testCmd->{'num'} . ".benchmark.$i.sql";
+        my $outfile = $testCmd->{'benchmarkPath'} . "/" .
+            $testCmd->{'group'} .  "_" .  $testCmd->{'num'} .
+            ".benchmark.$i.out";
+
+        open(FH, "> $sqlfile") or
+            die "Unable to open file $sqlfile to write SQL script, $ERRNO\n";
+        print FH $verifies[$i];
+        close(FH);
+
+        my $outfp;
+        open($outfp, "> $outfile") or
+            die "Unable to open output file $outfile, $!\n";
+
+        Util::runDbCmd($testCmd, $log, $sqlfile, $outfp);
+        $rcs[$i] =  $? >> 8;
+        close($outfp);
+
+        $outputs[$i] = 
+            $self->postProcessSingleOutputFile($outfile, $testCmd, $log, 1);
+    }
+
+    return \%result;
+}
+
+sub compare
+{
+    my ($self, $testResult, $benchmarkResult, $log, $testCmd) = @_;
+
+    # Make sure we have the same number of results from runTest and
+    # generateBenchmark
+    if (scalar(@{$testResult->{'output'}}) != 
+            scalar(@{$benchmarkResult->{'output'}})) {
+        die "runTest returned " .  scalar(@{$testResult->{'output'}}) .
+            " results, but generateBenchmark returned " .
+            scalar(@{$benchmarkResult->{'output'}}) . "\n";
+    }
+
+    my $totalFailures = 0;
+    for (my $i = 0; $i < @{$testResult->{'output'}}; $i++) {
+        # cksum the the two files to see if they are the same
+        my ($testChksm, $benchmarkChksm);
+        IPC::Run::run((['cat', @{$testResult->{'output'}}[$i]], '|',
+            ['cksum']), \$testChksm, $log) or
+            die "$0: error: cannot run cksum on test results\n";
+        IPC::Run::run((['cat', @{$benchmarkResult->{'output'}}[$i]], '|',
+            ['cksum']), \$benchmarkChksm, $log) or
+            die "$0: error: cannot run cksum on benchmark\n";
+
+        chomp $testChksm;
+        chomp $benchmarkChksm;
+        print $log
+            "test cksum: $testChksm\nbenchmark cksum: $benchmarkChksm\n";
+
+        if ($testChksm ne $benchmarkChksm) {
+            print $log "Test output $i checksum does not match benchmark " .
+                "checksum\n";
+            print $log "Test $i checksum = <$testChksm>\n";
+            print $log "Expected $i checksum = <$benchmarkChksm>\n";
+            print $log "RESULTS DIFFER: vimdiff " . cwd .
+                "/" . @{$testResult->{'output'}}[$i] . " " . cwd .
+                "/" . @{$benchmarkResult->{'output'}}[$i] . "\n";
+            $totalFailures++;
+        }
+
+        # Now, check if the sort order is specified
+        if (defined($testResult->{'sortArgs'})) {
+            my @sortChk = ('sort', '-cs');
+            push(@sortChk, @{$testResult->{'sortArgs'}});
+            push(@sortChk, @{$testResult->{'originalOutput'}}[$i]);
+            print $log "Going to run sort check command: " .
+                join(" ", @sortChk) . "\n";
+            IPC::Run::run(\@sortChk, \undef, $log, $log);
+            my $sortrc = $?;
+            if ($sortrc) {
+                print $log "Sort check failed\n";
+                $totalFailures++;
+            }
+        }
+    }
+
+    return $totalFailures == 0;
+}
+
+sub postProcessSingleOutputFile
+{
+    my ($self, $outfile, $testCmd, $log, $isBenchmark) = @_;
+
+    # If requested, process the data to smooth over floating point
+    # differences.
+    if (defined $testCmd->{'floatpostprocess'} &&
+            defined $testCmd->{'delimiter'}) {
+        # Move the file to a temp file and run through the pre-processor.
+        my $tmpfile = "$outfile.tmp";
+        link($outfile, $tmpfile) or
+            die "Unable to create temporary file $tmpfile, $!\n";
+        unlink($outfile) or
+            die "Unable to unlink file $outfile, $!\n";
+        open(IFH, "< $tmpfile") or
+            die "Unable to open file $tmpfile, $!\n";
+        open(OFH, "> $outfile") or
+            die "Unable to open file $outfile, $!\n";
+        my @cmd = ("$toolpath/floatpostprocessor.pl",
+            $testCmd->{'delimiter'});
+        print $log "Going to run [" . join(" ", @cmd) . "]\n";
+        IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or 
+            die "Failed to run float postprocessor, $!\n"; 
+        close(IFH);
+        close(OFH);
+        unlink($tmpfile);
+    }
+
+    if ($isBenchmark && defined $testCmd->{'nullpostprocess'}) {
+        # Move the file to a temp file and run through the pre-processor.
+        my $tmpfile = "$outfile.tmp";
+        link($outfile, $tmpfile) or
+            die "Unable to create temporary file $tmpfile, $!\n";
+        unlink($outfile) or
+            die "Unable to unlink file $outfile, $!\n";
+        open(IFH, "< $tmpfile") or
+            die "Unable to open file $tmpfile, $!\n";
+        open(OFH, "> $outfile") or
+            die "Unable to open file $outfile, $!\n";
+        my @cmd = ("sed", "s/NULL//g");
+        print $log "Going to run [" . join(" ", @cmd) . "]\n";
+        IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or 
+            die "Failed to run float postprocessor, $!\n"; 
+        close(IFH);
+        close(OFH);
+        unlink($tmpfile);
+    }
+
+    # Sort the results for the benchmark compare.
+    my $sortfile = "$outfile.sorted";
+    my @cmd = ("sort", $outfile);
+    print $log "Going to run [" . join(" ", @cmd) . "]\n";
+    IPC::Run::run(\@cmd, '>', "$sortfile");
+
+    return $sortfile;
+}
+
+
+
+##############################################################################
+# Count the number of stores in a Pig Latin script, so we know how many files
+# we need to compare.
+#
+sub countStores($$)
+{
+    my ($self, $testCmd) = @_;
+
+    # Special work around for queries with more than one store that are not
+    # actually multiqueries.
+    if (defined $testCmd->{'notmq'}) {
+        return 1;
+    }
+
+    my $count;
+
+    # hope they don't have more than store per line
+    # also note that this won't work if you comment out a store
+    my @q = split(/\n/, $testCmd->{'pig'});
+        for (my $i = 0; $i < @q; $i++) {
+            $count += $q[$i] =~ /store\s+[a-zA-Z][a-zA-Z0-9_]*\s+into/i;
+    }
+
+    return $count;
+}
+
+1;

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm Tue Dec  6 20:05:37 2011
@@ -0,0 +1,1000 @@
+package TestDriverPig;
+
+############################################################################           
+#  Licensed to the Apache Software Foundation (ASF) under one or more                  
+#  contributor license agreements.  See the NOTICE file distributed with               
+#  this work for additional information regarding copyright ownership.                 
+#  The ASF licenses this file to You under the Apache License, Version 2.0             
+#  (the "License"); you may not use this file except in compliance with                
+#  the License.  You may obtain a copy of the License at                               
+#                                                                                      
+#      http://www.apache.org/licenses/LICENSE-2.0                                      
+#                                                                                      
+#  Unless required by applicable law or agreed to in writing, software                 
+#  distributed under the License is distributed on an "AS IS" BASIS,                   
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
+#  See the License for the specific language governing permissions and                 
+#  limitations under the License.                                                      
+                                                                                       
+###############################################################################
+# Test driver for pig nightly tests.
+# 
+#
+
+use TestDriver;
+use IPC::Run; # don't do qw(run), it screws up TestDriver which also has a run method
+use Digest::MD5 qw(md5_hex);
+use Util;
+use File::Path;
+use Cwd;
+
+use English;
+
+our $className= "TestDriver";
+our @ISA = "$className";
+our $ROOT = (defined $ENV{'HARNESS_ROOT'} ? $ENV{'HARNESS_ROOT'} : die "ERROR: You must set environment variable HARNESS_ROOT\n");
+our $toolpath = "$ROOT/libexec/HCatTest";
+
+my $passedStr  = 'passed';
+my $failedStr  = 'failed';
+my $abortedStr = 'aborted';
+my $skippedStr = 'skipped';
+my $dependStr  = 'failed_dependency';
+
+sub new
+{
+    # Call our parent
+    my ($proto) = @_;
+    my $class = ref($proto) || $proto;
+    my $self = $class->SUPER::new;
+
+    bless($self, $class);
+    return $self;
+}
+
+sub replaceParameters
+{
+##!!! Move this to Util.pm
+
+    my ($self, $cmd, $outfile, $testCmd, $log) = @_;
+
+    # $self
+    $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g;
+
+    # $outfile
+    $cmd =~ s/:OUTPATH:/$outfile/g;
+
+    # $ENV
+    $cmd =~ s/:PIGHARNESS:/$ENV{HARNESS_ROOT}/g;
+
+    # $testCmd
+    $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g;
+    $cmd =~ s/:OUTPATH:/$outfile/g;
+    $cmd =~ s/:FUNCPATH:/$testCmd->{'funcjarPath'}/g;
+    $cmd =~ s/:PIGPATH:/$testCmd->{'pigpath'}/g;
+    $cmd =~ s/:RUNID:/$testCmd->{'UID'}/g;
+    $cmd =~ s/:USRHOMEPATH:/$testCmd->{'userhomePath'}/g;
+    $cmd =~ s/:MAPREDJARS:/$testCmd->{'mapredjars'}/g;
+    $cmd =~ s/:SCRIPTHOMEPATH:/$testCmd->{'scriptPath'}/g;
+    $cmd =~ s/:DBUSER:/$testCmd->{'dbuser'}/g;
+    $cmd =~ s/:DBNAME:/$testCmd->{'dbdb'}/g;
+#    $cmd =~ s/:LOCALINPATH:/$testCmd->{'localinpathbase'}/g;
+#    $cmd =~ s/:LOCALOUTPATH:/$testCmd->{'localoutpathbase'}/g;
+#    $cmd =~ s/:LOCALTESTPATH:/$testCmd->{'localpathbase'}/g;
+    $cmd =~ s/:BMPATH:/$testCmd->{'benchmarkPath'}/g;
+    $cmd =~ s/:TMP:/$testCmd->{'tmpPath'}/g;
+    $cmd =~ s/:HDFSTMP:/tmp\/$testCmd->{'runid'}/g;
+
+    if ( $testCmd->{'hadoopSecurity'} eq "secure" ) { 
+      $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteSecureCluster'}/g;
+    } else {
+      $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteNotSecureCluster'}/g;
+    }
+
+    return $cmd;
+}
+
+sub globalSetup
+{
+    my ($self, $globalHash, $log) = @_;
+    my $subName = (caller(0))[3];
+
+
+    # Setup the output path
+    my $me = `whoami`;
+    chomp $me;
+    $globalHash->{'runid'} = $me . "." . time;
+
+    # if "-ignore false" was provided on the command line,
+    # it means do run tests even when marked as 'ignore'
+    if(defined($globalHash->{'ignore'}) && $globalHash->{'ignore'} eq 'false')
+    {
+        $self->{'ignore'} = 'false';
+    }
+
+    $globalHash->{'outpath'} = $globalHash->{'outpathbase'} . "/" . $globalHash->{'runid'} . "/";
+    $globalHash->{'localpath'} = $globalHash->{'localpathbase'} . "/" . $globalHash->{'runid'} . "/";
+
+    # add libexec location to the path
+    if (defined($ENV{'PATH'})) {
+        $ENV{'PATH'} = $globalHash->{'scriptPath'} . ":" . $ENV{'PATH'};
+    }
+    else {
+        $ENV{'PATH'} = $globalHash->{'scriptPath'};
+    }
+
+    my @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', $globalHash->{'outpath'});
+
+
+    print $log "Going to run " . join(" ", @cmd) . "\n";
+    IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n";
+
+    IPC::Run::run(['mkdir', '-p', $globalHash->{'localpath'}], \undef, $log, $log) or 
+        die "Cannot create localpath directory " . $globalHash->{'localpath'} .
+        " " . "$ERRNO\n";
+
+    IPC::Run::run(['mkdir', '-p', $globalHash->{'benchmarkPath'}], \undef, $log, $log) or
+        die "Cannot create benchmark directory " .  $globalHash->{'benchmarkPath'} .
+        " " . "$ERRNO\n";
+
+    # Create the temporary directory
+    IPC::Run::run(['mkdir', '-p', $globalHash->{'tmpPath'}], \undef, $log, $log) or 
+        die "Cannot create temporary directory " . $globalHash->{'tmpPath'} .
+        " " . "$ERRNO\n";
+
+    # Create the HDFS temporary directory
+    @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', "tmp/$globalHash->{'runid'}");
+	print $log "Going to run " . join(" ", @cmd) . "\n";
+    IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n";
+}
+
+sub globalCleanup
+{
+    my ($self, $globalHash, $log) = @_;
+
+    IPC::Run::run(['rm', '-rf', $globalHash->{'tmpPath'}], \undef, $log, $log) or 
+        warn "Cannot remove temporary directory " . $globalHash->{'tmpPath'} .
+        " " . "$ERRNO\n";
+
+    # Cleanup the HDFS temporary directory
+    my @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'fs', '-rmr', "tmp/$globalHash->{'runid'}");
+	print $log "Going to run " . join(" ", @cmd) . "\n";
+    IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n";
+}
+
+
+sub runTest
+{
+    my ($self, $testCmd, $log) = @_;
+    my $subName  = (caller(0))[3];
+
+    # Check that we should run this test.  If the current execution type
+    # doesn't match the execonly flag, then skip this one.
+    if ($self->wrongExecutionMode($testCmd)) {
+        print $log "Skipping test $testCmd->{'group'}" . "_" .
+            $testCmd->{'num'} . " since it is executed only in " .
+            $testCmd->{'execonly'} . " mode and we are executing in " .
+            $testCmd->{'exectype'} . " mode.\n";
+        my %result;
+        return \%result;
+    }
+
+    if ( $testCmd->{'hcat_prep'} ) {
+        Util::prepareHCat($self, $testCmd, $log);
+    }
+    # Handle the various methods of running used in 
+    # the original TestDrivers
+
+    if ( $testCmd->{'pig'} && $self->hasCommandLineVerifications( $testCmd, $log) ) {
+       return $self->runPigCmdLine( $testCmd, $log, 1);
+    } elsif( $testCmd->{'pig'} ){
+       # If the results are written to a table run the command and then 
+       # run a another Pig script to dump the results of the table.
+       my $result;
+       if (defined($testCmd->{'result_table'})) {
+           $result = $self->runPig( $testCmd, $log, 0);
+           my @results = ();
+           my @outputs = ();
+           if (ref($testCmd->{'result_table'}) ne 'ARRAY') {
+               $results[0] = $testCmd->{'result_table'};
+           } else {
+               @results = @{$testCmd->{'result_table'}};
+           }
+           my $stores = $self->countStores($testCmd);
+
+           my $id = 0; # regular ouput count
+           for (my $i = 0; $i < @results; $i++) {
+               if ($results[$i] ne '?') {
+	           my %modifiedTestCmd = %{$testCmd};
+	           $pigfiles[$i] = $testCmd->{'localpath'} .
+	               $testCmd->{'group'} . "_" .  $testCmd->{'num'} .
+	               ".dumptable.$i.pig";
+	           $outfiles[$i] = $testCmd->{'thisResultsPath'} . "/" .
+	               $testCmd->{'group'} .  "_" .  $testCmd->{'num'} . ".$i.out";
+                   $tableName = $results[$i];
+	           $modifiedTestCmd{'num'} = $testCmd->{'num'} . "_" . $i . "_benchmark";
+                   $modifiedTestCmd{'pig'} = "a = load '$tableName' using org.apache.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';";
+                   my $r = $self->runPig(\%modifiedTestCmd, $log, 1);
+	           $outputs[$i] = $r->{'output'};
+               } else {
+                   $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out/$id";
+                   my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+                   # Copy result file out of hadoop
+                   my @baseCmd = $self->getPigCmd($testCmd, $log);
+                   my $testOut = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+                   $outputs[$i] = $testOut;
+                   $id++;
+               }
+           }
+           $result->{'outputs'}=\@outputs;
+           if ($self->countStores($testCmd)==1) {
+               $result->{'output'}=$outputs[0];
+           }
+       }
+       else {
+           $result = $self->runPig( $testCmd, $log, 1);
+       }
+       return $result;
+    } elsif(  $testCmd->{'script'} ){
+       return $self->runScript( $testCmd, $log );
+    } else {
+       die "$subName FATAL Did not find a testCmd that I know how to handle";
+    }
+}
+
+sub runPigCmdLine
+{
+    my ($self, $testCmd, $log) = @_;
+    my $subName = (caller(0))[3];
+    my %result;
+
+    # Set up file locations
+    my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".pig";
+    my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+    my $outdir  = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+    my $stdoutfile = "$outdir/stdout";
+    my $stderrfile = "$outdir/stderr";
+
+    mkpath( [ $outdir ] , 0, 0755) if ( ! -e outdir );
+    if ( ! -e $outdir ){
+       print $log "$0.$subName FATAL could not mkdir $outdir\n";
+       die "$0.$subName FATAL could not mkdir $outdir\n";
+    }
+
+    # Write the pig script to a file.
+    my $pigcmd = $self->replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log );
+
+    open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n";
+    print FH $pigcmd . "\n";
+    close(FH);
+
+    # Build the command
+    my @baseCmd = $self->getPigCmd($testCmd, $log);
+    my @cmd = @baseCmd;
+
+    # Add option -l giving location for secondary logs
+    ##!!! Should that even be here? 
+    my $locallog = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".log";
+    push(@cmd, "-logfile");
+    push(@cmd, $locallog);
+
+    # Add pig parameters if they're provided
+    if (defined($testCmd->{'pig_params'})) {
+        # Processing :PARAMPATH: in parameters
+        foreach my $param (@{$testCmd->{'pig_params'}}) {
+            $param =~ s/:PARAMPATH:/$testCmd->{'paramPath'}/g;
+        }
+        push(@cmd, @{$testCmd->{'pig_params'}});
+    }
+
+    # Add pig file and redirections 
+    push(@cmd, $pigfile);
+    my $command= join (" ", @cmd) . " 1> $stdoutfile 2> $stderrfile";
+
+    # Run the command
+    print $log "$0:$subName Going to run command: ($command)\n";
+    print $log "$0:$subName STD OUT IS IN FILE ($stdoutfile)\n";
+    print $log "$0:$subName STD ERROR IS IN FILE ($stderrfile)\n";
+    print $log "$0:$subName PIG SCRIPT CONTAINS ($pigfile):  \n$pigcmd\n";
+
+    my @result=`$command`;
+    $result{'rc'} = $? >> 8;
+    $result{'output'} = $outfile;
+    $result{'stdout'} = `cat $stdoutfile`;
+    $result{'stderr'} = `cat $stderrfile`;
+    $result{'stderr_file'} = $stderrfile;
+
+    print $log "STD ERROR CONTAINS:\n$result{'stderr'}\n";
+
+    return \%result;
+}
+
+
+sub runScript
+{
+    my ($self, $testCmd, $log) = @_;
+    my $subName = (caller(0))[3];
+    my %result;
+
+    # Set up file locations
+    my $script = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".sh";
+    my $outdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+    my $outfile = "$outdir/script.out";
+    my $stdoutfile = "$outdir/script.out";
+    my $stderrfile = "$outdir/script.err";
+
+    mkpath( [ $outdir ] , 0, 0755) if ( ! -e outdir );
+    if ( ! -e $outdir ){
+       print $log "$0.$subName FATAL could not mkdir $outdir\n";
+       die "$0.$subName FATAL could not mkdir $outdir\n";
+    }
+
+    # Write the script to a file
+    my $cmd = $self->replaceParameters( $testCmd->{'script'}, $outfile, $testCmd, $log );
+
+    open(FH, ">$script") or die "Unable to open file $script to write script, $ERRNO\n";
+    print FH $cmd . "\n";
+    close(FH);
+
+    my @result=`chmod +x $script`;
+
+    # Build the command
+    my $command= "$script 1> $stdoutfile 2> $stderrfile";
+
+    # Run the script
+    print $log "$0:$subName Going to run command: ($command)\n";
+    print $log "$0:$subName STD OUT IS IN FILE ($stdoutfile)\n";
+    print $log "$0:$subName STD ERROR IS IN FILE ($stderrfile)\n";
+    print $log "$0:$subName SCRIPT CONTAINS ($script):  \n$cmd\n";
+
+    @result=`$command`;
+    $result{'rc'} = $? >> 8;
+    $result{'output'} = $outfile;
+    $result{'stdout'} = `cat $stdoutfile`;
+    $result{'stderr'} = `cat $stderrfile`;
+    $result{'stderr_file'} = $stderrfile;
+
+    print $log "STD ERROR CONTAINS:\n$result{'stderr'}\n";
+
+    return \%result;
+}
+
+
+sub getPigCmd($$$)
+{
+    my ($self, $testCmd, $log) = @_;
+
+    my @pigCmd;
+
+    # set the PIG_CLASSPATH environment variable
+	my $pcp .= $testCmd->{'jythonjar'} if (defined($testCmd->{'jythonjar'}));
+    $pcp .= ":" . $testCmd->{'classpath'} if (defined($testCmd->{'classpath'}));
+    $pcp .= ":" . $testCmd->{'additionaljars'} if (defined($testCmd->{'additionaljars'}));
+    # Only add testconfigpath to PIG_CLASSPATH if HADOOP_HOME isn't defined
+    $pcp .= ":" . $testCmd->{'testconfigpath'} if ($testCmd->{'exectype'} ne "local"); #&& (! defined $ENV{'HADOOP_HOME'});
+
+    # Set it in our current environment.  It will get inherited by the IPC::Run
+    # command.
+    $ENV{'PIG_CLASSPATH'} = $pcp;
+
+    @pigCmd = ("$testCmd->{'pigpath'}/bin/pig");
+
+    if (defined($testCmd->{'additionaljars'})) {
+        push(@pigCmd, '-Dpig.additional.jars='.$testCmd->{'additionaljars'});
+    }
+
+    if ($testCmd->{'exectype'} eq "local") {
+		push(@{$testCmd->{'java_params'}}, "-Xmx1024m");
+        push(@pigCmd, ("-x", "local"));
+    }
+
+    my $opts .= "-Dhcat.metastore.uri=$testCmd->{'thriftserver'}";
+    if (defined($testCmd->{'java_params'})) {
+        $opts = $opts . " " . join(" ", @{$testCmd->{'java_params'}});
+    }
+
+    $ENV{'PIG_OPTS'} = $opts;
+
+	print $log "Returning Pig command " . join(" ", @pigCmd) . "\n";
+	print $log "With PIG_CLASSPATH set to " . $ENV{'PIG_CLASSPATH'} . " and PIG_OPTS set to " . $ENV{'PIG_OPTS'} . "\n";
+    return @pigCmd;
+}
+
+sub dumpPigTable
+{
+    my ($self, $testCmd, $table, $log, $id) = @_;
+    my $subName  = (caller(0))[3];
+
+    my %result;
+
+    # Write the pig script to a file.
+    my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.pig";
+    my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'}  . $id . "dump.out";
+
+    open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n";
+    print FH "a = load '$table' using org.apache.hcatalog.pig.HCatLoader(); store a into '$outfile';\n";
+    close(FH);
+
+
+    # Build the command
+    my @baseCmd = $self->getPigCmd($testCmd, $log);
+    my @cmd = @baseCmd;
+
+    push(@cmd, $pigfile);
+
+
+    # Run the command
+    print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n";
+    print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n";
+
+    IPC::Run::run(\@cmd, \undef, $log, $log) or
+        die "Failed running $pigfile\n";
+    $result{'rc'} = $? >> 8;
+
+
+    # Get results from the command locally
+    my $localoutfile;
+    my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.out";
+    my $stores = $self->countStores($testCmd);
+       
+    $outfile = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+    return $outfile;
+}
+
+sub runPig
+{
+    my ($self, $testCmd, $log, $copyResults) = @_;
+    my $subName  = (caller(0))[3];
+
+    my %result;
+
+    # Write the pig script to a file.
+    my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".pig";
+    my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+    my $pigcmd = $self->replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log );
+
+    open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n";
+    print FH $pigcmd . "\n";
+    close(FH);
+
+
+    # Build the command
+    my @baseCmd = $self->getPigCmd($testCmd, $log);
+    my @cmd = @baseCmd;
+
+    # Add option -l giving location for secondary logs
+    my $locallog = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".log";
+    push(@cmd, "-logfile");
+    push(@cmd, $locallog);
+
+    # Add pig parameters if they're provided
+    if (defined($testCmd->{'pig_params'})) {
+        # Processing :PARAMPATH: in parameters
+        foreach my $param (@{$testCmd->{'pig_params'}}) {
+            $param =~ s/:PARAMPATH:/$testCmd->{'paramPath'}/g;
+        }
+        push(@cmd, @{$testCmd->{'pig_params'}});
+    }
+
+    push(@cmd, $pigfile);
+
+
+    # Run the command
+    print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n";
+    print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n";
+
+    IPC::Run::run(\@cmd, \undef, $log, $log) or
+        die "Failed running $pigfile\n";
+    $result{'rc'} = $? >> 8;
+
+
+    # Get results from the command locally
+    my $localoutfile;
+    my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+    my $stores = $self->countStores($testCmd);
+       
+    # single query
+    if ($stores == 1) {
+        if ($copyResults) {
+            $result{'output'} = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log);
+            $result{'originalOutput'} = "$localdir/out_original"; # populated by postProcessSingleOutputFile
+        } else {
+            $result{'output'} = "NO_COPY";
+        }
+    }
+    # multi query
+    else {
+        my @outfiles = ();
+        for (my $id = 1; $id <= ($stores); $id++) {
+            $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out/$id";
+            $localoutfile = $outfile . ".$id";
+
+            # Copy result file out of hadoop
+            my $testOut;
+            if ($copyResults) {
+              $testOut = $self->postProcessSingleOutputFile($localoutfile, $localdir, \@baseCmd, $testCmd, $log);
+            } else {
+              $testOut = "NO_COPY";
+            }
+            push(@outfiles, $testOut);
+        }
+        ##!!! originalOutputs not set! Needed?
+        $result{'outputs'} = \@outfiles;
+    }
+
+    # Compare doesn't get the testCmd hash, so I need to stuff the necessary
+    # info about sorting into the result.
+    if (defined $testCmd->{'sortArgs'} && $testCmd->{'sortArgs'}) {
+        $result{'sortArgs'} = $testCmd->{'sortArgs'};
+    }
+
+    return \%result;
+}
+
+sub postProcessSingleSQLOutputFile
+{
+    my ($self, $outfile, $testCmd, $log, $isBenchmark) = @_;
+
+    # If requested, process the data to smooth over floating point
+    # differences.
+    if (defined $testCmd->{'floatpostprocess'} &&
+            defined $testCmd->{'delimiter'}) {
+        # Move the file to a temp file and run through the pre-processor.
+        my $tmpfile = "$outfile.tmp";
+        link($outfile, $tmpfile) or
+            die "Unable to create temporary file $tmpfile, $!\n";
+        unlink($outfile) or
+            die "Unable to unlink file $outfile, $!\n";
+        open(IFH, "< $tmpfile") or
+            die "Unable to open file $tmpfile, $!\n";
+        open(OFH, "> $outfile") or
+            die "Unable to open file $outfile, $!\n";
+        my @cmd = ("$toolpath/floatpostprocessor.pl",
+            $testCmd->{'delimiter'});
+        print $log "Going to run [" . join(" ", @cmd) . "]\n";
+        IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or 
+            die "Failed to run float postprocessor, $!\n"; 
+        close(IFH);
+        close(OFH);
+        unlink($tmpfile);
+    }
+
+    if ($isBenchmark && defined $testCmd->{'nullpostprocess'}) {
+        # Move the file to a temp file and run through the pre-processor.
+        my $tmpfile = "$outfile.tmp";
+        link($outfile, $tmpfile) or
+            die "Unable to create temporary file $tmpfile, $!\n";
+        unlink($outfile) or
+            die "Unable to unlink file $outfile, $!\n";
+        open(IFH, "< $tmpfile") or
+            die "Unable to open file $tmpfile, $!\n";
+        open(OFH, "> $outfile") or
+            die "Unable to open file $outfile, $!\n";
+        my @cmd = ("sed", "s/NULL//g");
+        print $log "Going to run [" . join(" ", @cmd) . "]\n";
+        IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or 
+            die "Failed to run float postprocessor, $!\n"; 
+        close(IFH);
+        close(OFH);
+        unlink($tmpfile);
+    }
+
+    # Sort the results for the benchmark compare.
+    my $sortfile = "$outfile.sorted";
+    my @cmd = ("sort", $outfile);
+    print $log "Going to run [" . join(" ", @cmd) . "]\n";
+    IPC::Run::run(\@cmd, '>', "$sortfile");
+
+    return $sortfile;
+}
+
+sub postProcessSingleOutputFile
+{
+    my ($self, $outfile, $localdir, $baseCmd, $testCmd, $log) = @_;
+    my $subName  = (caller(0))[3];
+
+    my @baseCmd = @{$baseCmd};
+    my @copyCmd = @baseCmd;
+    push(@copyCmd, ('-e', 'copyToLocal', $outfile, $localdir)); 
+    print $log "$0::$className::$subName INFO: Going to run pig command: @copyCmd\n";
+ 
+    IPC::Run::run(\@copyCmd, \undef, $log, $log) or die "Cannot copy results from HDFS $outfile to $localdir\n";
+
+
+    # Sort the result if necessary.  Keep the original output in one large file.
+    # Use system not IPC run so that the '*' gets interpolated by the shell.
+    
+    # Build command to:
+    # 1. Combine part files
+    my $fppCmd = "cat $localdir/map* $localdir/part* 2>/dev/null";
+    
+    # 2. Standardize float precision
+    if (defined $testCmd->{'floatpostprocess'} &&
+            defined $testCmd->{'delimiter'}) {
+        $fppCmd .= " | $toolpath/floatpostprocessor.pl '" .
+            $testCmd->{'delimiter'} . "'";
+    }
+    
+    $fppCmd .= " > $localdir/out_original";
+    
+    # run command
+    print $log "$fppCmd\n";
+    system($fppCmd);
+
+    # Sort the results for the benchmark compare.
+    my @sortCmd = ('sort', "$localdir/out_original");
+    print $log join(" ", @sortCmd) . "\n";
+    IPC::Run::run(\@sortCmd, '>', "$localdir/out_sorted");
+
+    return "$localdir/out_sorted";
+}
+
+sub generateBenchmark
+{
+    my ($self, $testCmd, $log) = @_;
+
+    my %result;
+
+    my @SQLQuery = @{$testCmd->{'sql'}};
+    my @SQLQuery = ();
+        if (ref($testCmd->{'sql'}) ne 'ARRAY') {
+            $SQLQuery[0] = $testCmd->{'sql'};
+        } else {
+            @SQLQuery = @{$testCmd->{'sql'}};
+        }
+ 
+    if ($#SQLQuery == 0) {
+        my $outfile = $self->generateSingleSQLBenchmark($testCmd, $SQLQuery[0], undef, $log);
+        $result{'output'} = $outfile;
+    } else {
+        my @outfiles = ();
+        for (my $id = 0; $id < ($#SQLQuery + 1); $id++) {
+            my $sql = $SQLQuery[$id];
+            my $outfile = $self->generateSingleSQLBenchmark($testCmd, $sql, ($id+1), $log); 
+            push(@outfiles, $outfile);
+        }
+        $result{'outputs'} = \@outfiles;
+    }
+
+    return \%result;
+}
+
+sub generateSingleSQLBenchmark
+{
+    my ($self, $testCmd, $sql, $id, $log) = @_;
+
+    my $qmd5 = substr(md5_hex($testCmd->{'pig'}), 0, 5);
+    my $sqlfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".benchmark.$id.sql";
+    my $outfile = $testCmd->{'benchmarkPath'} . "/" . $testCmd->{'group'} . "_" . $testCmd->{'num'};
+
+    $outfile .= defined($id) ? ".$id" . ".out" :  ".out";
+    
+    my $outfp;
+    open($outfp, "> $outfile") or
+        die "Unable to open output file $outfile, $!\n";
+
+    open(FH, "> $sqlfile") or
+        die "Unable to open file $sqlfile to write SQL script, $ERRNO\n";
+    print FH $sql;
+    close(FH);
+
+    Util::runDbCmd($testCmd, $log, $sqlfile, $outfp);
+    
+    $rcs[$i] =  $? >> 8;
+    close($outfp);
+
+    my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+
+    $outfile =
+        $self->postProcessSingleSQLOutputFile($outfile, $testCmd, $log);
+
+    return $outfile;
+}
+
+sub hasCommandLineVerifications
+{
+    my ($self, $testCmd, $log) = @_;
+
+    foreach my $key ('rc', 'expected_out', 'expected_out_regex', 'expected_err', 'expected_err_regex', 
+                     'not_expected_out', 'not_expected_out_regex', 'not_expected_err', 'not_expected_err_regex' ) {
+      if (defined $testCmd->{$key}) {
+         return 1;
+      }
+    }
+    return 0;
+}
+
+
+sub compare
+{
+    my ($self, $testResult, $benchmarkResult, $log, $testCmd) = @_;
+    my $subName  = (caller(0))[3];
+
+    # Check that we should run this test.  If the current execution type
+    # doesn't match the execonly flag, then skip this one.
+    if ($self->wrongExecutionMode($testCmd)) {
+        # Special magic value
+        return $self->{'wrong_execution_mode'}; 
+    }
+
+    # For now, if the test has 
+    # - testCmd pig, and 'sql' for benchmark, then use comparePig, i.e. using benchmark
+    # - any verification directives formerly used by CmdLine or Script drivers (rc, regex on out and err...)
+    #   then use compareScript even if testCmd is "pig"
+    # - testCmd script, then use compareScript
+    # - testCmd pig, and none of the above, then use comparePig
+    #
+    # Later, should add ability to have same tests both verify with the 'script' directives, 
+    # and do a benchmark compare, if it was a pig cmd. E.g. 'rc' could still be checked when 
+    # doing the benchmark compare.
+
+    if ( $testCmd->{'script'} || $self->hasCommandLineVerifications( $testCmd, $log) ){
+       return $self->compareScript ( $testResult, $log, $testCmd);
+    } elsif( $testCmd->{'pig'} ){
+       return $self->comparePig ( $testResult, $benchmarkResult, $log, $testCmd);
+    } else {
+       # Should have been caught by runTest, still...
+       print $log "$0.$subName WARNING Did not find a testCmd that I know how to handle\n";
+       return 0;
+    } 
+}
+
+
+sub compareScript
+{
+    my ($self, $testResult, $log, $testCmd) = @_;
+    my $subName  = (caller(0))[3];
+
+
+    # IMPORTANT NOTES:
+    #
+    # If you are using a regex to compare stdout or stderr
+    # and if the pattern that you are trying to match spans two line
+    # explicitly use '\n' (without the single quotes) in the regex
+    #
+    # If any verification directives are added here 
+    # do remember also to add them to the hasCommandLineVerifications subroutine.
+    #
+    # If the test conf file misspells the directive, you won't be told...
+    # 
+
+    my $result = 1;  # until proven wrong...
+
+
+    # Return Code
+    if (defined $testCmd->{'rc'}) {                                                                             
+      print $log "$0::$subName INFO Checking return code " .
+                 "against expected <$testCmd->{'rc'}>\n";
+      if ( (! defined $testResult->{'rc'}) || ($testResult->{'rc'} != $testCmd->{'rc'})) {                                                         
+        print $log "$0::$subName INFO Check failed: rc = <$testCmd->{'rc'}> expected, test returned rc = <$testResult->{'rc'}>\n";
+        $result = 0;
+      }
+    }
+
+    # Standard Out
+    if (defined $testCmd->{'expected_out'}) {
+      print $log "$0::$subName INFO Checking test stdout' " .
+              "as exact match against expected <$testCmd->{'expected_out'}>\n";
+      if ($testResult->{'stdout'} ne $testCmd->{'expected_out'}) {
+        print $log "$0::$subName INFO Check failed: exact match of <$testCmd->{'expected_out'}> expected in stdout: $testResult->{'stdout'}\n";
+        $result = 0;
+      }
+    } 
+
+    if (defined $testCmd->{'not_expected_out'}) {
+      print $log "$0::$subName INFO Checking test stdout " .
+              "as NOT exact match against expected <$testCmd->{'expected_out'}>\n";
+      if ($testResult->{'stdout'} eq $testCmd->{'not_expected_out'}) {
+        print $log "$0::$subName INFO Check failed: NON-match of <$testCmd->{'expected_out'}> expected to stdout: $testResult->{'stdout'}\n";
+        $result = 0;
+      }
+    } 
+
+    if (defined $testCmd->{'expected_out_regex'}) {
+      print $log "$0::$subName INFO Checking test stdout " .
+              "for regular expression <$testCmd->{'expected_out_regex'}>\n";
+      if ($testResult->{'stdout'} !~ $testCmd->{'expected_out_regex'}) {
+        print $log "$0::$subName INFO Check failed: regex match of <$testCmd->{'expected_out_regex'}> expected in stdout: $testResult->{'stdout'}\n";
+        $result = 0;
+      }
+    } 
+
+    if (defined $testCmd->{'not_expected_out_regex'}) {
+      print $log "$0::$subName INFO Checking test stdout " .
+              "for NON-match of regular expression <$testCmd->{'not_expected_out_regex'}>\n";
+      if ($testResult->{'stdout'} =~ $testCmd->{'not_expected_out_regex'}) {
+        print $log "$0::$subName INFO Check failed: regex NON-match of <$testCmd->{'not_expected_out_regex'}> expected in stdout: $testResult->{'stdout'}\n";
+        $result = 0;
+      }
+    } 
+
+    # Standard Error
+    if (defined $testCmd->{'expected_err'}) {
+      print $log "$0::$subName INFO Checking test stderr " .
+              "as exact match against expected <$testCmd->{'expected_err'}>\n";
+      if ($testResult->{'stderr'} ne $testCmd->{'expected_err'}) {
+        print $log "$0::$subName INFO Check failed: exact match of <$testCmd->{'expected_err'}> expected in stderr: $testResult->{'stderr_file'}\n";
+        $result = 0;
+      }
+    } 
+
+    if (defined $testCmd->{'not_expected_err'}) {
+      print $log "$0::$subName INFO Checking test stderr " .
+              "as NOT an exact match against expected <$testCmd->{'expected_err'}>\n";
+      if ($testResult->{'stderr'} eq $testCmd->{'not_expected_err'}) {
+        print $log "$0::$subName INFO Check failed: NON-match of <$testCmd->{'expected_err'}> expected to stderr: $testResult->{'stderr_file'}\n";
+        $result = 0;
+      }
+    } 
+
+    if (defined $testCmd->{'expected_err_regex'}) {
+      print $log "$0::$subName INFO Checking test stderr " .
+              "for regular expression <$testCmd->{'expected_err_regex'}>\n";
+      if ($testResult->{'stderr'} !~ $testCmd->{'expected_err_regex'}) {
+        print $log "$0::$subName INFO Check failed: regex match of <$testCmd->{'expected_err_regex'}> expected in stderr: $testResult->{'stderr_file'}\n";
+        $result = 0;
+      }
+    } 
+
+    if (defined $testCmd->{'not_expected_err_regex'}) {
+      print $log "$0::$subName INFO Checking test stderr " .
+              "for NON-match of regular expression <$testCmd->{'not_expected_err_regex'}>\n";
+      if ($testResult->{'stderr'} =~ $testCmd->{'not_expected_err_regex'}) {
+        print $log "$0::$subName INFO Check failed: regex NON-match of <$testCmd->{'not_expected_err_regex'}> expected in stderr: $testResult->{'stderr_file'}\n";
+        $result = 0;
+      }
+    } 
+
+  return $result;
+}
+
+
+sub comparePig
+{
+    my ($self, $testResult, $benchmarkResult, $log, $testCmd) = @_;
+    my $subName  = (caller(0))[3];
+
+    my $result;
+    my $stores = $self->countStores($testCmd);
+    
+    if ($stores == 1) {
+        $result = $self->compareSingleOutput($testResult, $testResult->{'output'},
+                $benchmarkResult->{'output'}, $log);
+    } else {
+        my $res = 0;
+        for (my $id = 0; $id < ($stores); $id++) {
+            my $testOutput = ($testResult->{'outputs'})->[$id];
+            my $benchmarkOutput = ($benchmarkResult->{'outputs'})->[$id];
+            $res += $self->compareSingleOutput($testResult, $testOutput,
+                                               $benchmarkOutput, $log);
+            $result = ($res == ($stores)) ? 1 : 0;
+        }
+    }
+
+    return $result;
+}
+
+
+sub compareSingleOutput
+{
+    my ($self, $testResult, $testOutput, $benchmarkOutput, $log) = @_;
+
+print $log "testResult: $testResult testOutput: $testOutput benchmarkOutput: $benchmarkOutput\n";
+
+    # cksum the the two files to see if they are the same
+    my ($testChksm, $benchmarkChksm);
+    IPC::Run::run((['cat', $testOutput], '|', ['cksum']), \$testChksm,
+        $log) or die "$0: error: cannot run cksum on test results\n";
+    IPC::Run::run((['cat', $benchmarkOutput], '|', ['cksum']),
+        \$benchmarkChksm, $log) or die "$0: error: cannot run cksum on benchmark\n";
+
+    chomp $testChksm;
+    chomp $benchmarkChksm;
+    print $log "test cksum: $testChksm\nbenchmark cksum: $benchmarkChksm\n";
+
+    my $result;
+    if ($testChksm ne $benchmarkChksm) {
+        print $log "Test output checksum does not match benchmark checksum\n";
+        print $log "Test checksum = <$testChksm>\n";
+        print $log "Expected checksum = <$benchmarkChksm>\n";
+        print $log "RESULTS DIFFER: vimdiff " . cwd . "/$testOutput " . cwd . "/$benchmarkOutput\n";
+    } else {
+        $result = 1;
+    }
+
+    # Now, check if the sort order is specified
+    if (defined($testResult->{'sortArgs'})) {
+        Util::setLocale();
+	my @sortChk = ('sort', '-cs');
+        push(@sortChk, @{$testResult->{'sortArgs'}});
+        push(@sortChk, $testResult->{'originalOutput'});
+        print $log "Going to run sort check command: " . join(" ", @sortChk) . "\n";
+        IPC::Run::run(\@sortChk, \undef, $log, $log);
+	my $sortrc = $?;
+        if ($sortrc) {
+            print $log "Sort check failed\n";
+            $result = 0;
+        }
+    }
+
+    return $result;
+}
+
+##############################################################################
+# Count the number of stores in a Pig Latin script, so we know how many files
+# we need to compare.
+#
+sub countStores($$)
+{
+    my ($self, $testCmd) = @_;
+
+    # Special work around for queries with more than one store that are not
+    # actually multiqueries.
+    if (defined $testCmd->{'notmq'}) {
+        return 1;
+    }
+
+    my $count;
+
+    # hope they don't have more than store per line
+    # also note that this won't work if you comment out a store
+    my @q = split(/\n/, $testCmd->{'pig'});
+        for (my $i = 0; $i < @q; $i++) {
+            $count += $q[$i] =~ /store\s+[a-zA-Z][a-zA-Z0-9_]*\s+into/i;
+    }
+
+    return $count;
+}
+
+##############################################################################
+# Check whether we should be running this test or not.
+#
+sub wrongExecutionMode($$)
+{
+    my ($self, $testCmd) = @_;
+
+    # Check that we should run this test.  If the current execution type
+    # doesn't match the execonly flag, then skip this one.
+    return (defined $testCmd->{'execonly'} &&
+            $testCmd->{'execonly'} ne $testCmd->{'exectype'});
+}
+
+##############################################################################
+#  Sub: printGroupResultsXml
+#  Print the results for the group using junit xml schema using values from the testStatuses hash.
+#
+# Paramaters:
+# $report       - the report object to use to generate the report
+# $groupName    - the name of the group to report totals for
+# $testStatuses - the hash containing the results for the tests run so far
+# $totalDuration- The total time it took to run the group of tests
+#
+# Returns:
+# None.
+#
+sub printGroupResultsXml
+{
+        my ( $report, $groupName, $testStatuses,  $totalDuration) = @_;
+        $totalDuration=0 if  ( !$totalDuration );
+
+        my ($pass, $fail, $abort, $depend) = (0, 0, 0, 0);
+
+        foreach my $key (keys(%$testStatuses)) {
+              if ( $key =~ /^$groupName/ ){
+                ($testStatuses->{$key} eq $passedStr) && $pass++;
+                ($testStatuses->{$key} eq $failedStr) && $fail++;
+                ($testStatuses->{$key} eq $abortedStr) && $abort++;
+                ($testStatuses->{$key} eq $dependStr) && $depend++;
+               }
+        }
+
+        my $total= $pass + $fail + $abort;
+        $report->totals( $groupName, $total, $fail, $abort, $totalDuration );
+
+}
+
+1;