You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ha...@apache.org on 2011/12/06 20:05:39 UTC

svn commit: r1211077 [4/7] - in /incubator/hcatalog/trunk: ./ conf/ src/test/e2e/hcatalog/ src/test/e2e/hcatalog/conf/ src/test/e2e/hcatalog/deployers/ src/test/e2e/hcatalog/drivers/ src/test/e2e/hcatalog/tests/ src/test/e2e/hcatalog/tools/generate/ sr...

Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm?rev=1211077&r1=1211076&r2=1211077&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm Tue Dec  6 20:05:37 2011
@@ -26,6 +26,289 @@
 
 package Util;
 
+use IPC::Run qw(run);
+use Log::Log4perl qw(:easy);
+
+sub prepareHCat
+{
+    my ($self, $testCmd, $log) = @_;
+    my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+    my $hcatCmd = $self->replaceParameters( $testCmd->{'hcat_prep'}, $outfile, $testCmd, $log);
+
+    my @hivefiles = ();
+    my @outfiles = ();
+    # Write the hive script to a file.
+    $hivefiles[0] = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" .
+        $testCmd->{'num'} . ".0.sql";
+    $outfiles[0] = $testCmd->{'thisResultsPath'} . "/" . $testCmd->{'group'} .
+        "_" .  $testCmd->{'num'} . ".0.out";
+
+    open(FH, "> $hivefiles[0]") or
+        die "Unable to open file $hivefiles[0] to write SQL script, $ERRNO\n";
+    print FH $testCmd->{'hcat_prep'} . "\n";
+    close(FH);
+
+    Util::runHCatCmdFromFile($testCmd, $log, $hivefiles[0]);
+}
+
+##############################################################################
+#  Sub: setupHiveProperties
+#
+#  Assure that necessary values are set in config in order to set Hive
+#  Java properties.
+#
+#  Returns:
+#  Nothing
+sub  setupHiveProperties($$)
+{
+    my ($cfg, $log) = @_;
+
+    # Set up values for the metastore
+    if (defined($cfg->{'metastore_thrift'}) && $cfg->{'metastore_thrift'} == 1) {
+        if (! defined $cfg->{'metastore_host'} || $cfg->{'metastore_host'} eq "") {
+            print $log "When using thrift, you must set the key " .
+                " 'metastore_host' to the machine your metastore is on\n";
+            die "metastore_host is not set in existing.conf\n";
+        }
+
+        $cfg->{'metastore_connection'} =
+            "jdbc:$cfg->{'metastore_db'}://$cfg->{'metastore_host'}/hivemetastoredb?createDatabaseIfNotExist=true";
+   
+        if (! defined $cfg->{'metastore_passwd'} || $cfg->{'metastore_passwd'} eq "") {
+            $cfg->{'metastore_passwd'} = 'hive';
+        }
+
+        if (! defined $cfg->{'metastore_port'} || $cfg->{'metastore_port'} eq "") {
+            $cfg->{'metastore_port'} = '9933';
+        }
+
+        $cfg->{'metastore_uri'} =
+            "thrift://$cfg->{'metastore_host'}:$cfg->{'metastore_port'}";
+    } else {
+        $cfg->{'metastore_connection'} =
+            "jdbc:derby:;databaseName=metastore_db;create=true";
+        $cfg->{'metastore_driver'} = "org.apache.derby.jdbc.EmbeddedDriver";
+    }
+}
+
+sub getHadoopCmd
+{
+    my ( $properties ) = @_;
+
+    my $subName        = (caller(0))[3];
+    my @baseCmd;
+
+    die "$0.$subName: null properties" if (! $properties );
+
+    my $cmd;
+
+    $cmd = $properties->{'hadoopbin'};
+    if ( ! -x "$cmd" ) {
+      print STDERR "\n$0::$subName WARNING: Can't find hadoop command: $cmd\n";
+      $cmd = `which hadoop`;
+      chomp $cmd;
+      print STDERR "$0::$subName WARNING: Instead using command: $cmd\n";
+    }
+    if ( ! -x "$cmd" ) {
+      die "\n$0::$subName FATAL: Hadoop command does not exist: $cmd\n";
+    }
+    push (@baseCmd, $cmd);
+
+    push (@baseCmd, '--config', $properties->{'testconfigpath'}) if defined($properties->{'testconfigpath'});
+
+    return @baseCmd;
+}
+
+##############################################################################
+#  Sub: runHiveCmdFromFile
+#
+#  Run the provided file using the Hive command line.
+#
+#  cfg - The configuration file for the test
+#  log - reference to the log file, should be an open file pointer
+#  sql - name of file containing SQL to run.  Optional, if present -f $sql
+#    will be appended to the command.
+#  outfile - open file pointer (or variable reference) to write stdout to for
+#    this test.  Optional, will be written to $log if this value is not
+#    provided.
+#  outfile - open file pointer (or variable reference) to write stderr to for
+#    this test.  Optional, will be written to $log if this value is not
+#    provided.
+#  noFailOnFail - if true, do not fail when the Hive command returns non-zero
+#    value.
+#  Returns:
+#  Nothing
+sub runHiveCmdFromFile($$;$$$$)
+{
+    my ($cfg, $log, $sql, $outfile, $errfile, $noFailOnFail) = @_;
+
+    if (!defined($ENV{'HADOOP_HOME'})) {
+        die "Cannot run hive when HADOOP_HOME environment variable is not set.";
+    }
+
+    $outfile = $log if (!defined($outfile));
+    $errfile = $log if (!defined($errfile));
+
+    my @cmd = ("$cfg->{'hivehome'}/bin/hive");
+
+    # Add all of the modified properties we want to set
+    push(@cmd, "--hiveconf", "hive.metastore.uris=$cfg->{'thriftserver'}");
+    push(@cmd, "--hiveconf", "hive.metastore.local=false");
+
+    if( defined($cfg->{'metastore.principal'}) && ($cfg->{'metastore.principal'} =~ m/\S+/)
+        &&  ($cfg->{'metastore.principal'} ne '${metastore.principal}')){
+        push(@cmd, "--hiveconf", "hive.metastore.sasl.enabled=true",  "--hiveconf", "hive.metastore.kerberos.principal=$cfg->{'metastore.principal'}");
+    } else {
+        push(@cmd, "--hiveconf", "hive.metastore.sasl.enabled=false");
+    }
+
+    if (defined($cfg->{'additionaljarspath'})) {
+        $ENV{'HIVE_AUX_JARS_PATH'} = $cfg->{'additionaljarspath'};
+    }
+
+    if (defined($cfg->{'hiveconf'})) {
+        foreach my $hc (@{$cfg->{'hiveconf'}}) {
+            push(@cmd, "--hiveconf", $hc);
+        }
+    }
+
+    if (defined($cfg->{'hivecmdargs'})) {
+        push(@cmd, @{$cfg->{'hivecmdargs'}});
+    }
+
+    if (defined($cfg->{'hiveops'})) {
+        $ENV{'HIVE_OPTS'} = join(" ", @{$cfg->{'hiveops'}});
+    }
+
+    $ENV{'HIVE_HOME'} = $cfg->{'hivehome'};
+
+    my $envStr;
+    for my $k (keys(%ENV)) {
+        $envStr .= $k . "=" . $ENV{$k} . " " if ($k =~ /HADOOP/ || $k =~ /HIVE/);
+    }
+    $envStr .= " ";
+
+    if (defined($sql)) {
+        push(@cmd, "-f", $sql);
+    }
+    print $log "Going to run hive command [" . join(" ", @cmd) .
+        "] with environment set to [$envStr]\n";
+    my $runrc = run(\@cmd, \undef, $outfile, $errfile);
+    my $rc = $? >> 8;
+
+    return $runrc if $runrc; # success
+
+    if (defined($noFailOnFail) && $noFailOnFail) {
+        return $rc;
+    } else {
+        die "Failed running hive command [" . join(" ", @cmd) . "]\n";
+    }
+}
+
+#############################################################################
+#  Sub: runHiveCmdFromFile
+#
+#  Run the provided file using the Hive command line.
+#
+#  cfg - The configuration file for the test
+#  log - reference to the log file, should be an open file pointer
+#  sql - name of file containing SQL to run.  Optional, if present -f $sql
+#    will be appended to the command.
+#  outfile - open file pointer (or variable reference) to write stdout to for
+#    this test.  Optional, will be written to $log if this value is not
+#    provided.
+#  outfile - open file pointer (or variable reference) to write stderr to for
+#    this test.  Optional, will be written to $log if this value is not
+#    provided.
+#  noFailOnFail - if true, do not fail when the Hive command returns non-zero
+#    value.
+#  Returns:
+#  Nothing
+sub runHCatCmdFromFile($$;$$$$)
+{
+    my ($cfg, $log, $sql, $outfile, $errfile, $noFailOnFail) = @_;
+
+    if (!defined($ENV{'HADOOP_HOME'})) {
+        die "Cannot run hive when HADOOP_HOME environment variable is not set.";
+    }
+
+    $outfile = $log if (!defined($outfile));
+    $errfile = $log if (!defined($errfile));
+
+    # unset HADOOP_CLASSPATH
+    $ENV{'HADOOP_CLASSPATH'} = "";
+
+    my @cmd;
+    if (defined($sql)) {
+        @cmd = ("$cfg->{'hcathome'}/bin/hcat", "-f", $sql);
+    } else {
+        @cmd = ("$cfg->{'hcathome'}/bin/hcat");
+    }
+
+    my $envStr;
+    for my $k (keys(%ENV)) {
+        $envStr .= $k . "=" . $ENV{$k} . " " if ($k =~ /HADOOP/ || $k =~ /HIVE/);
+    }
+    $envStr .= " ";
+    print $log "Going to run hcat command [" . join(" ", @cmd) .
+        "] with environment set to [$envStr]\n";
+    my $runrc = run(\@cmd, \undef, $outfile, $errfile);
+    my $rc = $? >> 8;
+
+    return $runrc if $runrc; # success
+
+    if (defined($noFailOnFail) && $noFailOnFail) {
+        return $rc;
+    } else {
+        die "Failed running hcat command [" . join(" ", @cmd) . "]\n";
+    }
+}
+
+##############################################################################
+#  Sub: runDbCmd
+#
+#  Run the provided mysql command
+#
+#  Returns:
+#  Nothing
+sub runDbCmd($$$;$)
+{
+    my ($cfg, $log, $sqlfile, $outfile) = @_;
+
+    $outfile = $log if (!defined($outfile));
+
+    open(SQL, "< $sqlfile") or die "Unable to open $sqlfile for reading, $!\n";
+
+    my @cmd = ('mysql', '-u', $cfg->{'dbuser'}, '-D', $cfg->{'dbdb'},
+        '-h', $cfg->{'dbhost'}, "--password=$cfg->{'dbpasswd'}",
+        "--skip-column-names");
+
+    print $log "Going to run [" . join(" ", @cmd) . "] passing in [$sqlfile]\n";
+
+    run(\@cmd, \*SQL, $outfile, $log) or
+        die "Failed running " . join(" ", @cmd) . "\n";
+    close(SQL);
+}
+
+#  Sub: runHadoopCmd
+#
+#  Run the provided hadoop command
+#
+#  Returns:
+#  Nothing
+sub runHadoopCmd($$$)
+{
+    my ($cfg, $log, $c) = @_;
+
+    my @cmd = ("$ENV{'HADOOP_HOME'}/bin/hadoop");
+    push(@cmd, split(' ', $c));
+
+    print $log "Going to run [" . join(" ", @cmd) . "]\n";
+
+    run(\@cmd, \undef, $log, $log) or
+        die "Failed running " . join(" ", @cmd) . "\n";
+}
+
 ##############################################################################
 #  Sub: localTime
 # 
@@ -111,35 +394,6 @@ sub execCmd() {
     return @result;
 }
 
-sub getHadoopCmd
-{
-    my ( $properties ) = @_;
-
-    my $subName        = (caller(0))[3];
-    my @baseCmd;
-
-    die "$0.$subName: null properties" if (! $properties );
-
-    my $cmd;
-
-    $cmd = $properties->{'gridstack.root'} . "/hadoop/current/bin/hadoop";
-    if ( ! -x "$cmd" ) {
-      print STDERR "\n$0::$subName WARNING: Can't find hadoop command: $cmd\n";
-      $cmd = `which hadoop`;
-      chomp $cmd;
-      print STDERR "$0::$subName WARNING: Instead using command: $cmd\n";
-    }
-    if ( ! -x "$cmd" ) {
-      die "\n$0::$subName FATAL: Hadoop command does not exist: $cmd\n";
-    }
-    push (@baseCmd, $cmd);
-
-    push (@baseCmd, '--config', $properties->{'testconfigpath'}) if defined($properties->{'testconfigpath'});
-
-    return @baseCmd;
-}
-
-
 sub getHiveCmd
 {
     my ( $properties ) = @_;
@@ -187,6 +441,17 @@ sub getHCatCmd
     return @baseCmd;
 }
 
+sub show_call_stack {
+  my ( $path, $line, $subr );
+  my $max_depth = 30;
+  my $i = 1;
+    print("--- Begin stack trace ---");
+    while ( (my @call_details = (caller($i++))) && ($i<$max_depth) ) {
+      print("$call_details[1] line $call_details[2] in function $
++call_details[3]");
+    print("--- End stack trace ---");
+  }
+}
 
 
 sub getPigCmd
@@ -198,6 +463,7 @@ sub getPigCmd
     my @baseCmd;
     die "$0.$subName: null properties" if (! $properties );
 
+show_call_stack();
     #UGLY HACK for pig sql support
     if ( $jarkey =~ /testsql/ ) {
 
@@ -217,6 +483,7 @@ sub getPigCmd
       # This allows for testing of the pig script as installed, and for testin of
       # the pig script's options, including error testing. 
 
+print 'use-pig.pl?????';
 
       $cmd = $properties->{'gridstack.root'} . "/pig/" . $properties->{'pigTestBuildName'} . "/bin/pig";
       if ( ! -x "$cmd" ) {
@@ -241,6 +508,7 @@ sub getPigCmd
     } else {
         $cmd="java";
 
+print 'not use-pig.pl?????';
         # Set JAVA options
 
         # User can provide only one of

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf Tue Dec  6 20:05:37 2011
@@ -0,0 +1,161 @@
+#!/home/y/bin/perl
+
+	#
+	# Do
+	# egrep '^#|name.*=>' hcat.conf | egrep -v '^#!|egrep' | less
+	# to get an outline of this test conf file
+	#
+	
+  # Has a couple of Hive set directives:
+  #   set hive.exec.dynamic.partition.mode=nonstrict;
+  #   set hive.exec.dynamic.partition=true;
+
+
+$cfg = {
+        'driver' => 'Hadoop',
+		'groups' => [
+# This first group should be moved to deployer ?
+		{
+			'name' => 'Hadoop_Checkin',
+			'tests' => [
+				{
+				 'num' => 1
+				,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SimpleRead -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k :OUTPATH:
+\,
+                                ,'sql' => q\select name, gpa from studenttab10k;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+				}, 
+                                {
+                                 'num' => 2
+                                ,'hcat_prep'=>q\drop table if exists hadoop_checkin_2;
+create table hadoop_checkin_2 (name string, age int, gpa double) STORED AS TEXTFILE;\
+                                ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadWrite -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_2
+\,
+                                ,'result_table' => 'hadoop_checkin_2'
+                                ,'sql' => q\select * from studenttab10k;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+                                {
+                                 'num' => 3
+                                ,'hcat_prep'=>q\drop table if exists hadoop_checkin_3;
+create table hadoop_checkin_3 (name string, cnt int) STORED AS TEXTFILE;\
+                                ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.GroupByAge -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_3
+\,
+                                ,'result_table' => 'hadoop_checkin_3'
+                                ,'sql' => q\select age, count(*) from studenttab10k group by age;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+			],
+ 		}, # end g
+                {
+                        'name' => 'Hadoop_Read',
+                        'tests' => [
+                                {
+                                 'num' => 1
+                                ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadText -libjars :HCAT_JAR: :THRIFTSERVER: all100k :OUTPATH:
+\,
+                                ,'sql' => q\select * from all100k;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+                                {
+                                 'num' => 2
+                                ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson :OUTPATH:
+\,
+                                ,'sql' => q\select s, i, d from all100kjson;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+                                {
+                                 'num' => 3
+                                ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc :OUTPATH:
+\,
+                                ,'sql' => q\select * from all100krc;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+                        ],
+                }, # end g
+                {
+                        'name' => 'Hadoop_Write',
+                        'tests' => [
+                                {
+                                 'num' => 1
+                                ,'hcat_prep'=>q\
+drop table if exists hadoop_write_1;
+create table hadoop_write_1(
+            t tinyint,
+            si smallint,
+            i int,
+            b bigint,
+            f float,
+            d double,
+            s string)
+        row format delimited
+        fields terminated by ':'
+        stored as textfile;\
+                                ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteText -libjars :HCAT_JAR: :THRIFTSERVER: all100k hadoop_write_1
+\,
+                                ,'result_table' => 'hadoop_write_1'
+                                ,'sql' => q\select * from all100k;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+                                {
+                                 'num' => 2
+                                ,'hcat_prep' => q\
+drop table if exists hadoop_write_2;
+create table hadoop_write_2(
+            s string,
+            i int,
+            d double,
+            m map<string, string>,
+            bb array<struct<a: int, b: string>>)
+            STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
+            INPUTDRIVER 'org.apache.hcatalog.pig.drivers.LoadFuncBasedInputDriver' OUTPUTDRIVER 'org.apache.hcatalog.pig.drivers.StoreFuncBasedOutputDriver'
+            TBLPROPERTIES ('hcat.pig.loader'='org.apache.pig.builtin.JsonLoader', 'hcat.pig.storer'='org.apache.pig.builtin.JsonStorage', 'hcat.pig.loader.args'=
+'s:chararray, i:int, d:double, m:map[chararray], bb:{t:(a:int, b:chararray)}', 'hcat.pig.args.delimiter'='	');\
+                                ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson hadoop_write_2
+\,
+                                ,'result_table' => 'hadoop_write_2'
+                                ,'sql' => q\select s, i, d, '', '' from all100kjson;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+                                {
+                                 'num' => 3
+                                ,'hcat_prep' => q\
+drop table if exists hadoop_write_3;
+create table hadoop_write_3(
+            name string,
+            age int,
+            gpa double)
+stored as rcfile
+TBLPROPERTIES (
+    'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
+    'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
+);
+\,
+                                ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc hadoop_write_3
+\,
+                                ,'result_table' => 'hadoop_write_3'
+                                ,'sql' => q\select * from all100krc;\
+                                ,'floatpostprocess' => 1
+                                ,'delimiter' => '	'
+                                },
+                        ],
+                }, # end g
+         ]
+}