You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ha...@apache.org on 2011/12/06 20:05:39 UTC
svn commit: r1211077 [4/7] - in /incubator/hcatalog/trunk: ./ conf/
src/test/e2e/hcatalog/ src/test/e2e/hcatalog/conf/
src/test/e2e/hcatalog/deployers/ src/test/e2e/hcatalog/drivers/
src/test/e2e/hcatalog/tests/ src/test/e2e/hcatalog/tools/generate/ sr...
Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm?rev=1211077&r1=1211076&r2=1211077&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm Tue Dec 6 20:05:37 2011
@@ -26,6 +26,289 @@
package Util;
+use IPC::Run qw(run);
+use Log::Log4perl qw(:easy);
+
+sub prepareHCat
+{
+ my ($self, $testCmd, $log) = @_;
+ my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out";
+ my $hcatCmd = $self->replaceParameters( $testCmd->{'hcat_prep'}, $outfile, $testCmd, $log);
+
+ my @hivefiles = ();
+ my @outfiles = ();
+ # Write the hive script to a file.
+ $hivefiles[0] = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" .
+ $testCmd->{'num'} . ".0.sql";
+ $outfiles[0] = $testCmd->{'thisResultsPath'} . "/" . $testCmd->{'group'} .
+ "_" . $testCmd->{'num'} . ".0.out";
+
+ open(FH, "> $hivefiles[0]") or
+ die "Unable to open file $hivefiles[0] to write SQL script, $ERRNO\n";
+ print FH $testCmd->{'hcat_prep'} . "\n";
+ close(FH);
+
+ Util::runHCatCmdFromFile($testCmd, $log, $hivefiles[0]);
+}
+
+##############################################################################
+# Sub: setupHiveProperties
+#
+# Assure that necessary values are set in config in order to set Hive
+# Java properties.
+#
+# Returns:
+# Nothing
+sub setupHiveProperties($$)
+{
+ my ($cfg, $log) = @_;
+
+ # Set up values for the metastore
+ if (defined($cfg->{'metastore_thrift'}) && $cfg->{'metastore_thrift'} == 1) {
+ if (! defined $cfg->{'metastore_host'} || $cfg->{'metastore_host'} eq "") {
+ print $log "When using thrift, you must set the key " .
+ " 'metastore_host' to the machine your metastore is on\n";
+ die "metastore_host is not set in existing.conf\n";
+ }
+
+ $cfg->{'metastore_connection'} =
+ "jdbc:$cfg->{'metastore_db'}://$cfg->{'metastore_host'}/hivemetastoredb?createDatabaseIfNotExist=true";
+
+ if (! defined $cfg->{'metastore_passwd'} || $cfg->{'metastore_passwd'} eq "") {
+ $cfg->{'metastore_passwd'} = 'hive';
+ }
+
+ if (! defined $cfg->{'metastore_port'} || $cfg->{'metastore_port'} eq "") {
+ $cfg->{'metastore_port'} = '9933';
+ }
+
+ $cfg->{'metastore_uri'} =
+ "thrift://$cfg->{'metastore_host'}:$cfg->{'metastore_port'}";
+ } else {
+ $cfg->{'metastore_connection'} =
+ "jdbc:derby:;databaseName=metastore_db;create=true";
+ $cfg->{'metastore_driver'} = "org.apache.derby.jdbc.EmbeddedDriver";
+ }
+}
+
+sub getHadoopCmd
+{
+ my ( $properties ) = @_;
+
+ my $subName = (caller(0))[3];
+ my @baseCmd;
+
+ die "$0.$subName: null properties" if (! $properties );
+
+ my $cmd;
+
+ $cmd = $properties->{'hadoopbin'};
+ if ( ! -x "$cmd" ) {
+ print STDERR "\n$0::$subName WARNING: Can't find hadoop command: $cmd\n";
+ $cmd = `which hadoop`;
+ chomp $cmd;
+ print STDERR "$0::$subName WARNING: Instead using command: $cmd\n";
+ }
+ if ( ! -x "$cmd" ) {
+ die "\n$0::$subName FATAL: Hadoop command does not exist: $cmd\n";
+ }
+ push (@baseCmd, $cmd);
+
+ push (@baseCmd, '--config', $properties->{'testconfigpath'}) if defined($properties->{'testconfigpath'});
+
+ return @baseCmd;
+}
+
+##############################################################################
+# Sub: runHiveCmdFromFile
+#
+# Run the provided file using the Hive command line.
+#
+# cfg - The configuration file for the test
+# log - reference to the log file, should be an open file pointer
+# sql - name of file containing SQL to run. Optional, if present -f $sql
+# will be appended to the command.
+# outfile - open file pointer (or variable reference) to write stdout to for
+# this test. Optional, will be written to $log if this value is not
+# provided.
+# outfile - open file pointer (or variable reference) to write stderr to for
+# this test. Optional, will be written to $log if this value is not
+# provided.
+# noFailOnFail - if true, do not fail when the Hive command returns non-zero
+# value.
+# Returns:
+# Nothing
+sub runHiveCmdFromFile($$;$$$$)
+{
+ my ($cfg, $log, $sql, $outfile, $errfile, $noFailOnFail) = @_;
+
+ if (!defined($ENV{'HADOOP_HOME'})) {
+ die "Cannot run hive when HADOOP_HOME environment variable is not set.";
+ }
+
+ $outfile = $log if (!defined($outfile));
+ $errfile = $log if (!defined($errfile));
+
+ my @cmd = ("$cfg->{'hivehome'}/bin/hive");
+
+ # Add all of the modified properties we want to set
+ push(@cmd, "--hiveconf", "hive.metastore.uris=$cfg->{'thriftserver'}");
+ push(@cmd, "--hiveconf", "hive.metastore.local=false");
+
+ if( defined($cfg->{'metastore.principal'}) && ($cfg->{'metastore.principal'} =~ m/\S+/)
+ && ($cfg->{'metastore.principal'} ne '${metastore.principal}')){
+ push(@cmd, "--hiveconf", "hive.metastore.sasl.enabled=true", "--hiveconf", "hive.metastore.kerberos.principal=$cfg->{'metastore.principal'}");
+ } else {
+ push(@cmd, "--hiveconf", "hive.metastore.sasl.enabled=false");
+ }
+
+ if (defined($cfg->{'additionaljarspath'})) {
+ $ENV{'HIVE_AUX_JARS_PATH'} = $cfg->{'additionaljarspath'};
+ }
+
+ if (defined($cfg->{'hiveconf'})) {
+ foreach my $hc (@{$cfg->{'hiveconf'}}) {
+ push(@cmd, "--hiveconf", $hc);
+ }
+ }
+
+ if (defined($cfg->{'hivecmdargs'})) {
+ push(@cmd, @{$cfg->{'hivecmdargs'}});
+ }
+
+ if (defined($cfg->{'hiveops'})) {
+ $ENV{'HIVE_OPTS'} = join(" ", @{$cfg->{'hiveops'}});
+ }
+
+ $ENV{'HIVE_HOME'} = $cfg->{'hivehome'};
+
+ my $envStr;
+ for my $k (keys(%ENV)) {
+ $envStr .= $k . "=" . $ENV{$k} . " " if ($k =~ /HADOOP/ || $k =~ /HIVE/);
+ }
+ $envStr .= " ";
+
+ if (defined($sql)) {
+ push(@cmd, "-f", $sql);
+ }
+ print $log "Going to run hive command [" . join(" ", @cmd) .
+ "] with environment set to [$envStr]\n";
+ my $runrc = run(\@cmd, \undef, $outfile, $errfile);
+ my $rc = $? >> 8;
+
+ return $runrc if $runrc; # success
+
+ if (defined($noFailOnFail) && $noFailOnFail) {
+ return $rc;
+ } else {
+ die "Failed running hive command [" . join(" ", @cmd) . "]\n";
+ }
+}
+
+#############################################################################
+# Sub: runHiveCmdFromFile
+#
+# Run the provided file using the Hive command line.
+#
+# cfg - The configuration file for the test
+# log - reference to the log file, should be an open file pointer
+# sql - name of file containing SQL to run. Optional, if present -f $sql
+# will be appended to the command.
+# outfile - open file pointer (or variable reference) to write stdout to for
+# this test. Optional, will be written to $log if this value is not
+# provided.
+# outfile - open file pointer (or variable reference) to write stderr to for
+# this test. Optional, will be written to $log if this value is not
+# provided.
+# noFailOnFail - if true, do not fail when the Hive command returns non-zero
+# value.
+# Returns:
+# Nothing
+sub runHCatCmdFromFile($$;$$$$)
+{
+ my ($cfg, $log, $sql, $outfile, $errfile, $noFailOnFail) = @_;
+
+ if (!defined($ENV{'HADOOP_HOME'})) {
+ die "Cannot run hive when HADOOP_HOME environment variable is not set.";
+ }
+
+ $outfile = $log if (!defined($outfile));
+ $errfile = $log if (!defined($errfile));
+
+ # unset HADOOP_CLASSPATH
+ $ENV{'HADOOP_CLASSPATH'} = "";
+
+ my @cmd;
+ if (defined($sql)) {
+ @cmd = ("$cfg->{'hcathome'}/bin/hcat", "-f", $sql);
+ } else {
+ @cmd = ("$cfg->{'hcathome'}/bin/hcat");
+ }
+
+ my $envStr;
+ for my $k (keys(%ENV)) {
+ $envStr .= $k . "=" . $ENV{$k} . " " if ($k =~ /HADOOP/ || $k =~ /HIVE/);
+ }
+ $envStr .= " ";
+ print $log "Going to run hcat command [" . join(" ", @cmd) .
+ "] with environment set to [$envStr]\n";
+ my $runrc = run(\@cmd, \undef, $outfile, $errfile);
+ my $rc = $? >> 8;
+
+ return $runrc if $runrc; # success
+
+ if (defined($noFailOnFail) && $noFailOnFail) {
+ return $rc;
+ } else {
+ die "Failed running hcat command [" . join(" ", @cmd) . "]\n";
+ }
+}
+
+##############################################################################
+# Sub: runDbCmd
+#
+# Run the provided mysql command
+#
+# Returns:
+# Nothing
+sub runDbCmd($$$;$)
+{
+ my ($cfg, $log, $sqlfile, $outfile) = @_;
+
+ $outfile = $log if (!defined($outfile));
+
+ open(SQL, "< $sqlfile") or die "Unable to open $sqlfile for reading, $!\n";
+
+ my @cmd = ('mysql', '-u', $cfg->{'dbuser'}, '-D', $cfg->{'dbdb'},
+ '-h', $cfg->{'dbhost'}, "--password=$cfg->{'dbpasswd'}",
+ "--skip-column-names");
+
+ print $log "Going to run [" . join(" ", @cmd) . "] passing in [$sqlfile]\n";
+
+ run(\@cmd, \*SQL, $outfile, $log) or
+ die "Failed running " . join(" ", @cmd) . "\n";
+ close(SQL);
+}
+
+# Sub: runHadoopCmd
+#
+# Run the provided hadoop command
+#
+# Returns:
+# Nothing
+sub runHadoopCmd($$$)
+{
+ my ($cfg, $log, $c) = @_;
+
+ my @cmd = ("$ENV{'HADOOP_HOME'}/bin/hadoop");
+ push(@cmd, split(' ', $c));
+
+ print $log "Going to run [" . join(" ", @cmd) . "]\n";
+
+ run(\@cmd, \undef, $log, $log) or
+ die "Failed running " . join(" ", @cmd) . "\n";
+}
+
##############################################################################
# Sub: localTime
#
@@ -111,35 +394,6 @@ sub execCmd() {
return @result;
}
-sub getHadoopCmd
-{
- my ( $properties ) = @_;
-
- my $subName = (caller(0))[3];
- my @baseCmd;
-
- die "$0.$subName: null properties" if (! $properties );
-
- my $cmd;
-
- $cmd = $properties->{'gridstack.root'} . "/hadoop/current/bin/hadoop";
- if ( ! -x "$cmd" ) {
- print STDERR "\n$0::$subName WARNING: Can't find hadoop command: $cmd\n";
- $cmd = `which hadoop`;
- chomp $cmd;
- print STDERR "$0::$subName WARNING: Instead using command: $cmd\n";
- }
- if ( ! -x "$cmd" ) {
- die "\n$0::$subName FATAL: Hadoop command does not exist: $cmd\n";
- }
- push (@baseCmd, $cmd);
-
- push (@baseCmd, '--config', $properties->{'testconfigpath'}) if defined($properties->{'testconfigpath'});
-
- return @baseCmd;
-}
-
-
sub getHiveCmd
{
my ( $properties ) = @_;
@@ -187,6 +441,17 @@ sub getHCatCmd
return @baseCmd;
}
+sub show_call_stack {
+ my ( $path, $line, $subr );
+ my $max_depth = 30;
+ my $i = 1;
+ print("--- Begin stack trace ---");
+ while ( (my @call_details = (caller($i++))) && ($i<$max_depth) ) {
+ print("$call_details[1] line $call_details[2] in function $
++call_details[3]");
+ print("--- End stack trace ---");
+ }
+}
sub getPigCmd
@@ -198,6 +463,7 @@ sub getPigCmd
my @baseCmd;
die "$0.$subName: null properties" if (! $properties );
+show_call_stack();
#UGLY HACK for pig sql support
if ( $jarkey =~ /testsql/ ) {
@@ -217,6 +483,7 @@ sub getPigCmd
# This allows for testing of the pig script as installed, and for testin of
# the pig script's options, including error testing.
+print 'use-pig.pl?????';
$cmd = $properties->{'gridstack.root'} . "/pig/" . $properties->{'pigTestBuildName'} . "/bin/pig";
if ( ! -x "$cmd" ) {
@@ -241,6 +508,7 @@ sub getPigCmd
} else {
$cmd="java";
+print 'not use-pig.pl?????';
# Set JAVA options
# User can provide only one of
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf (added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf Tue Dec 6 20:05:37 2011
@@ -0,0 +1,161 @@
+#!/home/y/bin/perl
+
+ #
+ # Do
+ # egrep '^#|name.*=>' hcat.conf | egrep -v '^#!|egrep' | less
+ # to get an outline of this test conf file
+ #
+
+ # Has a couple of Hive set directives:
+ # set hive.exec.dynamic.partition.mode=nonstrict;
+ # set hive.exec.dynamic.partition=true;
+
+
+$cfg = {
+ 'driver' => 'Hadoop',
+ 'groups' => [
+# This first group should be moved to deployer ?
+ {
+ 'name' => 'Hadoop_Checkin',
+ 'tests' => [
+ {
+ 'num' => 1
+ ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SimpleRead -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k :OUTPATH:
+\,
+ ,'sql' => q\select name, gpa from studenttab10k;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 2
+ ,'hcat_prep'=>q\drop table if exists hadoop_checkin_2;
+create table hadoop_checkin_2 (name string, age int, gpa double) STORED AS TEXTFILE;\
+ ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadWrite -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_2
+\,
+ ,'result_table' => 'hadoop_checkin_2'
+ ,'sql' => q\select * from studenttab10k;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 3
+ ,'hcat_prep'=>q\drop table if exists hadoop_checkin_3;
+create table hadoop_checkin_3 (name string, cnt int) STORED AS TEXTFILE;\
+ ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.GroupByAge -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_3
+\,
+ ,'result_table' => 'hadoop_checkin_3'
+ ,'sql' => q\select age, count(*) from studenttab10k group by age;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ ],
+ }, # end g
+ {
+ 'name' => 'Hadoop_Read',
+ 'tests' => [
+ {
+ 'num' => 1
+ ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadText -libjars :HCAT_JAR: :THRIFTSERVER: all100k :OUTPATH:
+\,
+ ,'sql' => q\select * from all100k;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 2
+ ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson :OUTPATH:
+\,
+ ,'sql' => q\select s, i, d from all100kjson;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 3
+ ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc :OUTPATH:
+\,
+ ,'sql' => q\select * from all100krc;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ ],
+ }, # end g
+ {
+ 'name' => 'Hadoop_Write',
+ 'tests' => [
+ {
+ 'num' => 1
+ ,'hcat_prep'=>q\
+drop table if exists hadoop_write_1;
+create table hadoop_write_1(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ s string)
+ row format delimited
+ fields terminated by ':'
+ stored as textfile;\
+ ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteText -libjars :HCAT_JAR: :THRIFTSERVER: all100k hadoop_write_1
+\,
+ ,'result_table' => 'hadoop_write_1'
+ ,'sql' => q\select * from all100k;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 2
+ ,'hcat_prep' => q\
+drop table if exists hadoop_write_2;
+create table hadoop_write_2(
+ s string,
+ i int,
+ d double,
+ m map<string, string>,
+ bb array<struct<a: int, b: string>>)
+ STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
+ INPUTDRIVER 'org.apache.hcatalog.pig.drivers.LoadFuncBasedInputDriver' OUTPUTDRIVER 'org.apache.hcatalog.pig.drivers.StoreFuncBasedOutputDriver'
+ TBLPROPERTIES ('hcat.pig.loader'='org.apache.pig.builtin.JsonLoader', 'hcat.pig.storer'='org.apache.pig.builtin.JsonStorage', 'hcat.pig.loader.args'=
+'s:chararray, i:int, d:double, m:map[chararray], bb:{t:(a:int, b:chararray)}', 'hcat.pig.args.delimiter'=' ');\
+ ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson hadoop_write_2
+\,
+ ,'result_table' => 'hadoop_write_2'
+ ,'sql' => q\select s, i, d, '', '' from all100kjson;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ 'num' => 3
+ ,'hcat_prep' => q\
+drop table if exists hadoop_write_3;
+create table hadoop_write_3(
+ name string,
+ age int,
+ gpa double)
+stored as rcfile
+TBLPROPERTIES (
+ 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver',
+ 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver'
+);
+\,
+ ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc hadoop_write_3
+\,
+ ,'result_table' => 'hadoop_write_3'
+ ,'sql' => q\select * from all100krc;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ ],
+ }, # end g
+ ]
+}