You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2011/05/10 19:39:37 UTC
svn commit: r1101560 - in /pig/trunk: CHANGES.txt
test/e2e/harness/pig_test_harness.pl test/e2e/pig/Makefile
test/e2e/pig/conf/default.conf test/e2e/pig/conf/existing_deployer.conf
test/e2e/pig/deployers/ test/e2e/pig/deployers/ExistingClusterDeployer.pm
Author: gates
Date: Tue May 10 17:39:36 2011
New Revision: 1101560
URL: http://svn.apache.org/viewvc?rev=1101560&view=rev
Log:
PIG-1994: e2e test harness deployment implementation for existing cluster
Added:
pig/trunk/test/e2e/pig/conf/existing_deployer.conf
pig/trunk/test/e2e/pig/deployers/
pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
Modified:
pig/trunk/CHANGES.txt
pig/trunk/test/e2e/harness/pig_test_harness.pl
pig/trunk/test/e2e/pig/Makefile
pig/trunk/test/e2e/pig/conf/default.conf
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1101560&r1=1101559&r2=1101560&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue May 10 17:39:36 2011
@@ -24,6 +24,9 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-1994: e2e test harness deployment implementation for existing cluster
+(gates)
+
PIG-2036: [piggybank] Set header delimiter in PigStorageSchema (mmoeller via dvryaboy)
PIG-1949: e2e test harness should use bin/pig rather than calling java
Modified: pig/trunk/test/e2e/harness/pig_test_harness.pl
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/harness/pig_test_harness.pl?rev=1101560&r1=1101559&r2=1101560&view=diff
==============================================================================
--- pig/trunk/test/e2e/harness/pig_test_harness.pl (original)
+++ pig/trunk/test/e2e/harness/pig_test_harness.pl Tue May 10 17:39:36 2011
@@ -383,6 +383,12 @@ if ($deploy) {
print $log "INFO: $0 at ".__LINE__." : Loading configuration file $deploycfg\n";
my $cfg = readCfg($deploycfg);
+ # Copy the global config into our cfg
+ foreach(keys(%$globalCfg)) {
+ next if $_ eq 'file';
+ $cfg->{$_} = $globalCfg->{$_}; #foreach(keys(%$globalCfg));
+ }
+
# Instantiate the TestDeployer
my $deployer = TestDeployerFactory::getTestDeployer($cfg);
die "FATAL: $0: Deployer does not exist\n" if ( !$deployer );
@@ -467,7 +473,7 @@ foreach my $arg (@ARGV) {
# Copy contents of global config file into hash.
foreach(keys(%$globalCfg)) {
next if $_ eq 'file';
- $cfg->{$_} = $globalCfg->{$_} foreach(keys(%$globalCfg));
+ $cfg->{$_} = $globalCfg->{$_}; # foreach(keys(%$globalCfg));
print $log "\nINFO $0: $_=".$cfg->{$_};
}
print $log "\n";
Modified: pig/trunk/test/e2e/pig/Makefile
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/Makefile?rev=1101560&r1=1101559&r2=1101560&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/Makefile (original)
+++ pig/trunk/test/e2e/pig/Makefile Tue May 10 17:39:36 2011
@@ -18,6 +18,10 @@ DRIVER_DIR = ./drivers
DRIVERS = $(DRIVER_DIR)/TestDriverPig.pm $(DRIVER_DIR)/Util.pm
+DEPLOYER_DIR = ./deployers
+
+DEPLOYERS = $(DEPLOYER_DIR)/ExistingClusterDeployer.pm
+
TEST_DIR = ./tests
TESTS = $(TEST_DIR)/bigdata.conf $(TEST_DIR)/cmdline.conf \
@@ -28,7 +32,7 @@ TESTS = $(TEST_DIR)/bigdata.conf $(TEST
CONF_DIR = ./conf
-CONF_FILES = $(CONF_DIR)/default.conf
+CONF_FILES = $(CONF_DIR)/default.conf $(CONF_DIR)/existing_deployer.conf
TOOL_DIR = ./tools
@@ -36,7 +40,8 @@ TEST_TOOL_DIR = $(TOOL_DIR)/test
GEN_TOOL_DIR = $(TOOL_DIR)/generate
-TOOLS = $(TEST_TOOL_DIR)/explainchk.pl $(TEST_TOOL_DIR)/floatpostprocessor.pl
+TOOLS = $(TEST_TOOL_DIR)/explainchk.pl $(TEST_TOOL_DIR)/floatpostprocessor.pl \
+ $(GEN_TOOL_DIR)/generate_data.pl
ARCHIVE = pigtests.tar
@@ -100,8 +105,8 @@ LIB_DIR = lib
STREAMING_DIR = streaming
-$(ARCHIVE): $(TESTS) $(DRIVERS) $(CONF_FILES) $(TOOLS)
- tar cf $(ARCHIVE) $(DRIVERS) $(TESTS) $(CONF_FILES) $(TOOLS)
+$(ARCHIVE): $(TESTS) $(DRIVERS) $(DEPLOYERS) $(CONF_FILES) $(TOOLS)
+ tar cf $(ARCHIVE) $(DRIVERS) $(DEPLOYERS) $(TESTS) $(CONF_FILES) $(TOOLS)
# Note, you must set PH_PIG_JARFILE environment variable
build_udfs:
@@ -111,7 +116,7 @@ build_udfs:
"before building the UDFs"; 1; fi)
(cd $(JAVA_UDF_DIR); ant -Dpig.jarfile=$${PH_PIG}/pig.jar)
-test: $(ARCHIVE) build_udfs
+pretest: $(ARCHIVE) build_udfs
@(if [ "$${PH_CLUSTER}x" == "x" ] ; then echo \
"You must set the environment variable PH_CLUSTER" \
"to the directory that contains your hadoop-site.xml" \
@@ -126,6 +131,7 @@ test: $(ARCHIVE) build_udfs
(cd $(TEST_DIST_DIR); tar xf $(ARCHIVE))
(cd $(TEST_DIST_DIR); tar xf pigharness.tar)
(cd $(TEST_DIST_DIR); mv drivers/*.pm .)
+ (cd $(TEST_DIST_DIR); mv deployers/*.pm .)
mkdir -p $(TEST_DIST_DIR)/libexec/PigTest
(cd $(TEST_DIST_DIR); cp $(TOOLS) libexec/PigTest)
chmod +x $(TEST_DIST_DIR)/libexec/PigTest/*
@@ -139,9 +145,20 @@ test: $(ARCHIVE) build_udfs
cp $(LIB_DIR)/* $(TEST_DIST_DIR)/lib
cp $(STREAMING_DIR)/* $(TEST_DIST_DIR)/libexec
chmod +x $(TEST_DIST_DIR)/libexec/*.pl
+
+test: pretest
(cd $(TEST_DIST_DIR); PIG_HARNESS_ROOT=. PH_LOCAL=. PH_OUT=. PH_ROOT=. \
./pig_test_harness.pl $(TESTS_TO_RUN) $(TESTS))
+deployandtest: pretest
+ (if [ "$${DEPLOYCFG}x" == "x" ] ; then echo \
+ "You must set the environment variable DEPLOYCFG to the config file " \
+ "for your deployment before running deploy"; 1; fi)
+ (cd $(TEST_DIST_DIR); PIG_HARNESS_ROOT=. PH_LOCAL=. PH_OUT=. PH_ROOT=. \
+ ./pig_test_harness.pl -deploycfg $(DEPLOYCFG) -deploy $(TESTS_TO_RUN) \
+ $(TESTS))
+
+
clean:
rm -f $(ARCHIVE)
rm -rf $(TEST_DIST_DIR)
Modified: pig/trunk/test/e2e/pig/conf/default.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/conf/default.conf?rev=1101560&r1=1101559&r2=1101560&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/conf/default.conf (original)
+++ pig/trunk/test/e2e/pig/conf/default.conf Tue May 10 17:39:36 2011
@@ -53,7 +53,7 @@ $cfg = {
#DATABASE
- , 'dbdb' => 'pigtest'
+ , 'dbdb' => 'pigtest2'
, 'dbuser' => 'pigtester'
, 'dbowner' => 'hadoopqa'
, 'dbgroup' => 'users'
Added: pig/trunk/test/e2e/pig/conf/existing_deployer.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/conf/existing_deployer.conf?rev=1101560&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/conf/existing_deployer.conf (added)
+++ pig/trunk/test/e2e/pig/conf/existing_deployer.conf Tue May 10 17:39:36 2011
@@ -0,0 +1,40 @@
+#!/usr/bin/env perl
+
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+###############################################################################
+# Test for TestHarness itself.
+#
+#
+
+$cfg = {
+ 'deployer' => 'ExistingClusterDeployer',
+
+ # hadoop values
+ 'hadoopdir' => $ENV{'PH_CLUSTER'},
+
+ # db values
+# 'dbuser' => 'pigtester',
+# 'dbdb' => 'pigtest2',
+
+ # generate values
+ 'gentool' => './libexec/PigTest/generate_data.pl',
+}
+;
+
+
+
Added: pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm?rev=1101560&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm (added)
+++ pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm Tue May 10 17:39:36 2011
@@ -0,0 +1,354 @@
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+package ExistingClusterDeployer;
+
+use IPC::Run qw(run);
+use TestDeployer;
+
+use strict;
+use English;
+
+###########################################################################
+# Class: ExistingClusterDeployer
+# Deploy the Pig harness to a cluster and database that already exists.
+
+##############################################################################
+# Sub: new
+# Constructor
+#
+# Paramaters:
+# None
+#
+# Returns:
+# None.
+sub new
+{
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = {};
+
+ bless($self, $class);
+
+ return $self;
+}
+
+##############################################################################
+# Sub: checkPrerequisites
+# Check any prerequisites before a deployment is begun. For example if a
+# particular deployment required the use of a database system it could
+# check here that the db was installed and accessible.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub checkPrerequisites
+{
+ my ($self, $cfg, $log) = @_;
+
+ # They must have declared the directory for their Hadoop installation
+ if (! defined $cfg->{'hadoopdir'} || $cfg->{'hadoopdir'} eq "") {
+ print $log "You must set the key 'hadoopdir' to your Hadoop directory "
+ . "in existing.conf\n";
+ die "hadoopdir is not set in existing.conf\n";
+ }
+
+ # Run a quick and easy Hadoop command to make sure we can
+ $self->runHadoopCmd($cfg, $log, "fs -ls /");
+
+ # Make sure the database is installed and set up
+ $self->runDbCmd($cfg, $log, 0, "create table test_table(test_col int);");
+ $self->runDbCmd($cfg, $log, 0, "drop table test_table;");
+
+}
+
+##############################################################################
+# Sub: deploy
+# Deploy any required packages
+# This is a no-op in this case because we're assuming both the cluster and the
+# database already exist
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub deploy
+{
+}
+
+##############################################################################
+# Sub: start
+# Start any software modules that are needed.
+# This is a no-op in this case because we're assuming both the cluster and the
+# database already exist
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub start
+{
+}
+
+##############################################################################
+# Sub: generateData
+# Generate any data needed for this test run.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub generateData
+{
+ my ($self, $cfg, $log) = @_;
+ my @tables = (
+ {
+ 'name' => "studenttab10k",
+ 'filetype' => "studenttab",
+ 'rows' => 10000,
+ 'hdfs' => "singlefile/studenttab10k",
+ }, {
+ 'name' => "votertab10k",
+ 'filetype' => "votertab",
+ 'rows' => 10000,
+ 'hdfs' => "singlefile/votertab10k",
+ }, {
+ 'name' => "studentcolon10k",
+ 'filetype' => "studentcolon",
+ 'rows' => 10000,
+ 'hdfs' => "singlefile/studentcolon10k",
+ }, {
+ 'name' => "textdoc",
+ 'filetype' => "textdoc",
+ 'rows' => 10000,
+ 'hdfs' => "singlefile/textdoc",
+ }, {
+ 'name' => "reg1459894",
+ 'filetype' => "reg1459894",
+ 'rows' => 1000,
+ 'hdfs' => "singlefile/reg1459894",
+ }, {
+ 'name' => "studenttabdir10k",
+ 'filetype' => "studenttab",
+ 'rows' => 10000,
+ 'hdfs' => "dir/studenttab10k",
+ }, {
+ 'name' => "studenttabsomegood",
+ 'filetype' => "studenttab",
+ 'rows' => 1000,
+ 'hdfs' => "glob/star/somegood/studenttab",
+ }, {
+ 'name' => "studenttabmoregood",
+ 'filetype' => "studenttab",
+ 'rows' => 1001,
+ 'hdfs' => "glob/star/moregood/studenttab",
+ }, {
+ 'name' => "studenttabbad",
+ 'filetype' => "studenttab",
+ 'rows' => 1002,
+ 'hdfs' => "glob/star/bad/studenttab",
+ }, {
+ 'name' => "fileexists",
+ 'filetype' => "studenttab",
+ 'rows' => 1,
+ 'hdfs' => "singlefile/fileexists",
+ }, {
+ 'name' => "studenttab20m",
+ 'filetype' => "studenttab",
+ 'rows' => 20000000,
+ 'hdfs' => "singlefile/studenttab20m",
+ }, {
+ 'name' => "unicode100",
+ 'filetype' => "unicode",
+ 'rows' => 100,
+ 'hdfs' => "singlefile/unicode100",
+ }, {
+ 'name' => "studentctrla10k",
+ 'filetype' => "studentctrla",
+ 'rows' => 10000,
+ 'hdfs' => "singlefile/studentctrla10k",
+ }, {
+ 'name' => "studentcomplextab10k",
+ 'filetype' => "studentcomplextab",
+ 'rows' => 10000,
+ 'hdfs' => "singlefile/studentcomplextab10k",
+ }, {
+ 'name' => "studentnulltab10k",
+ 'filetype' => "studentnulltab",
+ 'rows' => 10000,
+ 'hdfs' => "singlefile/studentnulltab10k",
+ }, {
+ 'name' => "voternulltab10k",
+ 'filetype' => "voternulltab",
+ 'rows' => 10000,
+ 'hdfs' => "singlefile/voternulltab10k",
+ },
+ );
+
+ # Create the HDFS directories
+ $self->runHadoopCmd($cfg, $log, "fs -mkdir $cfg->{'inpathbase'}");
+
+ foreach my $table (@tables) {
+ print "Generating data for $table->{'name'}\n";
+ # Generate the data
+ my @cmd = ($cfg->{'gentool'}, $table->{'filetype'}, $table->{'rows'},
+ $table->{'name'});
+ $self->runCmd($log, \@cmd);
+
+ # Copy the data to HDFS
+ my $hadoop = "fs -copyFromLocal $table->{'name'} ".
+ "$cfg->{'inpathbase'}/$table->{'hdfs'}";
+ $self->runHadoopCmd($cfg, $log, $hadoop);
+
+ # Load the data in the database
+ my $sql = "-f $table->{'name'}.sql";
+ $self->runDbCmd($cfg, $log, 1, $sql);
+ }
+}
+
+##############################################################################
+# Sub: confirmDeployment
+# Run checks to confirm that the deployment was successful. When this is
+# done the testing environment should be ready to run.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# Nothing
+# This method should die with an appropriate error message if there is
+# an issue.
+#
+sub confirmDeployment
+{
+}
+
+##############################################################################
+# Sub: deleteData
+# Remove any data created that will not be removed by undeploying.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub deleteData
+{
+}
+
+##############################################################################
+# Sub: stop
+# Stop any servers or systems that are no longer needed once testing is
+# completed.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub stop
+{
+}
+
+##############################################################################
+# Sub: undeploy
+# Remove any packages that were installed as part of the deployment.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub undeploy
+{
+}
+
+##############################################################################
+# Sub: confirmUndeployment
+# Run checks to confirm that the undeployment was successful. When this is
+# done anything that must be turned off or removed should be turned off or
+# removed.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# Nothing
+# This method should die with an appropriate error message if there is
+# an issue.
+#
+sub confirmUndeployment
+{
+ die "$0 INFO : confirmUndeployment is a virtual function!";
+}
+
+sub runHadoopCmd($$$$)
+{
+ my ($self, $cfg, $log, $c) = @_;
+
+ # set the PIG_CLASSPATH environment variable
+ $ENV{'HADOOP_CLASSPATH'} = "$cfg->{'hadoopdir'}/conf";
+
+ my @cmd = ("$cfg->{'hadoopdir'}/bin/hadoop");
+ push(@cmd, split(' ', $c));
+
+ $self->runCmd($log, \@cmd);
+}
+
+sub runDbCmd($$$$)
+{
+ my ($self, $cfg, $log, $isfile, $sql) = @_;
+
+ my $switch = $isfile ? 'f' : 'c';
+
+ my @cmd = ('psql', '-U', $cfg->{'dbuser'}, '-d', $cfg->{'dbdb'},
+ "-$switch", $sql);
+
+ $self->runCmd($log, \@cmd);
+}
+
+sub runCmd($$$)
+{
+ my ($self, $log, $cmd) = @_;
+
+ print $log "Going to run " . join(" ", @$cmd) . "\n";
+
+ run($cmd, \undef, $log, $log) or
+ die "Failed running " . join(" ", @$cmd) . "\n";
+}
+
+1;