You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2011/05/10 19:39:37 UTC

svn commit: r1101560 - in /pig/trunk: CHANGES.txt test/e2e/harness/pig_test_harness.pl test/e2e/pig/Makefile test/e2e/pig/conf/default.conf test/e2e/pig/conf/existing_deployer.conf test/e2e/pig/deployers/ test/e2e/pig/deployers/ExistingClusterDeployer.pm

Author: gates
Date: Tue May 10 17:39:36 2011
New Revision: 1101560

URL: http://svn.apache.org/viewvc?rev=1101560&view=rev
Log:
PIG-1994: e2e test harness deployment implementation for existing cluster

Added:
    pig/trunk/test/e2e/pig/conf/existing_deployer.conf
    pig/trunk/test/e2e/pig/deployers/
    pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/test/e2e/harness/pig_test_harness.pl
    pig/trunk/test/e2e/pig/Makefile
    pig/trunk/test/e2e/pig/conf/default.conf

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1101560&r1=1101559&r2=1101560&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue May 10 17:39:36 2011
@@ -24,6 +24,9 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-1994: e2e test harness deployment implementation for existing cluster
+(gates)
+
 PIG-2036: [piggybank] Set header delimiter in PigStorageSchema (mmoeller via dvryaboy)
 
 PIG-1949: e2e test harness should use bin/pig rather than calling java

Modified: pig/trunk/test/e2e/harness/pig_test_harness.pl
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/harness/pig_test_harness.pl?rev=1101560&r1=1101559&r2=1101560&view=diff
==============================================================================
--- pig/trunk/test/e2e/harness/pig_test_harness.pl (original)
+++ pig/trunk/test/e2e/harness/pig_test_harness.pl Tue May 10 17:39:36 2011
@@ -383,6 +383,12 @@ if ($deploy) {
     print $log "INFO: $0 at ".__LINE__." : Loading configuration file $deploycfg\n";
     my $cfg = readCfg($deploycfg);
 
+	# Copy the global config into our cfg
+	foreach(keys(%$globalCfg)) {
+		next if $_ eq 'file';
+		$cfg->{$_} = $globalCfg->{$_}; #foreach(keys(%$globalCfg));
+	}
+
     # Instantiate the TestDeployer
     my $deployer = TestDeployerFactory::getTestDeployer($cfg);
     die "FATAL: $0: Deployer does not exist\n" if ( !$deployer );
@@ -467,7 +473,7 @@ foreach my $arg (@ARGV) {
 	# Copy contents of global config file into hash.
 	foreach(keys(%$globalCfg)) {
 		next if $_ eq 'file';
-		$cfg->{$_} = $globalCfg->{$_} foreach(keys(%$globalCfg));
+		$cfg->{$_} = $globalCfg->{$_}; # foreach(keys(%$globalCfg));
 		print $log "\nINFO $0: $_=".$cfg->{$_};
 	}
 	print $log "\n"; 

Modified: pig/trunk/test/e2e/pig/Makefile
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/Makefile?rev=1101560&r1=1101559&r2=1101560&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/Makefile (original)
+++ pig/trunk/test/e2e/pig/Makefile Tue May 10 17:39:36 2011
@@ -18,6 +18,10 @@ DRIVER_DIR = ./drivers
 
 DRIVERS = $(DRIVER_DIR)/TestDriverPig.pm $(DRIVER_DIR)/Util.pm
 
+DEPLOYER_DIR = ./deployers
+
+DEPLOYERS = $(DEPLOYER_DIR)/ExistingClusterDeployer.pm
+
 TEST_DIR = ./tests
 
 TESTS = $(TEST_DIR)/bigdata.conf  $(TEST_DIR)/cmdline.conf \
@@ -28,7 +32,7 @@ TESTS = $(TEST_DIR)/bigdata.conf  $(TEST
 
 CONF_DIR = ./conf
 
-CONF_FILES = $(CONF_DIR)/default.conf
+CONF_FILES = $(CONF_DIR)/default.conf $(CONF_DIR)/existing_deployer.conf
 
 TOOL_DIR = ./tools
 
@@ -36,7 +40,8 @@ TEST_TOOL_DIR = $(TOOL_DIR)/test
 
 GEN_TOOL_DIR = $(TOOL_DIR)/generate
 
-TOOLS = $(TEST_TOOL_DIR)/explainchk.pl  $(TEST_TOOL_DIR)/floatpostprocessor.pl 
+TOOLS = $(TEST_TOOL_DIR)/explainchk.pl  $(TEST_TOOL_DIR)/floatpostprocessor.pl \
+        $(GEN_TOOL_DIR)/generate_data.pl
 
 ARCHIVE = pigtests.tar
 
@@ -100,8 +105,8 @@ LIB_DIR      = lib
 
 STREAMING_DIR = streaming
 
-$(ARCHIVE): $(TESTS) $(DRIVERS) $(CONF_FILES) $(TOOLS)
-	tar cf $(ARCHIVE) $(DRIVERS) $(TESTS) $(CONF_FILES) $(TOOLS)
+$(ARCHIVE): $(TESTS) $(DRIVERS) $(DEPLOYERS) $(CONF_FILES) $(TOOLS)
+	tar cf $(ARCHIVE) $(DRIVERS) $(DEPLOYERS) $(TESTS) $(CONF_FILES) $(TOOLS)
 
 # Note, you must set PH_PIG_JARFILE environment variable
 build_udfs: 
@@ -111,7 +116,7 @@ build_udfs: 
 		"before building the UDFs"; 1; fi)
 	(cd $(JAVA_UDF_DIR); ant -Dpig.jarfile=$${PH_PIG}/pig.jar)
 
-test: $(ARCHIVE) build_udfs
+pretest: $(ARCHIVE) build_udfs
 	@(if [ "$${PH_CLUSTER}x" == "x" ] ; then echo \
 	 	"You must set the environment variable PH_CLUSTER" \
 		"to the directory that contains your hadoop-site.xml" \
@@ -126,6 +131,7 @@ test: $(ARCHIVE) build_udfs
 	(cd $(TEST_DIST_DIR); tar xf $(ARCHIVE))
 	(cd $(TEST_DIST_DIR); tar xf pigharness.tar)
 	(cd $(TEST_DIST_DIR); mv drivers/*.pm .)
+	(cd $(TEST_DIST_DIR); mv deployers/*.pm .)
 	mkdir -p $(TEST_DIST_DIR)/libexec/PigTest
 	(cd $(TEST_DIST_DIR); cp $(TOOLS) libexec/PigTest)
 	chmod +x $(TEST_DIST_DIR)/libexec/PigTest/*
@@ -139,9 +145,20 @@ test: $(ARCHIVE) build_udfs
 	cp $(LIB_DIR)/* $(TEST_DIST_DIR)/lib
 	cp $(STREAMING_DIR)/* $(TEST_DIST_DIR)/libexec
 	chmod +x $(TEST_DIST_DIR)/libexec/*.pl
+
+test: pretest
 	(cd $(TEST_DIST_DIR); PIG_HARNESS_ROOT=. PH_LOCAL=. PH_OUT=. PH_ROOT=. \
 		./pig_test_harness.pl $(TESTS_TO_RUN) $(TESTS))
 
+deployandtest: pretest
+	(if [ "$${DEPLOYCFG}x" == "x" ] ; then echo \
+	 	"You must set the environment variable DEPLOYCFG to the config file " \
+		"for your deployment before running deploy"; 1; fi)
+	(cd $(TEST_DIST_DIR); PIG_HARNESS_ROOT=. PH_LOCAL=. PH_OUT=. PH_ROOT=. \
+		./pig_test_harness.pl -deploycfg $(DEPLOYCFG) -deploy $(TESTS_TO_RUN) \
+		$(TESTS))
+
+
 clean:
 	rm -f $(ARCHIVE)
 	rm -rf $(TEST_DIST_DIR)

Modified: pig/trunk/test/e2e/pig/conf/default.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/conf/default.conf?rev=1101560&r1=1101559&r2=1101560&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/conf/default.conf (original)
+++ pig/trunk/test/e2e/pig/conf/default.conf Tue May 10 17:39:36 2011
@@ -53,7 +53,7 @@ $cfg = {
 
 
     #DATABASE
-    , 'dbdb'             => 'pigtest'
+    , 'dbdb'             => 'pigtest2'
     , 'dbuser'           => 'pigtester'
     , 'dbowner'      => 'hadoopqa'
     , 'dbgroup'      => 'users'

Added: pig/trunk/test/e2e/pig/conf/existing_deployer.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/conf/existing_deployer.conf?rev=1101560&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/conf/existing_deployer.conf (added)
+++ pig/trunk/test/e2e/pig/conf/existing_deployer.conf Tue May 10 17:39:36 2011
@@ -0,0 +1,40 @@
+#!/usr/bin/env perl
+
+############################################################################           
+#  Licensed to the Apache Software Foundation (ASF) under one or more                  
+#  contributor license agreements.  See the NOTICE file distributed with               
+#  this work for additional information regarding copyright ownership.                 
+#  The ASF licenses this file to You under the Apache License, Version 2.0             
+#  (the "License"); you may not use this file except in compliance with                
+#  the License.  You may obtain a copy of the License at                               
+#                                                                                      
+#      http://www.apache.org/licenses/LICENSE-2.0                                      
+#                                                                                      
+#  Unless required by applicable law or agreed to in writing, software                 
+#  distributed under the License is distributed on an "AS IS" BASIS,                   
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
+#  See the License for the specific language governing permissions and                 
+#  limitations under the License.                                                      
+                                                                                       
+###############################################################################
+# Test for TestHarness itself.
+#
+#
+
+$cfg = {
+	'deployer' => 'ExistingClusterDeployer',
+	
+	# hadoop values
+	'hadoopdir'   => $ENV{'PH_CLUSTER'},
+
+	# db values
+# 	'dbuser' => 'pigtester',
+# 	'dbdb'   => 'pigtest2',
+
+    # generate values
+    'gentool' => './libexec/PigTest/generate_data.pl',
+}
+;
+
+
+

Added: pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm?rev=1101560&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm (added)
+++ pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm Tue May 10 17:39:36 2011
@@ -0,0 +1,354 @@
+############################################################################           
+#  Licensed to the Apache Software Foundation (ASF) under one or more                  
+#  contributor license agreements.  See the NOTICE file distributed with               
+#  this work for additional information regarding copyright ownership.                 
+#  The ASF licenses this file to You under the Apache License, Version 2.0             
+#  (the "License"); you may not use this file except in compliance with                
+#  the License.  You may obtain a copy of the License at                               
+#                                                                                      
+#      http://www.apache.org/licenses/LICENSE-2.0                                      
+#                                                                                      
+#  Unless required by applicable law or agreed to in writing, software                 
+#  distributed under the License is distributed on an "AS IS" BASIS,                   
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
+#  See the License for the specific language governing permissions and                 
+#  limitations under the License.                                                      
+                                                                                       
+package ExistingClusterDeployer;
+
+use IPC::Run qw(run);
+use TestDeployer;
+
+use strict;
+use English;
+
+###########################################################################
+# Class: ExistingClusterDeployer
+# Deploy the Pig harness to a cluster and database that already exists.
+
+##############################################################################
+# Sub: new
+# Constructor
+#
+# Paramaters:
+# None
+#
+# Returns:
+# None.
+sub new
+{
+    my $proto = shift;
+    my $class = ref($proto) || $proto;
+    my $self = {};
+
+    bless($self, $class);
+
+    return $self;
+}
+
+##############################################################################
+# Sub: checkPrerequisites
+# Check any prerequisites before a deployment is begun.  For example if a 
+# particular deployment required the use of a database system it could
+# check here that the db was installed and accessible.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub checkPrerequisites
+{
+    my ($self, $cfg, $log) = @_;
+
+    # They must have declared the directory for their Hadoop installation
+    if (! defined $cfg->{'hadoopdir'} || $cfg->{'hadoopdir'} eq "") {
+        print $log "You must set the key 'hadoopdir' to your Hadoop directory "
+            . "in existing.conf\n";
+        die "hadoopdir is not set in existing.conf\n";
+    }
+
+    # Run a quick and easy Hadoop command to make sure we can
+    $self->runHadoopCmd($cfg, $log, "fs -ls /");
+
+    # Make sure the database is installed and set up
+    $self->runDbCmd($cfg, $log, 0, "create table test_table(test_col int);");
+    $self->runDbCmd($cfg, $log, 0, "drop table test_table;");
+    
+}
+
+##############################################################################
+# Sub: deploy
+# Deploy any required packages
+# This is a no-op in this case because we're assuming both the cluster and the
+# database already exist
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub deploy
+{
+}
+
+##############################################################################
+# Sub: start
+# Start any software modules that are needed.
+# This is a no-op in this case because we're assuming both the cluster and the
+# database already exist
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub start
+{
+}
+
+##############################################################################
+# Sub: generateData
+# Generate any data needed for this test run.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub generateData
+{
+    my ($self, $cfg, $log) = @_;
+    my @tables = (
+        {
+            'name' => "studenttab10k",
+            'filetype' => "studenttab",
+            'rows' => 10000,
+            'hdfs' => "singlefile/studenttab10k",
+        }, {
+            'name' => "votertab10k",
+            'filetype' => "votertab",
+            'rows' => 10000,
+            'hdfs' => "singlefile/votertab10k",
+        }, {
+            'name' => "studentcolon10k",
+            'filetype' => "studentcolon",
+            'rows' => 10000,
+            'hdfs' => "singlefile/studentcolon10k",
+        }, {
+            'name' => "textdoc",
+            'filetype' => "textdoc",
+            'rows' => 10000,
+            'hdfs' => "singlefile/textdoc",
+        }, {
+            'name' => "reg1459894",
+            'filetype' => "reg1459894",
+            'rows' => 1000,
+            'hdfs' => "singlefile/reg1459894",
+        }, {
+            'name' => "studenttabdir10k",
+            'filetype' => "studenttab",
+            'rows' => 10000,
+            'hdfs' => "dir/studenttab10k",
+        }, {
+            'name' => "studenttabsomegood",
+            'filetype' => "studenttab",
+            'rows' => 1000,
+            'hdfs' => "glob/star/somegood/studenttab",
+        }, {
+            'name' => "studenttabmoregood",
+            'filetype' => "studenttab",
+            'rows' => 1001,
+            'hdfs' => "glob/star/moregood/studenttab",
+        }, {
+            'name' => "studenttabbad",
+            'filetype' => "studenttab",
+            'rows' => 1002,
+            'hdfs' => "glob/star/bad/studenttab",
+        }, {
+            'name' => "fileexists",
+            'filetype' => "studenttab",
+            'rows' => 1,
+            'hdfs' => "singlefile/fileexists",
+        }, {
+            'name' => "studenttab20m",
+            'filetype' => "studenttab",
+            'rows' => 20000000,
+            'hdfs' => "singlefile/studenttab20m",
+        }, {
+            'name' => "unicode100",
+            'filetype' => "unicode",
+            'rows' => 100,
+            'hdfs' => "singlefile/unicode100",
+        }, {
+            'name' => "studentctrla10k",
+            'filetype' => "studentctrla",
+            'rows' => 10000,
+            'hdfs' => "singlefile/studentctrla10k",
+        }, {
+            'name' => "studentcomplextab10k",
+            'filetype' => "studentcomplextab",
+            'rows' => 10000,
+            'hdfs' => "singlefile/studentcomplextab10k",
+        }, {
+            'name' => "studentnulltab10k",
+            'filetype' => "studentnulltab",
+            'rows' => 10000,
+            'hdfs' => "singlefile/studentnulltab10k",
+        }, {
+            'name' => "voternulltab10k",
+            'filetype' => "voternulltab",
+            'rows' => 10000,
+            'hdfs' => "singlefile/voternulltab10k",
+        },
+    );
+
+	# Create the HDFS directories
+	$self->runHadoopCmd($cfg, $log, "fs -mkdir $cfg->{'inpathbase'}");
+
+    foreach my $table (@tables) {
+		print "Generating data for $table->{'name'}\n";
+		# Generate the data
+        my @cmd = ($cfg->{'gentool'}, $table->{'filetype'}, $table->{'rows'},
+            $table->{'name'});
+		$self->runCmd($log, \@cmd);
+
+		# Copy the data to HDFS
+		my $hadoop = "fs -copyFromLocal $table->{'name'} ".
+			"$cfg->{'inpathbase'}/$table->{'hdfs'}";
+		$self->runHadoopCmd($cfg, $log, $hadoop);
+
+		# Load the data in the database
+		my $sql = "-f $table->{'name'}.sql";
+		$self->runDbCmd($cfg, $log, 1, $sql);
+    }
+}
+
+##############################################################################
+# Sub: confirmDeployment
+# Run checks to confirm that the deployment was successful.  When this is 
+# done the testing environment should be ready to run.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# Nothing
+# This method should die with an appropriate error message if there is 
+# an issue.
+#
+sub confirmDeployment
+{
+}
+
+##############################################################################
+# Sub: deleteData
+# Remove any data created that will not be removed by undeploying.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub deleteData
+{
+}
+
+##############################################################################
+# Sub: stop
+# Stop any servers or systems that are no longer needed once testing is
+# completed.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub stop
+{
+}
+
+##############################################################################
+# Sub: undeploy
+# Remove any packages that were installed as part of the deployment.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub undeploy
+{
+}
+
+##############################################################################
+# Sub: confirmUndeployment
+# Run checks to confirm that the undeployment was successful.  When this is 
+# done anything that must be turned off or removed should be turned off or
+# removed.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# Nothing
+# This method should die with an appropriate error message if there is 
+# an issue.
+#
+sub confirmUndeployment
+{
+    die "$0 INFO : confirmUndeployment is a virtual function!";
+}
+
+sub runHadoopCmd($$$$)
+{
+    my ($self, $cfg, $log, $c) = @_;
+
+    # set the PIG_CLASSPATH environment variable
+    $ENV{'HADOOP_CLASSPATH'} = "$cfg->{'hadoopdir'}/conf";
+                          
+    my @cmd = ("$cfg->{'hadoopdir'}/bin/hadoop");
+    push(@cmd, split(' ', $c));
+
+    $self->runCmd($log, \@cmd);
+}
+
+sub runDbCmd($$$$)
+{
+    my ($self, $cfg, $log, $isfile, $sql) = @_;
+
+	my $switch = $isfile ? 'f' : 'c';
+
+    my @cmd = ('psql', '-U', $cfg->{'dbuser'}, '-d', $cfg->{'dbdb'},
+		"-$switch", $sql);
+
+    $self->runCmd($log, \@cmd);
+}
+
+sub runCmd($$$)
+{
+    my ($self, $log, $cmd) = @_;
+
+    print $log "Going to run " . join(" ", @$cmd) . "\n";
+
+    run($cmd, \undef, $log, $log) or
+        die "Failed running " . join(" ", @$cmd) . "\n";
+}
+
+1;