You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2011/09/01 20:05:38 UTC
svn commit: r1164189 - in /pig/trunk: CHANGES.txt test/e2e/pig/whirr/
test/e2e/pig/whirr/README test/e2e/pig/whirr/pigtest.properties
test/e2e/pig/whirr/whirr_test_patch.sh
Author: gates
Date: Thu Sep 1 18:05:38 2011
New Revision: 1164189
URL: http://svn.apache.org/viewvc?rev=1164189&view=rev
Log:
PIG-2249 Enable pig e2e testing on EC2
Added:
pig/trunk/test/e2e/pig/whirr/
pig/trunk/test/e2e/pig/whirr/README
pig/trunk/test/e2e/pig/whirr/pigtest.properties (with props)
pig/trunk/test/e2e/pig/whirr/whirr_test_patch.sh
Modified:
pig/trunk/CHANGES.txt
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1164189&r1=1164188&r2=1164189&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Sep 1 18:05:38 2011
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-2249: Enable pig e2e testing on EC2 (gates)
+
PIG-2256: Upgrade Avro dependency to 1.5.3 (tucu00 via dvryaboy)
PIG-604: Kill the Pig job should kill all associated Hadoop Jobs (daijy)
Added: pig/trunk/test/e2e/pig/whirr/README
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/whirr/README?rev=1164189&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/whirr/README (added)
+++ pig/trunk/test/e2e/pig/whirr/README Thu Sep 1 18:05:38 2011
@@ -0,0 +1,108 @@
+-------------------------------------------------------------------------
+| Licensed to the Apache Software Foundation (ASF) under one
+| or more contributor license agreements. See the NOTICE file
+| distributed with this work for additional information
+| regarding copyright ownership. The ASF licenses this file
+| to you under the Apache License, Version 2.0 (the
+| "License"); you may not use this file except in compliance
+| with the License. You may obtain a copy of the License at
+|
+| http://www.apache.org/licenses/LICENSE-2.0
+|
+| Unless required by applicable law or agreed to in writing, software
+| distributed under the License is distributed on an "AS IS" BASIS,
+| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+| See the License for the specific language governing permissions and
+| limitations under the License.
+-------------------------------------------------------------------------
+
+The tools in this directory can be used to run Pig's end-to-end tests on
+Amazon via Apache Whirr. This is useful for those who do not have a cluster
+available to run them on. In the following text any value that starts
+"your_" is a value you should fill in.
+
+Prerequisites:
+1) An account in Amazon's AWS. http://aws.amazon.com/
+2) An Amazon Access Key ID and Secret Access Key. These are not ssh keys.
+ See
+ http://aws-portal.amazon.com/gp/aws/developer/account/index.html?action=access-key
+ Under Access Credentials, you need an Access Key.
+3) An RSA SSH key pair that is passphraseless. You may want to generate a
+ pair just for use with the tool to avoid forcing your regular ssh key
+ pair to be passphraseless. They must be RSA, Whirr does not work with any
+ of the other key types. You can generate a pair with the command:
+ ssh-keygen -f your_private_rsa_key_file -t rsa -P ''
+ where your_private_rsa_key_file is the file to store the private key in.
+4) Apache Whirr version 0.5 or later. You can download it from
+ http://www.apache.org/dyn/closer.cgi/incubator/whirr/
+
+To Start a Cluster:
+export AWS_ACCESS_KEY_ID=your_amazon_access_key
+export AWS_SECRET_ACCESS_KEY_ID=your_secret_amazon_access_key
+export SSH_PRIVATE_KEY_FILE=your_private_rsa_key_file
+cd your_path_to_apache_whirr/bin
+./whirr launch-cluster --config your_path_to_pig_trunk/test/e2e/pig/whirr/pigtest.properties
+
+This will take ~5 minutes and spew various messages on your screen.
+
+DO NOT FORGET TO SHUTDOWN YOUR CLUSTER (see below) (unless you think Amazon
+a worthy cause and wish to donate your extra cash to them).
+
+Running the tests:
+Open the file ~/.whirr/pigtest/hadoop-site.xml and find the line that has
+"mapred.job.tracker". The next line should have the hostname that is
+running your Job Tracker. Copy that host name, but NOT the port numbers
+(ie the :nnnn where nnnn is 9001 or something similar). This value will be
+referred to as "your_namenode".
+
+cd your_path_to_pig_src
+scp -i your_private_rsa_key_file test/e2e/pig/whirr/whirr_test_patch.sh your_namenode:~
+
+if you have a patch you want to run
+ scp -i your_private_rsa_key_file your_patch your_namenode:~
+
+ssh -i your_private_rsa_key_file your_namenode
+
+Now you can run the whirr_test_patch to run some or all of the tests against
+trunk or against your patch. To run all the tests against trunk, do
+./whirr_test_patch.sh
+
+To apply your patch and then run the tests, do
+./whirr_test_patch.sh -p your_patch
+
+To run just some of the tests, do
+./whirr_test_patch.sh -t Checkin
+
+Multiple -t options can be passed. It takes test group names or individual
+test names just as the -Dtests.to.run option takes in "ant test-e2e".
+
+whirr_test_patch is not idempotent. It downloads necessary packages, checks
+out trunk, applies your patch if appropriate, and generates the test data and
+loads into your cluster. Once you have successfully run it once, you should
+not run it again. If you wish to do additional testing cd src/trunk and run
+the end-to-end tests via ant as you normally would.
+
+Initial setup takes around 5 minutes. Running all of the nightly tests
+currently (August 2011) takes about 10 hours. When you are just testing a
+patch for submission your are not expected to run the full suite of tests.
+Checkin, plus any tests you've added, plus all that cover the area of your
+change is sufficient.
+
+Shutting down your cluster:
+In the same shell you started the cluster:
+./whirr destroy-cluster --config your_path_to_pig_trunk/test/e2e/pig/whirr/pigtest.properties
+
+Notes:
+1) As noted above, running all of the tests takes about 10 hours. Once you
+ setup your cluster, you are paying for at least one hour. You should
+ easily be able to run a handful of tests in this time to test your
+ patch.
+2) This sets up a cluster with 1 machine as Name Node/Job Tracker and 3
+ Data Nodes/Task Trackers. It uses m1.large images. This is adequate
+ for Pig functional tests, but not for performance testing.
+3) The pigtest.properties file is set to default us-east, which has lower
+ rental rates than us-west.
+4) You can monitor your Amazon EC2 usage (including billing) at
+ https://console.aws.amazon.com/ec2/ Personally I am paranoid and always
+ check this after shutdown to make sure I'm not still paying for a
+ cluster.
Added: pig/trunk/test/e2e/pig/whirr/pigtest.properties
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/whirr/pigtest.properties?rev=1164189&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/whirr/pigtest.properties (added)
+++ pig/trunk/test/e2e/pig/whirr/pigtest.properties Thu Sep 1 18:05:38 2011
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Hadoop Cluster for testing Pig on AWS EC2
+#
+
+# Read the Configuration Guide for more info:
+# http://incubator.apache.org/whirr/configuration-guide.html
+
+# Change the cluster name here
+whirr.cluster-name=pigtest
+
+# Change the number of machines in the cluster here
+whirr.instance-templates=1 hadoop-namenode+hadoop-jobtracker,3 hadoop-datanode+hadoop-tasktracker
+
+# For EC2 set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables.
+whirr.provider=aws-ec2
+whirr.identity=${env:AWS_ACCESS_KEY_ID}
+whirr.credential=${env:AWS_SECRET_ACCESS_KEY}
+
+# The size of the instance to use. See http://aws.amazon.com/ec2/instance-types/
+whirr.hardware-id=m1.large
+# Ubuntu 10.04 LTS Lucid. See http://alestic.com/
+# whirr.image-id=us-east-1/ami-da0cf8b3
+# If you choose a different location, make sure whirr.image-id is updated too
+whirr.location-id=us-east-1
+
+# By default use the user system SSH keys. Override them here.
+whirr.private-key-file=${env:SSH_PRIVATE_KEY_FILE}
+whirr.public-key-file=${env:SSH_PRIVATE_KEY_FILE}.pub
+
+# Expert: override Hadoop properties by setting properties with the prefix
+# hadoop-common, hadoop-hdfs, hadoop-mapreduce to set Common, HDFS, MapReduce
+# site properties, respectively. The prefix is removed by Whirr, so that for
+# example, setting
+# hadoop-common.fs.trash.interval=1440
+# will result in fs.trash.interval being set to 1440 in core-site.xml.
+
+# Expert: specify the version of Hadoop to install.
+#whirr.hadoop.version=0.20.2
+#whirr.hadoop.tarball.url=http://archive.apache.org/dist/hadoop/core/hadoop-${whirr.hadoop.version}/hadoop-${whirr.hadoop.version}.tar.gz
Propchange: pig/trunk/test/e2e/pig/whirr/pigtest.properties
------------------------------------------------------------------------------
svn:executable = *
Added: pig/trunk/test/e2e/pig/whirr/whirr_test_patch.sh
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/whirr/whirr_test_patch.sh?rev=1164189&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/whirr/whirr_test_patch.sh (added)
+++ pig/trunk/test/e2e/pig/whirr/whirr_test_patch.sh Thu Sep 1 18:05:38 2011
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+##########################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+OLD_PIG=0.8.1
+ANT_VERSION=1.8.2
+HADOOP_CONF=/etc/hadoop/conf
+HADOOP_BIN=/usr/local/hadoop/bin/hadoop
+
+while (( $# > 0 ))
+do
+ if [[ $1 = "-t" ]]
+ then
+ TESTS_TO_RUN="${TESTS_TO_RUN} -t $2"
+ shift; shift
+ elif [[ $1 == "-p" || $1 == "--patch" ]]
+ then
+ PATCH=$2
+ shift; shift
+ else
+ echo Usage: $0 [-t testtorun ] [ -p patchfile ]
+ echo "\t -t can be given multiple times"
+ exit 1
+ fi
+done
+
+if [[ -n ${TESTS_TO_RUN} ]]
+then
+ TESTS_TO_RUN="-Dtests.to.run=\"$TESTS_TO_RUN\""
+fi
+
+# Download ant and old version of Pig
+mkdir tools
+rc=$?
+if (( $rc != 0 ))
+then
+ echo Failed to create tools directory
+ exit $rc
+fi
+
+cd tools
+wget http://archive.apache.org/dist//ant/binaries/apache-ant-${ANT_VERSION}-bin.tar.gz
+rc=$?
+if (( $rc != 0 ))
+then
+ echo Failed to fetch ant tarball
+ exit $rc
+fi
+
+tar zxf apache-ant-${ANT_VERSION}-bin.tar.gz
+rc=$?
+if (( $rc != 0 ))
+then
+ echo Failed to untar ant tarball
+ exit $rc
+fi
+
+wget http://archive.apache.org/dist//pig/pig-${OLD_PIG}/pig-${OLD_PIG}.tar.gz
+rc=$?
+if (( $rc != 0 ))
+then
+ echo Failed to fetch old pig tarball
+ exit $rc
+fi
+
+tar zxf pig-${OLD_PIG}.tar.gz
+rc=$?
+if (( $rc != 0 ))
+then
+ echo Failed to untar old pig tarball
+ exit $rc
+fi
+
+# Fetch needed CPAN modules
+cd ..
+sudo cpan IPC::Run # need to find a way to make this headless
+rc=$?
+if (( $rc != 0 ))
+then
+ echo Failed to fetch IPC::Run
+ exit $rc
+fi
+
+sudo cpan DBI
+rc=$?
+if (( $rc != 0 ))
+then
+ echo Failed to fetch DBI
+ exit $rc
+fi
+
+# Fetch the source
+mkdir src
+rc=$?
+if (( $rc != 0 ))
+then
+ echo Failed to create src directory
+ exit $rc
+fi
+
+cd src
+svn co http://svn.apache.org/repos/asf/pig/trunk
+rc=$?
+if (( $rc != 0 ))
+then
+ echo Failed to checkout code from svn
+ exit $rc
+fi
+
+cd trunk
+
+if [[ -n ${PATCH} ]]
+then
+ patch -p0 < ${PATCH}
+ rc=$?
+ if (( $rc != 0 ))
+ then
+ echo Failed to apply patch ${PATCH}
+ exit $rc
+ fi
+fi
+
+${HOME}/tools/apache-ant-${ANT_VERSION}/bin/ant -Dharness.old.pig=${HOME}/tools/pig-${OLD_PIG} -Dharness.cluster.conf=${HADOOP_CONF} -Dharness.cluster.bin=${HADOOP_BIN} test-e2e-deploy
+${HOME}/tools/apache-ant-${ANT_VERSION}/bin/ant -Dharness.old.pig=${HOME}/tools/pig-${OLD_PIG} -Dharness.cluster.conf=${HADOOP_CONF} -Dharness.cluster.bin=${HADOOP_BIN} ${TESTS_TO_RUN} test-e2e
+
+