You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2011/09/01 20:05:38 UTC

svn commit: r1164189 - in /pig/trunk: CHANGES.txt test/e2e/pig/whirr/ test/e2e/pig/whirr/README test/e2e/pig/whirr/pigtest.properties test/e2e/pig/whirr/whirr_test_patch.sh

Author: gates
Date: Thu Sep  1 18:05:38 2011
New Revision: 1164189

URL: http://svn.apache.org/viewvc?rev=1164189&view=rev
Log:
PIG-2249 Enable pig e2e testing on EC2

Added:
    pig/trunk/test/e2e/pig/whirr/
    pig/trunk/test/e2e/pig/whirr/README
    pig/trunk/test/e2e/pig/whirr/pigtest.properties   (with props)
    pig/trunk/test/e2e/pig/whirr/whirr_test_patch.sh
Modified:
    pig/trunk/CHANGES.txt

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1164189&r1=1164188&r2=1164189&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Sep  1 18:05:38 2011
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-2249: Enable pig e2e testing on EC2 (gates)
+
 PIG-2256: Upgrade Avro dependency to 1.5.3 (tucu00 via dvryaboy)
 
 PIG-604: Kill the Pig job should kill all associated Hadoop Jobs (daijy)

Added: pig/trunk/test/e2e/pig/whirr/README
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/whirr/README?rev=1164189&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/whirr/README (added)
+++ pig/trunk/test/e2e/pig/whirr/README Thu Sep  1 18:05:38 2011
@@ -0,0 +1,108 @@
+-------------------------------------------------------------------------
+| Licensed to the Apache Software Foundation (ASF) under one
+| or more contributor license agreements.  See the NOTICE file
+| distributed with this work for additional information
+| regarding copyright ownership.  The ASF licenses this file
+| to you under the Apache License, Version 2.0 (the
+| "License"); you may not use this file except in compliance
+| with the License.  You may obtain a copy of the License at
+|  
+|     http://www.apache.org/licenses/LICENSE-2.0
+|  
+| Unless required by applicable law or agreed to in writing, software
+| distributed under the License is distributed on an "AS IS" BASIS,
+| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+| See the License for the specific language governing permissions and
+| limitations under the License.
+-------------------------------------------------------------------------
+ 
+The tools in this directory can be used to run Pig's end-to-end tests on
+Amazon via Apache Whirr.  This is useful for those who do not have a cluster
+available to run them on.  In the following text any value that starts
+"your_" is a value you should fill in.
+
+Prerequisites:
+1)  An account in Amazon's AWS.  http://aws.amazon.com/
+2)  An Amazon Access Key ID and Secret Access Key.  These are not ssh keys.
+    See
+    http://aws-portal.amazon.com/gp/aws/developer/account/index.html?action=access-key
+    Under Access Credentials, you need an Access Key.
+3)  An RSA SSH key pair that is passphraseless.  You may want to generate a
+    pair just for use with the tool to avoid forcing your regular ssh key
+    pair to be passphraseless.  They must be RSA, Whirr does not work with any
+    of the other key types.  You can generate a pair with the command:
+        ssh-keygen -f your_private_rsa_key_file -t rsa -P ''
+    where your_private_rsa_key_file is the file to store the private key in.
+4)  Apache Whirr version 0.5 or later.  You can download it from
+    http://www.apache.org/dyn/closer.cgi/incubator/whirr/
+
+To Start a Cluster:
+export AWS_ACCESS_KEY_ID=your_amazon_access_key
+export AWS_SECRET_ACCESS_KEY_ID=your_secret_amazon_access_key
+export SSH_PRIVATE_KEY_FILE=your_private_rsa_key_file
+cd your_path_to_apache_whirr/bin
+./whirr launch-cluster --config your_path_to_pig_trunk/test/e2e/pig/whirr/pigtest.properties
+
+This will take ~5 minutes and spew various messages on your screen.
+
+DO NOT FORGET TO SHUTDOWN YOUR CLUSTER (see below) (unless you think Amazon
+a worthy cause and wish to donate your extra cash to them).
+
+Running the tests:
+Open the file ~/.whirr/pigtest/hadoop-site.xml and find the line that has
+"mapred.job.tracker".  The next line should have the hostname that is
+running your Job Tracker.  Copy that host name, but NOT the port numbers
+(ie the :nnnn where nnnn is 9001 or something similar).  This value will be
+referred to as "your_namenode".
+
+cd your_path_to_pig_src
+scp -i your_private_rsa_key_file test/e2e/pig/whirr/whirr_test_patch.sh your_namenode:~
+
+if you have a patch you want to run
+    scp -i your_private_rsa_key_file your_patch your_namenode:~
+
+ssh -i your_private_rsa_key_file your_namenode
+
+Now you can run the whirr_test_patch to run some or all of the tests against
+trunk or against your patch.  To run all the tests against trunk, do
+./whirr_test_patch.sh
+
+To apply your patch and then run the tests, do
+./whirr_test_patch.sh -p your_patch
+
+To run just some of the tests, do
+./whirr_test_patch.sh -t Checkin
+
+Multiple -t options can be passed.  It takes test group names or individual
+test names just as the -Dtests.to.run option takes in "ant test-e2e".
+
+whirr_test_patch is not idempotent.  It downloads necessary packages, checks
+out trunk, applies your patch if appropriate, and generates the test data and
+loads into your cluster.  Once you have successfully run it once, you should
+not run it again.  If you wish to do additional testing cd src/trunk and run
+the end-to-end tests via ant as you normally would.
+
+Initial setup takes around 5 minutes.  Running all of the nightly tests
+currently (August 2011) takes about 10 hours.  When you are just testing a
+patch for submission your are not expected to run the full suite of tests. 
+Checkin, plus any tests you've added, plus all that cover the area of your
+change is sufficient.
+
+Shutting down your cluster:
+In the same shell you started the cluster:
+./whirr destroy-cluster --config your_path_to_pig_trunk/test/e2e/pig/whirr/pigtest.properties
+
+Notes:
+1)  As noted above, running all of the tests takes about 10 hours.  Once you
+    setup your cluster, you are paying for at least one hour.  You should
+    easily be able to run a handful of tests in this time to test your
+    patch.
+2)  This sets up a cluster with 1 machine as Name Node/Job Tracker and 3
+    Data Nodes/Task Trackers.  It uses m1.large images.  This is adequate
+    for Pig functional tests, but not for performance testing.
+3)  The pigtest.properties file is set to default us-east, which has lower
+    rental rates than us-west.
+4)  You can monitor your Amazon EC2 usage (including billing) at
+    https://console.aws.amazon.com/ec2/  Personally I am paranoid and always
+    check this after shutdown to make sure I'm not still paying for a
+    cluster.

Added: pig/trunk/test/e2e/pig/whirr/pigtest.properties
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/whirr/pigtest.properties?rev=1164189&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/whirr/pigtest.properties (added)
+++ pig/trunk/test/e2e/pig/whirr/pigtest.properties Thu Sep  1 18:05:38 2011
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Hadoop Cluster for testing Pig on AWS EC2
+# 
+
+# Read the Configuration Guide for more info:
+# http://incubator.apache.org/whirr/configuration-guide.html
+
+# Change the cluster name here
+whirr.cluster-name=pigtest
+
+# Change the number of machines in the cluster here
+whirr.instance-templates=1 hadoop-namenode+hadoop-jobtracker,3 hadoop-datanode+hadoop-tasktracker
+
+# For EC2 set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables.
+whirr.provider=aws-ec2
+whirr.identity=${env:AWS_ACCESS_KEY_ID}
+whirr.credential=${env:AWS_SECRET_ACCESS_KEY}
+
+# The size of the instance to use. See http://aws.amazon.com/ec2/instance-types/
+whirr.hardware-id=m1.large
+# Ubuntu 10.04 LTS Lucid. See http://alestic.com/
+# whirr.image-id=us-east-1/ami-da0cf8b3
+# If you choose a different location, make sure whirr.image-id is updated too
+whirr.location-id=us-east-1
+
+# By default use the user system SSH keys. Override them here.
+whirr.private-key-file=${env:SSH_PRIVATE_KEY_FILE}
+whirr.public-key-file=${env:SSH_PRIVATE_KEY_FILE}.pub
+
+# Expert: override Hadoop properties by setting properties with the prefix
+# hadoop-common, hadoop-hdfs, hadoop-mapreduce to set Common, HDFS, MapReduce
+# site properties, respectively. The prefix is removed by Whirr, so that for
+# example, setting 
+# hadoop-common.fs.trash.interval=1440
+# will result in fs.trash.interval being set to 1440 in core-site.xml.
+
+# Expert: specify the version of Hadoop to install.
+#whirr.hadoop.version=0.20.2
+#whirr.hadoop.tarball.url=http://archive.apache.org/dist/hadoop/core/hadoop-${whirr.hadoop.version}/hadoop-${whirr.hadoop.version}.tar.gz

Propchange: pig/trunk/test/e2e/pig/whirr/pigtest.properties
------------------------------------------------------------------------------
    svn:executable = *

Added: pig/trunk/test/e2e/pig/whirr/whirr_test_patch.sh
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/whirr/whirr_test_patch.sh?rev=1164189&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/whirr/whirr_test_patch.sh (added)
+++ pig/trunk/test/e2e/pig/whirr/whirr_test_patch.sh Thu Sep  1 18:05:38 2011
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+##########################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+OLD_PIG=0.8.1
+ANT_VERSION=1.8.2
+HADOOP_CONF=/etc/hadoop/conf
+HADOOP_BIN=/usr/local/hadoop/bin/hadoop
+
+while (( $# > 0 ))
+do
+    if [[ $1 = "-t" ]]
+    then
+        TESTS_TO_RUN="${TESTS_TO_RUN} -t $2"
+        shift; shift
+    elif [[ $1 == "-p" || $1 == "--patch" ]]
+    then
+        PATCH=$2
+        shift; shift
+    else
+        echo Usage: $0 [-t testtorun ] [ -p patchfile ]
+        echo "\t -t can be given multiple times"
+        exit 1
+    fi
+done
+
+if [[ -n ${TESTS_TO_RUN} ]]
+then
+    TESTS_TO_RUN="-Dtests.to.run=\"$TESTS_TO_RUN\""
+fi
+
+# Download ant and old version of Pig
+mkdir tools
+rc=$?
+if (( $rc != 0 ))
+then
+    echo Failed to create tools directory
+    exit $rc
+fi
+
+cd tools
+wget http://archive.apache.org/dist//ant/binaries/apache-ant-${ANT_VERSION}-bin.tar.gz
+rc=$?
+if (( $rc != 0 ))
+then
+    echo Failed to fetch ant tarball
+    exit $rc
+fi
+
+tar zxf apache-ant-${ANT_VERSION}-bin.tar.gz 
+rc=$?
+if (( $rc != 0 ))
+then
+    echo Failed to untar ant tarball
+    exit $rc
+fi
+
+wget http://archive.apache.org/dist//pig/pig-${OLD_PIG}/pig-${OLD_PIG}.tar.gz
+rc=$?
+if (( $rc != 0 ))
+then
+    echo Failed to fetch old pig tarball
+    exit $rc
+fi
+
+tar zxf pig-${OLD_PIG}.tar.gz 
+rc=$?
+if (( $rc != 0 ))
+then
+    echo Failed to untar old pig tarball
+    exit $rc
+fi
+
+# Fetch needed CPAN modules
+cd ..
+sudo cpan IPC::Run # need to find a way to make this headless
+rc=$?
+if (( $rc != 0 ))
+then
+    echo Failed to fetch IPC::Run
+    exit $rc
+fi
+
+sudo cpan DBI
+rc=$?
+if (( $rc != 0 ))
+then
+    echo Failed to fetch DBI
+    exit $rc
+fi
+
+# Fetch the source
+mkdir src
+rc=$?
+if (( $rc != 0 ))
+then
+    echo Failed to create src directory
+    exit $rc
+fi
+
+cd src
+svn co http://svn.apache.org/repos/asf/pig/trunk 
+rc=$?
+if (( $rc != 0 ))
+then
+    echo Failed to checkout code from svn
+    exit $rc
+fi
+
+cd trunk
+
+if [[ -n ${PATCH} ]] 
+then
+    patch -p0 < ${PATCH}
+    rc=$?
+    if (( $rc != 0 ))
+    then
+        echo Failed to apply patch ${PATCH}
+        exit $rc
+    fi
+fi
+
+${HOME}/tools/apache-ant-${ANT_VERSION}/bin/ant -Dharness.old.pig=${HOME}/tools/pig-${OLD_PIG} -Dharness.cluster.conf=${HADOOP_CONF} -Dharness.cluster.bin=${HADOOP_BIN} test-e2e-deploy
+${HOME}/tools/apache-ant-${ANT_VERSION}/bin/ant -Dharness.old.pig=${HOME}/tools/pig-${OLD_PIG} -Dharness.cluster.conf=${HADOOP_CONF} -Dharness.cluster.bin=${HADOOP_BIN} ${TESTS_TO_RUN} test-e2e
+
+