You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2008/03/17 18:41:02 UTC

svn commit: r637987 - in /incubator/pig/trunk: CHANGES.txt bin/ bin/pig bin/startHOD.expect scripts/pig.pl scripts/startHOD.expect

Author: gates
Date: Mon Mar 17 10:40:59 2008
New Revision: 637987

URL: http://svn.apache.org/viewvc?rev=637987&view=rev
Log:
Removed Yahoo specific scripts/pig.pl, replaced with generic bash script bin/pig.  Moved startHOD.expect to bin.

Added:
    incubator/pig/trunk/bin/
    incubator/pig/trunk/bin/pig
    incubator/pig/trunk/bin/startHOD.expect
Removed:
    incubator/pig/trunk/scripts/pig.pl
    incubator/pig/trunk/scripts/startHOD.expect
Modified:
    incubator/pig/trunk/CHANGES.txt

Modified: incubator/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/CHANGES.txt?rev=637987&r1=637986&r2=637987&view=diff
==============================================================================
--- incubator/pig/trunk/CHANGES.txt (original)
+++ incubator/pig/trunk/CHANGES.txt Mon Mar 17 10:40:59 2008
@@ -163,3 +163,6 @@
 
     PIG-129: making sure that temp files are stored in task's home dir and
     cleaned up
+
+    PIG-115: Removed Yahoo specific scripts/pig.pl, replaced with generic
+    bash script bin/pig.  Moved startHOD.expect to bin (joa23 via gates).

Added: incubator/pig/trunk/bin/pig
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/bin/pig?rev=637987&view=auto
==============================================================================
--- incubator/pig/trunk/bin/pig (added)
+++ incubator/pig/trunk/bin/pig Mon Mar 17 10:40:59 2008
@@ -0,0 +1,175 @@
+#!/usr/bin/env bash
+# 
+# The Pig command script
+#
+# Environment Variables
+#
+#     JAVA_HOME                The java implementation to use.    Overrides JAVA_HOME.
+#
+#     PIG_CLASSPATH Extra Java CLASSPATH entries.
+#
+#     PIG_HEAPSIZE    The maximum amount of heap to use, in MB. 
+#                                        Default is 1000.
+#
+#     PIG_OPTS            Extra Java runtime options.
+#
+#     PIG_CONF_DIR    Alternate conf dir. Default is ${PIG_HOME}/conf.
+#
+#     PIG_ROOT_LOGGER The root appender. Default is INFO,console
+#
+#     PIG_HADOOP_VERSION Version of hadoop to run with.    Default is 15 (0.15).
+
+cygwin=false
+case "`uname`" in
+CYGWIN*) cygwin=true;;
+esac
+debug=false
+
+# filter command line parameter
+for f in $@; do
+     if [[ $f = "-secretDebugCmd" ]]; then
+        debug=true
+     else
+        remaining="${remaining} $f"
+     fi
+done
+
+# resolve links - $0 may be a softlink
+this="$0"
+while [ -h "$this" ]; do
+    ls=`ls -ld "$this"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '.*/.*' > /dev/null; then
+        this="$link"
+    else
+        this=`dirname "$this"`/"$link"
+    fi
+done
+
+# convert relative path to absolute path
+bin=`dirname "$this"`
+script=`basename "$this"`
+bin=`unset CDPATH; cd "$bin"; pwd`
+this="$bin/$script"
+
+# the root of the Pig installation
+export PIG_HOME=`dirname "$this"`/..
+
+#check to see if the conf dir is given as an optional argument
+if [ $# -gt 1 ]
+then
+    if [ "--config" = "$1" ]
+    then
+        shift
+        confdir=$1
+        shift
+        PIG_CONF_DIR=$confdir
+    fi
+fi
+ 
+# Allow alternate conf dir location.
+PIG_CONF_DIR="${PIG_CONF_DIR:-$PIG_HOME/conf}"
+
+if [ -f "${PIG_CONF_DIR}/pig-env.sh" ]; then
+    . "${PIG_CONF_DIR}/pig-env.sh"
+fi
+
+# some Java parameters
+if [ "$JAVA_HOME" != "" ]; then
+    #echo "run java in $JAVA_HOME"
+    JAVA_HOME=$JAVA_HOME
+fi
+    
+if [ "$JAVA_HOME" = "" ]; then
+    echo "Error: JAVA_HOME is not set."
+    exit 1
+fi
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m 
+
+# check envvars which might override default args
+if [ "$PIG_HEAPSIZE" != "" ]; then
+    JAVA_HEAP_MAX="-Xmx""$PIG_HEAPSIZE""m"
+fi
+
+# CLASSPATH initially contains $PIG_CONF_DIR
+CLASSPATH="${PIG_CONF_DIR}"
+CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
+
+# for developers, add Pig classes to CLASSPATH
+if [ -d "$PIG_HOME/build/classes" ]; then
+    CLASSPATH=${CLASSPATH}:$PIG_HOME/build/classes
+fi
+if [ -d "$PIG_HOME/build/test/classes" ]; then
+    CLASSPATH=${CLASSPATH}:$PIG_HOME/build/test/classes
+fi
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# for releases, add core pig to CLASSPATH
+for f in $PIG_HOME/pig-*-core.jar; do
+    CLASSPATH=${CLASSPATH}:$f;
+done
+
+# during development pig jar might be in build
+for f in $PIG_HOME/build/pig-*-core.jar; do
+    CLASSPATH=${CLASSPATH}:$f;
+done
+
+# Set the version for Hadoop, default to 15
+PIG_HADOOP_VERSION="${PIG_HADOOP_VERSION:-15}"
+# add libs to CLASSPATH.    There can be more than one version of the hadoop
+# libraries in the lib dir, so don't blindly add them all.    Only add the one
+# that matche PIG_HADOOP_VERSION.
+for f in $PIG_HOME/lib/*.jar; do
+    IS_HADOOP=`echo $f | grep hadoop`
+    if [ "${IS_HADOOP}x" == "x" ]; then
+        CLASSPATH=${CLASSPATH}:$f;
+    else 
+        IS_RIGHT_VER=`echo $f | grep hadoop${PIG_HADOOP_VERSION}.jar`
+        if [ "${IS_RIGHT_VER}x" != "x" ]; then
+            CLASSPATH=${CLASSPATH}:$f;
+        fi
+    fi
+done
+
+# add user-specified CLASSPATH last
+if [ "$PIG_CLASSPATH" != "" ]; then
+    CLASSPATH=${CLASSPATH}:${PIG_CLASSPATH}
+fi
+
+# default log directory & file
+if [ "$PIG_LOG_DIR" = "" ]; then
+    PIG_LOG_DIR="$PIG_HOME/logs"
+fi
+if [ "$PIG_LOGFILE" = "" ]; then
+    PIG_LOGFILE='pig.log'
+fi
+
+# cygwin path translation
+if $cygwin; then
+    CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+    PIG_HOME=`cygpath -d "$PIG_HOME"`
+    PIG_LOG_DIR=`cygpath -d "$PIG_LOG_DIR"`
+fi
+ 
+# restore ordinary behaviour
+unset IFS
+
+CLASS=org.apache.pig.Main
+
+PIG_OPTS="$PIG_OPTS -Dpig.log.dir=$PIG_LOG_DIR"
+PIG_OPTS="$PIG_OPTS -Dpig.log.file=$PIG_LOGFILE"
+PIG_OPTS="$PIG_OPTS -Dpig.home.dir=$PIG_HOME"
+PIG_OPTS="$PIG_OPTS -Dpig.root.logger=${PIG_ROOT_LOGGER:-INFO,console,DRFA}"
+
+# run it
+if [ "$debug" == "true" ]; then
+    echo "dry run:"
+    echo "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS ${remaining}
+    echo
+else
+    exec "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS ${remaining}
+fi

Added: incubator/pig/trunk/bin/startHOD.expect
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/bin/startHOD.expect?rev=637987&view=auto
==============================================================================
--- incubator/pig/trunk/bin/startHOD.expect (added)
+++ incubator/pig/trunk/bin/startHOD.expect Mon Mar 17 10:40:59 2008
@@ -0,0 +1,79 @@
+#!/usr/bin/expect
+#
+# This is a wretched expect script to startup HOD and scrap the necessary
+# information we need to run Pig. Tragically, we can't just pipe HOD's output
+# into a script, so we have to use expect. Also the real information we need
+# is not given to us on stdout; rather, we get the name of the configuration
+# file with the information we need on stdout. We have to write actual TCL to
+# parse the file.
+#
+
+#
+# Quick and dirty parser to extract the value of mapred.job.tracker
+#
+
+trap handleExit {SIGINT SIGTERM SIGHUP SIGABRT SIGPIPE}
+
+proc handleExit {} {
+	send "exit\n"
+        set timeout 20
+	expect "do not CTL-C"
+	puts "Exiting"
+	exit
+}
+
+proc extractMapRedHostPort {file} {
+	set fh [open $file r]
+	set line [read $fh]
+	close $fh
+	regexp {>mapred.job.tracker</name>[^<]*<value>([^<]*)</value>} $line match sub
+	return $sub
+}
+
+#
+# Quick and dirty parser to extract the value of fs.default.name
+#
+proc extractDFSHostPort {file} {
+	set fh [open $file r]
+	set line [read $fh]
+	close $fh
+	regexp {>fs.default.name</name>[^<]*<value>([^<]*)</value>} $line match sub
+	return $sub
+}
+
+set mOpt {"-m" "15"}
+foreach i $argv {
+	if {$i == "-m"} {
+		set mOpt {};
+	}
+}
+
+log_user 0
+set timeout -1
+#spawn  /export/crawlspace/kryptonite/hod/current/bin/hod -n [join [concat $argv $mOpt]]
+#set args [split [join [concat $argv $mOpt]]]
+set args [concat $argv $mOpt]
+spawn -ignore {SIGHUP} /export/crawlspace/kryptonite/hod/current/bin/hod -n [lindex $args 0 ] [lindex $args 1] [lindex $args 2] [lindex $args 3] [lindex $args 4] [lindex $args 5] [lindex $args 6 ] [lindex $args 7] [lindex $args 8] [lindex $args 9] [lindex $args 10]
+
+expect "HDFS UI on "
+expect "\n"
+puts -nonewline "hdfsUI: $expect_out(buffer)"
+
+expect "Mapred UI on "
+expect "\n"
+puts -nonewline "mapredUI: $expect_out(buffer)"
+
+expect "Hadoop config file in: "
+expect "\n"
+puts -nonewline "hadoopConf: $expect_out(buffer)"
+
+puts "hdfs: [extractDFSHostPort [string trim $expect_out(buffer)]]\r"
+puts "mapred: [extractMapRedHostPort [string trim $expect_out(buffer)]]\r"
+
+#
+# Now just wait forever. Eventually we will be ruthlessly killed.
+#
+expect_user {
+        eof { handleExit }
+	timeout {exp_continue}
+}