You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whirr.apache.org by to...@apache.org on 2010/08/30 21:54:26 UTC
svn commit: r990923 - in /incubator/whirr/trunk: ./
core/src/main/java/org/apache/whirr/service/ scripts/cloudera/
scripts/cloudera/cdh/
services/hadoop/src/main/java/org/apache/whirr/service/hadoop/
Author: tomwhite
Date: Mon Aug 30 19:54:26 2010
New Revision: 990923
URL: http://svn.apache.org/viewvc?rev=990923&view=rev
Log:
WHIRR-54. Implement service/cdh.
Added:
incubator/whirr/trunk/scripts/cloudera/
incubator/whirr/trunk/scripts/cloudera/cdh/
incubator/whirr/trunk/scripts/cloudera/cdh/install (with props)
Modified:
incubator/whirr/trunk/CHANGES.txt
incubator/whirr/trunk/core/src/main/java/org/apache/whirr/service/ClusterSpec.java
incubator/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopService.java
Modified: incubator/whirr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/CHANGES.txt?rev=990923&r1=990922&r2=990923&view=diff
==============================================================================
--- incubator/whirr/trunk/CHANGES.txt (original)
+++ incubator/whirr/trunk/CHANGES.txt Mon Aug 30 19:54:26 2010
@@ -8,6 +8,9 @@ Trunk (unreleased changes)
WHIRR-33. Add a CLI. (tomwhite)
+ WHIRR-54. Implement service/cdh. To run CDH rather than Apache Hadoop, set
+ the property whirr.hadoop-install-runurl to cloudera/cdh/install (tomwhite)
+
IMPROVEMENTS
WHIRR-2. Import initial Java source code. (tomwhite)
Modified: incubator/whirr/trunk/core/src/main/java/org/apache/whirr/service/ClusterSpec.java
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/core/src/main/java/org/apache/whirr/service/ClusterSpec.java?rev=990923&r1=990922&r2=990923&view=diff
==============================================================================
--- incubator/whirr/trunk/core/src/main/java/org/apache/whirr/service/ClusterSpec.java (original)
+++ incubator/whirr/trunk/core/src/main/java/org/apache/whirr/service/ClusterSpec.java Mon Aug 30 19:54:26 2010
@@ -32,6 +32,7 @@ import java.util.List;
import java.util.Set;
import org.apache.commons.configuration.Configuration;
+import org.apache.commons.configuration.PropertiesConfiguration;
/**
* This class represents the specification of a cluster. It is used to describe
@@ -137,6 +138,7 @@ public class ClusterSpec {
private String clusterName;
private String secretKeyFile;
private List<String> clientCidrs = Lists.newArrayList();
+ private Configuration config = new PropertiesConfiguration();
public static ClusterSpec fromConfiguration(Configuration config) {
ClusterSpec spec = new ClusterSpec();
@@ -150,6 +152,7 @@ public class ClusterSpec {
spec.setClusterName(config.getString(Property.CLUSTER_NAME.getConfigName()));
spec.setSecretKeyFile(config.getString(Property.SECRET_KEY_FILE.getConfigName()));
spec.setClientCidrs(config.getList(Property.CLIENT_CIDRS.getConfigName()));
+ spec.config = config;
return spec;
}
@@ -225,6 +228,11 @@ public class ClusterSpec {
}
//
+
+ public Configuration getConfiguration() {
+ return config;
+ }
+
public String readPrivateKey() throws IOException {
return Files.toString(new File(getSecretKeyFile()), Charsets.UTF_8);
}
Added: incubator/whirr/trunk/scripts/cloudera/cdh/install
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/scripts/cloudera/cdh/install?rev=990923&view=auto
==============================================================================
--- incubator/whirr/trunk/scripts/cloudera/cdh/install (added)
+++ incubator/whirr/trunk/scripts/cloudera/cdh/install Mon Aug 30 19:54:26 2010
@@ -0,0 +1,495 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Install CDH.
+#
+
+set -x
+set -e
+
+################################################################################
+# Initialize variables
+################################################################################
+
+ROLES=$1
+shift
+
+NN_HOST=
+JT_HOST=
+CLOUD_PROVIDER=
+while getopts "n:j:c:" OPTION; do
+ case $OPTION in
+ n)
+ NN_HOST="$OPTARG"
+ ;;
+ j)
+ JT_HOST="$OPTARG"
+ ;;
+ c)
+ CLOUD_PROVIDER="$OPTARG"
+ ;;
+ esac
+done
+
+case $CLOUD_PROVIDER in
+ ec2)
+ # Use public hostname for EC2
+ SELF_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/public-hostname`
+ ;;
+ *)
+ SELF_HOST=`/sbin/ifconfig eth0 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}'`
+ ;;
+esac
+
+REPO=${REPO:-cdh3}
+HADOOP=hadoop-${HADOOP_VERSION:-0.20}
+HADOOP_CONF_DIR=/etc/$HADOOP/conf.dist
+for role in $(echo "$ROLES" | tr "," "\n"); do
+ case $role in
+ nn)
+ NN_HOST=$SELF_HOST
+ ;;
+ jt)
+ JT_HOST=$SELF_HOST
+ ;;
+ esac
+done
+
+function update_repo() {
+ if which dpkg &> /dev/null; then
+ cat > /etc/apt/sources.list.d/cloudera.list <<EOF
+deb http://archive.cloudera.com/debian lucid-$REPO contrib
+deb-src http://archive.cloudera.com/debian lucid-$REPO contrib
+EOF
+ curl -s http://archive.cloudera.com/debian/archive.key | sudo apt-key add -
+ sudo apt-get update
+ elif which rpm &> /dev/null; then
+ rm -f /etc/yum.repos.d/cloudera.repo
+ REPO_NUMBER=`echo $REPO | sed -e 's/cdh\([0-9][0-9]*\)/\1/'`
+ cat > /etc/yum.repos.d/cloudera-$REPO.repo <<EOF
+[cloudera-$REPO]
+name=Cloudera's Distribution for Hadoop, Version $REPO_NUMBER
+mirrorlist=http://archive.cloudera.com/redhat/cdh/$REPO_NUMBER/mirrors
+gpgkey = http://archive.cloudera.com/redhat/cdh/RPM-GPG-KEY-cloudera
+gpgcheck = 0
+EOF
+ yum update -y yum
+ fi
+}
+
+# Install a list of packages on debian or redhat as appropriate
+function install_packages() {
+ if which dpkg &> /dev/null; then
+ apt-get update
+ apt-get -y install $@
+ elif which rpm &> /dev/null; then
+ yum install -y $@
+ else
+ echo "No package manager found."
+ fi
+}
+
+# Install Hadoop packages and dependencies
+function install_hadoop() {
+ if which dpkg &> /dev/null; then
+ apt-get update
+ apt-get -y install $HADOOP
+ cp -r /etc/$HADOOP/conf.empty $HADOOP_CONF_DIR
+ update-alternatives --install /etc/$HADOOP/conf $HADOOP-conf $HADOOP_CONF_DIR 90
+ elif which rpm &> /dev/null; then
+ yum install -y $HADOOP
+ cp -r /etc/$HADOOP/conf.empty $HADOOP_CONF_DIR
+ alternatives --install /etc/$HADOOP/conf $HADOOP-conf $HADOOP_CONF_DIR 90
+ fi
+}
+
+function prep_disk() {
+ mount=$1
+ device=$2
+ automount=${3:-false}
+
+ echo "warning: ERASING CONTENTS OF $device"
+ mkfs.xfs -f $device
+ if [ ! -e $mount ]; then
+ mkdir $mount
+ fi
+ mount -o defaults,noatime $device $mount
+ if $automount ; then
+ echo "$device $mount xfs defaults,noatime 0 0" >> /etc/fstab
+ fi
+}
+
+function make_hadoop_dirs {
+ for mount in "$@"; do
+ if [ ! -e $mount/hadoop ]; then
+ mkdir -p $mount/hadoop
+ chown hadoop:hadoop $mount/hadoop
+ fi
+ done
+}
+
+# Configure Hadoop by setting up disks and site file
+function configure_hadoop() {
+
+ MOUNT=/data
+ FIRST_MOUNT=$MOUNT
+ DFS_NAME_DIR=$MOUNT/hadoop/hdfs/name
+ FS_CHECKPOINT_DIR=$MOUNT/hadoop/hdfs/secondary
+ DFS_DATA_DIR=$MOUNT/hadoop/hdfs/data
+ MAPRED_LOCAL_DIR=$MOUNT/hadoop/mapred/local
+ MAX_MAP_TASKS=2
+ MAX_REDUCE_TASKS=1
+ CHILD_OPTS=-Xmx550m
+ CHILD_ULIMIT=1126400
+ TMP_DIR=$MOUNT/tmp/hadoop-\${user.name}
+
+ mkdir -p $MOUNT/hadoop
+ chown hadoop:hadoop $MOUNT/hadoop
+ mkdir $MOUNT/tmp
+ chmod a+rwxt $MOUNT/tmp
+
+ ##############################################################################
+ # Modify this section to customize your Hadoop cluster.
+ ##############################################################################
+ cat > $HADOOP_CONF_DIR/hadoop-site.xml <<EOF
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+<property>
+ <name>dfs.block.size</name>
+ <value>134217728</value>
+ <final>true</final>
+</property>
+<property>
+ <name>dfs.data.dir</name>
+ <value>$DFS_DATA_DIR</value>
+ <final>true</final>
+</property>
+<property>
+ <name>dfs.datanode.du.reserved</name>
+ <value>1073741824</value>
+ <final>true</final>
+</property>
+<property>
+ <name>dfs.datanode.handler.count</name>
+ <value>3</value>
+ <final>true</final>
+</property>
+<!--property>
+ <name>dfs.hosts</name>
+ <value>$HADOOP_CONF_DIR/dfs.hosts</value>
+ <final>true</final>
+</property-->
+<!--property>
+ <name>dfs.hosts.exclude</name>
+ <value>$HADOOP_CONF_DIR/dfs.hosts.exclude</value>
+ <final>true</final>
+</property-->
+<property>
+ <name>dfs.name.dir</name>
+ <value>$DFS_NAME_DIR</value>
+ <final>true</final>
+</property>
+<property>
+ <name>dfs.namenode.handler.count</name>
+ <value>5</value>
+ <final>true</final>
+</property>
+<property>
+ <name>dfs.permissions</name>
+ <value>true</value>
+ <final>true</final>
+</property>
+<property>
+ <name>dfs.replication</name>
+ <value>$DFS_REPLICATION</value>
+</property>
+<property>
+ <name>fs.checkpoint.dir</name>
+ <value>$FS_CHECKPOINT_DIR</value>
+ <final>true</final>
+</property>
+<property>
+ <name>fs.default.name</name>
+ <value>hdfs://$NN_HOST:8020/</value>
+</property>
+<property>
+ <name>fs.trash.interval</name>
+ <value>1440</value>
+ <final>true</final>
+</property>
+<property>
+ <name>hadoop.tmp.dir</name>
+ <value>$MOUNT/tmp/hadoop-\${user.name}</value>
+ <final>true</final>
+</property>
+<property>
+ <name>io.file.buffer.size</name>
+ <value>65536</value>
+</property>
+<property>
+ <name>mapred.child.java.opts</name>
+ <value>$CHILD_OPTS</value>
+</property>
+<property>
+ <name>mapred.child.ulimit</name>
+ <value>$CHILD_ULIMIT</value>
+ <final>true</final>
+</property>
+<property>
+ <name>mapred.job.tracker</name>
+ <value>$JT_HOST:8021</value>
+</property>
+<property>
+ <name>mapred.job.tracker.handler.count</name>
+ <value>5</value>
+ <final>true</final>
+</property>
+<property>
+ <name>mapred.local.dir</name>
+ <value>$MAPRED_LOCAL_DIR</value>
+ <final>true</final>
+</property>
+<property>
+ <name>mapred.map.tasks.speculative.execution</name>
+ <value>true</value>
+</property>
+<property>
+ <name>mapred.reduce.parallel.copies</name>
+ <value>10</value>
+</property>
+<property>
+ <name>mapred.reduce.tasks</name>
+ <value>10</value>
+</property>
+<property>
+ <name>mapred.reduce.tasks.speculative.execution</name>
+ <value>false</value>
+</property>
+<property>
+ <name>mapred.submit.replication</name>
+ <value>10</value>
+</property>
+<property>
+ <name>mapred.system.dir</name>
+ <value>/hadoop/system/mapred</value>
+</property>
+<property>
+ <name>mapred.tasktracker.map.tasks.maximum</name>
+ <value>$MAX_MAP_TASKS</value>
+ <final>true</final>
+</property>
+<property>
+ <name>mapred.tasktracker.reduce.tasks.maximum</name>
+ <value>$MAX_REDUCE_TASKS</value>
+ <final>true</final>
+</property>
+<property>
+ <name>tasktracker.http.threads</name>
+ <value>46</value>
+ <final>true</final>
+</property>
+<property>
+ <name>mapred.compress.map.output</name>
+ <value>true</value>
+</property>
+<property>
+ <name>mapred.output.compression.type</name>
+ <value>BLOCK</value>
+</property>
+<property>
+ <name>hadoop.rpc.socket.factory.class.default</name>
+ <value>org.apache.hadoop.net.StandardSocketFactory</value>
+ <final>true</final>
+</property>
+<property>
+ <name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
+ <value></value>
+ <final>true</final>
+</property>
+<property>
+ <name>hadoop.rpc.socket.factory.class.JobSubmissionProtocol</name>
+ <value></value>
+ <final>true</final>
+</property>
+<property>
+ <name>io.compression.codecs</name>
+ <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</value>
+</property>
+<!-- Hue configuration -->
+<property>
+ <name>dfs.namenode.plugins</name>
+ <value>org.apache.hadoop.thriftfs.NamenodePlugin</value>
+ <description>Comma-separated list of namenode plug-ins to be activated.
+ </description>
+</property>
+<property>
+ <name>dfs.datanode.plugins</name>
+ <value>org.apache.hadoop.thriftfs.DatanodePlugin</value>
+ <description>Comma-separated list of datanode plug-ins to be activated.
+ </description>
+</property>
+<property>
+ <name>dfs.thrift.address</name>
+ <value>0.0.0.0:9090</value>
+</property>
+<property>
+ <name>jobtracker.thrift.address</name>
+ <value>0.0.0.0:9290</value>
+</property>
+<property>
+ <name>mapred.jobtracker.plugins</name>
+ <value>org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin</value>
+ <description>Comma-separated list of jobtracker plug-ins to be activated.</description>
+</property>
+</configuration>
+EOF
+
+# Expose /metrics URL endpoint
+ cat > $HADOOP_CONF_DIR/hadoop-metrics.properties <<EOF
+# Exposes /metrics URL endpoint for metrics information.
+dfs.class=org.apache.hadoop.metrics.spi.NoEmitMetricsContext
+mapred.class=org.apache.hadoop.metrics.spi.NoEmitMetricsContext
+jvm.class=org.apache.hadoop.metrics.spi.NoEmitMetricsContext
+rpc.class=org.apache.hadoop.metrics.spi.NoEmitMetricsContext
+EOF
+
+ # Keep PID files in a non-temporary directory
+ sed -i -e "s|# export HADOOP_PID_DIR=.*|export HADOOP_PID_DIR=/var/run/hadoop|" \
+ $HADOOP_CONF_DIR/hadoop-env.sh
+ mkdir -p /var/run/hadoop
+ chown -R hadoop:hadoop /var/run/hadoop
+
+ # Set SSH options within the cluster
+ sed -i -e 's|# export HADOOP_SSH_OPTS=.*|export HADOOP_SSH_OPTS="-o StrictHostKeyChecking=no"|' \
+ $HADOOP_CONF_DIR/hadoop-env.sh
+
+ # Disable IPv6
+ sed -i -e 's|# export HADOOP_OPTS=.*|export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true"|' \
+ $HADOOP_CONF_DIR/hadoop-env.sh
+
+ # Hadoop logs should be on the /mnt partition
+ sed -i -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/var/log/hadoop/logs|' \
+ $HADOOP_CONF_DIR/hadoop-env.sh
+ rm -rf /var/log/hadoop
+ mkdir -p $MOUNT/hadoop/logs
+ chown hadoop:hadoop $MOUNT/hadoop/logs
+ ln -s $MOUNT/hadoop/logs /var/log/hadoop
+ chown -R hadoop:hadoop /var/log/hadoop
+
+}
+
+function install_hue() {
+ if which dpkg &> /dev/null; then
+ apt-get -y install hue-common
+ apt-get -y install hue-useradmin hue-jobsub hue-beeswax
+ elif which rpm &> /dev/null; then
+ yum install -y hue-common
+ yum install hue-useradmin hue-jobsub hue-beeswax
+ fi
+
+ # Configure hue
+ sed -i -e "s|http_port=8088|http_port=80|" /etc/hue/hue.ini
+
+ # Hue logs should be on the /mnt partition
+ rm -rf /var/log/hue
+ mkdir -p $MOUNT/hue/logs
+ chown hue:hue $MOUNT/hue/logs
+ ln -s $MOUNT/hue/logs /var/log/hue
+ chown -R hue:hue /var/log/hue
+}
+
+function install_hue_plugins() {
+ if which dpkg &> /dev/null; then
+ apt-get -y install hue-plugins
+ elif which rpm &> /dev/null; then
+ yum install -y hue-plugins
+ fi
+}
+
+function start_hue() {
+ /etc/init.d/hue start
+}
+
+function start_namenode() {
+ if which dpkg &> /dev/null; then
+ AS_HADOOP="su -s /bin/bash - hadoop -c"
+ # Format HDFS
+ [ ! -e $FIRST_MOUNT/hadoop/hdfs ] && $AS_HADOOP "$HADOOP namenode -format"
+ update-rc.d $HADOOP-namenode defaults
+ update-rc.d $HADOOP-secondarynamenode defaults
+ elif which rpm &> /dev/null; then
+ AS_HADOOP="/sbin/runuser -s /bin/bash - hadoop -c"
+ # Format HDFS
+ [ ! -e $FIRST_MOUNT/hadoop/hdfs ] && $AS_HADOOP "$HADOOP namenode -format"
+ chkconfig --add $HADOOP-namenode
+ chkconfig --add $HADOOP-secondarynamenode
+ fi
+
+ service $HADOOP-namenode start
+ service $HADOOP-secondarynamenode start
+
+ $AS_HADOOP "$HADOOP dfsadmin -safemode wait"
+ $AS_HADOOP "/usr/bin/$HADOOP fs -mkdir /user"
+ # The following is questionable, as it allows a user to delete another user
+ # It's needed to allow users to create their own user directories
+ $AS_HADOOP "/usr/bin/$HADOOP fs -chmod +w /user"
+
+ # Create temporary directory for Pig and Hive in HDFS
+ $AS_HADOOP "/usr/bin/$HADOOP fs -mkdir /tmp"
+ $AS_HADOOP "/usr/bin/$HADOOP fs -chmod +w /tmp"
+ $AS_HADOOP "/usr/bin/$HADOOP fs -mkdir /user/hive/warehouse"
+ $AS_HADOOP "/usr/bin/$HADOOP fs -chmod +w /user/hive/warehouse"
+}
+
+function start_daemon() {
+ daemon=$1
+ if which dpkg &> /dev/null; then
+ update-rc.d $HADOOP-$daemon defaults
+ elif which rpm &> /dev/null; then
+ chkconfig --add $HADOOP-$daemon
+ fi
+ service $HADOOP-$daemon start
+}
+
+update_repo
+install_hadoop
+configure_hadoop
+install_hue_plugins
+
+for role in $(echo "$ROLES" | tr "," "\n"); do
+ case $role in
+ nn)
+ install_hue
+ start_namenode
+ start_hue
+ ;;
+ snn)
+ start_daemon secondarynamenode
+ ;;
+ jt)
+ start_daemon jobtracker
+ ;;
+ dn)
+ start_daemon datanode
+ ;;
+ tt)
+ start_daemon tasktracker
+ ;;
+ esac
+done
+
Propchange: incubator/whirr/trunk/scripts/cloudera/cdh/install
------------------------------------------------------------------------------
svn:eol-style = native
Modified: incubator/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopService.java
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopService.java?rev=990923&r1=990922&r2=990923&view=diff
==============================================================================
--- incubator/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopService.java (original)
+++ incubator/whirr/trunk/services/hadoop/src/main/java/org/apache/whirr/service/hadoop/HadoopService.java Mon Aug 30 19:54:26 2010
@@ -59,6 +59,7 @@ public class HadoopService extends Servi
public static final Set<String> MASTER_ROLE = Sets.newHashSet("nn", "jt");
public static final Set<String> WORKER_ROLE = Sets.newHashSet("dn", "tt");
+ public static final int WEB_PORT = 80;
public static final int NAMENODE_PORT = 8020;
public static final int JOBTRACKER_PORT = 8021;
public static final int NAMENODE_WEB_UI_PORT = 50070;
@@ -77,9 +78,12 @@ public class HadoopService extends Servi
// Launch Hadoop "master" (NN and JT)
// deal with user packages and autoshutdown with extra runurls
+ String hadoopInstallRunUrl = clusterSpec.getConfiguration().getString(
+ "whirr.hadoop-install-runurl", "apache/hadoop/install");
byte[] nnjtBootScript = RunUrlBuilder.runUrls(
"sun/java/install",
- String.format("apache/hadoop/install nn,jt -c %s", clusterSpec.getProvider()));
+ String.format("%s nn,jt -c %s", hadoopInstallRunUrl,
+ clusterSpec.getProvider()));
TemplateBuilder masterTemplateBuilder = computeService.templateBuilder()
.osFamily(UBUNTU)
@@ -111,6 +115,8 @@ public class HadoopService extends Servi
InetAddress jobtrackerPublicAddress = InetAddress.getByName(Iterables.get(node.getPublicAddresses(),0));
FirewallSettings.authorizeIngress(computeServiceContext, node, clusterSpec,
+ WEB_PORT);
+ FirewallSettings.authorizeIngress(computeServiceContext, node, clusterSpec,
NAMENODE_WEB_UI_PORT);
FirewallSettings.authorizeIngress(computeServiceContext, node, clusterSpec,
JOBTRACKER_WEB_UI_PORT);
@@ -128,7 +134,7 @@ public class HadoopService extends Servi
// Launch slaves (DN and TT)
byte[] slaveBootScript = RunUrlBuilder.runUrls(
"sun/java/install",
- String.format("apache/hadoop/install dn,tt -n %s -j %s",
+ String.format("cloudera/cdh/install dn,tt -n %s -j %s",
namenodePublicAddress.getHostName(),
jobtrackerPublicAddress.getHostName()));