You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whirr.apache.org by as...@apache.org on 2011/06/03 00:46:52 UTC
svn commit: r1130860 [1/2] - in /incubator/whirr/trunk: ./ src/site/ src/site/confluence/ src/site/xdoc/ src/site/xdoc/contrib/ src/site/xdoc/contrib/python/

Author: asavu
Date: Thu Jun  2 22:46:51 2011
New Revision: 1130860

URL: http://svn.apache.org/viewvc?rev=1130860&view=rev
Log:
WHIRR-320. Convert site documentation to xdoc format (asavu)

Added:
    incubator/whirr/trunk/src/site/xdoc/api-guide.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/configuration-guide.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/contrib/
    incubator/whirr/trunk/src/site/xdoc/contrib/python/
    incubator/whirr/trunk/src/site/xdoc/contrib/python/automatically-shutting-down-a-cluster.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/contrib/python/configuring-and-running.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/contrib/python/installation.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/contrib/python/launching-a-cluster.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/contrib/python/running-mapreduce-jobs.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/contrib/python/running-zookeeper.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/contrib/python/terminating-a-cluster.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/contrib/python/using-command-line-options.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/contrib/python/using-persistent-clusters.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/faq.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/index.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/quick-start-guide.xml   (with props)
    incubator/whirr/trunk/src/site/xdoc/whirr-in-5-minutes.xml   (with props)
Removed:
    incubator/whirr/trunk/src/site/confluence/
Modified:
    incubator/whirr/trunk/CHANGES.txt
    incubator/whirr/trunk/pom.xml
    incubator/whirr/trunk/src/site/site.xml
    incubator/whirr/trunk/src/site/xdoc/release-notes.xml

Modified: incubator/whirr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/CHANGES.txt?rev=1130860&r1=1130859&r2=1130860&view=diff
==============================================================================
--- incubator/whirr/trunk/CHANGES.txt (original)
+++ incubator/whirr/trunk/CHANGES.txt Thu Jun  2 22:46:51 2011
@@ -10,6 +10,8 @@ Trunk (unreleased changes)
 
     WHIRR-311. Allow services to register new CLI commands (asavu)
 
+    WHIRR-320. Convert site documentation to xdoc format (asavu)
+
   BUG FIXES
 
     WHIRR-315. Temporary override Providers#withIds until jclouds 

Modified: incubator/whirr/trunk/pom.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/pom.xml?rev=1130860&r1=1130859&r2=1130860&view=diff
==============================================================================
--- incubator/whirr/trunk/pom.xml (original)
+++ incubator/whirr/trunk/pom.xml Thu Jun  2 22:46:51 2011
@@ -355,10 +355,10 @@
             <exclude>.git/**</exclude>
             <exclude>.gitignore</exclude>
             <exclude>**/*.json</exclude>
-            <exclude>**/*.confluence</exclude>
             <exclude>**/src/main/resources/version-banner.txt</exclude>
             <exclude>docs/**</exclude>
             <exclude>**/*.log*</exclude>
+            <exclude>.idea/**</exclude>
             <exclude>src/site/resources/images/whirr-logo.ai</exclude> <!-- binary -->
           </excludes>
         </configuration>

Modified: incubator/whirr/trunk/src/site/site.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/site.xml?rev=1130860&r1=1130859&r2=1130860&view=diff
==============================================================================
--- incubator/whirr/trunk/src/site/site.xml (original)
+++ incubator/whirr/trunk/src/site/site.xml Thu Jun  2 22:46:51 2011
@@ -83,7 +83,7 @@
       <item name="Terminating a Cluster" href="contrib/python/terminating-a-cluster.html"/>
       <item name="Automatically Shutting Down a Cluster" href="contrib/python/automatically-shutting-down-a-cluster.html"/>
       <item name="Using Command Line Options" href="contrib/python/using-command-line-options.html"/>
-      <item name="Running ZooKeeper" href="contrib/python/running-zooKeeper.html"/>
+      <item name="Running ZooKeeper" href="contrib/python/running-zookeeper.html"/>
     </menu>
 
   </body>

Added: incubator/whirr/trunk/src/site/xdoc/api-guide.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/api-guide.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/api-guide.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/api-guide.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+  <properties></properties>
+  <body>
+    <section name="API Guide"></section>
+    <p>Whirr provides a Java API for stopping and starting clusters. Please see the 
+    <a href="apidocs/index.html">javadoc</a> and the unit test source code for how to achieve
+    this.</p>
+
+    <p>The downloaded artifact contains a example project that shows how Whirr can be used 
+    for various tasks .Check the <i>examples</i> subfolder.</p>
+
+    <p>You can execute an example by running:
+    <source>$ ./bin/example [example-name]</source></p>
+
+    <p>There's also some example code at 
+    <a class="externalLink" href="http://github.com/hammer/whirr-demo">
+    http://github.com/hammer/whirr-demo</a>.</p>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/api-guide.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/configuration-guide.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/configuration-guide.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/configuration-guide.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/configuration-guide.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,750 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+  <properties></properties>
+  <body>
+    <section name="Configuration Guide"></section>
+    <p>Whirr is configured using a properties file, and optionally using command line arguments
+    when using the CLI. Command line arguments take precedence over properties specified in a
+    properties file.</p>
+    <p>For example working configurations, please see the recipes in the 
+    <i>recipes</i> directory of the distribution.</p>
+    <subsection name="General Options"></subsection>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>Name</b>
+        </th>
+        <th>
+          <b>Command line option</b>
+        </th>
+        <th>
+          <b>Default</b>
+        </th>
+        <th>
+          <b>Description</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.config</tt>
+        </td>
+        <td>
+          <tt>--config</tt>
+        </td>
+        <td>none</td>
+        <td>A filename of a properties file containing properties in this table.
+        Note that Whirr properties specified in this file all have a 
+        <tt>whirr.</tt>prefix.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.service-name</tt>
+        </td>
+        <td>
+          <tt>--service-name</tt>
+        </td>
+        <td>The default service for launching clusters</td>
+        <td>The name of the service to use. You only need to set this if you want to
+        use a non-standard service launcher.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.cluster-name</tt>
+        </td>
+        <td>
+          <tt>--cluster-name</tt>
+        </td>
+        <td>none</td>
+        <td>The name of the cluster to operate on. E.g. 
+        <tt>hadoopcluster</tt>. The cluster name is used to tag the instances in some
+        cloud-specific way. For example, in Amazon it is used to form the security group name.</td>
+      </tr>
+    </table>
+    <subsection name="Instance Templates Options"></subsection>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>Name</b>
+        </th>
+        <th>
+          <b>Command line option</b>
+        </th>
+        <th>
+          <b>Default</b>
+        </th>
+        <th>
+          <b>Description</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.instance-templates</tt>
+        </td>
+        <td>
+          <tt>--instance-templates</tt>
+        </td>
+        <td>none</td>
+        <td>The number of instances to launch for each set of roles in a service.
+        E.g. 
+        <tt>1 nn+jt,10 dn+tt</tt>means one instance with the roles 
+        <tt>nn</tt>(namenode) and 
+        <tt>jt</tt>(jobtracker), and ten instances each with the roles 
+        <tt>dn</tt>(datanode) and 
+        <tt>tt</tt>(tasktracker). Note that currently a role may only be specified in a single
+        group.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.instance-templates-max-percent-failures</tt>
+        </td>
+        <td>
+          <tt>--instance-templates-max-percent-failures</tt>
+        </td>
+        <td>none</td>
+        <td>The percentage of successfully started instances for each set of roles.
+        E.g. 
+        <tt>100 nn+jt,60 dn+tt</tt>means all instances with the roles 
+        <tt>nn</tt>(namenode) and 
+        <tt>jt</tt>(jobtracker) has to be successfully started, and 60% of instances has to be
+        successfully started each with the roles 
+        <tt>dn</tt>(datanode) and 
+        <tt>tt</tt>(tasktracker), otherwise a retry step is initiated with the number of nodes
+        equal with the missing nodes per role compared to 
+        <tt>instance-templates</tt>value. If after the retry the percentage of successfully started
+        instances is still behind the limit, then the cluster startup is considered invalid. In a
+        valid cluster startup, with or without retry mechanism, all the failed nodes will be
+        cleaned up immediately. Only the completely failed cluster may leave unterminated failed
+        nodes. Default value is 100 for each roles, in that case we don't need to use this
+        parameter at all. In case we would like to lower the limit from 100% to 60% for only the 
+        <tt>dd</tt>(datanode) and 
+        <tt>tt</tt>(tasktracker), then we can specify 
+        <tt>60 dn+tt</tt>for the parameter and we may left the 
+        <tt>100 nn+jt,</tt>from the beginning of the value.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.instance-templates-minimum-number-of-instances</tt>
+        </td>
+        <td>
+          <tt>--instance-templates-minimum-number-of-instances</tt>
+        </td>
+        <td>none</td>
+        <td>The minimum number of successfully started instances for each set of
+        roles. E.g. 
+        <tt>1 nn+jt,6 dn+tt</tt>means 1 instance with the roles 
+        <tt>nn</tt>(namenode) and 
+        <tt>jt</tt>(jobtracker) has to be successfully started, and 6 instances has to be
+        successfully started each with the roles 
+        <tt>dn</tt>(datanode) and 
+        <tt>tt</tt>(tasktracker), otherwise a retry step is initiated with the number of nodes
+        equal with the missing nodes per role compared to 
+        <tt>instance-templates</tt>value. If after the retry the number of successfully started
+        instances i still behind the limit, then the cluster startup is considered invalid. In a
+        valid cluster startup, with or without retry mechanism, all the failed nodes will be
+        cleaned up immediately. Only the completely failed cluster may leave unterminated failed
+        nodes. Note that we may specify only 
+        <tt>6 dd+tt</tt>, in that case the limit will be applied only to the specified role.
+        Default value is 100 for each roles, in that case we don't need to use this parameter at
+        all. In case we would like to lower the limit for only the 
+        <tt>dd</tt>(datanode) and 
+        <tt>tt</tt>(tasktracker), then we can specify 
+        <tt>60 dn+tt</tt>for the parameter, skipping the 
+        <tt>100 nn+jt</tt>.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.max-startup-retries</tt>
+        </td>
+        <td>
+          <tt>--max-startup-retries</tt>
+        </td>
+        <td>
+          <tt>1</tt>
+        </td>
+        <td>The number of retries in case of insufficient successfully started
+        instances.</td>
+      </tr>
+    </table>
+    <subsection name="Cloud Provider Options"></subsection>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>Name</b>
+        </th>
+        <th>
+          <b>Command line option</b>
+        </th>
+        <th>
+          <b>Default</b>
+        </th>
+        <th>
+          <b>Description</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.provider</tt>
+        </td>
+        <td>
+          <tt>--provider</tt>
+        </td>
+        <td>
+          <tt>aws-ec2</tt>
+        </td>
+        <td>The name of the cloud provider. See the 
+        <a href="#cloud-provider-config">table below</a>for possible provider names.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.location-id</tt>
+        </td>
+        <td>
+          <tt>--location-id</tt>
+        </td>
+        <td>none</td>
+        <td>The location to launch instances in. If not specified then an arbitrary
+        location will be chosen.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.identity</tt>
+        </td>
+        <td>
+          <tt>--identity</tt>
+        </td>
+        <td>none</td>
+        <td>The cloud identity. See the 
+        <a href="#cloud-provider-config">table below</a>for how this maps to the credentials for
+        your provider.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.credential</tt>
+        </td>
+        <td>
+          <tt>--credential</tt>
+        </td>
+        <td>none</td>
+        <td>The cloud credential. See the 
+        <a href="#cloud-provider-config">table below</a>for how this maps to the credentials for
+        your provider.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.login-user</tt>
+        </td>
+        <td>
+          <tt>--login-user</tt>
+        </td>
+        <td>none</td>
+        <td>Override the default login user used to bootstrap whirr. E.g. ubuntu or
+        myuser:mypass</td>
+      </tr>
+    </table>
+    <subsection name="BlobStore Provider Options"></subsection>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>Name</b>
+        </th>
+        <th>
+          <b>Command line option</b>
+        </th>
+        <th>
+          <b>Default</b>
+        </th>
+        <th>
+          <b>Description</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.blobstore-provider</tt>
+        </td>
+        <td>
+          <tt>--blobstore-provider</tt>
+        </td>
+        <td>Computed from 
+        <tt>whirr.provider</tt></td>
+        <td>The name of the blobstore provider. All jclouds blobstore providers are
+        supported</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.blobstore-identity</tt>
+        </td>
+        <td>
+          <tt>--blobstore-identity</tt>
+        </td>
+        <td>
+          <tt>whirr.identity</tt>
+        </td>
+        <td>The blobstore identity. See the 
+        <a href="#cloud-provider-config">table below</a>for how this maps to the credentials for
+        your provider.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.blobstore-credential</tt>
+        </td>
+        <td>
+          <tt>--blobstore-credential</tt>
+        </td>
+        <td>
+          <tt>whirr.credential</tt>
+        </td>
+        <td>The blobstore credential. See the 
+        <a href="#cloud-provider-config">table below</a>for how this maps to the credentials for
+        your provider.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.blobstore-location-id</tt>
+        </td>
+        <td>
+          <tt>--blobstore-location-id</tt>
+        </td>
+        <td>As close as possible to the compute nodes</td>
+        <td>The blobstore location ID</td>
+      </tr>
+    </table>
+    <subsection name="Cluster State Store Options"></subsection>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>Name</b>
+        </th>
+        <th>
+          <b>Command line option</b>
+        </th>
+        <th>
+          <b>Default</b>
+        </th>
+        <th>
+          <b>Description</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.state-store</tt>
+        </td>
+        <td>
+          <tt>--state-store</tt>
+        </td>
+        <td>local</td>
+        <td>What kind of store to use for cluster state (local, blob or none).</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.state-store-container</tt>
+        </td>
+        <td>
+          <tt>--state-store-container</tt>
+        </td>
+        <td>none</td>
+        <td>Container where to store state. Valid only for the blob state
+        store.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.state-store-blob</tt>
+        </td>
+        <td>
+          <tt>--state-store-blob</tt>
+        </td>
+        <td>whirr-&lt;
+        <tt>whirr.cluster-name</tt>&gt;</td>
+        <td>Blob name for state storage. Valid only for the blob state store.</td>
+      </tr>
+    </table>
+    <subsection name="Instance Login Options"></subsection>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>Name</b>
+        </th>
+        <th>
+          <b>Command line option</b>
+        </th>
+        <th>
+          <b>Default</b>
+        </th>
+        <th>
+          <b>Description</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.cluster-user</tt>
+        </td>
+        <td>
+          <tt>--cluster-user</tt>
+        </td>
+        <td>Current local user</td>
+        <td>The name of the user that Whirr will create on all instances. This is
+        the user you should use to access the cluster.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.private-key-file</tt>
+        </td>
+        <td>
+          <tt>--private-key-file</tt>
+        </td>
+        <td>
+          <i>~/.ssh/id_rsa</i>
+        </td>
+        <td>The filename of the private RSA SSH key used to connect to instances.
+        Note: the public/private key must be set together, and must be passwordless.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.public-key-file</tt>
+        </td>
+        <td>
+          <tt>--public-key-file</tt>
+        </td>
+        <td>
+        <i>~/.ssh/id_rsa</i>.pub</td>
+        <td>The filename of the public RSA SSH key used to connect to instances.
+        Note: the public/private key must be set together, and must be passwordless.</td>
+      </tr>
+    </table>
+    <subsection name="Image and Hardware Selection Options"></subsection>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>Name</b>
+        </th>
+        <th>
+          <b>Command line option</b>
+        </th>
+        <th>
+          <b>Default</b>
+        </th>
+        <th>
+          <b>Description</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.image-id</tt>
+        </td>
+        <td>
+          <tt>--image-id</tt>
+        </td>
+        <td>none</td>
+        <td>The ID of the image to use for instances. If not specified then a
+        vanilla Linux image is chosen.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.hardware-id</tt>
+        </td>
+        <td>
+          <tt>--hardware-id</tt>
+        </td>
+        <td>none</td>
+        <td>The type of hardware to use for the instance. This must be compatible
+        with the image ID.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.hardware-min-ram</tt>
+        </td>
+        <td>
+          <tt>--hardware-min-ram</tt>
+        </td>
+        <td>1024</td>
+        <td>The minimum amount of RAM each instance should have</td>
+      </tr>
+    </table>
+    <subsection name="Firewall Options"></subsection>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>Name</b>
+        </th>
+        <th>
+          <b>Command line option</b>
+        </th>
+        <th>
+          <b>Default</b>
+        </th>
+        <th>
+          <b>Description</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>whirr.client-cidrs</tt>
+        </td>
+        <td>
+          <tt>--client-cidrs</tt>
+        </td>
+        <td>none</td>
+        <td>A comma-separated list of 
+        <a class="externalLink" href="http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing">
+        CIDR</a>blocks. E.g. 
+        <tt>208.128.0.0/11,108.128.0.0/11</tt></td>
+      </tr>
+    </table>
+    <p>
+      <a name="cloud-provider-config"></a>
+    </p>
+    <subsection name="Cloud provider specific configuration"></subsection>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>Compute Service Provider</b>
+        </th>
+        <th>
+          <b>
+            <tt>whirr.provider</tt>
+          </b>
+        </th>
+        <th>
+          <b>
+            <tt>whirr.identity</tt>
+          </b>
+        </th>
+        <th>
+          <b>
+            <tt>whirr.credential</tt>
+          </b>
+        </th>
+        <th>
+          <b>Notes</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>Amazon EC2</td>
+        <td>
+          <tt>aws-ec2</tt>
+        </td>
+        <td>Access Key ID</td>
+        <td>Secret Access Key</td>
+        <td>Used to form security Group (via jclouds tag)</td>
+      </tr>
+      <tr valign="top">
+        <td>Rackspace Cloud Servers</td>
+        <td>
+          <tt>cloudservers-us</tt>
+        </td>
+        <td>Username</td>
+        <td>API Key</td>
+        <td>Warning: clusters do not run behind a firewall.</td>
+      </tr>
+    </table>
+    <p>
+      <a name="comparison-with-python"></a>
+    </p>
+    <section name="Comparison with Python"></section>
+    <p>See 
+    <a href="contrib/python/using-command-line-options.html">Using Command Line Options</a>.</p>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>Python</b>
+        </th>
+        <th>
+          <b>Java</b>
+        </th>
+        <th>
+          <b>Notes</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>config-dir</tt>
+        </td>
+        <td>
+          <tt>whirr.config</tt>
+        </td>
+        <td></td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>service</tt>
+        </td>
+        <td>
+          <tt>whirr.service-name</tt>
+        </td>
+        <td></td>
+      </tr>
+      <tr valign="top">
+        <td>none</td>
+        <td>
+          <tt>whirr.cluster-name</tt>
+        </td>
+        <td>Specified as a positional argument on the Python CLI.</td>
+      </tr>
+      <tr valign="top">
+        <td>none</td>
+        <td>
+          <tt>whirr.instance-templates</tt>
+        </td>
+        <td>Specified as a positional arguments on the Python CLI.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>cloud-provider</tt>
+        </td>
+        <td>
+          <tt>whirr.provider</tt>
+        </td>
+        <td></td>
+      </tr>
+      <tr valign="top">
+        <td>none</td>
+        <td>
+          <tt>whirr.identity</tt>
+        </td>
+        <td>Specified using environment variables for Python. E.g. 
+        <tt>AWS_ACCESS_KEY_ID</tt>, 
+        <tt>RACKSPACE_KEY</tt></td>
+      </tr>
+      <tr valign="top">
+        <td>none</td>
+        <td>
+          <tt>whirr.credential</tt>
+        </td>
+        <td>Specified using environment variables for Python. E.g. 
+        <tt>AWS_ACCESS_KEY_ID</tt>, 
+        <tt>RACKSPACE_SECRET</tt></td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>private-key</tt>
+        </td>
+        <td>
+          <tt>whirr.private-key-file</tt>
+        </td>
+        <td></td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>public-key</tt>
+        </td>
+        <td>
+          <tt>whirr.public-key-file</tt>
+        </td>
+        <td></td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>client-cidr</tt>
+        </td>
+        <td>
+          <tt>whirr.client-cidrs</tt>
+        </td>
+        <td>Python's 
+        <tt>client-cidr</tt>option may be repeated multiple times, whereas Java's 
+        <tt>whirr.client-cidrs</tt>contains comma-separated CIDRs.</td>
+      </tr>
+      <tr valign="top">
+        <td>none</td>
+        <td>
+          <tt>whirr.run-url-base</tt>
+        </td>
+        <td>Specified using 
+        <tt>user-data-file</tt>in Python.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>image-id</tt>
+        </td>
+        <td>
+          <tt>whirr.image-id</tt>
+        </td>
+        <td></td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>instance-type</tt>
+        </td>
+        <td>
+          <tt>whirr.hardware-id</tt>
+        </td>
+        <td></td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>availability-zone</tt>
+        </td>
+        <td>
+          <tt>whirr.location-id</tt>
+        </td>
+        <td>Location is more general than availability zone.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>security-group</tt>
+        </td>
+        <td>none</td>
+        <td>Amazon-specific. However, Amazon users may wish to start a cluster in
+        additional security groups, which isn't currently supported in Java.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>env</tt>
+        </td>
+        <td>none</td>
+        <td>May not be needed in Java with runurls.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>user-data-file</tt>
+        </td>
+        <td>none</td>
+        <td>Amazon-specific. Use runurls.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>key-name</tt>
+        </td>
+        <td>none</td>
+        <td>Amazon-specific. Jclouds generates a new key for clusters.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>user-packages</tt>
+        </td>
+        <td>none</td>
+        <td>Implement by allowing arbitrary runurls.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>auto-shutdown</tt>
+        </td>
+        <td>none</td>
+        <td>Implement by allowing arbitrary runurls.</td>
+      </tr>
+      <tr valign="top">
+        <td>
+          <tt>ssh-options</tt>
+        </td>
+        <td>none</td>
+        <td>Jclouds handles SSH, so not needed in Java.</td>
+      </tr>
+    </table>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/configuration-guide.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/contrib/python/automatically-shutting-down-a-cluster.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/contrib/python/automatically-shutting-down-a-cluster.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/contrib/python/automatically-shutting-down-a-cluster.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/contrib/python/automatically-shutting-down-a-cluster.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->  
+  <properties></properties>
+  <body>
+    <section name="Automatically Shutting Down a Cluster"></section>
+    <p>You can use the 
+    <tt>--auto-shutdown</tt>option to automatically terminate a cluster at a specified number of
+    minutes after launch. This is useful for short-lived clusters where the jobs complete in a
+    known amount of time.</p>
+    <p>
+      <b>To configure the automatic shutdown (for example, 50 minutes after launch):</b>
+    </p>
+    <source>hadoop-ec2 launch-cluster --auto-shutdown 50 my-hadoop-cluster 2</source>
+    <p>You can also use the configuration property 
+    <tt>auto_shutdown</tt>in the configuration file; for example, to shut down 50 minutes after
+    launch, you would use 
+    <tt>auto_shutdown=50</tt>.</p>
+    <p>
+      <b>To cancel the automatic shutdown:</b>
+    </p>
+    <source>
+% hadoop-ec2 exec my-hadoop-cluster shutdown -c 
+% hadoop-ec2 update-slaves-file my-hadoop-cluster 
+% hadoop-ec2 exec my-hadoop-cluster /usr/lib/hadoop/bin/slaves.sh shutdown -c
+</source>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/contrib/python/automatically-shutting-down-a-cluster.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/contrib/python/configuring-and-running.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/contrib/python/configuring-and-running.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/contrib/python/configuring-and-running.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/contrib/python/configuring-and-running.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,140 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<document xmlns="http://maven.apache.org/XDOC/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the"License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->  
+  <properties></properties>
+  <body>
+    <section name="Configuring and Running"></section>
+    
+    <subsection name="Setting Environment Variables to Specify AWS Credentials"></subsection>
+
+    <p>You must specify your AWS credentials when using the cloud scripts (see 
+    <a href="../../faq.html#how-do-i-find-my-cloud-credentials">How do I find my cloud credentials?</a>). The simplest
+    way to do this is to set the environment variables (see 
+    <a class="externalLink" href="http://code.google.com/p/boto/wiki/BotoConfig">this page</a>for other options):</p>
+    <ul>
+      <li>
+      <tt>AWS_ACCESS_KEY_ID</tt>: Your AWS Access Key ID</li>
+      <li>
+      <tt>AWS_SECRET_ACCESS_KEY</tt>: Your AWS Secret Access Key</li>
+    </ul>
+    
+    <subsection name="Configuring the Python Cloud Scripts"></subsection>
+
+    <p>To configure the scripts, create a directory called 
+    <i>.hadoop-cloud</i> in your home directory (note the leading period "."). In that directory, create a file called 
+    <i>clusters.cfg</i> that contains a section for each cluster you want to control. The following example shows how to
+    specify an i386 Ubuntu OS as the AMI in a 
+    <i>clusters.cfg</i> file.</p>
+    <source>
+[my-hadoop-cluster] 
+image_id=ami-ed59bf84 
+instance_type=c1.medium 
+key_name=tom 
+availability_zone=us-east-1c
+private_key=/path/to/private/key/file 
+ssh_options=-i %(private_key)s -o StrictHostKeyChecking=no
+</source>
+    <p>You can select a suitable AMI from the following table:</p>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>AMI (bucket/name)</b>
+        </th>
+        <th>
+          <b>ID</b>
+        </th>
+        <th>
+          <b>OS</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>cloudera-ec2-hadoop-images/cloudera-hadoop-ubuntu-20090623-i386</td>
+        <td>ami-ed59bf84</td>
+        <td>Ubuntu 8.10 (Intrepid)</td>
+      </tr>
+      <tr valign="top">
+        <td>cloudera-ec2-hadoop-images/cloudera-hadoop-ubuntu-20090623-x8664</td>
+        <td>ami-8759bfee</td>
+        <td>Ubuntu 8.10 (Intrepid)</td>
+      </tr>
+      <tr valign="top">
+        <td>cloudera-ec2-hadoop-images/cloudera-hadoop-fedora-20090623-i386</td>
+        <td>ami-6159bf08</td>
+        <td>Fedora release 8 (Werewolf)</td>
+      </tr>
+      <tr valign="top">
+        <td>cloudera-ec2-hadoop-images/cloudera-hadoop-fedora-20090623-x8664</td>
+        <td>ami-2359bf4a</td>
+        <td>Fedora release 8 (Werewolf)</td>
+      </tr>
+    </table>
+    <p></p>
+    <p>If you wish to use 
+    <a class="externalLink" href="http://www.cloudera.com/hadoop/">CDH</a>instead of Apache Hadoop, use the following
+    configuration:</p>
+    <source>
+[my-hadoop-cluster]
+image_id=ami-2d4aa444 
+instance_type=c1.medium 
+key_name=tom 
+availability_zone=us-east-1c
+private_key=/path/to/private/key/file 
+ssh_options=-i %(private_key)s -o StrictHostKeyChecking=no
+user_data_file=http://archive.cloudera.com/cloud/ec2/cdh3/hadoop-ec2-init-remote.sh
+</source>
+    <p>Note that this example uses CDH3, as specified by the 
+    <tt>user_data_file</tt> property (the version of Hadoop to install is determined by this script). For CDH, use one
+    of the AMIs from this table:</p>
+    <table border="0">
+      <tr valign="top">
+        <th>
+          <b>AMI (bucket/name)</b>
+        </th>
+        <th>
+          <b>ID</b>
+        </th>
+        <th>
+          <b>OS</b>
+        </th>
+        <th>
+          <b>Notes</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td>ubuntu-images/ubuntu-lucid-10.04-i386-server-20100427.1</td>
+        <td>ami-2d4aa444</td>
+        <td>Ubuntu 10.04 (Lucid)</td>
+        <td>This AMI is suitable for use with CDH3b2 onwards. See http://alestic.com/</td>
+      </tr>
+      <tr valign="top">
+        <td>ubuntu-images/ubuntu-lucid-10.04-amd64-server-20100427.1</td>
+        <td>ami-fd4aa494</td>
+        <td>Ubuntu 10.04 (Lucid)</td>
+        <td>This AMI is suitable for use with CDH3b2 onwards. See http://alestic.com/</td>
+      </tr>
+    </table>
+    
+    <subsection name="Running a Basic Cloud Script"></subsection>
+
+    <p>After specifying an AMI, you can run the 
+    <tt>hadoop-ec2</tt> script. It will display usage instructions when you invoke it without arguments.</p>
+    <p>You can test that the script can connect to your cloud provider by typing:</p>
+    <source>% hadoop-ec2 list</source>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/contrib/python/configuring-and-running.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/contrib/python/installation.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/contrib/python/installation.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/contrib/python/installation.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/contrib/python/installation.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+  <properties></properties>
+  <body>
+    <section name="Installation"></section>
+    <p>The Python cloud scripts enable you to run Hadoop on cloud providers. A working cluster will
+    start immediately with one command. It's ideal for running temporary Hadoop clusters to carry
+    out a proof of concept, or to run a few one-time jobs. Currently, the scripts support Amazon
+    EC2 only, but in the future other cloud providers may also be supported.</p>
+    <p>Amazon Machine Images (AMIs) and associated launch scripts are provided that make it easy to
+    run Hadoop on EC2. Note that the AMIs contain only base packages (such as Java), and not a
+    particular version of Hadoop because Hadoop is installed at launch time.</p>
+    <p>
+      <b>In this section, command lines that start with 
+      <tt>#</tt> are executed on a cloud instance, and command lines starting with a 
+      <tt>%</tt> are executed on your workstation.</b>
+    </p>
+    <subsection name="Installing the Python Cloud Scripts"></subsection>
+    <p>The following prerequisites apply to using the Python cloud scripts:</p>
+    <ul>
+      <li>Python 2.5</li>
+      <li>boto 1.8d</li>
+      <li>simplejson 2.0.9</li>
+    </ul>
+    <p>You can install bot and simplejson by using 
+    <a class="externalLink" href="http://pypi.python.org/pypi/setuptools">easy_install</a>:</p>
+    <source>
+% easy_install "simplejson==2.0.9" 
+% easy_install "boto==1.8d"
+</source>
+    <p></p>
+    <p>Alternatively, you might like to use the python-boto and python-simplejson RPM and Debian
+    packages.</p>
+    <p>The Python Cloud scripts are packaged in the source tarball. Unpack the tarball on your
+    system. The CDH Cloud scripts are in 
+    <i>contrib/python/src/py</i>. For convenience, you can add this directory to your path.</p>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/contrib/python/installation.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/contrib/python/launching-a-cluster.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/contrib/python/launching-a-cluster.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/contrib/python/launching-a-cluster.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/contrib/python/launching-a-cluster.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,69 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->  
+  <properties></properties>
+  <body>
+    <section name="Launching a Cluster"></section>
+    <p>After you install the client scripts and enter your EC2 account information, starting a
+    Hadoop cluster with 10 nodes is easy with a single command. 
+    <br />
+    <br />To launch a cluster called "my-hadoop-cluster" with 10 worker (slave) nodes, use this
+    command:</p>
+    <source>% hadoop-ec2 launch-cluster my-hadoop-cluster 10</source>
+    <p>
+    <br />This command boots the master node and 10 worker nodes. The master node runs the
+    Namenode, secondary Namenode, and Jobtracker, and each worker node runs a Datanode and a
+    Tasktracker.</p>
+    <p>Equivalently, you can launch the cluster by using this command syntax:</p>
+    <source>% hadoop-ec2 launch-cluster my-hadoop-cluster 1 nn,snn,jt 10 dn,tt</source>
+    <p>Note that by using this syntax, you can also launch a split Namenode/Jobtracker cluster. For
+    example:</p>
+    <source>% hadoop-ec2 launch-cluster my-hadoop-cluster 1 nn,snn 1 jt 10 dn,tt</source>
+    <p>After the nodes have started and the Hadoop cluster is operational, the console will display
+    a message such as:</p>
+    <source>Browse the cluster at http://ec2-xxx-xxx-xxx-xxx.compute-1.amazonaws.com/</source>
+    <p>You can access Hadoop's web UI at the URL in the message. By default, port 80 is opened for
+    access from your client machine. You can change the firewall settings (to allow access from a
+    network, rather than just a single machine, for example) by using the Amazon EC2 command line
+    tools, or by using a tool such as 
+    <a class="externalLink"
+    href="http://developer.amazonwebservices.com/connect/entry.jspa?externalID=609">Elastic
+    Fox</a>. The security group to change is the one named 
+    <tt>&lt;cluster-name&gt;-&lt;role&gt;</tt>. For example, for the Namenode in the cluster
+    started above, it would be 
+    <tt>my-hadoop-cluster-nn</tt>.</p>
+    <p>For security reasons, traffic from the network your client is running on is proxied through
+    the master node of the cluster using an SSH tunnel (a SOCKS proxy on port 6666).</p>
+    <p>To set up the proxy, run the following command:</p>
+    <source>% eval `hadoop-ec2 proxy my-hadoop-cluster`</source>
+    <p>Note the backticks, which are used to evaluate the result of the command. This allows you to
+    stop the proxy later on (from the same terminal):</p>
+    <source>% kill $HADOOP_CLOUD_PROXY_PID</source>
+    <p>Web browsers need to be configured to use this proxy too, so you can view pages served by
+    worker nodes in the cluster. The most convenient way to do this is to use a 
+    <a class="externalLink" href="http://en.wikipedia.org/wiki/Proxy_auto-config">proxy auto-config
+    (PAC) file</a>file, such as 
+    <a class="externalLink" href="http://apache-hadoop-ec2.s3.amazonaws.com/proxy.pac">this
+    one</a>for Hadoop EC2 clusters.</p>
+    <p>If you are using Firefox, then you may find 
+    <a class="externalLink" href="http://foxyproxy.mozdev.org/">FoxyProxy</a>useful for managing
+    PAC files.</p>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/contrib/python/launching-a-cluster.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/contrib/python/running-mapreduce-jobs.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/contrib/python/running-mapreduce-jobs.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/contrib/python/running-mapreduce-jobs.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/contrib/python/running-mapreduce-jobs.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->  
+  <properties></properties>
+  <body>
+    <section name="Running MapReduce Jobs"></section>
+    <p>After you launch a cluster, a 
+    <tt>hadoop-site.xml</tt> file is created in the directory 
+    <tt>~/.hadoop-cloud/&lt;cluster-name&gt;</tt>. You can use this to connect to the cluster by
+    setting the 
+    <tt>HADOOP_CONF_DIR</tt> environment variable. (It is also possible to set the configuration
+    file to use by passing it as a 
+    <tt>-conf</tt> option to Hadoop Tools):</p>
+    <source>% export HADOOP_CONF_DIR=~/.hadoop-cloud/my-hadoop-cluster</source>
+    <p>
+      <b>To browse HDFS:</b>
+    </p>
+    <source>% hadoop fs -ls /</source>
+    <p>Note that the version of Hadoop installed locally should match the version installed on the
+    cluster. 
+    <br />
+    <br />
+    <b>To run a job locally:</b></p>
+    <source>
+% hadoop fs -mkdir input 
+# create an input directory 
+% hadoop fs -put $HADOOP_HOME/LICENSE.txt input 
+# copy a file there % hadoop jar $HADOOP_HOME/hadoop-*examples*.jar wordcount input output 
+% hadoop fs -cat output/part-* | head
+</source>
+    <p>The preceding examples assume that you installed Hadoop on your local machine. But you can
+    also run jobs within the cluster. 
+    <br />
+    <br />
+    <b>To run jobs within the cluster:</b></p>
+    <p>1. Log into the Namenode:</p>
+    <source>% hadoop-ec2 login my-hadoop-cluster</source>
+    <p>2. Run the job:</p>
+    <source>
+# hadoop fs -mkdir input 
+# hadoop fs -put /etc/hadoop/conf/*.xml input 
+# hadoop jar /usr/lib/hadoop/hadoop-*-examples.jar grep input output 'dfs\[a-z.]+' 
+# hadoop fs -cat output/part-* | head
+</source>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/contrib/python/running-mapreduce-jobs.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/contrib/python/running-zookeeper.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/contrib/python/running-zookeeper.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/contrib/python/running-zookeeper.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/contrib/python/running-zookeeper.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->  
+  <properties></properties>
+  <body>
+    <section name="Running Apache ZooKeeper"></section>
+    <p>The main use of the Python Cloud Scripts is to run Hadoop clusters, but you can also run
+    other services such as Apache ZooKeeper.</p>
+    <p>
+      <b>To run Apache ZooKeeper, set the 
+      <tt>service</tt>parameter to 
+      <tt>zookeeper</tt>:</b>
+    </p>
+    <source>
+[my-zookeeper-cluster]
+service=zookeeper
+ami=ami-ed59bf84
+instance_type=m1.small
+key_name=tom 
+availability_zone=us-east-1c 
+public_key=/path/to/public/key/file
+private_key=/path/to/private/key/file
+</source>
+    <p>
+      <b>To launch a three-node ZooKeeper ensemble:</b>
+    </p>
+    <source>% ./hadoop-ec2 launch-cluster my-zookeeper-cluster 3 zk</source>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/contrib/python/running-zookeeper.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/contrib/python/terminating-a-cluster.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/contrib/python/terminating-a-cluster.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/contrib/python/terminating-a-cluster.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/contrib/python/terminating-a-cluster.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->  
+  <properties></properties>
+  <body>
+    <section name="Terminating a Cluster"></section>
+    <p>When you are done using your cluster, you can terminate all instances in it.</p>
+    <p>
+      <b>WARNING: All data will be deleted when you terminate the cluster, unless you are using
+      EBS.</b>
+    </p>
+    <p>
+      <b>To terminate a cluster:</b>
+    </p>
+    <source>% hadoop-ec2 terminate-cluster my-hadoop-cluster</source>
+    <p>
+      <b>To delete the EC2 security groups:</b>
+    </p>
+    <source>% hadoop-ec2 delete-cluster my-hadoop-cluster</source>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/contrib/python/terminating-a-cluster.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/contrib/python/using-command-line-options.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/contrib/python/using-command-line-options.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/contrib/python/using-command-line-options.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/contrib/python/using-command-line-options.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->  
+  <properties></properties>
+  <body>
+
+    <section name="Using Command Line Options"></section>
+
+    <p>It is possible to specify options on the command line when you launch a cluster. The options
+    take precedence over any settings specified in the configuration file.</p>
+    <p>For example, the following command launches a 10-node cluster using a specified image and
+    instance type, overriding the equivalent settings (if any) that are in the 
+    <tt>my-hadoop-cluster</tt> section of the configuration file. Note that words in options are
+    separated by hyphens (
+    <tt>--instance-type</tt> ) while the corresponding configuration parameter are separated by
+    underscores (
+    <tt>instance_type</tt> ).</p>
+    <source>
+% hadoop-ec2 launch-cluster --image-id ami-2359bf4a --instance-type c1.xlarge my-hadoop-cluster 10
+</source>
+    <p>If there options are that you want to specify multiple times, you can set them in the
+    configuration file by separating them with newlines (and leading whitespace). For example:</p>
+    <source>env=AWS_ACCESS_KEY_ID=... AWS_SECRET_ACCESS_KEY=...</source>
+    <p>The scripts install Hadoop from a tarball (or, in the case of CDH, from RPMs or Debian
+    packages, depending on the OS) at instance boot time.</p>
+    <p>By default, Apache Hadoop 0.20.1 is installed. To run a different version of Hadoop, change
+    the 
+    <tt>user_data_file</tt> setting.</p>
+    <p>For example, to use the latest version of CDH3 add the following parameter:</p>
+    <source>--user-data-file http://archive.cloudera.com/cloud/ec2/cdh3/hadoop-ec2-init-remote.sh</source>
+    <p>By default, the latest version of the specified CDH release series is used. To use a
+    particular release of CDH, use the 
+    <tt>REPO env</tt> parameter, in addition to setting 
+    <tt>user_data_file</tt> . For example, to specify the Beta 1 release of CDH3:</p>
+    <source>--env REPO=cdh3b1</source>
+    <p>For this release, Hadoop configuration files can be found in 
+    <tt>/etc/hadoop/conf</tt> and logs are in 
+    <tt>/var/log/hadoop</tt> .</p>
+    
+    <subsection name="Customization"></subsection>
+
+    <p>You can specify a list of packages to install on every instance at boot time by using the 
+    <tt>--user-packages</tt> command-line option or the 
+    <tt>user_packages</tt> configuration parameter. Packages should be space-separated. Note that
+    package names should reflect the package manager being used to install them (
+    <tt>yum</tt> or 
+    <tt>apt-get</tt> depending on the OS).</p>
+    <p>For example, to install RPMs for R and git:</p>
+    <source>% hadoop-ec2 launch-cluster --user-packages 'R git-core' my-hadoop-cluster 10</source>
+    <p>You have full control over the script that is run when each instance boots. The default
+    script, 
+    <tt>hadoop-ec2-init-remote.sh</tt> , may be used as a starting point to add extra configuration
+    or customization of the instance. Make a copy of the script in your home directory, or
+    somewhere similar, and set the 
+    <tt>--user-data-file</tt> command-line option (or the 
+    <tt>user_data_file</tt> configuration parameter) to point to the (modified) copy. This option
+    may also point to an arbitrary URL, which makes it easy to share scripts.</p>
+    <p>For CDH, use the script located at 
+    <a class="externalLink"
+    href="http://archive.cloudera.com/cloud/ec2/cdh3/hadoop-ec2-init-remote.sh">
+    http://archive.cloudera.com/cloud/ec2/cdh3/hadoop-ec2-init-remote.sh</a></p>
+    <p>The 
+    <tt>hadoop-ec2</tt> script will replace 
+    <tt>%ENV%</tt> in your user data script with 
+    <tt>USER_PACKAGES</tt> , 
+    <tt>AUTO_SHUTDOWN</tt> , and 
+    <tt>EBS_MAPPINGS</tt> , as well as extra parameters supplied using the 
+    <tt>--env</tt> command-line flag.</p>
+    <p>Another way of customizing the instance, which may be more appropriate for larger changes,
+    is to create your own image.</p>
+    <p>It's possible to use any image, as long as it satisfies both of the following
+    conditions:</p>
+    <ul>
+      <li>Runs (gzip compressed) user data on boot</li>
+      <li>Has Java installed</li>
+    </ul>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/contrib/python/using-command-line-options.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/contrib/python/using-persistent-clusters.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/contrib/python/using-persistent-clusters.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/contrib/python/using-persistent-clusters.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/contrib/python/using-persistent-clusters.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,120 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+  <properties></properties>
+  <body>
+    <section name="Using Persistent Clusters"></section>
+    <p>
+      <b>Support for Amazon Elastic Block Storage (EBS) is a Beta feature.</b>
+    </p>
+    <p>When not in use, an EBS-cluster can surrender unneeded EC2 instances, then restart later and
+    continue where it left off. Users no longer need to copy large volumes of data from S3 to local
+    disk on the EC2 instance; data persists reliably and independently in Amazon's EBS, saving
+    compute costs.</p>
+    <p>
+      <b>Schematic showing how the cluster is set up:</b>
+    </p>
+    <img src="../../images/persistent-ec2.png" alt="" />
+    <p>
+      <b>To Use Persistent Cluster with EBS Storage</b>
+    </p>
+    <ol style="list-style-type: decimal">
+      <li>Create a new section called 
+      <tt>my-ebs-cluster</tt>in the 
+      <tt>~/.hadoop-cloud/clusters.cfg</tt>file.</li>
+      <li>Create storage for the new cluster by creating a temporary EBS volume of size 100GiB,
+      formatting it, and saving it as a snapshot in S3. This way, you only have to do the
+      formatting once.</li>
+    </ol>
+    <source>% hadoop-ec2 create-formatted-snapshot my-ebs-cluster 100</source>
+    <p>You create storage for a single Namenode and for two Datanodes. The volumes to create are
+    described in a JSON spec file, which references the snapshot you just created. Here is the
+    contents of a JSON file, called 
+    <tt>my-ebs-cluster-storage-spec.jso</tt>:</p>
+    <p>
+      <b>Example contents of my-ebs-cluster-storage-spec.json</b>
+    </p>
+    <source>
+{'dn': [{'device': '/dev/sdj',
+         'mount_point': '/ebs1',
+         'size_gb': '100',
+         'snapshot_id': 'snap-268e704f'},
+        {'device': '/dev/sdk',
+         'mount_point': '/ebs2',
+         'size_gb': '100',
+         'snapshot_id': 'snap-268e704f'}],
+ 'nn': [{'device': '/dev/sdj',
+         'mount_point': '/ebs1',
+         'size_gb': '100',
+         'snapshot_id': 'snap-268e704f'},
+        {'device': '/dev/sdk',
+         'mount_point': '/ebs2',
+         'size_gb': '100',
+         'snapshot_id': 'snap-268e704f'}]}
+</source>
+    <p>Each role (
+    <tt>nn</tt>and 
+    <tt>dn</tt>) is the key to an array of volume specifications. In this example, each role has
+    two devices (
+    <tt>/dev/sdj</tt>and 
+    <tt>/dev/sdk</tt>) with different mount points, and generated from an EBS snapshot. The
+    snapshot is the formatted snapshot created earlier, so that the volumes you create are
+    pre-formatted. The size of the drives must match the size of the snapshot created earlier.</p>
+    <p>
+      <b>To use this file to create actual volumes:</b>
+    </p>
+    <source>
+% hadoop-ec2 create-storage my-ebs-cluster nn 1 my-ebs-cluster-storage-spec.json 
+% hadoop-ec2 create-storage my-ebs-cluster dn 2 my-ebs-cluster-storage-spec.json
+</source>
+    <p>
+      <b>To start the cluster with two slave nodes:</b>
+    </p>
+    <source>% hadoop-ec2 launch-cluster my-ebs-cluster 1 nn,snn,jt 2 dn,tt</source>
+    <p>
+      <b>To login and run a job which creates some output:</b>
+    </p>
+    <source>
+% hadoop-ec2 login my-ebs-cluster 
+# hadoop fs -mkdir input 
+# hadoop fs -put /etc/hadoop/conf/*.xml input 
+# hadoop jar /usr/lib/hadoop/hadoop-*-examples.jar grep input output \ 'dfs[a-z.]+'
+</source>
+    <p>
+      <b>To view the output:</b>
+    </p>
+    <source># hadoop fs -cat output/part-* | head</source>
+    <p>
+      <b>To shutdown the cluster:</b>
+    </p>
+    <source>% hadoop-ec2 terminate-cluster my-ebs-cluster</source>
+    <p>
+      <b>To restart the cluster and login (after a short delay):</b>
+    </p>
+    <source>
+% hadoop-ec2 launch-cluster my-ebs-cluster 2 
+% hadoop-ec2 login my-ebs-cluster
+</source>
+    <p>
+      <b>The output from the job you ran before should still be there:</b>
+    </p>
+    <source># hadoop fs -cat output/part-* | head</source>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/contrib/python/using-persistent-clusters.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/faq.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/faq.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/faq.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/faq.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,209 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+  <properties></properties>
+  <body>
+    <section name="Frequently Asked Questions"></section>
+    <p>
+      <a name="how-do-i-find-my-cloud-credentials"></a> 
+    </p>
+
+    <subsection name="How do I find my cloud credentials?"></subsection>
+
+    <p>On EC2:</p>
+    <ol style="list-style-type: decimal">
+      <li>Go to 
+      <a class="externalLink"
+      href="http://aws-portal.amazon.com/gp/aws/developer/account/index.html?action=access-key">
+      http://aws-portal.amazon.com/gp/aws/developer/account/index.html?action=access-key</a> </li>
+      <li>Log in, if prompted</li>
+      <li>Find your Access Key ID and Secret Access Key in the "Access Credentials" section, under
+      the "Access Keys" tab. You will have to click "Show" to see the text of your secret access
+      key.</li>
+    </ol>
+    <p>Another good resource is 
+    <a class="externalLink" href="http://alestic.com/2009/11/ec2-credentials">Understanding Access
+    Credentials for AWS/EC2</a> by Eric Hammond.</p>
+
+    <subsection name="Can I specify my own private key?"></subsection>
+
+    <p>Yes, by setting 
+    <tt>whirr.private-key-file</tt> (or 
+    <tt>--private-key-file</tt> on the command line). You should also set 
+    <tt>whirr.public-key-file</tt> (
+    <tt>--public-key-file</tt> ) at the same time.</p>
+    <p>Private keys must not have a passphrase associated with them. You can check this with:</p>
+    <source>grep ENCRYPTED ~/.ssh/id_rsa</source>
+    <p>If there is no passphrase then there will be no match.</p>
+
+    <subsection name="How do I access my cluster from a different network?"></subsection>
+
+    <p>By default, access to clusters is restricted to the single IP address of the machine
+    starting the cluster, as determined by 
+    <a class="externalLink" href="http://checkip.amazonaws.com/">Amazon's check IP service</a> .
+    However, some networks report multiple origin IP addresses (e.g. they round-robin between them
+    by connection), which may cause problems if the address used for later connections is different
+    to the one reported at the time of the first connection.</p>
+    <p>A related problem is when you wish to access the cluster from a different network to the one
+    it was launched from.</p>
+    <p>In these cases you can specify the IP addresses of the machines that may connect to the
+    cluster by setting the 
+    <tt>client-cidrs</tt> property to a comma-separated list of 
+    <a class="externalLink" href="http://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing">
+    CIDR</a> blocks.</p>
+    <p>For example, 
+    <tt>208.128.0.0/16,38.102.147.107/32</tt>  would allow access from the 
+    <tt>208.128.0.0</tt>  class B network, and the (single) IP address 38.102.147.107.</p>
+
+    <subsection name="How can I start a cluster in a particular location?"></subsection>
+
+    <p>By default clusters are started in an arbitrary location (e.g. region or data center). You
+    can control the location by setting 
+    <tt>location-id</tt> (see the 
+    <a href="configuration-guide.html">configuration guide</a> for details).</p>
+    <p>For example, in EC2, setting 
+    <tt>location-id</tt> to 
+    <tt>us-east-1</tt> would start the cluster in the US-East region, while setting it to 
+    <tt>us-east-1a</tt> (note the final 
+    <tt>a</tt> ) would start the cluster in that particular availability zone (
+    <tt>us-east-1a</tt> ) in the US-East region.</p>
+
+    <subsection name="How can I use a custom image? How can I control the cloud hardware used?">
+    </subsection>
+
+    <p>The default image used is dependent on the Cloud provider, the hardware, and the service.
+    Whirr tries to find an image with Ubuntu Server and at least 1024 MB of RAM.</p>
+    <p>Use 
+    <tt>image-id</tt> to specify the image used, and 
+    <tt>hardware-id</tt> to specify the hardware. Both are cloud-specific.</p>
+    <p>You can specify the amount of RAM in a cloud agnostic way by setting a value for 
+    <tt>hardware-min-ram</tt> .</p>
+    <p>In addition, on EC2 you need to set jclouds.ec2.ami-owners to include the AMI owner if it is
+    not Amazon, Alestic, Canonical, or RightScale.</p>
+
+    <subsection name="How do I log in to a node in the cluster?"></subsection>
+
+    <p>On EC2, if you know the node's address you can do:</p>
+    <source>ssh -i ~/.ssh/id_rsa &lt;whirr.cluster-user&gt;@host</source>
+    <p>This assumes that you use the default private key; if this is not the case then specify the
+    one you used at cluster launch.</p>
+    <p>
+    <tt>whirr.cluster-user</tt> defaults to the name of the local user running Whirr.</p>
+    <p>
+      <a name="how-can-i-modify-the-instance-installation-and-configuration-scripts"></a> 
+    </p>
+
+    <subsection name="How can I modify the instance installation and configuration scripts?">
+    </subsection>
+
+    <p>The scripts to install and configure cloud instances are searched for on the classpath.</p>
+    <p>(Note that in versions prior to 0.4.0 scripts were downloaded from S3 by default, and could
+    be overridden by setting 
+    <tt>run-url-base</tt> . This property no longer has any effect, so you should instead use the
+    approach explained below.)</p>
+    <p>If you want to change the scripts then you can place a modified copy of the scripts in a 
+    <i>functions</i>directory in Whirr's installation directory. The original versions of the
+    scripts can be found in 
+    <i>functions</i>directories in the source trees.</p>
+    <p>For example, to override the Hadoop scripts, do the following:</p>
+    <source>
+cd $WHIRR_HOME 
+mkdir functions 
+cp services/hadoop/src/main/resources/functions/* functions
+</source>
+<p>Then make your changes to the copies in 
+<i>functions</i>.</p>
+<p>The first port of call for debugging the scripts that run on on a cloud instance is the 
+<i>whirr.log</i>in the directory from which you launched the 
+<i>whirr</i>CLI.</p>
+<p>The script output in this log file may be truncated, but you can see the complete output by
+logging into the node on which the script ran (see "How do I log in to a node in the cluster?"
+above) and looking in the 
+<i>/tmp/bootstrap</i>or directories for the script itself, and the standard output and standard
+error logs.</p>
+
+<subsection name="How do I specify the service version and other service properties?">
+</subsection>
+
+<p>Some services have a property to control the version number of the software to be installed.
+This is typically achieved by setting the property 
+<tt>whirr.&lt;service-name&gt;.tarball.url</tt> . Similarly, some services can have arbitrary
+service properties set.</p>
+<p>See the samples in the 
+<i>recipes</i>directory for details for a particular service.</p>
+<p>In cases where neither of these configuration controls are supported, you may modify the
+scripts to install a particular version of the service, or to change the service properties
+from the defaults. See "How to modify the instance installation and configuration scripts"
+above for details on how to override the scripts.</p>
+
+<subsection name="How can I install custom packages?"></subsection>
+
+<p>You can install extra software by modifying the scripts that run on the cloud instances. See
+"How to modify the instance installation and configuration scripts" above.</p>
+
+<subsection name="How do I run Cloudera's Distribution for Hadoop?"></subsection>
+
+<p>You can run CDH rather than Apache Hadoop by running the Hadoop service and setting the 
+<tt>whirr.hadoop-install-function</tt> and 
+<tt>whirr.hadoop-configure-function</tt> properties. See the 
+<i>recipes</i>directory in the distribution for samples.</p>
+<p>
+<a name="other-services"></a> 
+</p>
+
+<subsection name="How do I run a Cassandra/HBase/ZooKeeper cluster?"></subsection>
+
+<p>See the 
+<i>recipes</i>directory in the distribution for samples.</p>
+
+<subsection name="How do I automatically tear down a cluster after a fixed time?"></subsection>
+
+<p>It's often convenient to terminate a cluster a fixed time after launch. This is the case for
+test clusters, for example. You can achieve this by scheduling the destroy command using the 
+<tt>at</tt> command from your local machine.</p>
+<p>
+<b>WARNING: The machine from which you issued the 
+<tt>at</tt> command must be running (and able to contact the cloud provider) at the time it
+runs.</b>
+</p>
+<source>% echo 'bin/whirr destroy-cluster --config hadoop.properties' | at 'now + 50 min'</source>
+    <p>Note that issuing a 
+    <tt>shutdown</tt> command on an instance may simply stop the instance, which is not sufficient
+    to fully terminate the instance, in which case you would continue to be charged for it. This is
+    the case for EBS boot instances, for example.</p>
+    <p>You can read more about this technique on 
+    <a class="externalLink" href="http://alestic.com/2010/09/ec2-instance-termination">Eric
+    Hammond's blog</a> .</p>
+    <p>Also, Mac OS X users might find 
+    <a class="externalLink"
+    href="http://superuser.com/questions/43678/mac-os-x-at-command-not-working">this thread</a> a
+    useful reference for the 
+    <tt>at</tt> command.</p>
+
+    <subsection name="How do I start a machine without having a cluster role?"></subsection>
+
+    <p>Sometimes you need to provision machines in the same cluster without having a specific role.
+    For this you can use "noop" as a role name when specifying the instance templates.</p>
+    <source>
+whirr.instance-templates=3 zookeeper,1 noop 
+# will start three machines with zookeeper and one machine just with the OS
+</source>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/faq.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/whirr/trunk/src/site/xdoc/index.xml
URL: http://svn.apache.org/viewvc/incubator/whirr/trunk/src/site/xdoc/index.xml?rev=1130860&view=auto
==============================================================================
--- incubator/whirr/trunk/src/site/xdoc/index.xml (added)
+++ incubator/whirr/trunk/src/site/xdoc/index.xml Thu Jun  2 22:46:51 2011
@@ -0,0 +1,136 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+  <properties></properties>
+  <body>
+
+    <section name="Apache Whirr"></section>
+    <p>Apache Whirr is a set of libraries for running cloud services. Whirr is currently in the 
+    <a class="externalLink" href="http://incubator.apache.org/">Apache Incubator</a> .</p>
+    <p>Whirr provides:</p>
+    <ul>
+      <li>A cloud-neutral way to run services. You don't have to worry about the idiosyncrasies of
+      each provider.</li>
+      <li>A common service API. The details of provisioning are particular to the service.</li>
+      <li>Smart defaults for services. You can get a properly configured system running quickly,
+      while still being able to override settings as needed.</li>
+    </ul>
+    <p>You can also use Whirr as a command line tool for deploying clusters.</p>
+    <p>Find out more about Whirr at our 
+    <a class="externalLink" href="https://cwiki.apache.org/confluence/display/WHIRR">wiki</a> .</p>
+    
+    <subsection name="Getting Started"></subsection>
+
+    <p>You can use Whirr's CLI or APIs to 
+    <a href="quick-start-guide.html">get started with Whirr</a> .</p>
+    <p>There is also an 
+    <a href="faq.html">FAQ</a> which covers how to achieve common tasks with Whirr, and a 
+    <a href="configuration-guide.html">configuration guide</a> for reference.</p>
+    
+    <subsection name="Getting Involved"></subsection>
+
+    <p>Have you got a suggestion for improving Whirr? It's easy to 
+    <a class="externalLink"
+    href="https://cwiki.apache.org/confluence/display/WHIRR/How+To+Contribute">get
+    involved</a> .</p>
+    
+    <subsection name="Which services and cloud providers are supported?"></subsection>
+
+    <p>Whirr uses 
+    <a class="externalLink" href="http://code.google.com/p/jclouds/">jclouds</a> for provisioning,
+    so in principle it should support all the cloud providers that jclouds supports. The following
+    table shows the cloud provider and service combinations that have been tested.</p>
+    <table align="center" border="0">
+      <tr valign="top">
+        <th align="center">
+          <b>Cloud provider</b>
+        </th>
+        <th align="center">
+          <b>Cassandra</b>
+        </th>
+        <th align="center">
+          <b>Hadoop</b>
+        </th>
+        <th align="center">
+          <b>ZooKeeper</b>
+        </th>
+        <th align="center">
+          <b>HBase</b>
+        </th>
+        <th align="center">
+          <b>elasticsearch</b>
+        </th>
+        <th align="center">
+          <b>Voldemort</b>
+        </th>
+      </tr>
+      <tr valign="top">
+        <td align="center">Amazon EC2</td>
+        <td align="center">Yes</td>
+        <td align="center">Yes</td>
+        <td align="center">Yes</td>
+        <td align="center">Yes</td>
+        <td align="center">Yes</td>
+        <td align="center">Yes</td>
+      </tr>
+      <tr valign="top">
+        <td align="center">Rackspace Cloud Servers</td>
+        <td align="center">Yes</td>
+        <td align="center">Yes</td>
+        <td align="center">Yes</td>
+        <td align="center">Yes</td>
+        <td align="center">Yes</td>
+        <td align="center">Yes</td>
+      </tr>
+    </table>
+    <p>For development and local testing we are also supporting the BYON (bring your own nodes)
+    jclouds provider. Check the 
+    <i>recipes</i> folder for a configuration sample.</p>
+    
+    <subsection name="What server operating systems are supported?"></subsection>
+
+    <p>Each release is tested by running the integration tests on Ubuntu Server 10.04. All setup
+    scripts should also work on Centos 5.x but we don't have a formal testing procedure in place
+    right now.</p>
+    
+    <subsection name="Download"></subsection>
+
+    <p>Download a release of Whirr from a 
+    <a class="externalLink" href="http://www.apache.org/dyn/closer.cgi/incubator/whirr/">nearby
+    mirror</a> .</p>
+    
+    <subsection name="History"></subsection>
+
+    <p>The code that would become Whirr started out in 2007 as some 
+    <a class="externalLink" href="https://issues.apache.org/jira/browse/HADOOP-884">bash scripts in
+    Apache Hadoop</a> for running Hadoop clusters on EC2. Later the scripts were 
+    <a class="externalLink" href="https://issues.apache.org/jira/browse/WHIRR-3">ported to
+    Python</a> for extra features (such as EBS support) and a wider range of cloud providers. These
+    Python scripts are available today in Whirr under 
+    <i>contrib/python</i>.</p>
+    <p>In May 2010 the 
+    <a class="externalLink" href="http://incubator.apache.org/whirr">Apache Whirr
+    Incubator</a> project was started to give a home to the existing work that had been done, but
+    also to create a Java version using 
+    <a class="externalLink" href="http://code.google.com/p/jclouds/">jclouds</a> as the cloud
+    provisioning library. jclouds supports many providers and has a very rich API for running code
+    on instances, so it provides a very solid foundation for building Whirr on.</p>
+  </body>
+</document>

Propchange: incubator/whirr/trunk/src/site/xdoc/index.xml
------------------------------------------------------------------------------
    svn:eol-style = native