You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by yh...@apache.org on 2009/12/23 12:06:16 UTC
svn commit: r893472 - in /hadoop/mapreduce/branches/branch-0.21: ./ conf/
src/contrib/ src/contrib/capacity-scheduler/
src/contrib/capacity-scheduler/src/java/
src/docs/src/documentation/content/xdocs/
src/java/org/apache/hadoop/mapred/tools/
Author: yhemanth
Date: Wed Dec 23 11:06:15 2009
New Revision: 893472
URL: http://svn.apache.org/viewvc?rev=893472&view=rev
Log:
Merge -c 893469 from trunk to branch 0.21 to fix MAPREDUCE-1009.
Added:
hadoop/mapreduce/branches/branch-0.21/build-utils.xml
- copied unchanged from r893469, hadoop/mapreduce/trunk/build-utils.xml
hadoop/mapreduce/branches/branch-0.21/src/contrib/capacity-scheduler/src/java/mapred-queues.xml.template
- copied unchanged from r893469, hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/src/java/mapred-queues.xml.template
Modified:
hadoop/mapreduce/branches/branch-0.21/.gitignore
hadoop/mapreduce/branches/branch-0.21/CHANGES.txt
hadoop/mapreduce/branches/branch-0.21/build.xml
hadoop/mapreduce/branches/branch-0.21/conf/capacity-scheduler.xml.template
hadoop/mapreduce/branches/branch-0.21/conf/mapred-queues.xml.template
hadoop/mapreduce/branches/branch-0.21/src/contrib/build-contrib.xml
hadoop/mapreduce/branches/branch-0.21/src/contrib/build.xml
hadoop/mapreduce/branches/branch-0.21/src/contrib/capacity-scheduler/build.xml
hadoop/mapreduce/branches/branch-0.21/src/contrib/capacity-scheduler/src/java/ (props changed)
hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml
hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/cluster_setup.xml
hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/commands_manual.xml
hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml
hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/site.xml
hadoop/mapreduce/branches/branch-0.21/src/java/org/apache/hadoop/mapred/tools/MRAdmin.java
Modified: hadoop/mapreduce/branches/branch-0.21/.gitignore
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/.gitignore?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/.gitignore (original)
+++ hadoop/mapreduce/branches/branch-0.21/.gitignore Wed Dec 23 11:06:15 2009
@@ -36,6 +36,7 @@
conf/mapred-queues.xml
docs/api/
logs/
+src/contrib/capacity-scheduler/src/java/mapred-queues.xml
src/contrib/index/conf/index-config.xml
src/docs/build
src/docs/cn/build
Modified: hadoop/mapreduce/branches/branch-0.21/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/CHANGES.txt?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/CHANGES.txt (original)
+++ hadoop/mapreduce/branches/branch-0.21/CHANGES.txt Wed Dec 23 11:06:15 2009
@@ -902,3 +902,6 @@
MAPREDUCE-1286. Remove quoting from client opts in TaskRunner. (Yuri
Pradkin via cdouglas)
+
+ MAPREDUCE-1009. Update forrest documentation describing hierarchical
+ queues. (Vinod Kumar Vavilapalli via yhemanth)
Modified: hadoop/mapreduce/branches/branch-0.21/build.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/build.xml?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/build.xml (original)
+++ hadoop/mapreduce/branches/branch-0.21/build.xml Wed Dec 23 11:06:15 2009
@@ -21,6 +21,8 @@
xmlns:artifact="urn:maven-artifact-ant"
xmlns:ivy="antlib:org.apache.ivy.ant">
+ <import file="build-utils.xml" />
+
<!-- Load all the default properties, and any the user wants -->
<!-- to contribute (without having to type -D or edit this file -->
<property file="${user.home}/build.properties" />
@@ -760,6 +762,8 @@
<!-- ================================================================== -->
<target name="docs" depends="forrest.check" description="Generate forrest-based documentation. To use, specify -Dforrest.home=<base of Apache Forrest installation> on the command line." if="forrest.home">
+ <copy file="${conf.dir}/mapred-queues.xml.template"
+ tofile="${build.docs}/mapred-queues.xml"/>
<exec dir="${docs.src}" executable="${forrest.home}/bin/forrest"
failonerror="true">
<env key="JAVA_HOME" value="${java5.home}"/>
@@ -771,16 +775,12 @@
<style basedir="${mapred.src.dir}" destdir="${build.docs}"
includes="mapred-default.xml" style="conf/configuration.xsl"/>
<antcall target="changes-to-html"/>
+ <subant target="docs">
+ <property name="build.docs" value="${build.docs}"/>
+ <fileset file="${contrib.dir}/build.xml"/>
+ </subant>
</target>
- <target name="forrest.check" unless="forrest.home" depends="java5.check">
- <fail message="'forrest.home' is not defined. Please pass -Dforrest.home=<base of Apache Forrest installation> to Ant on the command-line." />
- </target>
-
- <target name="java5.check" unless="java5.home">
- <fail message="'java5.home' is not defined. Forrest requires Java 5. Please pass -Djava5.home=<base of Java 5 distribution> to Ant on the command-line." />
- </target>
-
<target name="javadoc-dev" depends="compile, ivy-retrieve-javadoc" description="Generate javadoc for hadoop developers">
<mkdir dir="${build.javadoc.dev}"/>
<javadoc
Modified: hadoop/mapreduce/branches/branch-0.21/conf/capacity-scheduler.xml.template
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/conf/capacity-scheduler.xml.template?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/conf/capacity-scheduler.xml.template (original)
+++ hadoop/mapreduce/branches/branch-0.21/conf/capacity-scheduler.xml.template Wed Dec 23 11:06:15 2009
@@ -1,77 +1,38 @@
<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- This is one of the configuration files for capacity-scheduler
+ (org.apache.hadoop.mapred.CapacityTaskScheduler), a TaskScheduler
+ for Map/Reduce system. The other configuration file is
+ conf/mapred-queues.xml which it shares with the framework for
+ configuring queues in the system. -->
+
+<!-- This file can be used to configure (1) job-initialization-poller
+ related properties and (2) the default values for various properties
+ for all the queues.-->
+
+<configuration>
+ <!-- The default configuration settings for the capacity task scheduler -->
+ <!-- The default values would be applied to all the queues which don't have -->
+ <!-- the appropriate property for the particular queue configured in the -->
+ <!-- queue-configuration file conf/mapred-queues.xml -->
-<!-- This is the configuration file for the resource manager in Hadoop. -->
-<!-- You can configure various scheduling parameters related to queues. -->
-<!-- The properties for a queue follow a naming convention,such as, -->
-<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. -->
-
-<configuration>
-
- <property>
- <name>mapred.capacity-scheduler.queue.default.capacity</name>
- <value>100</value>
- <description>Percentage of the number of slots in the cluster that are
- to be available for jobs in this queue.
- </description>
- </property>
-
- <property>
- <name>mapred.capacity-scheduler.queue.default.subQueues</name>
- <value></value>
- <description>Sub-queues are queues configured within queues.
- They provide a mechanism for administrators to link logically related queues
- Sub-queues can be nested. So there can be queues within a sub-queue.
- </description>
- </property>
-
- <property>
- <name>mapred.capacity-scheduler.queue.default.maximum-capacity</name>
- <value>-1</value>
- <description>
- maximum-capacity-stretch defines a limit beyond which a sub-queue cannot use the capacity of its parent queue.
- This provides a means to limit how much excess capacity a sub-queue can use. By default, there is no limit.
- The maximum-capacity-stretch of a queue can only be greater than or equal to its minimum capacity.
- Default value of 100 implies , sub-queue can use complete capacity of its parent.
- This property could be to curtail certain jobs which are long running in nature from occupying more than a
- certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of
- other queues being affected.
- </description>
- </property>
-
- <property>
- <name>mapred.capacity-scheduler.queue.default.supports-priority</name>
- <value>false</value>
- <description>If true, priorities of jobs will be taken into
- account in scheduling decisions.
- </description>
- </property>
-
- <property>
- <name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name>
- <value>100</value>
- <description> Each queue enforces a limit on the percentage of resources
- allocated to a user at any given time, if there is competition for them.
- This user limit can vary between a minimum and maximum value. The former
- depends on the number of users who have submitted jobs, and the latter is
- set to this property value. For example, suppose the value of this
- property is 25. If two users have submitted jobs to a queue, no single
- user can use more than 50% of the queue resources. If a third user submits
- a job, no single user can use more than 33% of the queue resources. With 4
- or more users, no user can use more than 25% of the queue's resources. A
- value of 100 implies no user limits are imposed.
- </description>
- </property>
- <property>
- <name>mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user</name>
- <value>2</value>
- <description>The maximum number of jobs to be pre-initialized for a user
- of the job queue.
- </description>
- </property>
-
- <!-- The default configuration settings for the capacity task scheduler -->
- <!-- The default values would be applied to all the queues which don't have -->
- <!-- the appropriate property for the particular queue -->
<property>
<name>mapred.capacity-scheduler.default-supports-priority</name>
<value>false</value>
Modified: hadoop/mapreduce/branches/branch-0.21/conf/mapred-queues.xml.template
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/conf/mapred-queues.xml.template?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/conf/mapred-queues.xml.template (original)
+++ hadoop/mapreduce/branches/branch-0.21/conf/mapred-queues.xml.template Wed Dec 23 11:06:15 2009
@@ -1,4 +1,20 @@
<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
<!-- This is the template for queue configuration. The format supports nesting of
queues within queues - a feature called hierarchical queues. All queues are
defined within the 'queues' tag which is the top level element for this
Modified: hadoop/mapreduce/branches/branch-0.21/src/contrib/build-contrib.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/contrib/build-contrib.xml?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/contrib/build-contrib.xml (original)
+++ hadoop/mapreduce/branches/branch-0.21/src/contrib/build-contrib.xml Wed Dec 23 11:06:15 2009
@@ -21,6 +21,8 @@
<project name="hadoopbuildcontrib" xmlns:ivy="antlib:org.apache.ivy.ant">
+ <import file="../../build-utils.xml" />
+
<property name="name" value="${ant.project.name}"/>
<property name="root" value="${basedir}"/>
@@ -259,6 +261,10 @@
<antcall target="checkfailure"/>
</target>
+ <target name="docs" depends="forrest.check" description="Generate forrest-based documentation. To use, specify -Dforrest.home=<base of Apache Forrest installation> on the command line." if="forrest.home">
+ <!-- Nothing by default -->
+ </target>
+
<target name="checkfailure" if="tests.failed">
<touch file="${build.contrib.dir}/testsfailed"/>
<fail unless="continueOnFailure">Contrib Tests failed!</fail>
Modified: hadoop/mapreduce/branches/branch-0.21/src/contrib/build.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/contrib/build.xml?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/contrib/build.xml (original)
+++ hadoop/mapreduce/branches/branch-0.21/src/contrib/build.xml Wed Dec 23 11:06:15 2009
@@ -64,6 +64,12 @@
<fail if="testsfailed">Tests failed!</fail>
</target>
+ <target name="docs">
+ <subant target="docs">
+ <fileset dir="." includes="capacity-scheduler/build.xml"/>
+ </subant>
+ </target>
+
<!-- ====================================================== -->
<!-- Clean all the contribs. -->
<!-- ====================================================== -->
Modified: hadoop/mapreduce/branches/branch-0.21/src/contrib/capacity-scheduler/build.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/contrib/capacity-scheduler/build.xml?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/contrib/capacity-scheduler/build.xml (original)
+++ hadoop/mapreduce/branches/branch-0.21/src/contrib/capacity-scheduler/build.xml Wed Dec 23 11:06:15 2009
@@ -25,4 +25,12 @@
<import file="../build-contrib.xml"/>
+ <target name="docs" depends="forrest.check" description="Generate forrest-based documentation. To use, specify -Dforrest.home=<base of Apache Forrest installation> on the command line." if="forrest.home">
+ <copy file="src/java/mapred-queues.xml.template"
+ tofile="${build.docs}/mapred-queues-capacity-scheduler.xml"/>
+ <xslt in="${conf.dir}/capacity-scheduler.xml.template"
+ out="${build.docs}/capacity-scheduler-conf.html"
+ style="${conf.dir}/configuration.xsl"/>
+ </target>
+
</project>
Propchange: hadoop/mapreduce/branches/branch-0.21/src/contrib/capacity-scheduler/src/java/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Dec 23 11:06:15 2009
@@ -0,0 +1 @@
+mapred-queues.xml
Modified: hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml (original)
+++ hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml Wed Dec 23 11:06:15 2009
@@ -40,7 +40,8 @@
<p>The Capacity Scheduler supports the following features:</p>
<ul>
<li>
- Multiple queues, where a job is submitted to a queue.
+ Multiple queues, possibly hierarchical/recursive, where a job is
+ submitted to a queue.
</li>
<li>
Queues are allocated a fraction of the capacity of the grid in the
@@ -72,11 +73,23 @@
competition for them.
</li>
<li>
+ Queues can use idle resources of other queues. In order to prevent
+ monopolizing of resources by particular queues, each queue can be
+ set a cap on the maximum number of resources it can expand to in
+ the presence of idle resources in other queues of the cluster.
+ </li>
+ <li>
Support for memory-intensive jobs, wherein a job can optionally
specify higher memory-requirements than the default, and the tasks
of the job will only be run on TaskTrackers that have enough memory
to spare.
</li>
+ <li>
+ Support for refreshing/reloading some of the queue-properties
+ without restarting the JobTracker, taking advantage of the
+ <a href="cluster_setup.html#Refreshing+queue+configuration">
+ queue-refresh</a> feature in the framework.
+ </li>
</ul>
</section>
@@ -144,54 +157,113 @@
<section>
<title>Setting Up Queues</title>
<p>
- You can define multiple queues to which users can submit jobs with
- the Capacity Scheduler. To define multiple queues, you should edit
- the site configuration for Hadoop and modify the
- <em>mapreduce.jobtracker.taskscheduler.queue.names</em> property.
- </p>
- <p>
- You can also configure ACLs for controlling which users or groups
- have access to the queues.
- </p>
- <p>
- For more details, see
- <a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html#Configuring+the+Hadoop+Daemons">Configuring the Hadoop Daemons</a>.
- </p>
+ You can define multiple, possibly hierarchical queues to which users
+ can submit jobs with the Capacity Scheduler. To define queues,
+ various properties should be set in two configuration files -
+ <a href="cluster_setup.html#mapred-queues.xml">mapred-queues.xml</a>
+ and
+ <a href="ext:capacity-scheduler-conf">conf/capacity-scheduler.xml</a>
+ .</p>
+ <p><em>conf/capacity-scheduler.xml</em> can be used to configure (1)
+ job-initialization-poller related properties and (2) the
+ default values for various properties in the queues</p>
+ <p><em>conf/mapred-queues.xml</em> contains the actual queue
+ configuration including (1) framework specific properties like ACLs
+ for controlling which users or groups have access to the queues and
+ state of the queues and (2) the scheduler specific properties for
+ each queue. If any of these scheduler specific properties are
+ missing and not configured for a queue, then the properties in
+ <em>conf/capacity-scheduler.xml</em> are used to set default values.
+ More details about the properties that can be configured, and their
+ semantics is mentioned below. Also, a default template for
+ mapred-queues.xml tailored for using with
+ Capacity-scheduler can be found
+ <a href="ext:mapred-queues-capacity-scheduler">here</a>.</p>
</section>
<section>
<title>Configuring Properties for Queues</title>
<p>The Capacity Scheduler can be configured with several properties
- for each queue that control the behavior of the Scheduler. This
- configuration is in the <em>conf/capacity-scheduler.xml</em>. By
+ for each queue that control the behavior of the Scheduler. As
+ described above, this scheduler specific configuration has to be in
+ the <em>conf/mapred-queues.xml</em> along with the rest of the
+ framework specific configuration. By
default, the configuration is set up for one queue, named
<em>default</em>.</p>
- <p>To specify a property for a queue that is defined in the site
- configuration, you should use the property name as
- <em>mapred.capacity-scheduler.queue.<queue-name>.<property-name></em>.
- </p>
- <p>For example, to define the property <em>capacity</em>
- for queue named <em>research</em>, you should specify the property
- name as
- <em>mapred.capacity-scheduler.queue.research.capacity</em>.
+ <p>To specify a property for a specific queue that is defined in the
+ mapred-queues.xml, you should set the corresponding property in a
+ <property> tag explained
+ <a href="cluster_setup.html#property_tag">here</a>.
</p>
<p>The properties defined for queues and their descriptions are
listed in the table below:</p>
<table>
- <tr><th>Name</th><th>Description</th></tr>
- <tr><td>mapred.capacity-scheduler.queue.<queue-<br/>name>.capacity</td>
- <td>Percentage of the number of slots in the cluster that are made
- to be available for jobs in this queue. The sum of capacities
- for all queues should be less than or equal 100.</td>
+ <tr>
+ <th>Name</th>
+ <th>
+ <a href="commands_manual.html#RefreshQueues">
+ Refresh-able?</a>
+ </th>
+ <th>Applicable to?</th>
+ <th>Description</th>
+ </tr>
+ <tr>
+ <td>capacity</td>
+ <td>Yes</td>
+ <td>Container queues as well as leaf queues</td>
+ <td>For a root-level container queue, this is the percentage of the
+ number of slots in the cluster that will be available for all its
+ immediate children together. For a root-level leaf-queue, this is
+ the percentage of the number of slots in the cluster that will be
+ available for all its jobs. For a non-root level container queue,
+ this is the percentage of the number of slots in its parent queue
+ that will be available for all its children together. For a
+ non-root-level leaf queue, this is the percentage of the number of
+ slots in its parent queue that will be available for jobs in this
+ queue. The sum of capacities for all children of a container queue
+ should be less than or equal 100. The sum of capacities of all the
+ root-level queues should be less than or equal to 100.
+ </td>
+ </tr>
+ <tr>
+ <td>maximum-capacity</td>
+ <td>Yes</td>
+ <td>Container queues as well as leaf queues</td>
+ <td>
+ A limit in percentage beyond which a non-root-level queue cannot use
+ the capacity of its parent queue; for a root-level queue, this is
+ the limit in percentage beyond which it cannot use the
+ cluster-capacity. This property provides a means to limit how much
+ excess capacity a queue can use. It can be used to prevent queues
+ with long running jobs from occupying more than a certain percentage
+ of the parent-queue or the cluster, which, in the absence of
+ pre-emption, can lead to capacity guarantees of other queues getting
+ affected.
+
+ The maximum-capacity of a queue can only be greater than or equal to
+ its capacity. By default, there is no limit for a queue. For a
+ non-root-level queue this means it can occupy till the
+ maximum-capacity of its parent, for a root-level queue, it means that
+ it can occupy the whole cluster. A value of 100 implies that a queue
+ can use the complete capacity of its parent, or the complete
+ cluster-capacity in case of root-level-queues.
+ </td>
</tr>
- <tr><td>mapred.capacity-scheduler.queue.<queue-<br/>name>.supports-priority</td>
+ <tr>
+ <td>supports-priority</td>
+ <td>No</td>
+ <td>Leaf queues only</td>
<td>If true, priorities of jobs will be taken into account in scheduling
- decisions.</td>
+ decisions.
+ </td>
</tr>
- <tr><td>mapred.capacity-scheduler.queue.<queue-<br/>name>.minimum-user-limit-percent</td>
+ <tr>
+ <td>minimum-user-limit-percent</td>
+ <td>Yes</td>
+ <td>Leaf queues only</td>
<td>Each queue enforces a limit on the percentage of resources
allocated to a user at any given time, if there is competition
for them. This user limit can vary between a minimum and maximum
@@ -202,9 +274,25 @@
of the queue resources. If a third user submits a job, no single
user can use more than 33% of the queue resources. With 4 or more
users, no user can use more than 25% of the queue's resources. A
- value of 100 implies no user limits are imposed.</td>
+ value of 100 implies no user limits are imposed.
+ </td>
</tr>
+ <tr>
+ <td>maximum-initialized-jobs-per-user</td>
+ <td>Yes</td>
+ <td>Leaf queues only</td>
+ <td>
+ Maximum number of jobs which are allowed to be pre-initialized for
+ a particular user in the queue. Once a job is scheduled, i.e.
+ it starts running, then that job is not considered
+ while scheduler computes the maximum job a user is allowed to
+ initialize.
+ </td>
+ </tr>
</table>
+ <p>See <a href="ext:mapred-queues-capacity-scheduler">
+ this configuration file</a> for a default configuration of queues in
+ capacity-scheduler.</p>
</section>
<section>
@@ -296,18 +384,6 @@
<tr><th>Name</th><th>Description</th></tr>
<tr>
<td>
- mapred.capacity-scheduler.queue.<queue-<br/>name>.maximum-initialized-jobs-per-user
- </td>
- <td>
- Maximum number of jobs which are allowed to be pre-initialized for
- a particular user in the queue. Once a job is scheduled, i.e.
- it starts running, then that job is not considered
- while scheduler computes the maximum job a user is allowed to
- initialize.
- </td>
- </tr>
- <tr>
- <td>
mapred.capacity-scheduler.init-poll-interval
</td>
<td>
Modified: hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/cluster_setup.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/cluster_setup.xml?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/cluster_setup.xml (original)
+++ hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/cluster_setup.xml Wed Dec 23 11:06:15 2009
@@ -33,7 +33,7 @@
Hadoop clusters ranging from a few nodes to extremely large clusters with
thousands of nodes.</p>
<p>
- To play with Hadoop, you may first want to install Hadoop on a single machine (see <a href="quickstart.html"> Hadoop Quick Start</a>).
+ To play with Hadoop, you may first want to install Hadoop on a single machine (see <a href="ext:single-node-setup"> Hadoop Quick Start</a>).
</p>
</section>
@@ -42,11 +42,11 @@
<ol>
<li>
- Make sure all <a href="quickstart.html#PreReqs">requisite</a> software
+ Make sure all <a href="ext:single-node-setup/PreReqs">requisite</a> software
is installed on all nodes in your cluster.
</li>
<li>
- <a href="quickstart.html#Download">Get</a> the Hadoop software.
+ <a href="ext:single-node-setup/Download">Get</a> the Hadoop software.
</li>
</ol>
</section>
@@ -81,15 +81,17 @@
<ol>
<li>
Read-only default configuration -
- <a href="ext:core-default">src/core/core-default.xml</a>,
- <a href="ext:hdfs-default">src/hdfs/hdfs-default.xml</a> and
- <a href="ext:mapred-default">src/mapred/mapred-default.xml</a>.
+ <a href="ext:common-default">src/core/core-default.xml</a>,
+ <a href="ext:hdfs-default">src/hdfs/hdfs-default.xml</a>,
+ <a href="ext:mapred-default">src/mapred/mapred-default.xml</a> and
+ <a href="ext:mapred-queues">conf/mapred-queues.xml.template</a>.
</li>
<li>
Site-specific configuration -
- <em>conf/core-site.xml</em>,
- <em>conf/hdfs-site.xml</em> and
- <em>conf/mapred-site.xml</em>.
+ <a href="#core-site.xml">conf/core-site.xml</a>,
+ <a href="#hdfs-site.xml">conf/hdfs-site.xml</a>,
+ <a href="#mapred-site.xml">conf/mapred-site.xml</a> and
+ <a href="#mapred-queues.xml">conf/mapred-queues.xml</a>.
</li>
</ol>
@@ -163,9 +165,8 @@
<title>Configuring the Hadoop Daemons</title>
<p>This section deals with important parameters to be specified in the
- following:
- <br/>
- <code>conf/core-site.xml</code>:</p>
+ following:</p>
+ <anchor id="core-site.xml"/><p><code>conf/core-site.xml</code>:</p>
<table>
<tr>
@@ -180,7 +181,7 @@
</tr>
</table>
- <p><br/><code>conf/hdfs-site.xml</code>:</p>
+ <anchor id="hdfs-site.xml"/><p><code>conf/hdfs-site.xml</code>:</p>
<table>
<tr>
@@ -212,7 +213,7 @@
</tr>
</table>
- <p><br/><code>conf/mapred-site.xml</code>:</p>
+ <anchor id="mapred-site.xml"/><p><code>conf/mapred-site.xml</code>:</p>
<table>
<tr>
@@ -271,83 +272,321 @@
TaskTrackers.
</td>
</tr>
- <tr>
- <td>mapred.queue.names</td>
- <td>Comma separated list of queues to which jobs can be submitted.</td>
- <td>
- The Map/Reduce system always supports atleast one queue
- with the name as <em>default</em>. Hence, this parameter's
- value should always contain the string <em>default</em>.
- Some job schedulers supported in Hadoop, like the
- <a href="capacity_scheduler.html">Capacity
- Scheduler</a>, support multiple queues. If such a scheduler is
- being used, the list of configured queue names must be
- specified here. Once queues are defined, users can submit
- jobs to a queue using the property name
- <em>mapreduce.job.queuename</em> in the job configuration.
- There could be a separate
- configuration file for configuring properties of these
- queues that is managed by the scheduler.
- Refer to the documentation of the scheduler for information on
- the same.
- </td>
- </tr>
- <tr>
- <td>mapred.acls.enabled</td>
- <td>Specifies whether ACLs are supported for controlling job
- submission and administration</td>
- <td>
- If <em>true</em>, ACLs would be checked while submitting
- and administering jobs. ACLs can be specified using the
- configuration parameters of the form
- <em>mapred.queue.queue-name.acl-name</em>, defined below.
- </td>
- </tr>
- </table>
-
- <p><br/><code> conf/mapred-queue-acls.xml</code></p>
-
- <table>
- <tr>
- <th>Parameter</th>
- <th>Value</th>
- <th>Notes</th>
- </tr>
- <tr>
- <td>mapred.queue.<em>queue-name</em>.acl-submit-job</td>
- <td>List of users and groups that can submit jobs to the
- specified <em>queue-name</em>.</td>
- <td>
- The list of users and groups are both comma separated
- list of names. The two lists are separated by a blank.
- Example: <em>user1,user2 group1,group2</em>.
- If you wish to define only a list of groups, provide
- a blank at the beginning of the value.
- </td>
- </tr>
- <tr>
- <td>mapred.queue.<em>queue-name</em>.acl-administer-job</td>
- <td>List of users and groups that can change the priority
- or kill jobs that have been submitted to the
- specified <em>queue-name</em>.</td>
- <td>
- The list of users and groups are both comma separated
- list of names. The two lists are separated by a blank.
- Example: <em>user1,user2 group1,group2</em>.
- If you wish to define only a list of groups, provide
- a blank at the beginning of the value. Note that an
- owner of a job can always change the priority or kill
- his/her own job, irrespective of the ACLs.
- </td>
- </tr>
- </table>
-
+ </table>
<p>Typically all the above parameters are marked as
<a href="ext:api/org/apache/hadoop/conf/configuration/final_parameters">
final</a> to ensure that they cannot be overriden by user-applications.
</p>
+ <anchor id="mapred-queues.xml"/><p><code>conf/mapred-queues.xml
+ </code>:</p>
+ <p>This file is used to configure the queues in the Map/Reduce
+ system. Queues are abstract entities in the JobTracker that can be
+ used to manage collections of jobs. They provide a way for
+ administrators to organize jobs in specific ways and to enforce
+ certain policies on such collections, thus providing varying
+ levels of administrative control and management functions on jobs.
+ </p>
+ <p>One can imagine the following sample scenarios:</p>
+ <ul>
+ <li> Jobs submitted by a particular group of users can all be
+ submitted to one queue. </li>
+ <li> Long running jobs in an organization can be submitted to a
+ queue. </li>
+ <li> Short running jobs can be submitted to a queue and the number
+ of jobs that can run concurrently can be restricted. </li>
+ </ul>
+ <p>The usage of queues is closely tied to the scheduler configured
+ at the JobTracker via <em>mapreduce.jobtracker.taskscheduler</em>.
+ The degree of support of queues depends on the scheduler used. Some
+ schedulers support a single queue, while others support more complex
+ configurations. Schedulers also implement the policies that apply
+ to jobs in a queue. Some schedulers, such as the Fairshare scheduler,
+ implement their own mechanisms for collections of jobs and do not rely
+ on queues provided by the framework. The administrators are
+ encouraged to refer to the documentation of the scheduler they are
+ interested in for determining the level of support for queues.</p>
+ <p>The Map/Reduce framework supports some basic operations on queues
+ such as job submission to a specific queue, access control for queues,
+ queue states, viewing configured queues and their properties
+ and refresh of queue properties. In order to fully implement some of
+ these operations, the framework takes the help of the configured
+ scheduler.</p>
+ <p>The following types of queue configurations are possible:</p>
+ <ul>
+ <li> Single queue: The default configuration in Map/Reduce comprises
+ of a single queue, as supported by the default scheduler. All jobs
+ are submitted to this default queue which maintains jobs in a priority
+ based FIFO order.</li>
+ <li> Multiple single level queues: Multiple queues are defined, and
+ jobs can be submitted to any of these queues. Different policies
+ can be applied to these queues by schedulers that support this
+ configuration to provide a better level of support. For example,
+ the <a href="capacity_scheduler.html">capacity scheduler</a>
+ provides ways of configuring different
+ capacity and fairness guarantees on these queues.</li>
+ <li> Hierarchical queues: Hierarchical queues are a configuration in
+ which queues can contain other queues within them recursively. The
+ queues that contain other queues are referred to as
+ container queues. Queues that do not contain other queues are
+ referred as leaf or job queues. Jobs can only be submitted to leaf
+ queues. Hierarchical queues can potentially offer a higher level
+ of control to administrators, as schedulers can now build a
+ hierarchy of policies where policies applicable to a container
+ queue can provide context for policies applicable to queues it
+ contains. It also opens up possibilities for delegating queue
+ administration where administration of queues in a container queue
+ can be turned over to a different set of administrators, within
+ the context provided by the container queue. For example, the
+ <a href="capacity_scheduler.html">capacity scheduler</a>
+ uses hierarchical queues to partition capacity of a cluster
+ among container queues, and allowing queues they contain to divide
+ that capacity in more ways.</li>
+ </ul>
+
+ <p>Most of the configuration of the queues can be refreshed/reloaded
+ without restarting the Map/Reduce sub-system by editing this
+ configuration file as described in the section on
+ <a href="commands_manual.html#RefreshQueues">reloading queue
+ configuration</a>.
+ Not all configuration properties can be reloaded of course,
+ as will description of each property below explain.</p>
+
+ <p>The format of conf/mapred-queues.xml is different from the other
+ configuration files, supporting nested configuration
+ elements to support hierarchical queues. The format is as follows:
+ </p>
+
+ <source>
+ <queues aclsEnabled="$aclsEnabled">
+ <queue>
+ <name>$queue-name</name>
+ <state>$state</state>
+ <queue>
+ <name>$child-queue1</name>
+ <properties>
+ <property key="$key" value="$value"/>
+ ...
+ </properties>
+ <queue>
+ <name>$grand-child-queue1</name>
+ ...
+ </queue>
+ </queue>
+ <queue>
+ <name>$child-queue2</name>
+ ...
+ </queue>
+ ...
+ ...
+ ...
+ <queue>
+ <name>$leaf-queue</name>
+ <acl-submit-job>$acls</acl-submit-job>
+ <acl-administer-jobs>$acls</acl-administer-jobs>
+ <properties>
+ <property key="$key" value="$value"/>
+ ...
+ </properties>
+ </queue>
+ </queue>
+ </queues>
+ </source>
+ <table>
+ <tr>
+ <th>Tag/Attribute</th>
+ <th>Value</th>
+ <th>
+ <a href="commands_manual.html#RefreshQueues">Refresh-able?</a>
+ </th>
+ <th>Notes</th>
+ </tr>
+
+ <tr>
+ <td><anchor id="queues_tag"/>queues</td>
+ <td>Root element of the configuration file.</td>
+ <td>Not-applicable</td>
+ <td>All the queues are nested inside this root element of the
+ file. There can be only one root queues element in the file.</td>
+ </tr>
+
+ <tr>
+ <td>aclsEnabled</td>
+ <td>Boolean attribute to the
+ <a href="#queues_tag"><em><queues></em></a> tag
+ specifying whether ACLs are supported for controlling job
+ submission and administration for <em>all</em> the queues
+ configured.
+ </td>
+ <td>Yes</td>
+ <td>If <em>false</em>, ACLs are ignored for <em>all</em> the
+ configured queues. <br/><br/>
+ If <em>true</em>, the user and group details of the user
+ are checked against the configured ACLs of the corresponding
+ job-queue while submitting and administering jobs. ACLs can be
+ specified for each queue using the queue-specific tags
+ "acl-$acl_name", defined below. ACLs are checked only against
+ the job-queues, i.e. the leaf-level queues; ACLs configured
+ for the rest of the queues in the hierarchy are ignored.
+ </td>
+ </tr>
+
+ <tr>
+ <td><anchor id="queue_tag"/>queue</td>
+ <td>A child element of the
+ <a href="#queues_tag"><em><queues></em></a> tag or another
+ <a href="#queue_tag"><em><queue></em></a>. Denotes a queue
+ in the system.
+ </td>
+ <td>Not applicable</td>
+ <td>Queues can be hierarchical and so this element can contain
+ children of this same type.</td>
+ </tr>
+
+ <tr>
+ <td>name</td>
+ <td>Child element of a
+ <a href="#queue_tag"><em><queue></em></a> specifying the
+ name of the queue.</td>
+ <td>No</td>
+ <td>Name of the queue cannot contain the character <em>":"</em>
+ which is reserved as the queue-name delimiter when addressing a
+ queue in a hierarchy.</td>
+ </tr>
+
+ <tr>
+ <td>state</td>
+ <td>Child element of a
+ <a href="#queue_tag"><em><queue></em></a> specifying the
+ state of the queue.
+ </td>
+ <td>Yes</td>
+ <td>Each queue has a corresponding state. A queue in
+ <em>'running'</em> state can accept new jobs, while a queue in
+ <em>'stopped'</em> state will stop accepting any new jobs. State
+ is defined and respected by the framework only for the
+ leaf-level queues and is ignored for all other queues.
+ <br/><br/>
+ The state of the queue can be viewed from the command line using
+ <code>'bin/mapred queue'</code> command and also on the the Web
+ UI.<br/><br/>
+ Administrators can stop and start queues at runtime using the
+ feature of <a href="commands_manual.html#RefreshQueues">reloading
+ queue configuration</a>. If a queue is stopped at runtime, it
+ will complete all the existing running jobs and will stop
+ accepting any new jobs.
+ </td>
+ </tr>
+
+ <tr>
+ <td>acl-submit-job</td>
+ <td>Child element of a
+ <a href="#queue_tag"><em><queue></em></a> specifying the
+ list of users and groups that can submit jobs to the specified
+ queue.</td>
+ <td>Yes</td>
+ <td>
+ Applicable only to leaf-queues.<br/><br/>
+ The list of users and groups are both comma separated
+ list of names. The two lists are separated by a blank.
+ Example: <em>user1,user2 group1,group2</em>.
+ If you wish to define only a list of groups, provide
+ a blank at the beginning of the value.
+ <br/><br/>
+ </td>
+ </tr>
+
+ <tr>
+ <td>acl-administer-job</td>
+ <td>Child element of a
+ <a href="#queue_tag"><em><queue></em></a> specifying the
+ list of users and groups that can change the priority of a job
+ or kill a job that has been submitted to the specified queue.
+ </td>
+ <td>Yes</td>
+ <td>
+ Applicable only to leaf-queues.<br/><br/>
+ The list of users and groups are both comma separated
+ list of names. The two lists are separated by a blank.
+ Example: <em>user1,user2 group1,group2</em>.
+ If you wish to define only a list of groups, provide
+ a blank at the beginning of the value. Note that an
+ owner of a job can always change the priority or kill
+ his/her own job, irrespective of the ACLs.
+ </td>
+ </tr>
+
+ <tr>
+ <td><anchor id="properties_tag"/>properties</td>
+ <td>Child element of a
+ <a href="#queue_tag"><em><queue></em></a> specifying the
+ scheduler specific properties.</td>
+ <td>Not applicable</td>
+ <td>The scheduler specific properties are the children of this
+ element specified as a group of <property> tags described
+ below. The JobTracker completely ignores these properties. These
+ can be used as per-queue properties needed by the scheduler
+ being configured. Please look at the scheduler specific
+ documentation as to how these properties are used by that
+ particular scheduler.
+ </td>
+ </tr>
+
+ <tr>
+ <td><anchor id="property_tag"/>property</td>
+ <td>Child element of
+ <a href="#properties_tag"><em><properties></em></a> for a
+ specific queue.</td>
+ <td>Not applicable</td>
+ <td>A single scheduler specific queue-property. Ignored by
+ the JobTracker and used by the scheduler that is configured.</td>
+ </tr>
+
+ <tr>
+ <td>key</td>
+ <td>Attribute of a
+ <a href="#property_tag"><em><property></em></a> for a
+ specific queue.</td>
+ <td>Scheduler-specific</td>
+ <td>The name of a single scheduler specific queue-property.</td>
+ </tr>
+
+ <tr>
+ <td>value</td>
+ <td>Attribute of a
+ <a href="#property_tag"><em><property></em></a> for a
+ specific queue.</td>
+ <td>Scheduler-specific</td>
+ <td>The value of a single scheduler specific queue-property.
+ The value can be anything that is left for the proper
+ interpretation by the scheduler that is configured.</td>
+ </tr>
+
+ </table>
+
+ <p>Once the queues are configured properly and the Map/Reduce
+ system is up and running, from the command line one can
+ <a href="commands_manual.html#QueuesList">get the list
+ of queues</a> and
+ <a href="commands_manual.html#QueuesInfo">obtain
+ information specific to each queue</a>. This information is also
+ available from the web UI. On the web UI, queue information can be
+ seen by going to queueinfo.jsp, linked to from the queues table-cell
+ in the cluster-summary table. The queueinfo.jsp prints the hierarchy
+ of queues as well as the specific information for each queue.
+ </p>
+
+ <p> Users can submit jobs only to a
+ leaf-level queue by specifying the fully-qualified queue-name for
+ the property name <em>mapreduce.job.queuename</em> in the job
+ configuration. The character ':' is the queue-name delimiter and so,
+ for e.g., if one wants to submit to a configured job-queue 'Queue-C'
+ which is one of the sub-queues of 'Queue-B' which in-turn is a
+ sub-queue of 'Queue-A', then the job configuration should contain
+ property <em>mapreduce.job.queuename</em> set to the <em>
+ <value>Queue-A:Queue-B:Queue-C</value></em></p>
+ </section>
<section>
<title>Real-World Cluster Configurations</title>
@@ -881,7 +1120,6 @@
<code>$ bin/hadoop job -history all output-dir</code><br/></p>
</section>
</section>
- </section>
<p>Once all the necessary configuration is complete, distribute the files
to the <code>HADOOP_CONF_DIR</code> directory on all the machines,
@@ -952,7 +1190,7 @@
and starts the <code>TaskTracker</code> daemon on all the listed slaves.
</p>
</section>
-
+
<section>
<title>Hadoop Shutdown</title>
Modified: hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/commands_manual.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/commands_manual.xml?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/commands_manual.xml (original)
+++ hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/commands_manual.xml Wed Dec 23 11:06:15 2009
@@ -369,13 +369,13 @@
<th> COMMAND_OPTION </th><th> Description </th>
</tr>
<tr>
- <td><code>-list</code> </td>
+ <td><anchor id="QueuesList"/><code>-list</code> </td>
<td>Gets list of Job Queues configured in the system. Along with scheduling information
associated with the job queues.
</td>
</tr>
<tr>
- <td><code>-info <job-queue-name> [-showJobs]</code></td>
+ <td><anchor id="QueuesInfo"/><code>-info <job-queue-name> [-showJobs]</code></td>
<td>
Displays the job queue information and associated scheduling information of particular
job queue. If -showJobs options is present a list of jobs submitted to the particular job
@@ -581,16 +581,61 @@
<p>Runs MR admin client</p>
<p><code>Usage: hadoop mradmin [</code>
<a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a>
- <code>] [-refreshQueueAcls] </code></p>
+ <code>] [-refreshServiceAcl] [-refreshQueues] [-refreshNodes] [-help [cmd]] </code></p>
<table>
<tr>
<th> COMMAND_OPTION </th><th> Description </th>
</tr>
<tr>
- <td><code>-refreshQueueAcls</code></td>
- <td> Refresh the queue acls used by Hadoop, to check access during submissions
- and administration of the job by the user. The properties present in
- <code>mapred-queue-acls.xml</code> is reloaded by the queue manager.</td>
+ <td><code>-refreshServiceAcl</code></td>
+ <td> Reload the service-level authorization policies. Jobtracker
+ will reload the authorization policy file.</td>
+ </tr>
+ <tr>
+ <td><anchor id="RefreshQueues"/><code>-refreshQueues</code></td>
+ <td><p> Reload the queues' configuration at the JobTracker.
+ Most of the configuration of the queues can be refreshed/reloaded
+ without restarting the Map/Reduce sub-system. Administrators
+ typically own the
+ <a href="cluster_setup.html#mapred-queues.xml">
+ <em>conf/mapred-queues.xml</em></a>
+ file, can edit it while the JobTracker is still running, and can do
+ a reload by running this command.</p>
+ <p>It should be noted that while trying to refresh queues'
+ configuration, one cannot change the hierarchy of queues itself.
+ This means no operation that involves a change in either the
+ hierarchy structure itself or the queues' names will be allowed.
+ Only selected properties of queues can be changed during refresh.
+ For example, new queues cannot be added dynamically, neither can an
+ existing queue be deleted.</p>
+ <p>If during a reload of queue configuration,
+ a syntactic or semantic error in made during the editing of the
+ configuration file, the refresh command fails with an exception that
+ is printed on the standard output of this command, thus informing the
+ requester with any helpful messages of what has gone wrong during
+ the edit/reload. Importantly, the existing queue configuration is
+ untouched and the system is left in a consistent state.
+ </p>
+ <p>As described in the
+ <a href="cluster_setup.html#mapred-queues.xml"><em>
+ conf/mapred-queues.xml</em></a> section, the
+ <a href="cluster_setup.html#properties_tag"><em>
+ <properties></em></a> tag in the queue configuration file can
+ also be used to specify per-queue properties needed by the scheduler.
+ When the framework's queue configuration is reloaded using this
+ command, this scheduler specific configuration will also be reloaded
+ , provided the scheduler being configured supports this reload.
+ Please see the documentation of the particular scheduler in use.</p>
+ </td>
+ </tr>
+ <tr>
+ <td><code>-refreshNodes</code></td>
+ <td> Refresh the hosts information at the jobtracker.</td>
+ </tr>
+ <tr>
+ <td><code>-help [cmd]</code></td>
+ <td>Displays help for the given command or all commands if none
+ is specified.</td>
</tr>
</table>
</section>
Modified: hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml (original)
+++ hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml Wed Dec 23 11:06:15 2009
@@ -41,10 +41,10 @@
</p>
<ul>
<li>
- <a href="http://hadoop.apache.org/common/docs/current/single_node_setup.html">Single Node Setup</a> for first-time users.
+ <a href="ext:single-node-setup">Single Node Setup</a> for first-time users.
</li>
<li>
- <a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html">Cluster Setup</a> for large, distributed clusters.
+ <a href="cluster_setup.html">Cluster Setup</a> for large, distributed clusters.
</li>
</ul>
</section>
@@ -152,8 +152,8 @@
occurences of each word in a given input set.</p>
<p>This example works with a
- pseudo-distributed (<a href="http://hadoop.apache.org/common/docs/current/single_node_setup.html#SingleNodeSetup">Single Node Setup</a>)
- or fully-distributed (<a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html">Cluster Setup</a>)
+ pseudo-distributed (<a href="ext:single-node-setup">Single Node Setup</a>)
+ or fully-distributed (<a href="cluster_setup.html">Cluster Setup</a>)
Hadoop installation.</p>
<section>
@@ -1301,7 +1301,7 @@
<p>Note: <code>mapred.{map|reduce}.child.java.opts</code> are used only
for configuring the launched child tasks from task tracker. Configuring
the memory options for daemons is documented under
- <a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html#Configuring+the+Environment+of+the+Hadoop+Daemons">
+ <a href="cluster_setup.html#Configuring+the+Environment+of+the+Hadoop+Daemons">
Configuring the Environment of the Hadoop Daemons</a> (Cluster Setup).</p>
<p>The memory available to some parts of the framework is also
@@ -2410,8 +2410,8 @@
<p>This example needs the HDFS to be up and running, especially for the
<code>DistributedCache</code>-related features. Hence it only works with a
- pseudo-distributed (<a href="http://hadoop.apache.org/common/docs/current/single_node_setup.html#SingleNodeSetup">Single Node Setup</a>)
- or fully-distributed (<a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html#Fully-Distributed+Operation">Cluster Setup</a>)
+ pseudo-distributed (<a href="ext:single-node-setup">Single Node Setup</a>)
+ or fully-distributed (<a href="cluster_setup.html#Fully-Distributed+Operation">Cluster Setup</a>)
Hadoop installation.</p>
<section>
Modified: hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/site.xml?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/site.xml (original)
+++ hadoop/mapreduce/branches/branch-0.21/src/docs/src/documentation/content/xdocs/site.xml Wed Dec 23 11:06:15 2009
@@ -34,6 +34,7 @@
<docs label="Getting Started">
<overview label="Overview" href="index.html" />
+ <setup label="Cluster Setup" href="cluster_setup.html"/>
<mapred label="MapReduce Tutorial" href="mapred_tutorial.html" />
<streaming label="Hadoop Streaming" href="streaming.html" />
</docs>
@@ -71,11 +72,18 @@
<jira href="http://hadoop.apache.org/mapreduce/issue_tracking.html"/>
<wiki href="http://wiki.apache.org/hadoop/MapReduce" />
<faq href="http://wiki.apache.org/hadoop/MapReduce/FAQ" />
-
<common-default href="http://hadoop.apache.org/common/docs/current/common-default.html" />
<hdfs-default href="http://hadoop.apache.org/hdfs/docs/current/hdfs-default.html" />
<mapred-default href="http://hadoop.apache.org/mapreduce/docs/current/mapred-default.html" />
-
+ <mapred-queues href="http://hadoop.apache.org/mapreduce/docs/current/mapred-queues.xml" />
+ <mapred-queues-capacity-scheduler href="http://hadoop.apache.org/mapreduce/docs/current/mapred-queues-capacity-scheduler.xml" />
+ <capacity-scheduler-conf href="http://hadoop.apache.org/mapreduce/docs/current/capacity-scheduler-conf.html" />
+
+ <single-node-setup href="http://hadoop.apache.org/common/docs/current/single_node_setup.html">
+ <PreReqs href="#PreReqs" />
+ <Download href="#Download" />
+ </single-node-setup>
+
<zlib href="http://www.zlib.net/" />
<gzip href="http://www.gzip.org/" />
<bzip href="http://www.bzip.org/" />
Modified: hadoop/mapreduce/branches/branch-0.21/src/java/org/apache/hadoop/mapred/tools/MRAdmin.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/branch-0.21/src/java/org/apache/hadoop/mapred/tools/MRAdmin.java?rev=893472&r1=893471&r2=893472&view=diff
==============================================================================
--- hadoop/mapreduce/branches/branch-0.21/src/java/org/apache/hadoop/mapred/tools/MRAdmin.java (original)
+++ hadoop/mapreduce/branches/branch-0.21/src/java/org/apache/hadoop/mapred/tools/MRAdmin.java Wed Dec 23 11:06:15 2009
@@ -60,8 +60,9 @@
"\t\tJobtracker will reload the authorization policy file.\n";
String refreshQueues =
- "-refreshQueues: Reload the queue acls and state.\n"
- + "\t\tJobTracker will reload the mapred-queues.xml file.\n";
+ "-refreshQueues: Reload the queues' acls, states and "
+ + "scheduler specific properties.\n"
+ + "\t\tJobTracker will reload the mapred-queues configuration file.\n";
String refreshNodes =
"-refreshNodes: Refresh the hosts information at the jobtracker.\n";