You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sm...@apache.org on 2013/08/16 22:57:14 UTC
git commit: AMBARI-2938. Update stack definition for MAPREDUCE2.
(smohanty)
Updated Branches:
refs/heads/trunk 2ebffb165 -> 8bf8f9e96
AMBARI-2938. Update stack definition for MAPREDUCE2. (smohanty)
Project: http://git-wip-us.apache.org/repos/asf/incubator-ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ambari/commit/8bf8f9e9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ambari/tree/8bf8f9e9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ambari/diff/8bf8f9e9
Branch: refs/heads/trunk
Commit: 8bf8f9e96e7ce5cee8a67217c945e4ed87b740c7
Parents: 2ebffb1
Author: Sumit Mohanty <sm...@hortonworks.com>
Authored: Fri Aug 16 13:56:22 2013 -0700
Committer: Sumit Mohanty <sm...@hortonworks.com>
Committed: Fri Aug 16 13:56:22 2013 -0700
----------------------------------------------------------------------
.../MAPREDUCE2/configuration/mapred-site.xml | 231 +++++++++++--------
1 file changed, 135 insertions(+), 96 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/8bf8f9e9/ambari-server/src/main/resources/stacks/HDP/2.0.5/services/MAPREDUCE2/configuration/mapred-site.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.5/services/MAPREDUCE2/configuration/mapred-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.0.5/services/MAPREDUCE2/configuration/mapred-site.xml
index 51e3e4d..900c917 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.5/services/MAPREDUCE2/configuration/mapred-site.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.5/services/MAPREDUCE2/configuration/mapred-site.xml
@@ -22,32 +22,41 @@
<configuration xmlns:xi="http://www.w3.org/2001/XInclude">
-<!-- i/o properties -->
+ <!-- i/o properties -->
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>100</value>
- <description>No description</description>
+ <description>
+ The total amount of buffer memory to use while sorting files, in megabytes. By default, gives each merge stream
+ 1MB, which should minimize seeks
+ </description>
</property>
<property>
<name>mapreduce.map.sort.spill.percent</name>
<value>0.1</value>
- <description>No description</description>
+ <description>
+ The soft limit in the serialization buffer. Once reached, a thread will begin to spill the contents to disk
+ in the background. Note that collection will not block if this threshold is exceeded while a spill is already
+ in progress, so spills may be larger than this threshold when it is set to less than .5
+ </description>
</property>
<property>
<name>mapreduce.task.io.sort.factor</name>
<value>100</value>
- <description>No description</description>
+ <description>
+ The number of streams to merge at once while sorting files. This determines the number of open file handles
+ </description>
</property>
-<!-- map/reduce properties -->
+ <!-- map/reduce properties -->
<property>
<name>mapreduce.jobtracker.system.dir</name>
<value></value>
- <description>No description</description>
+ <description>The directory where MapReduce stores control files</description>
<final>true</final>
</property>
@@ -55,68 +64,80 @@
<!-- cluster specific -->
<name>mapreduce.cluster.local.dir</name>
<value></value>
- <description>No description</description>
+ <description>
+ The local directory where MapReduce stores intermediate data files. May be a comma-separated list of
+ directories on different devices in order to spread disk i/o. Directories that do not exist are ignored
+ </description>
<final>true</final>
</property>
<property>
<name>mapreduce.reduce.shuffle.parallelcopies</name>
<value>30</value>
- <description>No description</description>
+ <description>
+ The default number of parallel transfers run by reduce during the copy(shuffle) phase
+ </description>
</property>
<property>
<name>mapreduce.tasktracker.map.tasks.maximum</name>
<value></value>
- <description>No description</description>
+ <description>
+ The maximum number of map tasks that will be run simultaneously by a task tracker
+ </description>
</property>
<property>
<name>mapreduce.map.speculative</name>
<value>false</value>
- <description>If true, then multiple instances of some map tasks
- may be executed in parallel.</description>
+ <description>
+ If true, then multiple instances of some map tasks may be executed in parallel
+ </description>
</property>
<property>
<name>mapreduce.reduce.speculative</name>
<value>false</value>
- <description>If true, then multiple instances of some reduce tasks
- may be executed in parallel.</description>
+ <description>
+ If true, then multiple instances of some reduce tasks may be executed in parallel
+ </description>
</property>
<property>
<name>mapreduce.job.reduce.slowstart.completedmaps</name>
<value>0.05</value>
+ <description>
+ Fraction of the number of maps in the job which should be complete before reduces are scheduled for the job
+ </description>
</property>
<property>
<name>mapreduce.reduce.merge.inmem.threshold</name>
<value>1000</value>
<description>The threshold, in terms of the number of files
- for the in-memory merge process. When we accumulate threshold number of files
- we initiate the in-memory merge and spill to disk. A value of 0 or less than
- 0 indicates we want to DON'T have any threshold and instead depend only on
- the ramfs's memory consumption to trigger the merge.
- </description>
+ for the in-memory merge process. When we accumulate threshold number of files
+ we initiate the in-memory merge and spill to disk. A value of 0 or less than
+ 0 indicates we want to DON'T have any threshold and instead depend only on
+ the ramfs's memory consumption to trigger the merge.
+ </description>
</property>
<property>
<name>mapreduce.reduce.shuffle.merge.percent</name>
<value>0.66</value>
<description>The usage threshold at which an in-memory merge will be
- initiated, expressed as a percentage of the total memory allocated to
- storing in-memory map outputs, as defined by
- mapreduce.reduce.shuffle.input.buffer.percent.
- </description>
+ initiated, expressed as a percentage of the total memory allocated to
+ storing in-memory map outputs, as defined by
+ mapreduce.reduce.shuffle.input.buffer.percent.
+ </description>
</property>
<property>
<name>mapreduce.reduce.shuffle.input.buffer.percent</name>
<value>0.7</value>
<description>The percentage of memory to be allocated from the maximum heap
- size to storing map outputs during the shuffle.
- </description>
+ size to storing map outputs during the shuffle.
+ </description>
</property>
<property>
@@ -127,38 +148,42 @@
</description>
</property>
-<property>
- <name>mapreduce.output.fileoutputformat.compress.type</name>
- <value>BLOCK</value>
- <description>If the job outputs are to compressed as SequenceFiles, how should
- they be compressed? Should be one of NONE, RECORD or BLOCK.
- </description>
-</property>
+ <property>
+ <name>mapreduce.output.fileoutputformat.compress.type</name>
+ <value>BLOCK</value>
+ <description>If the job outputs are to compressed as SequenceFiles, how should
+ they be compressed? Should be one of NONE, RECORD or BLOCK.
+ </description>
+ </property>
<property>
<name>mapreduce.reduce.input.buffer.percent</name>
<value>0.0</value>
<description>The percentage of memory- relative to the maximum heap size- to
- retain map outputs during the reduce. When the shuffle is concluded, any
- remaining map outputs in memory must consume less than this threshold before
- the reduce can begin.
- </description>
+ retain map outputs during the reduce. When the shuffle is concluded, any
+ remaining map outputs in memory must consume less than this threshold before
+ the reduce can begin.
+ </description>
</property>
- <property>
- <name>mapreduce.reduce.input.limit</name>
- <value>10737418240</value>
- <description>The limit on the input size of the reduce. (This value
- is 10 Gb.) If the estimated input size of the reduce is greater than
- this value, job is failed. A value of -1 means that there is no limit
- set. </description>
-</property>
+ <property>
+ <name>mapreduce.reduce.input.limit</name>
+ <value>10737418240</value>
+ <description>The limit on the input size of the reduce. (This value
+ is 10 Gb.) If the estimated input size of the reduce is greater than
+ this value, job is failed. A value of -1 means that there is no limit
+ set.
+ </description>
+ </property>
<!-- copied from kryptonite configuration -->
<property>
<name>mapreduce.map.output.compress</name>
<value></value>
+ <description>
+ Should the outputs of the maps be compressed before being sent across the network. Uses SequenceFile compression
+ </description>
</property>
@@ -166,15 +191,18 @@
<name>mapreduce.task.timeout</name>
<value>600000</value>
<description>The number of milliseconds before a task will be
- terminated if it neither reads an input, writes an output, nor
- updates its status string.
- </description>
+ terminated if it neither reads an input, writes an output, nor
+ updates its status string.
+ </description>
</property>
<property>
<name>mapred.child.java.opts</name>
<value>-Xmx512m</value>
- <description>No description</description>
+ <description>
+ Java opts for the task tracker child processes. The following symbol, if present, will be interpolated: @taskid@
+ is replaced by current TaskID. Any other occurrences of '@' will go unchanged
+ </description>
</property>
<property>
@@ -203,11 +231,19 @@
<property>
<name>mapreduce.tasktracker.healthchecker.script.path</name>
<value></value>
+ <description>
+ Absolute path to the script which is periodically run by the node health monitoring service to determine if
+ the node is healthy or not. If the value of this key is empty or the file does not exist in the location
+ configured here, the node health monitoring service is not started.
+ </description>
</property>
<property>
<name>mapreduce.tasktracker.healthchecker.script.timeout</name>
<value>60000</value>
+ <description>
+ Time after node health script should be killed if unresponsive and considered that the script has failed
+ </description>
</property>
<property>
@@ -216,55 +252,58 @@
<description>The filename of the keytab for the task tracker</description>
</property>
- <property>
- <name>mapreduce.jobhistory.keytab.file</name>
+ <property>
+ <name>mapreduce.jobhistory.keytab.file</name>
<!-- cluster variant -->
- <value></value>
- <description>The keytab for the job history server principal.</description>
- </property>
-
-<property>
- <name>mapreduce.shuffle.port</name>
- <value>8081</value>
- <description>Default port that the ShuffleHandler will run on. ShuffleHandler is a service run at the NodeManager to facilitate transfers of intermediate Map outputs to requesting Reducers.</description>
-</property>
-
-<property>
- <name>mapreduce.jobhistory.intermediate-done-dir</name>
- <value>/mr-history/tmp</value>
- <description>Directory where history files are written by MapReduce jobs.</description>
-</property>
-
-<property>
- <name>mapreduce.jobhistory.done-dir</name>
- <value>/mr-history/done</value>
- <description>Directory where history files are managed by the MR JobHistory Server.</description>
-</property>
-
-<property>
- <name>mapreduce.jobhistory.address</name>
- <value>localhost:10020</value>
- <description>Enter your JobHistoryServer hostname.</description>
-</property>
-
-<property>
- <name>mapreduce.jobhistory.webapp.address</name>
- <value>localhost:19888</value>
- <description>Enter your JobHistoryServer hostname.</description>
-</property>
-
-<property>
- <name>mapreduce.framework.name</name>
- <value>yarn</value>
- <description>No description</description>
-</property>
-
-<property>
- <name>yarn.app.mapreduce.am.staging-dir</name>
- <value>/user</value>
- <description>
- The staging dir used while submitting jobs.
- </description>
-</property>
+ <value></value>
+ <description>The keytab for the job history server principal.</description>
+ </property>
+
+ <property>
+ <name>mapreduce.shuffle.port</name>
+ <value>8081</value>
+ <description>
+ Default port that the ShuffleHandler will run on. ShuffleHandler is a service run at the NodeManager
+ to facilitate transfers of intermediate Map outputs to requesting Reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>mapreduce.jobhistory.intermediate-done-dir</name>
+ <value>/mr-history/tmp</value>
+ <description>Directory where history files are written by MapReduce jobs.</description>
+ </property>
+
+ <property>
+ <name>mapreduce.jobhistory.done-dir</name>
+ <value>/mr-history/done</value>
+ <description>Directory where history files are managed by the MR JobHistory Server.</description>
+ </property>
+
+ <property>
+ <name>mapreduce.jobhistory.address</name>
+ <value>localhost:10020</value>
+ <description>Enter your JobHistoryServer hostname.</description>
+ </property>
+
+ <property>
+ <name>mapreduce.jobhistory.webapp.address</name>
+ <value>localhost:19888</value>
+ <description>Enter your JobHistoryServer hostname.</description>
+ </property>
+
+ <property>
+ <name>mapreduce.framework.name</name>
+ <value>yarn</value>
+ <description>The runtime framework for executing MapReduce jobs. Can be one of local, classic or yarn</description>
+ </property>
+
+ <property>
+ <name>yarn.app.mapreduce.am.staging-dir</name>
+ <value>/user</value>
+ <description>
+ The staging dir used while submitting jobs.
+ </description>
+ </property>
</configuration>