You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by of...@apache.org on 2015/03/02 22:10:16 UTC
bigtop git commit: BIGTOP-1686: Update and clean mapred template and
namespace
Repository: bigtop
Updated Branches:
refs/heads/master 3dd00010e -> b2225cfdb
BIGTOP-1686: Update and clean mapred template and namespace
Update the mapred-site.xml template to current names for settings.
Remove outdated settings. Make more settings adjustable and move their
defaults into the Puppet class.
Change variable names to consistently resemble the setting names.
Signed-off-by: Olaf Flebbe <of...@oflebbe.de>
Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/b2225cfd
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/b2225cfd
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/b2225cfd
Branch: refs/heads/master
Commit: b2225cfdb218d1920da46dd18edbfcbafb7e4c36
Parents: 3dd0001
Author: Michael Weiser <m....@science-computing.de>
Authored: Thu Feb 19 17:34:22 2015 +0100
Committer: Olaf Flebbe <of...@oflebbe.de>
Committed: Mon Mar 2 22:09:27 2015 +0100
----------------------------------------------------------------------
.../puppet/hieradata/bigtop/cluster.yaml | 4 +-
.../puppet/modules/hadoop/manifests/init.pp | 44 ++--
.../modules/hadoop/templates/mapred-site.xml | 224 ++++++-------------
3 files changed, 91 insertions(+), 181 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/bigtop/blob/b2225cfd/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
index 28c9449..2751d33 100644
--- a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
+++ b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
@@ -61,8 +61,8 @@ hadoop::common_yarn::hadoop_rm_host: "%{hiera('bigtop::hadoop_head_node')}"
# actually default but needed for hue::server::rm_port here
hadoop::common_yarn::hadoop_rm_port: "8032"
-hadoop::common_mapred_app::hadoop_hs_host: "%{hiera('bigtop::hadoop_head_node')}"
-hadoop::common_mapred_app::hadoop_jobtracker_host: "%{hiera('bigtop::hadoop_head_node')}"
+hadoop::common_mapred_app::jobtracker_host: "%{hiera('bigtop::hadoop_head_node')}"
+hadoop::common_mapred_app::mapreduce_jobhistory_host: "%{hiera('bigtop::hadoop_head_node')}"
# actually default but needed for hue::server::webhdfs_url here
hadoop::httpfs::hadoop_httpfs_port: "14000"
http://git-wip-us.apache.org/repos/asf/bigtop/blob/b2225cfd/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp b/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp
index eaca730..a3c94db 100644
--- a/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp
+++ b/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp
@@ -227,28 +227,30 @@ class hadoop ($hadoop_security_authentication = "simple",
}
class common_mapred_app (
- $hadoop_config_io_sort_factor = undef,
- $hadoop_config_io_sort_mb = undef,
- $hadoop_config_mapred_child_ulimit = undef,
- $hadoop_config_mapred_fairscheduler_assignmultiple = undef,
- $hadoop_config_mapred_fairscheduler_sizebasedweight = undef,
- $hadoop_config_mapred_job_tracker_handler_count = undef,
- $hadoop_config_mapred_reduce_parallel_copies = undef,
- $hadoop_config_mapred_reduce_slowstart_completed_maps = undef,
- $hadoop_config_mapred_reduce_tasks_speculative_execution = undef,
- $hadoop_config_tasktracker_http_threads = undef,
- $hadoop_config_use_compression = undef,
- $hadoop_hs_host = undef,
- $hadoop_hs_port = "10020",
- $hadoop_hs_webapp_port = "19888",
- $hadoop_jobtracker_fairscheduler_weightadjuster = undef,
- $hadoop_jobtracker_host,
- $hadoop_jobtracker_port = "8021",
- $hadoop_jobtracker_taskscheduler = undef,
- $hadoop_mapred_jobtracker_plugins = "",
- $hadoop_mapred_tasktracker_plugins = "",
- $mapred_acls_enabled = undef,
+ $mapreduce_cluster_acls_enabled = undef,
+ $mapreduce_jobtracker_taskscheduler = undef,
+ $mapreduce_jobhistory_host = undef,
+ $mapreduce_jobhistory_port = "10020",
+ $mapreduce_jobhistory_webapp_port = "19888",
+ $mapreduce_framework_name = "yarn",
+ $jobtracker_host,
+ $jobtracker_port = "8021",
$mapred_data_dirs = suffix($hadoop::hadoop_storage_dirs, "/mapred"),
+ $mapreduce_cluster_temp_dir = "/mapred/system",
+ $mapreduce_jobtracker_system_dir = "/mapred/system",
+ $mapreduce_jobtracker_staging_root_dir = "/user",
+ $yarn_app_mapreduce_am_staging_dir = "/user",
+ $mapreduce_task_io_sort_factor = 64, # 10 default
+ $mapreduce_task_io_sort_mb = 256, # 100 default
+ $mapreduce_reduce_shuffle_parallelcopies = undef, # 5 is default
+ # processorcount == facter fact
+ $mapreduce_tasktracker_map_tasks_maximum = inline_template("<%= [1, @processorcount.to_i * 0.20].max.round %>"),
+ $mapreduce_tasktracker_reduce_tasks_maximum = inline_template("<%= [1, @processorcount.to_i * 0.20].max.round %>"),
+ $mapreduce_tasktracker_http_threads = 60, # 40 default
+ $mapreduce_output_fileoutputformat_compress_type = "BLOCK", # "RECORD" default
+ $mapreduce_map_output_compress = undef,
+ $mapreduce_job_reduce_slowstart_completedmaps = undef,
+ $mapred_jobtracker_plugins = "",
$hadoop_security_authentication = $hadoop::hadoop_security_authentication,
$kerberos_realm = $hadoop::kerberos_realm,
) inherits hadoop {
http://git-wip-us.apache.org/repos/asf/bigtop/blob/b2225cfd/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml b/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml
index 5bf9777..d9e842f 100644
--- a/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml
+++ b/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml
@@ -42,7 +42,7 @@
<name>mapreduce.jobtracker.keytab.file</name>
<value>/etc/mapred.keytab</value> <!-- path to the MapReduce keytab -->
</property>
-
+
<!-- TaskTracker security configs -->
<property>
<name>mapreduce.tasktracker.kerberos.principal</name>
@@ -56,7 +56,7 @@
<name>mapreduce.tasktracker.keytab.file</name>
<value>/etc/mapred.keytab</value> <!-- path to the MapReduce keytab -->
</property>
-
+
<!-- TaskController settings -->
<property>
<name>mapreduce.tasktracker.taskcontroller</name>
@@ -66,63 +66,42 @@
<name>mapreduce.tasktracker.group</name>
<value>mapred</value>
</property>
-<% end %>
-<% if @mapred_acls_enabled %>
+<% end %>
+<% if @mapreduce_cluster_acls_enabled %>
<property>
<name>mapreduce.cluster.acls.enabled</name>
- <value><%= @mapred_acls_enabled %></value>
+ <value><%= @mapreduce_cluster_acls_enabled %></value>
</property>
-<% end %>
+<% end %>
<!-- specify JobTracker TaskScheduler -->
-<% if @hadoop_jobtracker_taskscheduler %>
+<% if @mapreduce_jobtracker_taskscheduler %>
<property>
<name>mapreduce.jobtracker.taskscheduler</name>
<value><%= @hadoop_jobtracker_taskscheduler %></value>
</property>
-<% end %>
-
-<% if @hadoop_config_mapred_fairscheduler_assignmultiple %>
- <property>
- <name>mapred.fairscheduler.assignmultiple</name>
- <value><%= @hadoop_config_mapred_fairscheduler_assignmultiple %></value>
- </property>
-<% end %>
-<% if @hadoop_config_mapred_fairscheduler_sizebasedweight %>
- <property>
- <name>mapred.fairscheduler.sizebasedweight</name>
- <value><%= @hadoop_config_mapred_fairscheduler_assignmultiple %></value>
- </property>
<% end %>
-
-<% if @hadoop_jobtracker_fairscheduler_weightadjuster %>
- <property>
- <name>mapred.fairscheduler.weightadjuster</name>
- <value><%= @hadoop_jobtracker_fairscheduler_weightadjuster %></value>
- </property>
-<% end %>
-
-<% if @hadoop_hs_host %>
+<% if @mapreduce_jobhistory_host %>
<property>
<name>mapreduce.jobhistory.address</name>
- <value><%= @hadoop_hs_host %>:<%= @hadoop_hs_port %></value>
+ <value><%= @mapreduce_jobhistory_host %>:<%= @mapreduce_jobhistory_port %></value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
- <value><%= @hadoop_hs_host %>:<%= @hadoop_hs_webapp_port %></value>
+ <value><%= @mapreduce_jobhistory_host %>:<%= @mapreduce_jobhistory_webapp_port %></value>
</property>
-<% end %>
+<% end %>
<property>
<name>mapreduce.framework.name</name>
- <value>yarn</value>
+ <value><%= @mapreduce_framework_name %></value>
</property>
<property>
<name>mapreduce.jobtracker.address</name>
- <value><%= @hadoop_jobtracker_host %>:<%= @hadoop_jobtracker_port%></value>
+ <value><%= @jobtracker_host %>:<%= @jobtracker_port%></value>
</property>
<property>
@@ -131,189 +110,118 @@
<final>true</final>
</property>
- <!-- property>
+<% if @mapreduce_cluster_temp_dir -%>
+ <property>
<name>mapreduce.cluster.temp.dir</name>
- <value>/mapred/system</value>
- </property -->
+ <value><%= @mapreduce_cluster_temp_dir %></value>
+ </property>
+<% end -%>
+<% if @mapreduce_jobtracker_system_dir -%>
<property>
<name>mapreduce.jobtracker.system.dir</name>
- <value>/mapred/system</value>
+ <value><%= @mapreduce_cluster_temp_dir %></value>
</property>
+<% end -%>
+<% if @mapreduce_jobtracker_staging_root_dir -%>
<property>
<name>mapreduce.jobtracker.staging.root.dir</name>
- <value>/user</value>
+ <value><%= @mapreduce_jobtracker_staging_root_dir %></value>
</property>
+<% end -%>
+<% if @yarn_app_mapreduce_am_staging_dir -%>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
- <value>/user</value>
+ <value><%= @yarn_app_mapreduce_am_staging_dir %></value>
</property>
+<% end -%>
<property>
<name>mapred.child.java.opts</name>
<value>-Xmx1024m</value>
</property>
-<% if @hadoop_config_mapred_child_ulimit %>
- <property>
- <!-- set this to ~1.5x the heap size in mapred.child.java.opts -->
- <name>mapred.child.ulimit</name>
- <value><%= @hadoop_config_mapred_child_ulimit %></value>
- </property>
-<% else %>
- <property>
- <!-- set this to ~1.5x the heap size in mapred.child.java.opts -->
- <name>mapred.child.ulimit</name>
- <value>unlimited</value>
- </property>
-<% end %>
-
-<% if @hadoop_config_io_sort_mb %>
- <property>
- <name>io.sort.mb</name>
- <value><%= @hadoop_config_io_sort_mb %></value>
- </property>
-<% else %>
- <property>
- <name>io.sort.mb</name>
- <value>256</value>
- </property>
-<% end %>
-
-<% if @hadoop_config_io_sort_factor %>
- <property>
- <name>io.sort.factor</name>
- <value><%= @hadoop_config_io_sort_factor %></value>
- </property>
-<% else %>
+<% if @mapreduce_task_io_sort_mb -%>
<property>
- <name>io.sort.factor</name>
- <value>64</value>
+ <name>mapreduce.task.io.sort.mb</name>
+ <value><%= @mapreduce_task_io_sort_mb %></value>
</property>
-<% end %>
-<% if @hadoop_config_mapred_job_tracker_handler_count %>
+<% end -%>
+<% if @mapreduce_task_io_sort_factor -%>
<property>
- <name>mapred.job.tracker.handler.count</name>
- <value><%= @hadoop_config_mapred_job_tracker_handler_count %></value>
- <final>true</final>
+ <name>mapreduce.task.io.sort.factor</name>
+ <value><%= @mapreduce_task_io_sort_factor %></value>
</property>
-<% else %>
- <property>
- <name>mapred.job.tracker.handler.count</name>
- <value>10</value>
- <final>true</final>
- </property>
-<% end %>
+<% end -%>
+<% if @mapreduce_reduce_shuffle_parallelcopies -%>
<property>
- <name>mapred.map.tasks.speculative.execution</name>
- <value>true</value>
- </property>
-
-<% if @hadoop_config_mapred_reduce_parallel_copies %>
- <property>
- <name>mapred.reduce.parallel.copies</name>
+ <name>mapreduce.reduce.shuffle.parallelcopies</name>
<!-- set this to somewhere between sqrt(nodes) and nodes/2.
for <20 nodes, set == |nodes| -->
- <value><%= @hadoop_config_mapred_reduce_parallel_copies %></value>
+ <value><%= @mapreduce_reduce_shuffle_parallelcopies %></value>
</property>
-<% else %>
- <property>
- <name>mapred.reduce.parallel.copies</name>
- <!-- set this to somewhere between sqrt(nodes) and nodes/2.
- for <20 nodes, set == |nodes| -->
- <value>5</value>
- </property>
-<% end %>
+<% end -%>
+<% if @mapreduce_tasktracker_map_tasks_maximum -%>
<property>
- <name>mapred.reduce.tasks</name>
- <!-- set to numnodes * mapred.tasktracker.reduce.tasks.maximum -->
- <value>30</value>
- </property>
-
-<% if @hadoop_config_mapred_reduce_tasks_speculative_execution %>
- <property>
- <name>mapred.reduce.tasks.speculative.execution</name>
- <value><%= @hadoop_config_mapred_reduce_tasks_speculative_execution %></value>
- </property>
-<% else %>
- <property>
- <name>mapred.reduce.tasks.speculative.execution</name>
- <value>false</value>
- </property>
-<% end %>
-
- <property>
- <name>mapred.tasktracker.map.tasks.maximum</name>
+ <name>mapreduce.tasktracker.map.tasks.maximum</name>
<!-- see other kb entry about this one. -->
- <value><%= [1, @processorcount.to_i * 0.80].max.round %></value>
+ <value><%= @mapreduce_tasktracker_map_tasks_maximum %></value>
<final>true</final>
</property>
+<% end -%>
+<% if @mapreduce_tasktracker_reduce_tasks_maximum -%>
<property>
- <name>mapred.tasktracker.reduce.tasks.maximum</name>
+ <name>mapreduce.tasktracker.reduce.tasks.maximum</name>
<!-- see other kb entry about this one. -->
- <value><%= [1, @processorcount.to_i * 0.20].max.round %></value>
+ <value><%= @mapreduce_tasktracker_reduce_tasks_maximum %></value>
<final>true</final>
</property>
-<% if @hadoop_config_tasktracker_http_threads %>
+<% end -%>
+<% if @mapreduce_tasktracker_http_threads -%>
<property>
- <name>tasktracker.http.threads</name>
- <value><%= @hadoop_config_tasktracker_http_threads %></value>
+ <name>mapreduce.tasktracker.http.threads</name>
+ <value><%= @mapreduce_tasktracker_http_threads %></value>
<final>true</final>
</property>
-<% else %>
- <property>
- <name>tasktracker.http.threads</name>
- <value>60</value>
- <final>true</final>
- </property>
-<% end %>
+<% end -%>
+<% if @mapreduce_output_fileoutputformat_compress_type -%>
<property>
- <name>mapred.output.compression.type</name>
- <value>BLOCK</value>
+ <name>mapreduce.output.fileoutputformat.compress.type</name>
+ <value><%= @mapreduce_output_fileoutputformat_compress_type %></value>
<description>If the job outputs are to compressed as
SequenceFiles, how should they be compressed? Should be one of
NONE, RECORD or BLOCK.</description>
</property>
-<% if @hadoop_config_use_compression %>
+<% end -%>
+<% if @mapreduce_map_output_compress -%>
<property>
- <name>mapred.compress.map.output</name>
- <value><%= @hadoop_config_use_compression %></value>
+ <name>mapreduce.map.output.compress</name>
+ <value><%= @mapreduce_map_output_compress %></value>
</property>
-<% else %>
- <property>
- <name>mapred.compress.map.output</name>
- <value>false</value>
- </property>
-<% end %>
-<% if @hadoop_config_mapred_reduce_slowstart_completed_maps %>
+<% end -%>
+<% if @mapreduce_job_reduce_slowstart_completedmaps -%>
<property>
- <name>mapred.reduce.slowstart.completed.maps</name>
- <value><%= @hadoop_config_mapred_reduce_slowstart_completed_maps %></value>
+ <name>mapreduce.job.reduce.slowstart.completedmaps</name>
+ <value><%= @mapreduce_job_reduce_slowstart_completedmaps %></value>
</property>
-<% end %>
-<% if @hadoop_mapred_jobtracker_plugins %>
+<% end -%>
+<% if @mapred_jobtracker_plugins -%>
<property>
<name>mapred.jobtracker.plugins</name>
- <value><%= @hadoop_mapred_jobtracker_plugins %></value>
+ <value><%= @mapred_jobtracker_plugins %></value>
<description>Comma-separated list of jobtracker plug-ins to be activated.</description>
</property>
-<% end %>
-<% if @hadoop_mapred_tasktracker_plugins %>
- <property>
- <name>mapred.tasktracker.instrumentation</name>
- <value><%= @hadoop_mapred_tasktracker_plugins %></value>
- </property>
-<% end %>
+<% end -%>
</configuration>