You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by of...@apache.org on 2015/03/02 22:10:16 UTC

bigtop git commit: BIGTOP-1686: Update and clean mapred template and namespace

Repository: bigtop
Updated Branches:
  refs/heads/master 3dd00010e -> b2225cfdb


BIGTOP-1686: Update and clean mapred template and namespace

Update the mapred-site.xml template to current names for settings.
Remove outdated settings. Make more settings adjustable and move their
defaults into the Puppet class.

Change variable names to consistently resemble the setting names.

Signed-off-by: Olaf Flebbe <of...@oflebbe.de>


Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/b2225cfd
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/b2225cfd
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/b2225cfd

Branch: refs/heads/master
Commit: b2225cfdb218d1920da46dd18edbfcbafb7e4c36
Parents: 3dd0001
Author: Michael Weiser <m....@science-computing.de>
Authored: Thu Feb 19 17:34:22 2015 +0100
Committer: Olaf Flebbe <of...@oflebbe.de>
Committed: Mon Mar 2 22:09:27 2015 +0100

----------------------------------------------------------------------
 .../puppet/hieradata/bigtop/cluster.yaml        |   4 +-
 .../puppet/modules/hadoop/manifests/init.pp     |  44 ++--
 .../modules/hadoop/templates/mapred-site.xml    | 224 ++++++-------------
 3 files changed, 91 insertions(+), 181 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bigtop/blob/b2225cfd/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
index 28c9449..2751d33 100644
--- a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
+++ b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
@@ -61,8 +61,8 @@ hadoop::common_yarn::hadoop_rm_host: "%{hiera('bigtop::hadoop_head_node')}"
 # actually default but needed for hue::server::rm_port here
 hadoop::common_yarn::hadoop_rm_port: "8032"
 
-hadoop::common_mapred_app::hadoop_hs_host: "%{hiera('bigtop::hadoop_head_node')}"
-hadoop::common_mapred_app::hadoop_jobtracker_host: "%{hiera('bigtop::hadoop_head_node')}"
+hadoop::common_mapred_app::jobtracker_host: "%{hiera('bigtop::hadoop_head_node')}"
+hadoop::common_mapred_app::mapreduce_jobhistory_host: "%{hiera('bigtop::hadoop_head_node')}"
 
 # actually default but needed for hue::server::webhdfs_url here
 hadoop::httpfs::hadoop_httpfs_port: "14000"

http://git-wip-us.apache.org/repos/asf/bigtop/blob/b2225cfd/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp b/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp
index eaca730..a3c94db 100644
--- a/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp
+++ b/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp
@@ -227,28 +227,30 @@ class hadoop ($hadoop_security_authentication = "simple",
   }
 
   class common_mapred_app (
-      $hadoop_config_io_sort_factor = undef,
-      $hadoop_config_io_sort_mb = undef,
-      $hadoop_config_mapred_child_ulimit = undef,
-      $hadoop_config_mapred_fairscheduler_assignmultiple = undef,
-      $hadoop_config_mapred_fairscheduler_sizebasedweight = undef,
-      $hadoop_config_mapred_job_tracker_handler_count = undef,
-      $hadoop_config_mapred_reduce_parallel_copies = undef,
-      $hadoop_config_mapred_reduce_slowstart_completed_maps = undef,
-      $hadoop_config_mapred_reduce_tasks_speculative_execution = undef,
-      $hadoop_config_tasktracker_http_threads = undef,
-      $hadoop_config_use_compression = undef,
-      $hadoop_hs_host = undef,
-      $hadoop_hs_port = "10020",
-      $hadoop_hs_webapp_port = "19888",
-      $hadoop_jobtracker_fairscheduler_weightadjuster = undef,
-      $hadoop_jobtracker_host,
-      $hadoop_jobtracker_port = "8021",
-      $hadoop_jobtracker_taskscheduler = undef,
-      $hadoop_mapred_jobtracker_plugins = "",
-      $hadoop_mapred_tasktracker_plugins = "",
-      $mapred_acls_enabled = undef,
+      $mapreduce_cluster_acls_enabled = undef,
+      $mapreduce_jobtracker_taskscheduler = undef,
+      $mapreduce_jobhistory_host = undef,
+      $mapreduce_jobhistory_port = "10020",
+      $mapreduce_jobhistory_webapp_port = "19888",
+      $mapreduce_framework_name = "yarn",
+      $jobtracker_host,
+      $jobtracker_port = "8021",
       $mapred_data_dirs = suffix($hadoop::hadoop_storage_dirs, "/mapred"),
+      $mapreduce_cluster_temp_dir = "/mapred/system",
+      $mapreduce_jobtracker_system_dir = "/mapred/system",
+      $mapreduce_jobtracker_staging_root_dir = "/user",
+      $yarn_app_mapreduce_am_staging_dir = "/user",
+      $mapreduce_task_io_sort_factor = 64,              # 10 default
+      $mapreduce_task_io_sort_mb = 256,                 # 100 default
+      $mapreduce_reduce_shuffle_parallelcopies = undef, # 5 is default
+      # processorcount == facter fact
+      $mapreduce_tasktracker_map_tasks_maximum = inline_template("<%= [1, @processorcount.to_i * 0.20].max.round %>"),
+      $mapreduce_tasktracker_reduce_tasks_maximum = inline_template("<%= [1, @processorcount.to_i * 0.20].max.round %>"),
+      $mapreduce_tasktracker_http_threads = 60,         # 40 default
+      $mapreduce_output_fileoutputformat_compress_type = "BLOCK", # "RECORD" default
+      $mapreduce_map_output_compress = undef,
+      $mapreduce_job_reduce_slowstart_completedmaps = undef,
+      $mapred_jobtracker_plugins = "",
       $hadoop_security_authentication = $hadoop::hadoop_security_authentication,
       $kerberos_realm = $hadoop::kerberos_realm,
   ) inherits hadoop {

http://git-wip-us.apache.org/repos/asf/bigtop/blob/b2225cfd/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml b/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml
index 5bf9777..d9e842f 100644
--- a/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml
+++ b/bigtop-deploy/puppet/modules/hadoop/templates/mapred-site.xml
@@ -42,7 +42,7 @@
     <name>mapreduce.jobtracker.keytab.file</name>
     <value>/etc/mapred.keytab</value> <!-- path to the MapReduce keytab -->
   </property>
-  
+
   <!-- TaskTracker security configs -->
   <property>
     <name>mapreduce.tasktracker.kerberos.principal</name>
@@ -56,7 +56,7 @@
     <name>mapreduce.tasktracker.keytab.file</name>
     <value>/etc/mapred.keytab</value> <!-- path to the MapReduce keytab -->
   </property>
-  
+
   <!-- TaskController settings -->
   <property>
     <name>mapreduce.tasktracker.taskcontroller</name>
@@ -66,63 +66,42 @@
     <name>mapreduce.tasktracker.group</name>
     <value>mapred</value>
   </property>
-<% end %>
 
-<% if @mapred_acls_enabled %>
+<% end %>
+<% if @mapreduce_cluster_acls_enabled %>
   <property>
     <name>mapreduce.cluster.acls.enabled</name>
-    <value><%= @mapred_acls_enabled %></value>
+    <value><%= @mapreduce_cluster_acls_enabled %></value>
   </property>
-<% end %>
 
+<% end %>
 <!-- specify JobTracker TaskScheduler -->
-<% if @hadoop_jobtracker_taskscheduler %>
+<% if @mapreduce_jobtracker_taskscheduler %>
   <property>
     <name>mapreduce.jobtracker.taskscheduler</name>
     <value><%= @hadoop_jobtracker_taskscheduler %></value>
   </property>
-<% end %>
-
-<% if @hadoop_config_mapred_fairscheduler_assignmultiple %>
-  <property>
-    <name>mapred.fairscheduler.assignmultiple</name>
-    <value><%= @hadoop_config_mapred_fairscheduler_assignmultiple %></value>
-  </property>
-<% end %>
 
-<% if @hadoop_config_mapred_fairscheduler_sizebasedweight %>
-  <property>
-    <name>mapred.fairscheduler.sizebasedweight</name>
-    <value><%= @hadoop_config_mapred_fairscheduler_assignmultiple %></value>
-  </property>
 <% end %>
-
-<% if @hadoop_jobtracker_fairscheduler_weightadjuster %>
-  <property>
-    <name>mapred.fairscheduler.weightadjuster</name>
-    <value><%= @hadoop_jobtracker_fairscheduler_weightadjuster %></value>
-  </property>
-<% end %>
-
-<% if @hadoop_hs_host %>
+<% if @mapreduce_jobhistory_host %>
   <property>
     <name>mapreduce.jobhistory.address</name>
-    <value><%= @hadoop_hs_host %>:<%= @hadoop_hs_port %></value>
+    <value><%= @mapreduce_jobhistory_host %>:<%= @mapreduce_jobhistory_port %></value>
   </property>
   <property>
     <name>mapreduce.jobhistory.webapp.address</name>
-    <value><%= @hadoop_hs_host %>:<%= @hadoop_hs_webapp_port %></value>
+    <value><%= @mapreduce_jobhistory_host %>:<%= @mapreduce_jobhistory_webapp_port %></value>
   </property>
-<% end %>
 
+<% end %>
   <property>
     <name>mapreduce.framework.name</name>
-    <value>yarn</value>
+    <value><%= @mapreduce_framework_name %></value>
   </property>
 
   <property>
     <name>mapreduce.jobtracker.address</name>
-    <value><%= @hadoop_jobtracker_host %>:<%= @hadoop_jobtracker_port%></value>
+    <value><%= @jobtracker_host %>:<%= @jobtracker_port%></value>
   </property>
 
   <property>
@@ -131,189 +110,118 @@
     <final>true</final>
   </property>
 
-  <!-- property>
+<% if @mapreduce_cluster_temp_dir -%>
+  <property>
     <name>mapreduce.cluster.temp.dir</name>
-    <value>/mapred/system</value>
-  </property -->
+    <value><%= @mapreduce_cluster_temp_dir %></value>
+  </property>
 
+<% end -%>
+<% if @mapreduce_jobtracker_system_dir -%>
   <property>
     <name>mapreduce.jobtracker.system.dir</name>
-    <value>/mapred/system</value>
+    <value><%= @mapreduce_cluster_temp_dir %></value>
   </property>
 
+<% end -%>
+<% if @mapreduce_jobtracker_staging_root_dir -%>
   <property>
     <name>mapreduce.jobtracker.staging.root.dir</name>
-    <value>/user</value>
+    <value><%= @mapreduce_jobtracker_staging_root_dir %></value>
   </property>
 
+<% end -%>
+<% if @yarn_app_mapreduce_am_staging_dir -%>
   <property>
     <name>yarn.app.mapreduce.am.staging-dir</name>
-    <value>/user</value>
+    <value><%= @yarn_app_mapreduce_am_staging_dir %></value>
   </property>
 
+<% end -%>
   <property>
     <name>mapred.child.java.opts</name>
     <value>-Xmx1024m</value>
   </property>
 
-<% if @hadoop_config_mapred_child_ulimit %>
-  <property>
-    <!-- set this to ~1.5x the heap size in mapred.child.java.opts -->
-    <name>mapred.child.ulimit</name>
-    <value><%= @hadoop_config_mapred_child_ulimit %></value>
-  </property>
-<% else %>
-  <property>
-    <!-- set this to ~1.5x the heap size in mapred.child.java.opts -->
-    <name>mapred.child.ulimit</name>
-    <value>unlimited</value>
-  </property>
-<% end %>
-
-<% if @hadoop_config_io_sort_mb %>
-  <property>
-    <name>io.sort.mb</name>
-    <value><%= @hadoop_config_io_sort_mb %></value>
-  </property>
-<% else %>
-  <property>
-    <name>io.sort.mb</name>
-    <value>256</value>
-  </property>
-<% end %>
-
-<% if @hadoop_config_io_sort_factor %>
-  <property>
-    <name>io.sort.factor</name>
-    <value><%= @hadoop_config_io_sort_factor %></value>
-  </property>
-<% else %>
+<% if @mapreduce_task_io_sort_mb -%>
   <property>
-    <name>io.sort.factor</name>
-    <value>64</value>
+    <name>mapreduce.task.io.sort.mb</name>
+    <value><%= @mapreduce_task_io_sort_mb %></value>
   </property>
-<% end %>
 
-<% if @hadoop_config_mapred_job_tracker_handler_count %>
+<% end -%>
+<% if @mapreduce_task_io_sort_factor -%>
   <property>
-    <name>mapred.job.tracker.handler.count</name>
-    <value><%= @hadoop_config_mapred_job_tracker_handler_count %></value>
-    <final>true</final>
+    <name>mapreduce.task.io.sort.factor</name>
+    <value><%= @mapreduce_task_io_sort_factor %></value>
   </property>
-<% else %>
-  <property>
-    <name>mapred.job.tracker.handler.count</name>
-    <value>10</value>
-    <final>true</final>
-  </property>
-<% end %>
 
+<% end -%>
+<% if @mapreduce_reduce_shuffle_parallelcopies -%>
   <property>
-    <name>mapred.map.tasks.speculative.execution</name>
-    <value>true</value>
-  </property>
-  
-<% if @hadoop_config_mapred_reduce_parallel_copies %>
-  <property>
-    <name>mapred.reduce.parallel.copies</name>
+    <name>mapreduce.reduce.shuffle.parallelcopies</name>
     <!-- set this to somewhere between sqrt(nodes) and nodes/2.
     for <20 nodes, set == |nodes| -->
-    <value><%= @hadoop_config_mapred_reduce_parallel_copies %></value>
+    <value><%= @mapreduce_reduce_shuffle_parallelcopies %></value>
   </property>
-<% else %>
-  <property>
-    <name>mapred.reduce.parallel.copies</name>
-    <!-- set this to somewhere between sqrt(nodes) and nodes/2.
-    for <20 nodes, set == |nodes| -->
-    <value>5</value>
-  </property>
-<% end %>
 
+<% end -%>
+<% if @mapreduce_tasktracker_map_tasks_maximum -%>
   <property>
-    <name>mapred.reduce.tasks</name>
-    <!-- set to numnodes * mapred.tasktracker.reduce.tasks.maximum -->
-    <value>30</value>
-  </property>
-
-<% if @hadoop_config_mapred_reduce_tasks_speculative_execution %>
-  <property>
-    <name>mapred.reduce.tasks.speculative.execution</name>
-    <value><%= @hadoop_config_mapred_reduce_tasks_speculative_execution %></value>
-  </property>
-<% else %>
-  <property>
-    <name>mapred.reduce.tasks.speculative.execution</name>
-    <value>false</value>
-  </property>
-<% end %>
-
-  <property>
-    <name>mapred.tasktracker.map.tasks.maximum</name>
+    <name>mapreduce.tasktracker.map.tasks.maximum</name>
     <!-- see other kb entry about this one. -->
-    <value><%= [1, @processorcount.to_i * 0.80].max.round %></value>
+    <value><%= @mapreduce_tasktracker_map_tasks_maximum %></value>
     <final>true</final>
   </property>
 
+<% end -%>
+<% if @mapreduce_tasktracker_reduce_tasks_maximum -%>
   <property>
-    <name>mapred.tasktracker.reduce.tasks.maximum</name>
+    <name>mapreduce.tasktracker.reduce.tasks.maximum</name>
     <!-- see other kb entry about this one. -->
-    <value><%= [1, @processorcount.to_i * 0.20].max.round %></value>
+    <value><%= @mapreduce_tasktracker_reduce_tasks_maximum %></value>
     <final>true</final>
   </property>
 
-<% if @hadoop_config_tasktracker_http_threads %>
+<% end -%>
+<% if @mapreduce_tasktracker_http_threads -%>
   <property>
-    <name>tasktracker.http.threads</name>
-    <value><%= @hadoop_config_tasktracker_http_threads %></value>
+    <name>mapreduce.tasktracker.http.threads</name>
+    <value><%= @mapreduce_tasktracker_http_threads %></value>
     <final>true</final>
   </property>
-<% else %>
-  <property>
-    <name>tasktracker.http.threads</name>
-    <value>60</value>
-    <final>true</final>
-  </property>
-<% end %>
 
+<% end -%>
+<% if @mapreduce_output_fileoutputformat_compress_type -%>
   <property>
-    <name>mapred.output.compression.type</name>
-    <value>BLOCK</value>
+    <name>mapreduce.output.fileoutputformat.compress.type</name>
+    <value><%= @mapreduce_output_fileoutputformat_compress_type %></value>
     <description>If the job outputs are to compressed as
     SequenceFiles, how should they be compressed? Should be one of
     NONE, RECORD or BLOCK.</description>
   </property>
 
-<% if @hadoop_config_use_compression %>
+<% end -%>
+<% if @mapreduce_map_output_compress -%>
   <property>
-    <name>mapred.compress.map.output</name>
-    <value><%= @hadoop_config_use_compression %></value>
+    <name>mapreduce.map.output.compress</name>
+    <value><%= @mapreduce_map_output_compress %></value>
   </property>
-<% else %>
-  <property>
-    <name>mapred.compress.map.output</name>
-    <value>false</value>
-  </property>
-<% end %>
 
-<% if @hadoop_config_mapred_reduce_slowstart_completed_maps %>
+<% end -%>
+<% if @mapreduce_job_reduce_slowstart_completedmaps -%>
   <property>
-    <name>mapred.reduce.slowstart.completed.maps</name>
-    <value><%= @hadoop_config_mapred_reduce_slowstart_completed_maps %></value>
+    <name>mapreduce.job.reduce.slowstart.completedmaps</name>
+    <value><%= @mapreduce_job_reduce_slowstart_completedmaps %></value>
   </property>
-<% end %>
 
-<% if @hadoop_mapred_jobtracker_plugins %>
+<% end -%>
+<% if @mapred_jobtracker_plugins -%>
   <property>
     <name>mapred.jobtracker.plugins</name>
-    <value><%= @hadoop_mapred_jobtracker_plugins %></value>
+    <value><%= @mapred_jobtracker_plugins %></value>
     <description>Comma-separated list of jobtracker plug-ins to be activated.</description>
   </property>
-<% end %>
-<% if @hadoop_mapred_tasktracker_plugins %>
-  <property>
-    <name>mapred.tasktracker.instrumentation</name>
-    <value><%= @hadoop_mapred_tasktracker_plugins %></value>
-  </property>
-<% end %>
 
+<% end -%>
 </configuration>