You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ma...@apache.org on 2013/02/15 00:02:19 UTC
svn commit: r1446382 - in /incubator/ambari/trunk: ./ ambari-agent/src/main/puppet/modules/hdp-hadoop/manifests/ ambari-agent/src/main/puppet/modules/hdp-nagios/manifests/ ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ ambari-serve...

Author: mahadev
Date: Thu Feb 14 23:02:18 2013
New Revision: 1446382

URL: http://svn.apache.org/r1446382
Log:
AMBARI-1433. Allow capacity scheduler to be configurable via the API's. (mahadev)

Added:
    incubator/ambari/trunk/ambari-server/src/main/resources/stacks/HDP/1.2.0/services/MAPREDUCE/configuration/capacity-scheduler.xml
    incubator/ambari/trunk/ambari-server/src/main/resources/stacks/HDPLocal/1.2.0/services/MAPREDUCE/configuration/capacity-scheduler.xml
Modified:
    incubator/ambari/trunk/.gitignore
    incubator/ambari/trunk/CHANGES.txt
    incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-hadoop/manifests/init.pp
    incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/manifests/server.pp
    incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionManager.java
    incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java
    incubator/ambari/trunk/ambari-server/src/main/python/ambari-server.py

Modified: incubator/ambari/trunk/.gitignore
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/.gitignore?rev=1446382&r1=1446381&r2=1446382&view=diff
==============================================================================
--- incubator/ambari/trunk/.gitignore (original)
+++ incubator/ambari/trunk/.gitignore Thu Feb 14 23:02:18 2013
@@ -7,6 +7,7 @@
 target
 /ambari-server/derby.log
 /ambari-server/pass.txt
+/ambari-web/npm-debug.log
 /ambari-web/public/
 /ambari-web/node_modules/
 *.pyc

Modified: incubator/ambari/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/CHANGES.txt?rev=1446382&r1=1446381&r2=1446382&view=diff
==============================================================================
--- incubator/ambari/trunk/CHANGES.txt (original)
+++ incubator/ambari/trunk/CHANGES.txt Thu Feb 14 23:02:18 2013
@@ -278,6 +278,9 @@ Trunk (unreleased changes):
 
  AMBARI-1411. Missing unit test coverage for resource providers. (tbeerbower)
 
+ AMBARI-1433. Allow capacity scheduler to be configurable via the API's.
+ (mahadev)
+
  BUG FIXES
 
  AMBARI-1431. Hosts table no longer allows sorting. (yusaku)

Modified: incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-hadoop/manifests/init.pp
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-hadoop/manifests/init.pp?rev=1446382&r1=1446381&r2=1446382&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-hadoop/manifests/init.pp (original)
+++ incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-hadoop/manifests/init.pp Thu Feb 14 23:02:18 2013
@@ -101,19 +101,8 @@ debug('##Configs generation for hdp-hado
       configuration => $configuration['capacity-scheduler'],
       owner => $hdp-hadoop::params::hdfs_user,
       group => $hdp::params::user_group,
-      replace => true,
     }
-  } else { #   This file will just be a static file for now. - BUG-3195
-    file {"capacity-scheduler.xml":
-      ensure  => present,
-      source => "puppet:///modules/hdp-hadoop/capacity-scheduler.xml",
-      mode => '0744',
-      path => "${hdp-hadoop::params::conf_dir}/capacity-scheduler.xml",
-      owner => $hdp-hadoop::params::mapred_user,
-      group => $hdp::params::user_group,
-      replace => true,
-    }
-  }
+  } 
 
 
   if has_key($configuration, 'hdfs-site') {

Modified: incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/manifests/server.pp
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/manifests/server.pp?rev=1446382&r1=1446381&r2=1446382&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/manifests/server.pp (original)
+++ incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/manifests/server.pp Thu Feb 14 23:02:18 2013
@@ -149,7 +149,7 @@ class hdp-nagios::server(
     if ($service_state == 'installed_and_configured') {
       $webserver_state = 'restart'
     } elsif ($service_state == 'running') {
-      $webserver_state = 'running'
+      $webserver_state = 'restart'
     } else {
       # We are never stopping httpd
       #$webserver_state = $service_state

Modified: incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionManager.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionManager.java?rev=1446382&r1=1446381&r2=1446382&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionManager.java (original)
+++ incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionManager.java Thu Feb 14 23:02:18 2013
@@ -17,11 +17,9 @@
  */
 package org.apache.ambari.server.actionmanager;
 
-import java.util.Collection;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.atomic.AtomicLong;
-
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import com.google.inject.name.Named;
 import org.apache.ambari.server.agent.ActionQueue;
 import org.apache.ambari.server.agent.CommandReport;
 import org.apache.ambari.server.controller.HostsMap;
@@ -30,9 +28,10 @@ import org.apache.ambari.server.utils.St
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.inject.Inject;
-import com.google.inject.Singleton;
-import com.google.inject.name.Named;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
 
 
 /**
@@ -75,6 +74,9 @@ public class ActionManager {
       LOG.info("Persisting stage into db: " + s.toString());
     }
     db.persistActions(stages);
+
+    // Now scheduler should process actions
+    scheduler.awake();
   }
 
   public List<Stage> getRequestStatus(long requestId) {

Modified: incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java?rev=1446382&r1=1446381&r2=1446382&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java (original)
+++ incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java Thu Feb 14 23:02:18 2013
@@ -17,32 +17,25 @@
  */
 package org.apache.ambari.server.actionmanager;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
 import org.apache.ambari.server.AmbariException;
 import org.apache.ambari.server.Role;
 import org.apache.ambari.server.ServiceComponentNotFoundException;
 import org.apache.ambari.server.agent.ActionQueue;
 import org.apache.ambari.server.agent.ExecutionCommand;
 import org.apache.ambari.server.controller.HostsMap;
-import org.apache.ambari.server.state.Cluster;
-import org.apache.ambari.server.state.Clusters;
-import org.apache.ambari.server.state.Service;
-import org.apache.ambari.server.state.ServiceComponent;
-import org.apache.ambari.server.state.ServiceComponentHost;
+import org.apache.ambari.server.state.*;
 import org.apache.ambari.server.state.fsm.InvalidStateTransitionException;
 import org.apache.ambari.server.state.svccomphost.ServiceComponentHostOpFailedEvent;
-import org.apache.ambari.server.utils.StageUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-//This class encapsulates the action scheduler thread.
-//Action schedule frequently looks at action database and determines if
-//there is an action that can be scheduled.
+import java.util.*;
+
+/**
+ * This class encapsulates the action scheduler thread.
+ * Action schedule frequently looks at action database and determines if
+ * there is an action that can be scheduled.
+ */
 class ActionScheduler implements Runnable {
 
   private static Logger LOG = LoggerFactory.getLogger(ActionScheduler.class);
@@ -56,6 +49,14 @@ class ActionScheduler implements Runnabl
   private final Clusters fsmObject;
   private boolean taskTimeoutAdjustment = true;
   private final HostsMap hostsMap;
+  private final Object wakeupSyncObject = new Object();
+
+  /**
+   * true if scheduler should run ASAP.
+   * We need this flag to avoid sleep in situations, when
+   * we receive awake() request during running a scheduler iteration.
+   */
+  private boolean activeAwakeRequest = false;
 
   public ActionScheduler(long sleepTimeMilliSec, long actionTimeoutMilliSec,
       ActionDBAccessor db, ActionQueue actionQueue, Clusters fsmObject,
@@ -79,11 +80,28 @@ class ActionScheduler implements Runnabl
     schedulerThread.interrupt();
   }
 
+  /**
+   * Should be called from another thread when we want scheduler to
+   * make a run ASAP (for example, to process desired configs of SCHs).
+   * The method is guaranteed to return quickly.
+   */
+  public void awake() {
+    synchronized (wakeupSyncObject) {
+      activeAwakeRequest = true;
+      wakeupSyncObject.notify();
+    }
+  }
+
   @Override
   public void run() {
     while (shouldRun) {
       try {
-        Thread.sleep(sleepTime);
+        synchronized (wakeupSyncObject) {
+          if (!activeAwakeRequest) {
+              wakeupSyncObject.wait(sleepTime);
+          }
+          activeAwakeRequest = false;
+        }
         doWork();
       } catch (InterruptedException ex) {
         LOG.warn("Scheduler thread is interrupted going to stop", ex);

Modified: incubator/ambari/trunk/ambari-server/src/main/python/ambari-server.py
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/python/ambari-server.py?rev=1446382&r1=1446381&r2=1446382&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/python/ambari-server.py (original)
+++ incubator/ambari/trunk/ambari-server/src/main/python/ambari-server.py Thu Feb 14 23:02:18 2013
@@ -420,7 +420,7 @@ def download_jdk(args):
 
   try:
     jdk_url = properties['jdk.url']
-    resources_dir = properties['resources.dir']
+    resources_dir = properties['resources.dir']  
   except (KeyError), e:
     print 'Property ' + str(e) + ' is not defined at ' + conf_file
     return -1
@@ -432,7 +432,7 @@ def download_jdk(args):
       #Get Header from url,to get file size then
       retcode, out, err = run_os_command(size_command)
       if out.find("Content-Length") == -1:
-        print "Request headr doesn't contain Content-Length";
+        print "Request header doesn't contain Content-Length";
         return -1
       start_with = int(out.find("Content-Length") + len("Content-Length") + 2)
       end_with = out.find("\r\n", start_with)
@@ -458,7 +458,43 @@ def download_jdk(args):
       return -1
   else:
     print "JDK already exists using " + dest_file
+  
+  try:
+     out = install_jdk(dest_file)
+     jdk_version = re.search('Creating (jdk.*)/jre', out).group(1)
+  except Exception, e:
+     print "Installation of JDK was failed: %s\n" % e.message
+     file_exists = os.path.isfile(dest_file)
+     if file_exists:
+        ok = get_YN_input("JDK found at "+dest_file+". "
+                    "Would you like to re-download the JDK [y/n] (y)? ", True)
+        if (ok == False):
+           print "Unable to install JDK. Please remove JDK file found at "+ dest_file +" and re-run Ambari Server setup" 
+           return -1
+        else:
+           track_jdk(JDK_LOCAL_FILENAME, jdk_url, dest_file)
+           print 'Successfully re-downloaded JDK distribution to ' + dest_file 
+           try:
+               out = install_jdk(dest_file)
+               jdk_version = re.search('Creating (jdk.*)/jre', out).group(1)
+           except Exception, e:
+               print "Installation of JDK was failed: %s\n" % e.message
+               print "Unable to install JDK. Please remove JDK, file found at "+ dest_file +" and re-run Ambari Server setup" 
+               return -1              
+  
+     else:
+         print "Unable to install JDK. File "+ dest_file +"does not exist, please re-run Ambari Server setup"
+         return -1
+  
+  print "Successfully installed JDK to {0}/{1}".\
+      format(JDK_INSTALL_DIR, jdk_version)
+  write_property(JAVA_HOME_PROPERTY, "{0}/{1}".
+      format(JDK_INSTALL_DIR, jdk_version))
+  return 0
+
+class RetCodeException(Exception): pass
 
+def install_jdk(dest_file):
   ok = get_YN_input("To install the Oracle JDK you must accept the "
                     "license terms found at "
                     "http://www.oracle.com/technetwork/java/javase/"
@@ -475,14 +511,9 @@ def download_jdk(args):
   retcode, out, err = run_os_command(MAKE_FILE_EXECUTABLE_CMD.format(dest_file))
   retcode, out, err = run_os_command(dest_file + ' -noregister')
   os.chdir(savedPath)
-  jdk_version = re.search('Creating (jdk.*)/jre', out).group(1)
-  print "Successfully installed JDK to {0}/{1}".\
-      format(JDK_INSTALL_DIR, jdk_version)
-  write_property(JAVA_HOME_PROPERTY, "{0}/{1}".
-      format(JDK_INSTALL_DIR, jdk_version))
-  return 0
-
-
+  if (retcode != 0):
+       raise RetCodeException("Installation JDK returned code %s" % retcode) 
+  return out  
 
 def get_postgre_status():
   retcode, out, err = run_os_command(PG_ST_CMD)
@@ -1097,4 +1128,4 @@ class Properties(object):
 
 
 if __name__ == "__main__":
-  main()
+  main()
\ No newline at end of file

Added: incubator/ambari/trunk/ambari-server/src/main/resources/stacks/HDP/1.2.0/services/MAPREDUCE/configuration/capacity-scheduler.xml
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/resources/stacks/HDP/1.2.0/services/MAPREDUCE/configuration/capacity-scheduler.xml?rev=1446382&view=auto
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/resources/stacks/HDP/1.2.0/services/MAPREDUCE/configuration/capacity-scheduler.xml (added)
+++ incubator/ambari/trunk/ambari-server/src/main/resources/stacks/HDP/1.2.0/services/MAPREDUCE/configuration/capacity-scheduler.xml Thu Feb 14 23:02:18 2013
@@ -0,0 +1,195 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<!-- This is the configuration file for the resource manager in Hadoop. -->
+<!-- You can configure various scheduling parameters related to queues. -->
+<!-- The properties for a queue follow a naming convention,such as, -->
+<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. -->
+
+<configuration>
+
+  <property>
+    <name>mapred.capacity-scheduler.maximum-system-jobs</name>
+    <value>3000</value>
+    <description>Maximum number of jobs in the system which can be initialized,
+     concurrently, by the CapacityScheduler.
+    </description>    
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.capacity</name>
+    <value>100</value>
+    <description>Percentage of the number of slots in the cluster that are
+      to be available for jobs in this queue.
+    </description>    
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.maximum-capacity</name>
+    <value>-1</value>
+    <description>
+	maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster.
+	This provides a means to limit how much excess capacity a queue can use. By default, there is no limit.
+	The maximum-capacity of a queue can only be greater than or equal to its minimum capacity.
+        Default value of -1 implies a queue can use complete capacity of the cluster.
+
+        This property could be to curtail certain jobs which are long running in nature from occupying more than a 
+        certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of 
+        other queues being affected.
+        
+        One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity
+        the max capacity would change. So if large no of nodes or racks get added to the cluster , max Capacity in 
+        absolute terms would increase accordingly.
+    </description>    
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.supports-priority</name>
+    <value>false</value>
+    <description>If true, priorities of jobs will be taken into 
+      account in scheduling decisions.
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name>
+    <value>100</value>
+    <description> Each queue enforces a limit on the percentage of resources 
+    allocated to a user at any given time, if there is competition for them. 
+    This user limit can vary between a minimum and maximum value. The former
+    depends on the number of users who have submitted jobs, and the latter is
+    set to this property value. For example, suppose the value of this 
+    property is 25. If two users have submitted jobs to a queue, no single 
+    user can use more than 50% of the queue resources. If a third user submits
+    a job, no single user can use more than 33% of the queue resources. With 4 
+    or more users, no user can use more than 25% of the queue's resources. A 
+    value of 100 implies no user limits are imposed. 
+    </description>
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.user-limit-factor</name>
+    <value>1</value>
+    <description>The multiple of the queue capacity which can be configured to 
+    allow a single user to acquire more slots. 
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks</name>
+    <value>200000</value>
+    <description>The maximum number of tasks, across all jobs in the queue, 
+    which can be initialized concurrently. Once the queue's jobs exceed this 
+    limit they will be queued on disk.  
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks-per-user</name>
+    <value>100000</value>
+    <description>The maximum number of tasks per-user, across all the of the 
+    user's jobs in the queue, which can be initialized concurrently. Once the 
+    user's jobs exceed this limit they will be queued on disk.  
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.init-accept-jobs-factor</name>
+    <value>10</value>
+    <description>The multipe of (maximum-system-jobs * queue-capacity) used to 
+    determine the number of jobs which are accepted by the scheduler.  
+    </description>
+  </property>
+
+  <!-- The default configuration settings for the capacity task scheduler -->
+  <!-- The default values would be applied to all the queues which don't have -->
+  <!-- the appropriate property for the particular queue -->
+  <property>
+    <name>mapred.capacity-scheduler.default-supports-priority</name>
+    <value>false</value>
+    <description>If true, priorities of jobs will be taken into 
+      account in scheduling decisions by default in a job queue.
+    </description>
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>
+    <value>100</value>
+    <description>The percentage of the resources limited to a particular user
+      for the job queue at any given point of time by default.
+    </description>
+  </property>
+
+
+  <property>
+    <name>mapred.capacity-scheduler.default-user-limit-factor</name>
+    <value>1</value>
+    <description>The default multiple of queue-capacity which is used to 
+    determine the amount of slots a single user can consume concurrently.
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.default-maximum-active-tasks-per-queue</name>
+    <value>200000</value>
+    <description>The default maximum number of tasks, across all jobs in the 
+    queue, which can be initialized concurrently. Once the queue's jobs exceed 
+    this limit they will be queued on disk.  
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.default-maximum-active-tasks-per-user</name>
+    <value>100000</value>
+    <description>The default maximum number of tasks per-user, across all the of 
+    the user's jobs in the queue, which can be initialized concurrently. Once 
+    the user's jobs exceed this limit they will be queued on disk.  
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.default-init-accept-jobs-factor</name>
+    <value>10</value>
+    <description>The default multipe of (maximum-system-jobs * queue-capacity) 
+    used to determine the number of jobs which are accepted by the scheduler.  
+    </description>
+  </property>
+
+  <!-- Capacity scheduler Job Initialization configuration parameters -->
+  <property>
+    <name>mapred.capacity-scheduler.init-poll-interval</name>
+    <value>5000</value>
+    <description>The amount of time in miliseconds which is used to poll 
+    the job queues for jobs to initialize.
+    </description>
+  </property>
+  <property>
+    <name>mapred.capacity-scheduler.init-worker-threads</name>
+    <value>5</value>
+    <description>Number of worker threads which would be used by
+    Initialization poller to initialize jobs in a set of queue.
+    If number mentioned in property is equal to number of job queues
+    then a single thread would initialize jobs in a queue. If lesser
+    then a thread would get a set of queues assigned. If the number
+    is greater then number of threads would be equal to number of 
+    job queues.
+    </description>
+  </property>
+
+</configuration>

Added: incubator/ambari/trunk/ambari-server/src/main/resources/stacks/HDPLocal/1.2.0/services/MAPREDUCE/configuration/capacity-scheduler.xml
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/resources/stacks/HDPLocal/1.2.0/services/MAPREDUCE/configuration/capacity-scheduler.xml?rev=1446382&view=auto
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/resources/stacks/HDPLocal/1.2.0/services/MAPREDUCE/configuration/capacity-scheduler.xml (added)
+++ incubator/ambari/trunk/ambari-server/src/main/resources/stacks/HDPLocal/1.2.0/services/MAPREDUCE/configuration/capacity-scheduler.xml Thu Feb 14 23:02:18 2013
@@ -0,0 +1,195 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<!-- This is the configuration file for the resource manager in Hadoop. -->
+<!-- You can configure various scheduling parameters related to queues. -->
+<!-- The properties for a queue follow a naming convention,such as, -->
+<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. -->
+
+<configuration>
+
+  <property>
+    <name>mapred.capacity-scheduler.maximum-system-jobs</name>
+    <value>3000</value>
+    <description>Maximum number of jobs in the system which can be initialized,
+     concurrently, by the CapacityScheduler.
+    </description>    
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.capacity</name>
+    <value>100</value>
+    <description>Percentage of the number of slots in the cluster that are
+      to be available for jobs in this queue.
+    </description>    
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.maximum-capacity</name>
+    <value>-1</value>
+    <description>
+	maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster.
+	This provides a means to limit how much excess capacity a queue can use. By default, there is no limit.
+	The maximum-capacity of a queue can only be greater than or equal to its minimum capacity.
+        Default value of -1 implies a queue can use complete capacity of the cluster.
+
+        This property could be to curtail certain jobs which are long running in nature from occupying more than a 
+        certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of 
+        other queues being affected.
+        
+        One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity
+        the max capacity would change. So if large no of nodes or racks get added to the cluster , max Capacity in 
+        absolute terms would increase accordingly.
+    </description>    
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.supports-priority</name>
+    <value>false</value>
+    <description>If true, priorities of jobs will be taken into 
+      account in scheduling decisions.
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name>
+    <value>100</value>
+    <description> Each queue enforces a limit on the percentage of resources 
+    allocated to a user at any given time, if there is competition for them. 
+    This user limit can vary between a minimum and maximum value. The former
+    depends on the number of users who have submitted jobs, and the latter is
+    set to this property value. For example, suppose the value of this 
+    property is 25. If two users have submitted jobs to a queue, no single 
+    user can use more than 50% of the queue resources. If a third user submits
+    a job, no single user can use more than 33% of the queue resources. With 4 
+    or more users, no user can use more than 25% of the queue's resources. A 
+    value of 100 implies no user limits are imposed. 
+    </description>
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.user-limit-factor</name>
+    <value>1</value>
+    <description>The multiple of the queue capacity which can be configured to 
+    allow a single user to acquire more slots. 
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks</name>
+    <value>200000</value>
+    <description>The maximum number of tasks, across all jobs in the queue, 
+    which can be initialized concurrently. Once the queue's jobs exceed this 
+    limit they will be queued on disk.  
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks-per-user</name>
+    <value>100000</value>
+    <description>The maximum number of tasks per-user, across all the of the 
+    user's jobs in the queue, which can be initialized concurrently. Once the 
+    user's jobs exceed this limit they will be queued on disk.  
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.init-accept-jobs-factor</name>
+    <value>10</value>
+    <description>The multipe of (maximum-system-jobs * queue-capacity) used to 
+    determine the number of jobs which are accepted by the scheduler.  
+    </description>
+  </property>
+
+  <!-- The default configuration settings for the capacity task scheduler -->
+  <!-- The default values would be applied to all the queues which don't have -->
+  <!-- the appropriate property for the particular queue -->
+  <property>
+    <name>mapred.capacity-scheduler.default-supports-priority</name>
+    <value>false</value>
+    <description>If true, priorities of jobs will be taken into 
+      account in scheduling decisions by default in a job queue.
+    </description>
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>
+    <value>100</value>
+    <description>The percentage of the resources limited to a particular user
+      for the job queue at any given point of time by default.
+    </description>
+  </property>
+
+
+  <property>
+    <name>mapred.capacity-scheduler.default-user-limit-factor</name>
+    <value>1</value>
+    <description>The default multiple of queue-capacity which is used to 
+    determine the amount of slots a single user can consume concurrently.
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.default-maximum-active-tasks-per-queue</name>
+    <value>200000</value>
+    <description>The default maximum number of tasks, across all jobs in the 
+    queue, which can be initialized concurrently. Once the queue's jobs exceed 
+    this limit they will be queued on disk.  
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.default-maximum-active-tasks-per-user</name>
+    <value>100000</value>
+    <description>The default maximum number of tasks per-user, across all the of 
+    the user's jobs in the queue, which can be initialized concurrently. Once 
+    the user's jobs exceed this limit they will be queued on disk.  
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.default-init-accept-jobs-factor</name>
+    <value>10</value>
+    <description>The default multipe of (maximum-system-jobs * queue-capacity) 
+    used to determine the number of jobs which are accepted by the scheduler.  
+    </description>
+  </property>
+
+  <!-- Capacity scheduler Job Initialization configuration parameters -->
+  <property>
+    <name>mapred.capacity-scheduler.init-poll-interval</name>
+    <value>5000</value>
+    <description>The amount of time in miliseconds which is used to poll 
+    the job queues for jobs to initialize.
+    </description>
+  </property>
+  <property>
+    <name>mapred.capacity-scheduler.init-worker-threads</name>
+    <value>5</value>
+    <description>Number of worker threads which would be used by
+    Initialization poller to initialize jobs in a set of queue.
+    If number mentioned in property is equal to number of job queues
+    then a single thread would initialize jobs in a queue. If lesser
+    then a thread would get a set of queues assigned. If the number
+    is greater then number of threads would be equal to number of 
+    job queues.
+    </description>
+  </property>
+
+</configuration>