You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jl...@apache.org on 2017/01/25 17:40:58 UTC

ambari git commit: AMBARI-19690: NM Memory can end up being too high on nodes with many components (jluniya)

Repository: ambari
Updated Branches:
  refs/heads/trunk e55523012 -> 6a8115572


AMBARI-19690: NM Memory can end up being too high on nodes with many components (jluniya)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/6a811557
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/6a811557
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/6a811557

Branch: refs/heads/trunk
Commit: 6a8115572b328785532aed27c1dc44a1bac17a01
Parents: e555230
Author: Jayush Luniya <jl...@hortonworks.com>
Authored: Wed Jan 25 09:40:56 2017 -0800
Committer: Jayush Luniya <jl...@hortonworks.com>
Committed: Wed Jan 25 09:40:56 2017 -0800

----------------------------------------------------------------------
 .../stacks/HDP/2.0.6/services/stack_advisor.py  |  60 +++++++++-
 .../stacks/HDP/2.5/services/stack_advisor.py    |  33 +-----
 .../src/main/resources/stacks/stack_advisor.py  |  18 +++
 .../stacks/2.0.6/common/test_stack_advisor.py   | 113 ++++++++++++++++++-
 .../stacks/2.5/common/test_stack_advisor.py     |   4 +-
 5 files changed, 191 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/6a811557/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
index 7ed1b77..55f3d30 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
@@ -1350,6 +1350,35 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
         totalMemoryRequired += self.formatXmxSizeToBytes(heapsize)
     return totalMemoryRequired
 
+  def get_yarn_nm_mem_in_mb(self, services, configurations):
+    """
+    Gets YARN NodeManager memory in MB (yarn.nodemanager.resource.memory-mb).
+    Reads from:
+      - configurations (if changed as part of current Stack Advisor invocation (output)), and services["changed-configurations"]
+        is empty, else
+      - services['configurations'] (input).
+
+    services["changed-configurations"] would be empty is Stack Advisor call if made from Blueprints (1st invocation). Subsequent
+    Stack Advisor calls will have it non-empty. We do this because in subsequent invocations, even if Stack Advsior calculates this
+    value (configurations), it is finally not recommended, making 'input' value to survive.
+    """
+    yarn_nm_mem_in_mb = None
+
+    yarn_site = getServicesSiteProperties(services, "yarn-site")
+    yarn_site_properties = getSiteProperties(configurations, "yarn-site")
+
+    # Check if services["changed-configurations"] is empty and 'yarn.nodemanager.resource.memory-mb' is modified in current ST invocation.
+    if not ("changed-configurations" in services and services["changed-configurations"]) and yarn_site_properties and 'yarn.nodemanager.resource.memory-mb' in yarn_site_properties:
+      yarn_nm_mem_in_mb = float(yarn_site_properties['yarn.nodemanager.resource.memory-mb'])
+    elif yarn_site and 'yarn.nodemanager.resource.memory-mb' in yarn_site:
+      # Check if 'yarn.nodemanager.resource.memory-mb' is input in services array.
+      yarn_nm_mem_in_mb = float(yarn_site['yarn.nodemanager.resource.memory-mb'])
+
+    if yarn_nm_mem_in_mb <= 0.0:
+      Logger.warning("'yarn.nodemanager.resource.memory-mb' current value : {0}. Expected value : > 0".format(yarn_nm_mem_in_mb))
+
+    return yarn_nm_mem_in_mb
+
   def getPreferredMountPoints(self, hostInfo):
 
     # '/etc/resolv.conf', '/etc/hostname', '/etc/hosts' are docker specific mount points
@@ -1438,10 +1467,37 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
 
   def validateYARNConfigurations(self, properties, recommendedDefaults, configurations, services, hosts):
     clusterEnv = getSiteProperties(configurations, "cluster-env")
-    validationItems = [ {"config-name": 'yarn.nodemanager.resource.memory-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'yarn.nodemanager.resource.memory-mb')},
+
+    validationItems = [ {"config-name": 'yarn.nodemanager.resource.memory-mb', "item": self.validatorGreaterThenDefaultValue(properties, recommendedDefaults, 'yarn.nodemanager.resource.memory-mb')},
                         {"config-name": 'yarn.scheduler.minimum-allocation-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'yarn.scheduler.minimum-allocation-mb')},
                         {"config-name": 'yarn.nodemanager.linux-container-executor.group', "item": self.validatorEqualsPropertyItem(properties, "yarn.nodemanager.linux-container-executor.group", clusterEnv, "user_group")},
-                        {"config-name": 'yarn.scheduler.maximum-allocation-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'yarn.scheduler.maximum-allocation-mb')} ]
+                        {"config-name": 'yarn.scheduler.maximum-allocation-mb', "item": self.validatorGreaterThenDefaultValue(properties, recommendedDefaults, 'yarn.scheduler.maximum-allocation-mb')} ]
+    nmMemory = int(self.get_yarn_nm_mem_in_mb(services, configurations))
+    if "items" in hosts and len(hosts["items"]) > 0:
+      nodeManagerHosts = self.getHostsWithComponent("YARN", "NODEMANAGER", services, hosts)
+      nmLowMemoryHosts = []
+      # NodeManager host with least memory is generally used in calculations as it will work in larger hosts.
+      if nodeManagerHosts is not None and len(nodeManagerHosts) > 0:
+        for nmHost in nodeManagerHosts:
+          nmHostName = nmHost["Hosts"]["host_name"]
+          componentNames = []
+          for service in services["services"]:
+            for component in service["components"]:
+              if not self.isClientComponent(component) and component["StackServiceComponents"]["hostnames"] is not None:
+                if nmHostName in component["StackServiceComponents"]["hostnames"]:
+                  componentNames.append(component["StackServiceComponents"]["component_name"])
+          requiredMemory = self.getMemorySizeRequired(services, componentNames, configurations)
+          unusedMemory = int((nmHost["Hosts"]["total_mem"] * 1024 - requiredMemory)/ (1024 * 1024)) # in MB
+          if nmMemory > unusedMemory:
+            nmLowMemoryHosts.append(nmHostName)
+
+        if len(nmLowMemoryHosts) > 0:
+          validationItems.append({"config-name": "yarn.nodemanager.resource.memory-mb",
+            "item": self.getWarnItem(
+                "Node manager hosts with high memory usage found (examples : {0}). Consider reducing the allocated "
+                "memory for containers or moving other co-located components "
+                "to a different host.".format(",".join(nmLowMemoryHosts[:3])))})
+
     return self.toConfigurationValidationProblems(validationItems, "yarn-site")
 
   def validateYARNEnvConfigurations(self, properties, recommendedDefaults, configurations, services, hosts):

http://git-wip-us.apache.org/repos/asf/ambari/blob/6a811557/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py
index d2c0459..17f0c59 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py
@@ -109,7 +109,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor):
       "ATLAS": {"application-properties": self.validateAtlasConfigurations},
       "HIVE": {"hive-interactive-env": self.validateHiveInteractiveEnvConfigurations,
                "hive-interactive-site": self.validateHiveInteractiveSiteConfigurations},
-      "YARN": {"yarn-site": self.validateYarnConfigurations},
+      "YARN": {"yarn-site": self.validateYARNConfigurations},
       "RANGER": {"ranger-tagsync-site": self.validateRangerTagsyncConfigurations},
       "SPARK2": {"spark2-defaults": self.validateSpark2Defaults,
                  "spark2-thrift-sparkconf": self.validateSpark2ThriftSparkConf},
@@ -247,7 +247,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor):
     ]
     return self.toConfigurationValidationProblems(validationItems, "spark2-thrift-sparkconf")
 
-  def validateYarnConfigurations(self, properties, recommendedDefaults, configurations, services, hosts):
+  def validateYARNConfigurations(self, properties, recommendedDefaults, configurations, services, hosts):
     parentValidationProblems = super(HDP25StackAdvisor, self).validateYARNConfigurations(properties, recommendedDefaults, configurations, services, hosts)
     yarn_site_properties = self.getSiteProperties(configurations, "yarn-site")
     servicesList = [service["StackServices"]["service_name"] for service in services["services"]]
@@ -1361,35 +1361,6 @@ class HDP25StackAdvisor(HDP24StackAdvisor):
     if yarn_min_container_size < 256:
       return 256
 
-  def get_yarn_nm_mem_in_mb(self, services, configurations):
-    """
-    Gets YARN NodeManager memory in MB (yarn.nodemanager.resource.memory-mb).
-    Reads from:
-      - configurations (if changed as part of current Stack Advisor invocation (output)), and services["changed-configurations"]
-        is empty, else
-      - services['configurations'] (input).
-
-    services["changed-configurations"] would be empty is Stack Advisor call if made from Blueprints (1st invocation). Subsequent
-    Stack Advisor calls will have it non-empty. We do this because in subsequent invocations, even if Stack Advsior calculates this
-    value (configurations), it is finally not recommended, making 'input' value to survive.
-    """
-    yarn_nm_mem_in_mb = None
-
-    yarn_site = self.getServicesSiteProperties(services, "yarn-site")
-    yarn_site_properties = self.getSiteProperties(configurations, "yarn-site")
-
-    # Check if services["changed-configurations"] is empty and 'yarn.nodemanager.resource.memory-mb' is modified in current ST invocation.
-    if not services["changed-configurations"] and yarn_site_properties and 'yarn.nodemanager.resource.memory-mb' in yarn_site_properties:
-      yarn_nm_mem_in_mb = float(yarn_site_properties['yarn.nodemanager.resource.memory-mb'])
-    elif yarn_site and 'yarn.nodemanager.resource.memory-mb' in yarn_site:
-      # Check if 'yarn.nodemanager.resource.memory-mb' is input in services array.
-        yarn_nm_mem_in_mb = float(yarn_site['yarn.nodemanager.resource.memory-mb'])
-
-    if yarn_nm_mem_in_mb <= 0.0:
-      Logger.warning("'yarn.nodemanager.resource.memory-mb' current value : {0}. Expected value : > 0".format(yarn_nm_mem_in_mb))
-
-    return yarn_nm_mem_in_mb
-
   def calculate_tez_am_container_size(self, services, total_cluster_capacity):
     """
     Calculates Tez App Master container size (tez.am.resource.memory.mb) for tez_hive2/tez-site on initialization if values read is 0.

http://git-wip-us.apache.org/repos/asf/ambari/blob/6a811557/ambari-server/src/main/resources/stacks/stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/stack_advisor.py b/ambari-server/src/main/resources/stacks/stack_advisor.py
index ad3b510..6fb014e 100644
--- a/ambari-server/src/main/resources/stacks/stack_advisor.py
+++ b/ambari-server/src/main/resources/stacks/stack_advisor.py
@@ -2151,6 +2151,24 @@ class DefaultStackAdvisor(StackAdvisor):
       return self.getWarnItem("Value is less than the recommended default of {0}".format(defaultValue))
     return None
 
+  def validatorGreaterThenDefaultValue(self, properties, recommendedDefaults, propertyName):
+    if propertyName not in recommendedDefaults:
+      # If a property name exists in say hbase-env and hbase-site (which is allowed), then it will exist in the
+      # "properties" dictionary, but not necessarily in the "recommendedDefaults" dictionary". In this case, ignore it.
+      return None
+
+    if not propertyName in properties:
+      return self.getErrorItem("Value should be set")
+    value = self.to_number(properties[propertyName])
+    if value is None:
+      return self.getErrorItem("Value should be integer")
+    defaultValue = self.to_number(recommendedDefaults[propertyName])
+    if defaultValue is None:
+      return None
+    if value > defaultValue:
+      return self.getWarnItem("Value is greater than the recommended default of {0}".format(defaultValue))
+    return None
+
   def validatorEqualsPropertyItem(self, properties1, propertyName1,
                                   properties2, propertyName2,
                                   emptyAllowed=False):

http://git-wip-us.apache.org/repos/asf/ambari/blob/6a811557/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
index ff25512..a6931c5 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
@@ -776,7 +776,7 @@ class TestHDP206StackAdvisor(TestCase):
 
     # Test - Cluster data with 2 hosts - pick minimum memory
     servicesList.append("YARN")
-    services = services = {"services":
+    services = {"services":
                   [{"StackServices":
                       {"service_name" : "YARN",
                        "service_version" : "2.6.0.2.2"
@@ -3401,7 +3401,41 @@ class TestHDP206StackAdvisor(TestCase):
         }
       }
     }
-    services = {'configurations': {} }
+
+    services = {"services":
+      [{"StackServices":
+        {"service_name" : "YARN",
+          "service_version" : "2.6.0.2.2"
+        },
+        "components":[
+          {
+            "StackServiceComponents":{
+              "advertise_version":"true",
+              "cardinality":"1+",
+              "component_category":"SLAVE",
+              "component_name":"NODEMANAGER",
+              "custom_commands":[
+
+              ],
+              "display_name":"NodeManager",
+              "is_client":"false",
+              "is_master":"false",
+              "service_name":"YARN",
+              "stack_name":"HDP",
+              "stack_version":"2.2",
+              "hostnames":[
+                "host1",
+                "host2"
+              ]
+            },
+            "dependencies":[
+            ]
+          }
+        ],
+      }],
+      "configurations": {}
+    }
+
     recommendedDefaults = {'yarn.nodemanager.resource.memory-mb' : '12288',
       'yarn.scheduler.minimum-allocation-mb' : '3072',
       'yarn.nodemanager.linux-container-executor.group': 'hadoop',
@@ -3413,3 +3447,78 @@ class TestHDP206StackAdvisor(TestCase):
 
     res = self.stackAdvisor.validateYARNConfigurations(properties, recommendedDefaults, configurations, services, {})
     self.assertFalse(res)
+
+    hosts = {
+      "items" : [
+        {
+          "Hosts" : {
+            "host_name" : "host1",
+            "cpu_count" : 2,
+            "total_mem" : 12582912,
+            "disk_info" : [
+              {
+                "available" : "21052800",
+                "device" : "/dev/vda1",
+                "used" : "3303636",
+                "percent" : "14%",
+                "size" : "25666616",
+                "type" : "ext4",
+                "mountpoint" : "/"
+              },
+              {
+                "available" : "244732200",
+                "device" : "/dev/vdb",
+                "used" : "60508",
+                "percent" : "1%",
+                "size" : "257899908",
+                "type" : "ext4",
+                "mountpoint" : "/grid/0"
+              }
+            ]
+          }
+        }
+      ]
+    }
+    # Cluster RAM = 12 GB (12582912 KB)
+    # YARN NodeManager HeapSize = 1024 MB (default)
+    # Max Container Allocation = 11264 MB ( user set to 12288)
+    expectedItems = [
+      {
+        'config-type':  'yarn-site',
+        'message': 'Node manager hosts with high memory usage found (examples : host1). '
+                   'Consider reducing the allocated memory for containers or '
+                   'moving other co-located components to a different host.',
+        'type': 'configuration',
+        'config-name': 'yarn.nodemanager.resource.memory-mb',
+        'level': 'WARN'
+      }
+    ]
+    items = self.stackAdvisor.validateYARNConfigurations(properties, recommendedDefaults, configurations, services, hosts)
+    self.assertEquals(expectedItems, items)
+
+
+    recommendedDefaults = {'yarn.nodemanager.resource.memory-mb' : '10240',
+      'yarn.scheduler.minimum-allocation-mb' : '3072',
+      'yarn.nodemanager.linux-container-executor.group': 'hadoop',
+      'yarn.scheduler.maximum-allocation-mb': '10240'}
+
+    expectedItems = [
+      {
+        'config-type':  'yarn-site',
+        'message': 'Value is greater than the recommended default of 10240',
+        'type': 'configuration',
+        'config-name': 'yarn.nodemanager.resource.memory-mb',
+        'level': 'WARN'
+      },
+      {
+        'config-type':  'yarn-site',
+        'message': 'Value is greater than the recommended default of 10240',
+        'type': 'configuration',
+        'config-name': 'yarn.scheduler.maximum-allocation-mb',
+        'level': 'WARN'
+      }
+    ]
+
+    items = self.stackAdvisor.validateYARNConfigurations(properties, recommendedDefaults, configurations, services, {})
+    self.assertEquals(expectedItems, items)
+

http://git-wip-us.apache.org/repos/asf/ambari/blob/6a811557/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py
index ad962fd..a53cb25 100644
--- a/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py
+++ b/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py
@@ -444,7 +444,7 @@ class TestHDP25StackAdvisor(TestCase):
     self.assertEquals(validations[0], expected)
 
 
-  def test_validateYarnConfigurations(self):
+  def test_validateYARNConfigurations(self):
     properties = {'enable_hive_interactive': 'true',
                   'hive.tez.container.size': '2048', "yarn.nodemanager.linux-container-executor.group": "hadoop"}
     recommendedDefaults = {'enable_hive_interactive': 'true',
@@ -473,7 +473,7 @@ class TestHDP25StackAdvisor(TestCase):
     res_expected = [
       {'config-type': 'yarn-site', 'message': 'While enabling HIVE_SERVER_INTERACTIVE it is recommended that you enable work preserving restart in YARN.', 'type': 'configuration', 'config-name': 'yarn.resourcemanager.work-preserving-recovery.enabled', 'level': 'WARN'}
     ]
-    res = self.stackAdvisor.validateYarnConfigurations(properties, recommendedDefaults, configurations, services, {})
+    res = self.stackAdvisor.validateYARNConfigurations(properties, recommendedDefaults, configurations, services, {})
     self.assertEquals(res, res_expected)
     pass