You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jl...@apache.org on 2017/01/25 17:40:58 UTC
ambari git commit: AMBARI-19690: NM Memory can end up being too high
on nodes with many components (jluniya)
Repository: ambari
Updated Branches:
refs/heads/trunk e55523012 -> 6a8115572
AMBARI-19690: NM Memory can end up being too high on nodes with many components (jluniya)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/6a811557
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/6a811557
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/6a811557
Branch: refs/heads/trunk
Commit: 6a8115572b328785532aed27c1dc44a1bac17a01
Parents: e555230
Author: Jayush Luniya <jl...@hortonworks.com>
Authored: Wed Jan 25 09:40:56 2017 -0800
Committer: Jayush Luniya <jl...@hortonworks.com>
Committed: Wed Jan 25 09:40:56 2017 -0800
----------------------------------------------------------------------
.../stacks/HDP/2.0.6/services/stack_advisor.py | 60 +++++++++-
.../stacks/HDP/2.5/services/stack_advisor.py | 33 +-----
.../src/main/resources/stacks/stack_advisor.py | 18 +++
.../stacks/2.0.6/common/test_stack_advisor.py | 113 ++++++++++++++++++-
.../stacks/2.5/common/test_stack_advisor.py | 4 +-
5 files changed, 191 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/6a811557/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
index 7ed1b77..55f3d30 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
@@ -1350,6 +1350,35 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
totalMemoryRequired += self.formatXmxSizeToBytes(heapsize)
return totalMemoryRequired
+ def get_yarn_nm_mem_in_mb(self, services, configurations):
+ """
+ Gets YARN NodeManager memory in MB (yarn.nodemanager.resource.memory-mb).
+ Reads from:
+ - configurations (if changed as part of current Stack Advisor invocation (output)), and services["changed-configurations"]
+ is empty, else
+ - services['configurations'] (input).
+
+ services["changed-configurations"] would be empty is Stack Advisor call if made from Blueprints (1st invocation). Subsequent
+ Stack Advisor calls will have it non-empty. We do this because in subsequent invocations, even if Stack Advsior calculates this
+ value (configurations), it is finally not recommended, making 'input' value to survive.
+ """
+ yarn_nm_mem_in_mb = None
+
+ yarn_site = getServicesSiteProperties(services, "yarn-site")
+ yarn_site_properties = getSiteProperties(configurations, "yarn-site")
+
+ # Check if services["changed-configurations"] is empty and 'yarn.nodemanager.resource.memory-mb' is modified in current ST invocation.
+ if not ("changed-configurations" in services and services["changed-configurations"]) and yarn_site_properties and 'yarn.nodemanager.resource.memory-mb' in yarn_site_properties:
+ yarn_nm_mem_in_mb = float(yarn_site_properties['yarn.nodemanager.resource.memory-mb'])
+ elif yarn_site and 'yarn.nodemanager.resource.memory-mb' in yarn_site:
+ # Check if 'yarn.nodemanager.resource.memory-mb' is input in services array.
+ yarn_nm_mem_in_mb = float(yarn_site['yarn.nodemanager.resource.memory-mb'])
+
+ if yarn_nm_mem_in_mb <= 0.0:
+ Logger.warning("'yarn.nodemanager.resource.memory-mb' current value : {0}. Expected value : > 0".format(yarn_nm_mem_in_mb))
+
+ return yarn_nm_mem_in_mb
+
def getPreferredMountPoints(self, hostInfo):
# '/etc/resolv.conf', '/etc/hostname', '/etc/hosts' are docker specific mount points
@@ -1438,10 +1467,37 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
def validateYARNConfigurations(self, properties, recommendedDefaults, configurations, services, hosts):
clusterEnv = getSiteProperties(configurations, "cluster-env")
- validationItems = [ {"config-name": 'yarn.nodemanager.resource.memory-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'yarn.nodemanager.resource.memory-mb')},
+
+ validationItems = [ {"config-name": 'yarn.nodemanager.resource.memory-mb', "item": self.validatorGreaterThenDefaultValue(properties, recommendedDefaults, 'yarn.nodemanager.resource.memory-mb')},
{"config-name": 'yarn.scheduler.minimum-allocation-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'yarn.scheduler.minimum-allocation-mb')},
{"config-name": 'yarn.nodemanager.linux-container-executor.group', "item": self.validatorEqualsPropertyItem(properties, "yarn.nodemanager.linux-container-executor.group", clusterEnv, "user_group")},
- {"config-name": 'yarn.scheduler.maximum-allocation-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'yarn.scheduler.maximum-allocation-mb')} ]
+ {"config-name": 'yarn.scheduler.maximum-allocation-mb', "item": self.validatorGreaterThenDefaultValue(properties, recommendedDefaults, 'yarn.scheduler.maximum-allocation-mb')} ]
+ nmMemory = int(self.get_yarn_nm_mem_in_mb(services, configurations))
+ if "items" in hosts and len(hosts["items"]) > 0:
+ nodeManagerHosts = self.getHostsWithComponent("YARN", "NODEMANAGER", services, hosts)
+ nmLowMemoryHosts = []
+ # NodeManager host with least memory is generally used in calculations as it will work in larger hosts.
+ if nodeManagerHosts is not None and len(nodeManagerHosts) > 0:
+ for nmHost in nodeManagerHosts:
+ nmHostName = nmHost["Hosts"]["host_name"]
+ componentNames = []
+ for service in services["services"]:
+ for component in service["components"]:
+ if not self.isClientComponent(component) and component["StackServiceComponents"]["hostnames"] is not None:
+ if nmHostName in component["StackServiceComponents"]["hostnames"]:
+ componentNames.append(component["StackServiceComponents"]["component_name"])
+ requiredMemory = self.getMemorySizeRequired(services, componentNames, configurations)
+ unusedMemory = int((nmHost["Hosts"]["total_mem"] * 1024 - requiredMemory)/ (1024 * 1024)) # in MB
+ if nmMemory > unusedMemory:
+ nmLowMemoryHosts.append(nmHostName)
+
+ if len(nmLowMemoryHosts) > 0:
+ validationItems.append({"config-name": "yarn.nodemanager.resource.memory-mb",
+ "item": self.getWarnItem(
+ "Node manager hosts with high memory usage found (examples : {0}). Consider reducing the allocated "
+ "memory for containers or moving other co-located components "
+ "to a different host.".format(",".join(nmLowMemoryHosts[:3])))})
+
return self.toConfigurationValidationProblems(validationItems, "yarn-site")
def validateYARNEnvConfigurations(self, properties, recommendedDefaults, configurations, services, hosts):
http://git-wip-us.apache.org/repos/asf/ambari/blob/6a811557/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py
index d2c0459..17f0c59 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py
@@ -109,7 +109,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor):
"ATLAS": {"application-properties": self.validateAtlasConfigurations},
"HIVE": {"hive-interactive-env": self.validateHiveInteractiveEnvConfigurations,
"hive-interactive-site": self.validateHiveInteractiveSiteConfigurations},
- "YARN": {"yarn-site": self.validateYarnConfigurations},
+ "YARN": {"yarn-site": self.validateYARNConfigurations},
"RANGER": {"ranger-tagsync-site": self.validateRangerTagsyncConfigurations},
"SPARK2": {"spark2-defaults": self.validateSpark2Defaults,
"spark2-thrift-sparkconf": self.validateSpark2ThriftSparkConf},
@@ -247,7 +247,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor):
]
return self.toConfigurationValidationProblems(validationItems, "spark2-thrift-sparkconf")
- def validateYarnConfigurations(self, properties, recommendedDefaults, configurations, services, hosts):
+ def validateYARNConfigurations(self, properties, recommendedDefaults, configurations, services, hosts):
parentValidationProblems = super(HDP25StackAdvisor, self).validateYARNConfigurations(properties, recommendedDefaults, configurations, services, hosts)
yarn_site_properties = self.getSiteProperties(configurations, "yarn-site")
servicesList = [service["StackServices"]["service_name"] for service in services["services"]]
@@ -1361,35 +1361,6 @@ class HDP25StackAdvisor(HDP24StackAdvisor):
if yarn_min_container_size < 256:
return 256
- def get_yarn_nm_mem_in_mb(self, services, configurations):
- """
- Gets YARN NodeManager memory in MB (yarn.nodemanager.resource.memory-mb).
- Reads from:
- - configurations (if changed as part of current Stack Advisor invocation (output)), and services["changed-configurations"]
- is empty, else
- - services['configurations'] (input).
-
- services["changed-configurations"] would be empty is Stack Advisor call if made from Blueprints (1st invocation). Subsequent
- Stack Advisor calls will have it non-empty. We do this because in subsequent invocations, even if Stack Advsior calculates this
- value (configurations), it is finally not recommended, making 'input' value to survive.
- """
- yarn_nm_mem_in_mb = None
-
- yarn_site = self.getServicesSiteProperties(services, "yarn-site")
- yarn_site_properties = self.getSiteProperties(configurations, "yarn-site")
-
- # Check if services["changed-configurations"] is empty and 'yarn.nodemanager.resource.memory-mb' is modified in current ST invocation.
- if not services["changed-configurations"] and yarn_site_properties and 'yarn.nodemanager.resource.memory-mb' in yarn_site_properties:
- yarn_nm_mem_in_mb = float(yarn_site_properties['yarn.nodemanager.resource.memory-mb'])
- elif yarn_site and 'yarn.nodemanager.resource.memory-mb' in yarn_site:
- # Check if 'yarn.nodemanager.resource.memory-mb' is input in services array.
- yarn_nm_mem_in_mb = float(yarn_site['yarn.nodemanager.resource.memory-mb'])
-
- if yarn_nm_mem_in_mb <= 0.0:
- Logger.warning("'yarn.nodemanager.resource.memory-mb' current value : {0}. Expected value : > 0".format(yarn_nm_mem_in_mb))
-
- return yarn_nm_mem_in_mb
-
def calculate_tez_am_container_size(self, services, total_cluster_capacity):
"""
Calculates Tez App Master container size (tez.am.resource.memory.mb) for tez_hive2/tez-site on initialization if values read is 0.
http://git-wip-us.apache.org/repos/asf/ambari/blob/6a811557/ambari-server/src/main/resources/stacks/stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/stack_advisor.py b/ambari-server/src/main/resources/stacks/stack_advisor.py
index ad3b510..6fb014e 100644
--- a/ambari-server/src/main/resources/stacks/stack_advisor.py
+++ b/ambari-server/src/main/resources/stacks/stack_advisor.py
@@ -2151,6 +2151,24 @@ class DefaultStackAdvisor(StackAdvisor):
return self.getWarnItem("Value is less than the recommended default of {0}".format(defaultValue))
return None
+ def validatorGreaterThenDefaultValue(self, properties, recommendedDefaults, propertyName):
+ if propertyName not in recommendedDefaults:
+ # If a property name exists in say hbase-env and hbase-site (which is allowed), then it will exist in the
+ # "properties" dictionary, but not necessarily in the "recommendedDefaults" dictionary". In this case, ignore it.
+ return None
+
+ if not propertyName in properties:
+ return self.getErrorItem("Value should be set")
+ value = self.to_number(properties[propertyName])
+ if value is None:
+ return self.getErrorItem("Value should be integer")
+ defaultValue = self.to_number(recommendedDefaults[propertyName])
+ if defaultValue is None:
+ return None
+ if value > defaultValue:
+ return self.getWarnItem("Value is greater than the recommended default of {0}".format(defaultValue))
+ return None
+
def validatorEqualsPropertyItem(self, properties1, propertyName1,
properties2, propertyName2,
emptyAllowed=False):
http://git-wip-us.apache.org/repos/asf/ambari/blob/6a811557/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
index ff25512..a6931c5 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
@@ -776,7 +776,7 @@ class TestHDP206StackAdvisor(TestCase):
# Test - Cluster data with 2 hosts - pick minimum memory
servicesList.append("YARN")
- services = services = {"services":
+ services = {"services":
[{"StackServices":
{"service_name" : "YARN",
"service_version" : "2.6.0.2.2"
@@ -3401,7 +3401,41 @@ class TestHDP206StackAdvisor(TestCase):
}
}
}
- services = {'configurations': {} }
+
+ services = {"services":
+ [{"StackServices":
+ {"service_name" : "YARN",
+ "service_version" : "2.6.0.2.2"
+ },
+ "components":[
+ {
+ "StackServiceComponents":{
+ "advertise_version":"true",
+ "cardinality":"1+",
+ "component_category":"SLAVE",
+ "component_name":"NODEMANAGER",
+ "custom_commands":[
+
+ ],
+ "display_name":"NodeManager",
+ "is_client":"false",
+ "is_master":"false",
+ "service_name":"YARN",
+ "stack_name":"HDP",
+ "stack_version":"2.2",
+ "hostnames":[
+ "host1",
+ "host2"
+ ]
+ },
+ "dependencies":[
+ ]
+ }
+ ],
+ }],
+ "configurations": {}
+ }
+
recommendedDefaults = {'yarn.nodemanager.resource.memory-mb' : '12288',
'yarn.scheduler.minimum-allocation-mb' : '3072',
'yarn.nodemanager.linux-container-executor.group': 'hadoop',
@@ -3413,3 +3447,78 @@ class TestHDP206StackAdvisor(TestCase):
res = self.stackAdvisor.validateYARNConfigurations(properties, recommendedDefaults, configurations, services, {})
self.assertFalse(res)
+
+ hosts = {
+ "items" : [
+ {
+ "Hosts" : {
+ "host_name" : "host1",
+ "cpu_count" : 2,
+ "total_mem" : 12582912,
+ "disk_info" : [
+ {
+ "available" : "21052800",
+ "device" : "/dev/vda1",
+ "used" : "3303636",
+ "percent" : "14%",
+ "size" : "25666616",
+ "type" : "ext4",
+ "mountpoint" : "/"
+ },
+ {
+ "available" : "244732200",
+ "device" : "/dev/vdb",
+ "used" : "60508",
+ "percent" : "1%",
+ "size" : "257899908",
+ "type" : "ext4",
+ "mountpoint" : "/grid/0"
+ }
+ ]
+ }
+ }
+ ]
+ }
+ # Cluster RAM = 12 GB (12582912 KB)
+ # YARN NodeManager HeapSize = 1024 MB (default)
+ # Max Container Allocation = 11264 MB ( user set to 12288)
+ expectedItems = [
+ {
+ 'config-type': 'yarn-site',
+ 'message': 'Node manager hosts with high memory usage found (examples : host1). '
+ 'Consider reducing the allocated memory for containers or '
+ 'moving other co-located components to a different host.',
+ 'type': 'configuration',
+ 'config-name': 'yarn.nodemanager.resource.memory-mb',
+ 'level': 'WARN'
+ }
+ ]
+ items = self.stackAdvisor.validateYARNConfigurations(properties, recommendedDefaults, configurations, services, hosts)
+ self.assertEquals(expectedItems, items)
+
+
+ recommendedDefaults = {'yarn.nodemanager.resource.memory-mb' : '10240',
+ 'yarn.scheduler.minimum-allocation-mb' : '3072',
+ 'yarn.nodemanager.linux-container-executor.group': 'hadoop',
+ 'yarn.scheduler.maximum-allocation-mb': '10240'}
+
+ expectedItems = [
+ {
+ 'config-type': 'yarn-site',
+ 'message': 'Value is greater than the recommended default of 10240',
+ 'type': 'configuration',
+ 'config-name': 'yarn.nodemanager.resource.memory-mb',
+ 'level': 'WARN'
+ },
+ {
+ 'config-type': 'yarn-site',
+ 'message': 'Value is greater than the recommended default of 10240',
+ 'type': 'configuration',
+ 'config-name': 'yarn.scheduler.maximum-allocation-mb',
+ 'level': 'WARN'
+ }
+ ]
+
+ items = self.stackAdvisor.validateYARNConfigurations(properties, recommendedDefaults, configurations, services, {})
+ self.assertEquals(expectedItems, items)
+
http://git-wip-us.apache.org/repos/asf/ambari/blob/6a811557/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py
index ad962fd..a53cb25 100644
--- a/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py
+++ b/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py
@@ -444,7 +444,7 @@ class TestHDP25StackAdvisor(TestCase):
self.assertEquals(validations[0], expected)
- def test_validateYarnConfigurations(self):
+ def test_validateYARNConfigurations(self):
properties = {'enable_hive_interactive': 'true',
'hive.tez.container.size': '2048', "yarn.nodemanager.linux-container-executor.group": "hadoop"}
recommendedDefaults = {'enable_hive_interactive': 'true',
@@ -473,7 +473,7 @@ class TestHDP25StackAdvisor(TestCase):
res_expected = [
{'config-type': 'yarn-site', 'message': 'While enabling HIVE_SERVER_INTERACTIVE it is recommended that you enable work preserving restart in YARN.', 'type': 'configuration', 'config-name': 'yarn.resourcemanager.work-preserving-recovery.enabled', 'level': 'WARN'}
]
- res = self.stackAdvisor.validateYarnConfigurations(properties, recommendedDefaults, configurations, services, {})
+ res = self.stackAdvisor.validateYARNConfigurations(properties, recommendedDefaults, configurations, services, {})
self.assertEquals(res, res_expected)
pass