You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ds...@apache.org on 2015/04/21 10:56:12 UTC

ambari git commit: AMBARI-10602 HDFS namenode_opt_newsize and namenode_opt_maxnewsize not using number of datanodes in calculation (dsen)

Repository: ambari
Updated Branches:
  refs/heads/trunk 766cb64ef -> c3e0771e4


AMBARI-10602 HDFS namenode_opt_newsize and namenode_opt_maxnewsize not using number of datanodes in calculation (dsen)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/c3e0771e
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/c3e0771e
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/c3e0771e

Branch: refs/heads/trunk
Commit: c3e0771e40fb255ff69bb6cbc70495435486512e
Parents: 766cb64
Author: Dmytro Sen <ds...@apache.org>
Authored: Tue Apr 21 11:55:56 2015 +0300
Committer: Dmytro Sen <ds...@apache.org>
Committed: Tue Apr 21 11:55:56 2015 +0300

----------------------------------------------------------------------
 .../stacks/HDP/2.0.6/services/stack_advisor.py  |   5 +-
 .../stacks/HDP/2.2/services/stack_advisor.py    |  59 ++++++--
 .../stacks/2.0.6/common/test_stack_advisor.py   | 149 +++++++++++++++++++
 .../stacks/2.2/common/test_stack_advisor.py     |  58 ++++----
 4 files changed, 233 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/c3e0771e/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
index 7af01a4..0b54b38 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py
@@ -213,9 +213,8 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
       service = [serviceEntry for serviceEntry in services["services"] if serviceEntry["StackServices"]["service_name"] == serviceName][0]
       components = [componentEntry for componentEntry in service["components"] if componentEntry["StackServiceComponents"]["component_name"] == componentName]
       if (len(components) > 0 and len(components[0]["StackServiceComponents"]["hostnames"]) > 0):
-        # component available - determine hosts and memory
-        componentHostname = components[0]["StackServiceComponents"]["hostnames"][0]
-        componentHosts = [host for host in hosts["items"] if host["Hosts"]["host_name"] == componentHostname]
+        componentHostnames = components[0]["StackServiceComponents"]["hostnames"]
+        componentHosts = [host for host in hosts["items"] if host["Hosts"]["host_name"] in componentHostnames]
         return componentHosts
     return []
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/c3e0771e/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py
index 7f163dd..6289e6a 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py
@@ -98,26 +98,67 @@ class HDP22StackAdvisor(HDP21StackAdvisor):
     putHdfsSiteProperty("dfs.namenode.safemode.threshold-pct", "0.99f" if len(namenodeHosts) > 1 else "1.0f")
 
     putHdfsEnvProperty = self.putProperty(configurations, "hadoop-env", services)
-    putHdfsEnvProperty('namenode_heapsize', max(int(clusterData['totalAvailableRam'] / 2), 1024))
-    putHdfsEnvProperty('namenode_opt_newsize', max(int(clusterData['totalAvailableRam'] / 8), 128))
-    putHdfsEnvProperty('namenode_opt_maxnewsize', max(int(clusterData['totalAvailableRam'] / 8), 256))
-
-    # Property Attributes
     putHdfsEnvPropertyAttribute = self.putPropertyAttribute(configurations, "hadoop-env")
+
+    nn_max_heapsize=None
     if (namenodeHosts is not None and len(namenodeHosts) > 0):
       if len(namenodeHosts) > 1:
-        namenode_heapsize = min(int(namenodeHosts[0]["Hosts"]["total_mem"]), int(namenodeHosts[1]["Hosts"]["total_mem"])) / 1024
+        nn_max_heapsize = min(int(namenodeHosts[0]["Hosts"]["total_mem"]), int(namenodeHosts[1]["Hosts"]["total_mem"])) / 1024
       else:
-        namenode_heapsize = int(namenodeHosts[0]["Hosts"]["total_mem"] / 1024) # total_mem in kb
+        nn_max_heapsize = int(namenodeHosts[0]["Hosts"]["total_mem"] / 1024) # total_mem in kb
 
-      putHdfsEnvPropertyAttribute('namenode_heapsize', 'maximum', namenode_heapsize)
+      putHdfsEnvPropertyAttribute('namenode_heapsize', 'maximum', nn_max_heapsize)
+
+    #Old fallback values
+    putHdfsEnvProperty('namenode_heapsize', max(int(clusterData['totalAvailableRam'] / 2), 1024))
+    putHdfsEnvProperty('namenode_opt_newsize', max(int(clusterData['totalAvailableRam'] / 8), 128))
+    putHdfsEnvProperty('namenode_opt_maxnewsize', max(int(clusterData['totalAvailableRam'] / 8), 256))
 
     datanodeHosts = self.getHostsWithComponent("HDFS", "DATANODE", services, hosts)
-    if (datanodeHosts is not None and len(datanodeHosts)>0):
+    if datanodeHosts is not None and len(datanodeHosts) > 0:
       min_datanode_ram_kb = 1073741824 # 1 TB
       for datanode in datanodeHosts:
         ram_kb = datanode['Hosts']['total_mem']
         min_datanode_ram_kb = min(min_datanode_ram_kb, ram_kb)
+
+      datanodeFilesM = len(datanodeHosts)*dataDirsCount/10 # in millions, # of files = # of disks * 100'000
+      nn_memory_configs = [
+        {'nn_heap':1024,  'nn_opt':128},
+        {'nn_heap':3072,  'nn_opt':512},
+        {'nn_heap':5376,  'nn_opt':768},
+        {'nn_heap':9984,  'nn_opt':1280},
+        {'nn_heap':14848, 'nn_opt':2048},
+        {'nn_heap':19456, 'nn_opt':2560},
+        {'nn_heap':24320, 'nn_opt':3072},
+        {'nn_heap':33536, 'nn_opt':4352},
+        {'nn_heap':47872, 'nn_opt':6144},
+        {'nn_heap':59648, 'nn_opt':7680},
+        {'nn_heap':71424, 'nn_opt':8960},
+        {'nn_heap':94976, 'nn_opt':8960}
+      ]
+      index = {
+        datanodeFilesM < 1 : 0,
+        1 <= datanodeFilesM < 5 : 1,
+        5 <= datanodeFilesM < 10 : 2,
+        10 <= datanodeFilesM < 20 : 3,
+        20 <= datanodeFilesM < 30 : 4,
+        30 <= datanodeFilesM < 40 : 5,
+        40 <= datanodeFilesM < 50 : 6,
+        50 <= datanodeFilesM < 70 : 7,
+        70 <= datanodeFilesM < 100 : 8,
+        100 <= datanodeFilesM < 125 : 9,
+        125 <= datanodeFilesM < 150 : 10,
+        150 <= datanodeFilesM : 11
+      }[1]
+
+      nn_memory_config = nn_memory_configs[index]
+
+      #override with new values if applicable
+      if nn_max_heapsize is not None and nn_max_heapsize <= nn_memory_config['nn_heap']:
+        putHdfsEnvProperty('namenode_heapsize', nn_memory_config['nn_heap'])
+        putHdfsEnvProperty('namenode_opt_newsize', nn_memory_config['nn_opt'])
+        putHdfsEnvProperty('namenode_opt_maxnewsize', nn_memory_config['nn_opt'])
+
       putHdfsEnvPropertyAttribute('dtnode_heapsize', 'maximum', int(min_datanode_ram_kb/1024))
 
     putHdfsSitePropertyAttribute = self.putPropertyAttribute(configurations, "hdfs-site")

http://git-wip-us.apache.org/repos/asf/ambari/blob/c3e0771e/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
index 1f5549b..ef6def1 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py
@@ -585,3 +585,152 @@ class TestHDP206StackAdvisor(TestCase):
 
     res = self.stackAdvisor.validateHDFSConfigurationsEnv(properties, recommendedDefaults, configurations, '', '')
     self.assertEquals(res, res_expected)
+
+  def test_getHostsWithComponent(self):
+    services = {"services":
+                  [{"StackServices":
+                      {"service_name" : "HDFS",
+                       "service_version" : "2.6.0.2.2"
+                      },
+                    "components":[
+                      {
+                        "href":"/api/v1/stacks/HDP/versions/2.2/services/HDFS/components/DATANODE",
+                        "StackServiceComponents":{
+                          "advertise_version":"true",
+                          "cardinality":"1+",
+                          "component_category":"SLAVE",
+                          "component_name":"DATANODE",
+                          "custom_commands":[
+
+                          ],
+                          "display_name":"DataNode",
+                          "is_client":"false",
+                          "is_master":"false",
+                          "service_name":"HDFS",
+                          "stack_name":"HDP",
+                          "stack_version":"2.2",
+                          "hostnames":[
+                            "host1",
+                            "host2"
+                          ]
+                        },
+                        "dependencies":[
+
+                        ]
+                      },
+                      {
+                        "href":"/api/v1/stacks/HDP/versions/2.2/services/HDFS/components/JOURNALNODE",
+                        "StackServiceComponents":{
+                          "advertise_version":"true",
+                          "cardinality":"0+",
+                          "component_category":"SLAVE",
+                          "component_name":"JOURNALNODE",
+                          "custom_commands":[
+
+                          ],
+                          "display_name":"JournalNode",
+                          "is_client":"false",
+                          "is_master":"false",
+                          "service_name":"HDFS",
+                          "stack_name":"HDP",
+                          "stack_version":"2.2",
+                          "hostnames":[
+                            "host1"
+                          ]
+                        },
+                        "dependencies":[
+                          {
+                            "href":"/api/v1/stacks/HDP/versions/2.2/services/HDFS/components/JOURNALNODE/dependencies/HDFS_CLIENT",
+                            "Dependencies":{
+                              "component_name":"HDFS_CLIENT",
+                              "dependent_component_name":"JOURNALNODE",
+                              "dependent_service_name":"HDFS",
+                              "stack_name":"HDP",
+                              "stack_version":"2.2"
+                            }
+                          }
+                        ]
+                      },
+                      {
+                        "href":"/api/v1/stacks/HDP/versions/2.2/services/HDFS/components/NAMENODE",
+                        "StackServiceComponents":{
+                          "advertise_version":"true",
+                          "cardinality":"1-2",
+                          "component_category":"MASTER",
+                          "component_name":"NAMENODE",
+                          "custom_commands":[
+                            "DECOMMISSION",
+                            "REBALANCEHDFS"
+                          ],
+                          "display_name":"NameNode",
+                          "is_client":"false",
+                          "is_master":"true",
+                          "service_name":"HDFS",
+                          "stack_name":"HDP",
+                          "stack_version":"2.2",
+                          "hostnames":[
+                            "host2"
+                          ]
+                        },
+                        "dependencies":[
+
+                        ]
+                      },
+                      ],
+                    }],
+                "configurations": {}
+    }
+    hosts = {
+      "items" : [
+        {
+          "href" : "/api/v1/hosts/host1",
+          "Hosts" : {
+            "cpu_count" : 1,
+            "host_name" : "host1",
+            "os_arch" : "x86_64",
+            "os_type" : "centos6",
+            "ph_cpu_count" : 1,
+            "public_host_name" : "host1",
+            "rack_info" : "/default-rack",
+            "total_mem" : 2097152
+          }
+        },
+        {
+          "href" : "/api/v1/hosts/host2",
+          "Hosts" : {
+            "cpu_count" : 1,
+            "host_name" : "host2",
+            "os_arch" : "x86_64",
+            "os_type" : "centos6",
+            "ph_cpu_count" : 1,
+            "public_host_name" : "host2",
+            "rack_info" : "/default-rack",
+            "total_mem" : 1048576
+          }
+        },
+        ]
+    }
+
+    datanodes = self.stackAdvisor.getHostsWithComponent("HDFS", "DATANODE", services, hosts)
+    self.assertEquals(len(datanodes), 2)
+    self.assertEquals(datanodes, hosts["items"])
+    datanode = self.stackAdvisor.getHostWithComponent("HDFS", "DATANODE", services, hosts)
+    self.assertEquals(datanode, hosts["items"][0])
+    namenodes = self.stackAdvisor.getHostsWithComponent("HDFS", "NAMENODE", services, hosts)
+    self.assertEquals(len(namenodes), 1)
+    # [host2]
+    self.assertEquals(namenodes, [hosts["items"][1]])
+    namenode = self.stackAdvisor.getHostWithComponent("HDFS", "NAMENODE", services, hosts)
+    # host2
+    self.assertEquals(namenode, hosts["items"][1])
+
+    # not installed
+    nodemanager = self.stackAdvisor.getHostWithComponent("YARN", "NODEMANAGER", services, hosts)
+    self.assertEquals(nodemanager, None)
+
+    # unknown component
+    unknown_component = self.stackAdvisor.getHostWithComponent("YARN", "UNKNOWN", services, hosts)
+    self.assertEquals(nodemanager, None)
+    # unknown service
+    unknown_component = self.stackAdvisor.getHostWithComponent("UNKNOWN", "NODEMANAGER", services, hosts)
+    self.assertEquals(nodemanager, None)

http://git-wip-us.apache.org/repos/asf/ambari/blob/c3e0771e/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py
index 60fb33a..0453b7a 100644
--- a/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py
+++ b/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py
@@ -1444,8 +1444,8 @@ class TestHDP22StackAdvisor(TestCase):
       'hadoop-env': {
         'properties': {
           'namenode_heapsize': '1024',
-          'namenode_opt_newsize' : '256',
-          'namenode_opt_maxnewsize' : '256'
+          'namenode_opt_newsize' : '128',
+          'namenode_opt_maxnewsize' : '128'
         },
         'property_attributes': {
           'dtnode_heapsize': {'maximum': '2048'},
@@ -1558,30 +1558,6 @@ class TestHDP22StackAdvisor(TestCase):
 
                           ]
                         },
-                        {
-                          "href":"/api/v1/stacks/HDP/versions/2.2/services/HDFS/components/SECONDARY_NAMENODE",
-                          "StackServiceComponents":{
-                            "advertise_version":"true",
-                            "cardinality":"1",
-                            "component_category":"MASTER",
-                            "component_name":"SECONDARY_NAMENODE",
-                            "custom_commands":[
-
-                            ],
-                            "display_name":"SNameNode",
-                            "is_client":"false",
-                            "is_master":"true",
-                            "service_name":"HDFS",
-                            "stack_name":"HDP",
-                            "stack_version":"2.2",
-                            "hostnames":[
-                              "host1"
-                            ]
-                          },
-                          "dependencies":[
-
-                          ]
-                        },
                       ],
                     }],
                 "configurations": configurations
@@ -1619,6 +1595,36 @@ class TestHDP22StackAdvisor(TestCase):
 
     self.stackAdvisor.recommendHDFSConfigurations(configurations, clusterData, services, hosts)
     self.assertEquals(configurations, expected)
+    # namenode heapsize depends on # of datanodes
+    datanode_hostnames = services["services"][0]["components"][0]["StackServiceComponents"]["hostnames"] # datanode hostnames
+    for i in xrange(200):
+      hostname = "datanode" + `i`
+      datanode_hostnames.append(hostname)
+      hosts['items'].append(
+        {
+          "href" : "/api/v1/hosts/" + hostname,
+          "Hosts" : {
+            "cpu_count" : 1,
+            "host_name" : hostname,
+            "os_arch" : "x86_64",
+            "os_type" : "centos6",
+            "ph_cpu_count" : 1,
+            "public_host_name" : hostname,
+            "rack_info" : "/default-rack",
+            "total_mem" : 2097152
+          }
+        }
+      )
+    self.stackAdvisor.recommendHDFSConfigurations(configurations, clusterData, services, hosts)
+    self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_heapsize"], "47872")
+    self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_opt_maxnewsize"], "6144")
+    self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_opt_maxnewsize"], "6144")
+    # namenode_heapsize depends on number of disks used used by datanode
+    configurations["hdfs-site"]["properties"]["dfs.datanode.data.dir"] = "/path1"
+    self.stackAdvisor.recommendHDFSConfigurations(configurations, clusterData, services, hosts)
+    self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_heapsize"], "14848")
+    self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_opt_maxnewsize"], "2048")
+    self.assertEquals(configurations["hadoop-env"]["properties"]["namenode_opt_maxnewsize"], "2048")
 
   def test_validateHDFSConfigurationsEnv(self):
     configurations = {}