You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by bh...@apache.org on 2016/03/19 20:08:34 UTC

ambari git commit: AMBARI-15449: HAWQ hdfs-client / output.replace-datanode-on-failure should be set to true by default (adenissov via bhuvnesh2703)

Repository: ambari
Updated Branches:
  refs/heads/branch-2.2 4ae094997 -> d2e6c9c67


AMBARI-15449: HAWQ hdfs-client / output.replace-datanode-on-failure should be set to true by default (adenissov via bhuvnesh2703)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/d2e6c9c6
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/d2e6c9c6
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/d2e6c9c6

Branch: refs/heads/branch-2.2
Commit: d2e6c9c676756a52b866a44aa4152c5ae74efc7f
Parents: 4ae0949
Author: Bhuvnesh Chaudhary <bc...@pivotal.io>
Authored: Sat Mar 19 12:09:05 2016 -0700
Committer: Bhuvnesh Chaudhary <bc...@pivotal.io>
Committed: Sat Mar 19 12:09:05 2016 -0700

----------------------------------------------------------------------
 .../HAWQ/2.0.0/configuration/hawq-site.xml      | 18 +++--
 .../HAWQ/2.0.0/configuration/hdfs-client.xml    |  7 +-
 .../stacks/HDP/2.3/services/stack_advisor.py    | 81 +++++++++++++------
 .../stacks/2.3/common/test_stack_advisor.py     | 85 +++++++++++++++++---
 4 files changed, 144 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/d2e6c9c6/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml
index 93ad3fe..ec7275f 100644
--- a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml
+++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml
@@ -303,8 +303,10 @@
     <display-name>Default buckets for Hash Distributed tables</display-name>
     <value>6</value>
     <description>
-     The default number of virtual segments per query for hash distributed tables that can be used for query execution in a HAWQ segment when Resource Manager ('hawq_global_rm_type') is set to Standalone ('none').
-      The default is 6.
+      The default number of buckets when creating a hash distributed table, if not defined explicitly when creating the table.
+      When running a query on a hash dist table, the query resource is fixed and equal to the number of buckets.
+      The default set during installation is the number of HAWQ Segments multiplied by 6.
+      Adjust the value when the number of HAWQ Segments changes in the cluster.
     </description>
     <value-attributes>
       <type>int</type>
@@ -319,9 +321,9 @@
     <display-name>Virtual Segments Limit per Query (per Segment Node)</display-name>
     <value>6</value>
     <description>
-      Not recommended to change this parameter.
-       The maximum  number of virtual segments per query per Segment Node that can be used for query execution in a HAWQ segment when Resource Manager ('hawq_global_rm_type') is set to Standalone ('none').
-      The default is 6.
+      The maximum number of virtual segments per query per segment node when querying randomly distributed tables, external tables, and tables with user defined functions.
+      This parameter is used to limit the maximum resource usage on each segment node.
+      The default is 6. Not recommended to change this parameter.
     </description>
     <value-attributes>
       <type>int</type>
@@ -336,9 +338,9 @@
     <display-name>Virtual Segments Limit per Query (Total)</display-name>
     <value>512</value>
     <description>
-      Not recommended to change this parameter.
-      The maximum total number of virtual segments per query that can be used for query execution in a HAWQ segment when Resource Manager ('hawq_global_rm_type') is set to Standalone ('none').
-      The default is 512.
+      The maximum number of virtual segments per query across the cluster.
+      This parameter is used to limit the maximum resource usage per query across the cluster regardless of the number of HAWQ Segments.
+      The default value is 512. Not recommended to change this parameter.
     </description>
     <value-attributes>
       <type>int</type>

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2e6c9c6/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hdfs-client.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hdfs-client.xml b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hdfs-client.xml
index c2a756a..6754fcd 100644
--- a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hdfs-client.xml
+++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hdfs-client.xml
@@ -323,10 +323,11 @@
 
   <property>
     <name>output.replace-datanode-on-failure</name>
-    <value>false</value>
+    <value>true</value>
     <description>
-      Determines whether the client adds a new DataNode to pipeline if the number of nodes in the pipeline is less than the specified number of replicas.
-      The default is true.
+      Determines whether the client adds a new DataNode to pipeline if the number of nodes in the pipeline
+      is less than the specified number of replicas. The default is true.
+      For test environments with 4 HAWQ Segments or less, value should be false.
     </description>
     <value-attributes>
       <type>boolean</type>

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2e6c9c6/ambari-server/src/main/resources/stacks/HDP/2.3/services/stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.3/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.3/services/stack_advisor.py
index c0bc4e2..171a6b6 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.3/services/stack_advisor.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.3/services/stack_advisor.py
@@ -662,30 +662,39 @@ class HDP23StackAdvisor(HDP22StackAdvisor):
 
 
   def recommendHAWQConfigurations(self, configurations, clusterData, services, hosts):
-    if "hawq-site" not in services["configurations"]:
-      return
-    hawq_site = services["configurations"]["hawq-site"]["properties"]
-    putHawqSiteProperty = self.putProperty(configurations, "hawq-site", services)
-    componentsListList = [service["components"] for service in services["services"]]
-    componentsList = [item["StackServiceComponents"] for sublist in componentsListList for item in sublist]
-    servicesList = [service["StackServices"]["service_name"] for service in services["services"]]
-
-    # remove master port when master is colocated with Ambari server
-    if self.isHawqMasterComponentOnAmbariServer(services) and "hawq_master_address_port" in hawq_site:
+    if any(x in services["configurations"] for x in ["hawq-site", "hdfs-client"]):
+      componentsListList = [service["components"] for service in services["services"]]
+      componentsList = [item["StackServiceComponents"] for sublist in componentsListList for item in sublist]
+      servicesList = [service["StackServices"]["service_name"] for service in services["services"]]
+      numSegments = len(self.__getHosts(componentsList, "HAWQSEGMENT"))
+
+    if "hawq-site" in services["configurations"]:
+      hawq_site = services["configurations"]["hawq-site"]["properties"]
+      putHawqSiteProperty = self.putProperty(configurations, "hawq-site", services)
+
+      # remove master port when master is colocated with Ambari server
+      if self.isHawqMasterComponentOnAmbariServer(services) and "hawq_master_address_port" in hawq_site:
         putHawqSiteProperty('hawq_master_address_port', '')
 
-    # calculate optimal number of virtual segments
-    numSegments = len(self.__getHosts(componentsList, "HAWQSEGMENT"))
-    # update default if segments are deployed
-    if numSegments and "default_hash_table_bucket_number" in hawq_site:
-      putHawqSiteProperty('default_hash_table_bucket_number', numSegments * 6)
+      # update default if segments are deployed
+      if numSegments and "default_hash_table_bucket_number" in hawq_site:
+        putHawqSiteProperty('default_hash_table_bucket_number', numSegments * 6)
+
+      # update YARN RM urls with the values from yarn-site if YARN is installed
+      if "YARN" in servicesList and "yarn-site" in services["configurations"]:
+        yarn_site = services["configurations"]["yarn-site"]["properties"]
+        for hs_prop, ys_prop in self.getHAWQYARNPropertyMapping().items():
+          if hs_prop in hawq_site and ys_prop in yarn_site:
+            putHawqSiteProperty(hs_prop, yarn_site[ys_prop])
+
+    # set output.replace-datanode-on-failure in HAWQ hdfs-client depending on the cluster size
+    if "hdfs-client" in services["configurations"]:
+      hdfs_client = services["configurations"]["hdfs-client"]["properties"]
+      if "output.replace-datanode-on-failure" in hdfs_client:
+        propertyValue = "true" if numSegments > 4 else "false"
+        putHdfsClientProperty = self.putProperty(configurations, "hdfs-client", services)
+        putHdfsClientProperty("output.replace-datanode-on-failure", propertyValue)
 
-    # update YARN RM urls with the values from yarn-site if YARN is installed
-    if "YARN" in servicesList and "yarn-site" in services["configurations"]:
-      yarn_site = services["configurations"]["yarn-site"]["properties"]
-      for hs_prop, ys_prop in self.getHAWQYARNPropertyMapping().items():
-        if hs_prop in hawq_site and ys_prop in yarn_site:
-          putHawqSiteProperty(hs_prop, yarn_site[ys_prop])
 
   def getServiceConfigurationValidators(self):
     parentValidators = super(HDP23StackAdvisor, self).getServiceConfigurationValidators()
@@ -696,7 +705,8 @@ class HDP23StackAdvisor(HDP22StackAdvisor):
       "HBASE": {"hbase-site": self.validateHBASEConfigurations},
       "KAKFA": {"kafka-broker": self.validateKAFKAConfigurations},
       "YARN": {"yarn-site": self.validateYARNConfigurations},
-      "HAWQ": {"hawq-site": self.validateHAWQConfigurations}
+      "HAWQ": {"hawq-site": self.validateHAWQSiteConfigurations,
+               "hdfs-client": self.validateHAWQHdfsClientConfigurations}
     }
     self.mergeValidators(parentValidators, childValidators)
     return parentValidators
@@ -912,7 +922,7 @@ class HDP23StackAdvisor(HDP22StackAdvisor):
                               "It is not advisable to have " + display_name + " at " + root_dir +". Consider creating a sub directory for HAWQ")})
 
 
-  def validateHAWQConfigurations(self, properties, recommendedDefaults, configurations, services, hosts):
+  def validateHAWQSiteConfigurations(self, properties, recommendedDefaults, configurations, services, hosts):
     hawq_site = properties
     validationItems = []
 
@@ -952,7 +962,30 @@ class HDP23StackAdvisor(HDP22StackAdvisor):
       validationItems.append({"config-name": HAWQ_GLOBAL_RM_TYPE, "item": self.getErrorItem(message)})
 
     return self.toConfigurationValidationProblems(validationItems, "hawq-site")
-  
+
+  def validateHAWQHdfsClientConfigurations(self, properties, recommendedDefaults, configurations, services, hosts):
+    hdfs_client = properties
+    validationItems = []
+
+    # check HAWQ hdfs-client output.replace-datanode-on-failure property
+    PROP_NAME = "output.replace-datanode-on-failure"
+    if PROP_NAME in hdfs_client:
+      value = hdfs_client[PROP_NAME].upper()
+      componentsListList = [service["components"] for service in services["services"]]
+      componentsList = [item["StackServiceComponents"] for sublist in componentsListList for item in sublist]
+      numSegments = len(self.__getHosts(componentsList, "HAWQSEGMENT"))
+
+      message = None
+      limit = 4
+      if numSegments > limit and value != 'TRUE':
+        message = "{0} should be set to true (checked) for clusters with more than {1} HAWQ Segments"
+      elif numSegments <= limit and value != 'FALSE':
+        message = "{0} should be set to false (unchecked) for clusters with {1} or less HAWQ Segments"
+
+      if message:
+        validationItems.append({"config-name": PROP_NAME, "item": self.getWarnItem(message.format(PROP_NAME, str(limit)))})
+
+    return self.toConfigurationValidationProblems(validationItems, "hdfs-client")
   
   def isComponentUsingCardinalityForLayout(self, componentName):
     return componentName in ['NFS_GATEWAY', 'PHOENIX_QUERY_SERVER', 'SPARK_THRIFTSERVER']

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2e6c9c6/ambari-server/src/test/python/stacks/2.3/common/test_stack_advisor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.3/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.3/common/test_stack_advisor.py
index 4f0b4b7..b5cf1a6 100644
--- a/ambari-server/src/test/python/stacks/2.3/common/test_stack_advisor.py
+++ b/ambari-server/src/test/python/stacks/2.3/common/test_stack_advisor.py
@@ -1462,6 +1462,9 @@ class TestHDP23StackAdvisor(TestCase):
     services["configurations"]["hawq-site"] = {"properties": {"default_hash_table_bucket_number": "24",
                                                               "hawq_rm_yarn_address": "localhost:8032",
                                                               "hawq_rm_yarn_scheduler_address": "localhost:8030"}}
+
+    services["configurations"]["hdfs-client"] = {"properties": {"output.replace-datanode-on-failure": "true"}}
+
     services["configurations"]["yarn-site"] = {"properties": {"yarn.resourcemanager.address": "host1:8050",
                                                               "yarn.resourcemanager.scheduler.address": "host1:8030"}}
     services["services"].append({"StackServices" : {"service_name" : "YARN"}, "components":[]})
@@ -1472,6 +1475,7 @@ class TestHDP23StackAdvisor(TestCase):
     self.assertEquals(len(hawqSegmentComponent["hostnames"]), 3)
     self.stackAdvisor.recommendHAWQConfigurations(configurations, clusterData, services, None)
     self.assertEquals(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"], str(3 * 6))
+    self.assertEquals(configurations["hdfs-client"]["properties"]["output.replace-datanode-on-failure"], "false")
 
     # check derived properties
     self.assertEquals(configurations["hawq-site"]["properties"]["hawq_rm_yarn_address"], "host1:8050")
@@ -1481,14 +1485,15 @@ class TestHDP23StackAdvisor(TestCase):
     hawqSegmentComponent["hostnames"] = ["host" + str(i) for i in range(100)]
     self.stackAdvisor.recommendHAWQConfigurations(configurations, clusterData, services, None)
     self.assertEquals(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"], str(100 * 6))
+    self.assertEquals(configurations["hdfs-client"]["properties"]["output.replace-datanode-on-failure"], "true")
 
-    # Test 4 - with no segments
+    # Test 3 - with no segments
     configurations = {}
     services["configurations"]["hawq-site"] = {"properties":{'hawq-site': {'properties': {}}}}
     hawqSegmentComponent["hostnames"] = []
     self.stackAdvisor.recommendHAWQConfigurations(configurations, clusterData, services, None)
-    self.assertEquals(configurations, {'hawq-site': {'properties': {}}})
-
+    self.assertEquals(configurations, {'hdfs-client': {'properties': {'output.replace-datanode-on-failure': 'false'}},
+                                       'hawq-site': {'properties': {}}})
 
   def test_validateHiveConfigurations(self):
     properties = {"hive_security_authorization": "None",
@@ -1757,7 +1762,7 @@ class TestHDP23StackAdvisor(TestCase):
     self.assertEqual(len(problems), 0)
 
 
-  def test_validateHAWQConfigurations(self):
+  def test_validateHAWQSiteConfigurations(self):
     services = self.load_json("services-hawq-3-hosts.json")
     # setup default configuration values
     # Test hawq_rm_yarn_address and hawq_rm_scheduler_address are set correctly
@@ -1788,7 +1793,7 @@ class TestHDP23StackAdvisor(TestCase):
       }
     }
 
-    problems = self.stackAdvisor.validateHAWQConfigurations(properties, defaults, configurations, services, hosts)
+    problems = self.stackAdvisor.validateHAWQSiteConfigurations(properties, defaults, configurations, services, hosts)
     problems_dict = {}
     for problem in problems:
       problems_dict[problem['config-name']] = problem
@@ -1826,7 +1831,7 @@ class TestHDP23StackAdvisor(TestCase):
                           "level": "ERROR"
                     } ]
     """
-    problems = self.stackAdvisor.validateHAWQConfigurations(properties, defaults, services["configurations"], services, hosts)
+    problems = self.stackAdvisor.validateHAWQSiteConfigurations(properties, defaults, services["configurations"], services, hosts)
     self.assertEqual(len(problems), 1)
     expected = {
       "config-type": "hawq-site",
@@ -1840,7 +1845,7 @@ class TestHDP23StackAdvisor(TestCase):
     # case 2: hawq_global_rm_type is set as yarn, and YARN service is installed. No validation errors expected.
     services["services"].append({"StackServices" : {"service_name" : "YARN"}, "components":[]})
 
-    problems = self.stackAdvisor.validateHAWQConfigurations(properties, defaults, services["configurations"], services, hosts)
+    problems = self.stackAdvisor.validateHAWQSiteConfigurations(properties, defaults, services["configurations"], services, hosts)
     self.assertEqual(len(problems), 0)
 
     # Test HAWQ Master port conflict with Ambari Server Postgres port
@@ -1853,7 +1858,7 @@ class TestHDP23StackAdvisor(TestCase):
           {"hawq_master_address_port": "5432"}
       }
     }
-    problems = self.stackAdvisor.validateHAWQConfigurations(properties, defaults, configurations, services, hosts)
+    problems = self.stackAdvisor.validateHAWQSiteConfigurations(properties, defaults, configurations, services, hosts)
     self.assertEqual(len(problems), 1)
     expected = {
       "config-name": "hawq_master_address_port",
@@ -1868,17 +1873,73 @@ class TestHDP23StackAdvisor(TestCase):
     # case 2: HAWQ Master is placed on Ambari Server and HAWQ Master port is different from  Ambari Server Postgres Port
     self.stackAdvisor.isHawqMasterComponentOnAmbariServer = MagicMock(return_value=True)
     configurations["hawq-site"]["properties"]["hawq_master_address_port"] = "10432"
-    problems = self.stackAdvisor.validateHAWQConfigurations(properties, defaults, configurations, services, hosts)
+    problems = self.stackAdvisor.validateHAWQSiteConfigurations(properties, defaults, configurations, services, hosts)
     self.assertEqual(len(problems), 0)
 
     # case 3: HAWQ Master is not placed on Ambari Server and HAWQ Master port is same as  Ambari Server Postgres Port
     self.stackAdvisor.isHawqMasterComponentOnAmbariServer = MagicMock(return_value=False)
     configurations["hawq-site"]["properties"]["hawq_master_address_port"] = "5432"
-    problems = self.stackAdvisor.validateHAWQConfigurations(properties, defaults, configurations, services, hosts)
+    problems = self.stackAdvisor.validateHAWQSiteConfigurations(properties, defaults, configurations, services, hosts)
     self.assertEqual(len(problems), 0)
 
     # case 4: HAWQ Master is not placed on Ambari Server and HAWQ Master port is different from  Ambari Server Postgres Port
     self.stackAdvisor.isHawqMasterComponentOnAmbariServer = MagicMock(return_value=False)
     configurations["hawq-site"]["properties"]["hawq_master_address_port"] = "10432"
-    problems = self.stackAdvisor.validateHAWQConfigurations(properties, defaults, configurations, services, hosts)
-    self.assertEqual(len(problems), 0)
\ No newline at end of file
+    problems = self.stackAdvisor.validateHAWQSiteConfigurations(properties, defaults, configurations, services, hosts)
+    self.assertEqual(len(problems), 0)
+
+  def test_validateHAWQHdfsClientConfigurations(self):
+    services = {
+      "services":  [
+        { "StackServices": {"service_name": "HAWQ"},
+          "components": [{
+            "StackServiceComponents": {
+              "component_name": "HAWQSEGMENT",
+              "hostnames": []
+            }}]
+          }],
+      "configurations": {}
+    }
+    # setup default configuration values
+    configurations = services["configurations"]
+    configurations["hdfs-client"] = {"properties": {"output.replace-datanode-on-failure": "true"}}
+    properties = configurations["hdfs-client"]["properties"]
+    defaults = {}
+    hosts = {}
+
+    # 1. Try with no hosts
+    expected = {
+        'config-type': 'hdfs-client',
+        'message': 'output.replace-datanode-on-failure should be set to false (unchecked) for clusters with 4 or less HAWQ Segments',
+        'type': 'configuration',
+        'config-name': 'output.replace-datanode-on-failure',
+        'level': 'WARN'
+    }
+
+    problems = self.stackAdvisor.validateHAWQHdfsClientConfigurations(properties, defaults, configurations, services, hosts)
+    self.assertEqual(len(problems), 1)
+    self.assertEqual(problems[0], expected)
+
+    # 2. Try with 4 hosts
+    services["services"][0]["components"][0]["StackServiceComponents"]["hostnames"] = ["host1", "host2", "host3", "host4"]
+    problems = self.stackAdvisor.validateHAWQHdfsClientConfigurations(properties, defaults, configurations, services, hosts)
+    self.assertEqual(len(problems), 1)
+    self.assertEqual(problems[0], expected)
+
+    # 3. Try with 5 hosts - default value
+    services["services"][0]["components"][0]["StackServiceComponents"]["hostnames"] = ["host1", "host2", "host3", "host4", "host5"]
+    problems = self.stackAdvisor.validateHAWQHdfsClientConfigurations(properties, defaults, configurations, services, hosts)
+    self.assertEqual(len(problems), 0)
+
+    # 4. Try with 5 hosts
+    properties = {"output.replace-datanode-on-failure": "false"}
+    expected = {
+      'config-type': 'hdfs-client',
+      'message': 'output.replace-datanode-on-failure should be set to true (checked) for clusters with more than 4 HAWQ Segments',
+      'type': 'configuration',
+      'config-name': 'output.replace-datanode-on-failure',
+      'level': 'WARN'
+    }
+    problems = self.stackAdvisor.validateHAWQHdfsClientConfigurations(properties, defaults, configurations, services, hosts)
+    self.assertEqual(len(problems), 1)
+    self.assertEqual(problems[0], expected)