You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ja...@apache.org on 2015/12/08 20:37:35 UTC

ambari git commit: AMBARI-14029: HAWQ support on HDFS HA(mithmatt via jaoki)

Repository: ambari
Updated Branches:
  refs/heads/trunk cd5693277 -> 7d3320db8


AMBARI-14029: HAWQ support on HDFS HA(mithmatt via jaoki)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/7d3320db
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/7d3320db
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/7d3320db

Branch: refs/heads/trunk
Commit: 7d3320db8adf2ebdbfde06620add162b48968a78
Parents: cd56932
Author: Jun Aoki <ja...@apache.org>
Authored: Tue Dec 8 11:37:27 2015 -0800
Committer: Jun Aoki <ja...@apache.org>
Committed: Tue Dec 8 11:37:27 2015 -0800

----------------------------------------------------------------------
 .../HAWQ/2.0.0/configuration/hdfs-client.xml    | 280 +++++++++++++++++++
 .../HAWQ/2.0.0/package/scripts/common.py        |  70 +++--
 .../HAWQ/2.0.0/package/scripts/params.py        |  30 +-
 3 files changed, 333 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/7d3320db/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hdfs-client.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hdfs-client.xml b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hdfs-client.xml
new file mode 100644
index 0000000..053f108
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hdfs-client.xml
@@ -0,0 +1,280 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<configuration>
+
+	<property>
+		<name>rpc.client.timeout</name>
+		<value>3600000</value>
+		<description>
+		The timeout interval of an RPC invocation in milliseconds. The default is set to 3600000 (1 hour).
+		</description>
+	</property>
+
+	<property>
+		<name>rpc.client.connect.tcpnodelay</name>
+		<value>true</value>
+		<description>
+		This parameter indicates whether TCP_NODELAY is set to true for connecting to the RPC server. The default is set to true.
+		</description>
+	</property>
+
+	<property>
+		<name>rpc.client.max.idle</name>
+		<value>10000</value>
+		<description>
+		The max idle time of an RPC connection in milliseconds. The default is set to 10000 (10 seconds).
+		</description>
+	</property>
+
+	<property>
+		<name>rpc.client.ping.interval</name>
+		<value>10000</value>
+		<description>
+		The periodic interval in milliseconds at which the RPC client sends a heart beat to the server. 0 indicates disabled. The default is set to 10000 (10 seconds).
+		</description>
+	</property>
+
+	<property>
+		<name>rpc.client.connect.timeout</name>
+		<value>600000</value>
+		<description>
+		The timeout interval in milliseconds when the RPC client is trying to setup the connection. The default is set to 600000 (10 minutes).
+		</description>
+	</property>
+
+	<property>
+		<name>rpc.client.connect.retry</name>
+		<value>10</value>
+		<description>
+		The maximum times to retry if the RPC client fails to setup the connection to the server. The default is set to 10.
+		</description>
+	</property>
+
+	<property>
+		<name>rpc.client.read.timeout</name>
+		<value>3600000</value>
+		<description>
+		The timeout interval in milliseconds when the RPC client is trying to read from the server. The default is set to 3600000 (1 hour).
+		</description>
+	</property>
+
+	<property>
+		<name>rpc.client.write.timeout</name>
+		<value>3600000</value>
+		<description>
+		The timeout interval in milliseconds when the RPC client is trying to write to the server. The default is set to 3600000 (1 hour).
+		</description>
+	</property>
+
+	<property>
+		<name>rpc.client.socket.linger.timeout</name>
+		<value>-1</value>
+		<description>
+		The value to be set for socket SO_LINGER when connecting to the RPC server. The default value is -1.
+		</description>
+	</property>
+
+	<property>
+		<name>dfs.client.read.shortcircuit</name>
+		<value>true</value>
+		<description>
+		If set to true the datanode is bypassed when reading file blocks, if the block and the client are on the same node. The default is set to true.
+		</description>
+	</property>
+
+	<property>
+		<name>dfs.default.replica</name>
+		<value>3</value>
+		<description>
+		The default number of replicas. The default is set to 3.
+		</description>
+	</property>
+
+	<property>
+		<name>dfs.prefetchsize</name>
+		<value>10</value>
+		<description>
+		The default number of blocks for which information will be pre-fetched. The default is set to 10.
+		</description>
+	</property>
+
+	<property>
+		<name>dfs.client.failover.max.attempts</name>
+		<value>15</value>
+		<description>
+		The maximum number of times to retry when the dfs client is trying to issue an RPC call, if multiple namenodes are configured. The default is set to 15.
+		</description>
+	</property>
+
+	<property>
+		<name>dfs.default.blocksize</name>
+		<value>134217728</value>
+		<description>
+		The default block size in bytes. The default is set to 134217728 (128 MB).
+		</description>
+	</property>
+
+	<property>
+		<name>dfs.client.log.severity</name>
+		<value>INFO</value>
+		<description>
+		The minimal log severity level, valid values include FATAL, ERROR, INFO, DEBUG1, DEBUG2, DEBUG3. The default is set to INFO.
+		</description>
+	</property>
+
+	<property>
+		<name>input.connect.timeout</name>
+		<value>600000</value>
+		<description>
+		The timeout interval in milliseconds when the input stream is trying to setup the connection to a datanode. The default is set to 600000 (10 minutes).
+		</description>
+	</property>
+
+	<property>
+		<name>input.read.timeout</name>
+		<value>3600000</value>
+		<description>
+		The timeout interval in milliseconds when the input stream is trying to read from a datanode. The default is set to 3600000 (1 hour).
+		</description>
+	</property>
+
+	<property>
+		<name>input.write.timeout</name>
+		<value>3600000</value>
+		<description>
+		The timeout interval in milliseconds when the input stream is trying to write to a datanode. The default is set to 3600000 (1 hour).
+		</description>
+	</property>
+
+	<property>
+		<name>input.localread.default.buffersize</name>
+		<value>2097152</value>
+		<description>
+		The buffer size in bytes which is to be used to hold the data from the file block and to verify the checksum.
+		It is only used when 'dfs.client.read.shortcircuit' is set to true. The default is set to 1048576 (1 MB).
+		</description>
+	</property>
+
+	<property>
+		<name>input.localread.blockinfo.cachesize</name>
+		<value>1000</value>
+		<description>
+		The size of file block path information cache in bytes. The default is set to 1000 bytes.
+		</description>
+	</property>
+
+	<property>
+		<name>input.read.getblockinfo.retry</name>
+		<value>3</value>
+		<description>
+		The maximum number of times to retry when the client fails to get block information from the namenode. The default is set to 3.
+		</description>
+	</property>
+
+	<property>
+		<name>output.replace-datanode-on-failure</name>
+		<value>false</value>
+		<description>
+		If set to true, the client adds a new datanode into pipeline if the number of nodes in the pipeline is less than the specified number of replicas. The default is set to true.
+		</description>
+	</property>
+
+	<property>
+		<name>output.default.chunksize</name>
+		<value>512</value>
+		<description>
+		The chunk size (in bytes) in the pipeline. The default is set to 512 bytes.
+		</description>
+	</property>
+
+	<property>
+		<name>output.default.packetsize</name>
+		<value>65536</value>
+		<description>
+		The packet size (in bytes) in the pipeline. The default is set to 65536 (64 kB).
+		</description>
+	</property>
+
+	<property>
+		<name>output.default.write.retry</name>
+		<value>10</value>
+		<description>
+		The maximum number of times to  retry when the client fails to set up the pipeline. The default is set to 10.
+		</description>
+	</property>
+
+	<property>
+		<name>output.connect.timeout</name>
+		<value>600000</value>
+		<description>
+		The timeout interval in milliseconds when the output stream is trying to setup the connection to a datanode. The default is set to 600000 (10 minutes).
+		</description>
+	</property>
+
+	<property>
+		<name>output.read.timeout</name>
+		<value>3600000</value>
+		<description>
+		The timeout interval in milliseconds when the output stream is trying to read from a datanode. The default is set to 3600000 (1 hour).
+		</description>
+	</property>
+
+	<property>
+		<name>output.write.timeout</name>
+		<value>3600000</value>
+		<description>
+		The timeout interval in milliseconds when the output stream is trying to write to a datanode. The default is set to 3600000 (1 hour).
+		</description>
+	</property>
+
+	<property>
+		<name>output.packetpool.size</name>
+		<value>1024</value>
+		<description>
+		The maximum number of packets in a file's packet pool. The default is set to 1024.
+		</description>
+	</property>
+
+	<property>
+		<name>output.close.timeout</name>
+		<value>900000</value>
+		<description>
+		The timeout interval in milliseconds when closing an output stream. The default is set to 900000 (1.5 hour).
+		</description>
+	</property>
+
+	<property>
+		<name>dfs.domain.socket.path</name>
+		<value>/var/lib/hadoop-hdfs/dn_socket</value>
+		<description>
+		Optional: This is the path to a UNIX domain socket that will be used for communication between the datanode and local HDFS clients.
+		If the string "_PORT" is present in this path, it will be replaced by the TCP port of the datanode.
+		</description>
+	</property>
+
+	<property>
+		<name>dfs.client.use.legacy.blockreader.local</name>
+		<value>false</value>
+		<description>
+		If this configuration is set to true, the legacy short-circuit reader implementation based on HDFS-2246 is used.
+		This implementation is used on platforms other than Linux which does not have the new implementation based on HDFS-347.
+		</description>
+	</property>
+
+</configuration>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/7d3320db/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/scripts/common.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/scripts/common.py b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/scripts/common.py
index c62ac12..f44b576 100644
--- a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/scripts/common.py
+++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/scripts/common.py
@@ -27,7 +27,6 @@ from resource_management.core.logger import Logger
 from resource_management.core.system import System
 from resource_management.core.exceptions import Fail
 from resource_management.core.resources.accounts import Group, User
-from resource_management.core.source import Template
 import xml.etree.ElementTree as ET
 
 import utils
@@ -79,19 +78,42 @@ def setup_common_configurations():
   """
   Sets up the config files common to master, standby and segment nodes.
   """
+  __update_hdfs_client()
+  __update_yarn_client()
+  __update_hawq_site()
+  __set_osparams()
+
+def __update_hdfs_client():
+  """
+  Writes hdfs-client.xml on the local filesystem on hawq nodes.
+  If hdfs ha is enabled, appends related parameters to hdfs-client.xml
+  """
   import params
 
-  substituted_conf_dict = __substitute_hostnames_in_hawq_site()
-  XmlConfig("hawq-site.xml",
+  hdfs_client_dict = params.hdfs_client.copy()
+  dfs_nameservice = params.hdfs_site.get('dfs.nameservices')
+
+  # Adds additional parameters required for HDFS HA, if HDFS HA is enabled
+  # Temporary logic, this logic will be moved to ambari-web to expose these parameters on UI once HDFS HA is enabled
+  if dfs_nameservice:
+    ha_namenodes = 'dfs.ha.namenodes.{0}'.format(dfs_nameservice)
+    ha_nn_list = [ha_nn.strip() for ha_nn in params.hdfs_site[ha_namenodes].split(',')]
+    required_keys = ('dfs.nameservices', ha_namenodes,
+                     'dfs.namenode.rpc-address.{0}.{1}'.format(dfs_nameservice, ha_nn_list[0]),
+                     'dfs.namenode.http-address.{0}.{1}'.format(dfs_nameservice, ha_nn_list[0]),
+                     'dfs.namenode.rpc-address.{0}.{1}'.format(dfs_nameservice, ha_nn_list[1]),
+                     'dfs.namenode.http-address.{0}.{1}'.format(dfs_nameservice, ha_nn_list[1]))
+
+    for key in required_keys:
+      hdfs_client_dict[key] = params.hdfs_site[key]
+
+  XmlConfig("hdfs-client.xml",
             conf_dir=constants.hawq_config_dir,
-            configurations=substituted_conf_dict,
-            configuration_attributes=params.config['configuration_attributes']['hawq-site'],
+            configurations=ConfigDictionary(hdfs_client_dict),
+            configuration_attributes=params.config['configuration_attributes']['hdfs-client'],
             owner=constants.hawq_user,
             group=constants.hawq_group,
             mode=0644)
-  if "yarn-site" in params.config["configurations"]:
-    __update_yarn_client()
-  __set_osparams()
 
 
 def __update_yarn_client():
@@ -146,33 +168,19 @@ def __update_yarn_client():
             mode=0644)
 
 
-def __substitute_hostnames_in_hawq_site():
+def __update_hawq_site():
   """
-  Temporary function to replace localhost with actual HAWQ component hostnames.
-  This function will be in place till the entire HAWQ plugin code along with the UI
-  changes are submitted to the trunk.
+  Sets up hawq-site.xml
   """
   import params
 
-  LOCALHOST = "localhost"
-  
-  # in case there is no standby
-  hawqstandby_host_desired_value = params.hawqstandby_host if params.hawqstandby_host is not None else 'none' 
-  
-  substituted_hawq_site = params.hawq_site.copy()
-  hawq_site_property_map = {"hawq_master_address_host": params.hawqmaster_host,
-                            "hawq_standby_address_host": hawqstandby_host_desired_value,
-                            "hawq_rm_yarn_address": params.rm_host,
-                            "hawq_rm_yarn_scheduler_address": params.rm_host,
-                            "hawq_dfs_url": params.namenode_host
-                            }
-
-  for property, desired_value in hawq_site_property_map.iteritems():
-    if desired_value is not None:
-      # Replace localhost with required component hostname
-      substituted_hawq_site[property] = re.sub(LOCALHOST, desired_value, substituted_hawq_site[property])
-
-  return substituted_hawq_site
+  XmlConfig("hawq-site.xml",
+            conf_dir=constants.hawq_config_dir,
+            configurations=params.hawq_site,
+            configuration_attributes=params.config['configuration_attributes']['hawq-site'],
+            owner=constants.hawq_user,
+            group=constants.hawq_group,
+            mode=0644)
 
 
 def __set_osparams():

http://git-wip-us.apache.org/repos/asf/ambari/blob/7d3320db/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/scripts/params.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/scripts/params.py b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/scripts/params.py
index c8e97b8..8d9de6e 100644
--- a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/scripts/params.py
+++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/scripts/params.py
@@ -34,53 +34,51 @@ def __get_component_host(component):
   return component_host
 
 
-def __get_namenode_host():
-  """
-  Gets the namenode host; active namenode in case of HA
-  """
-  namenode_host = __get_component_host('namenode_host')
-  
-  # hostname of the active HDFS HA Namenode (only used when HA is enabled)
-  dfs_ha_namenode_active = default('/configurations/hadoop-env/dfs_ha_initial_namenode_active', None)
-  if dfs_ha_namenode_active is not None:
-    namenode_host = dfs_ha_namenode_active
-  return namenode_host
-
-
 hostname = config['hostname']
 
 # Users and Groups
 hdfs_superuser = config['configurations']['hadoop-env']['hdfs_user']
 user_group = config['configurations']['cluster-env']['user_group']
 
+
 # HAWQ Hostnames
 hawqmaster_host = __get_component_host('hawqmaster_hosts')
 hawqstandby_host = __get_component_host('hawqstandby_hosts')
 hawqsegment_hosts = default('/clusterHostInfo/hawqsegment_hosts', [])
 
+
 # HDFS
 hdfs_site = config['configurations']['hdfs-site']
 default_fs = config['configurations']['core-site']['fs.defaultFS']
 
 # HDFSResource partial function
-HdfsResource = functools.partial(HdfsResource, user=hdfs_superuser, hdfs_site=hdfs_site, default_fs=default_fs)
+HdfsResource = functools.partial(HdfsResource,
+                                 user=hdfs_superuser,
+                                 hdfs_site=hdfs_site,
+                                 default_fs=default_fs)
 
-namenode_host= __get_namenode_host()
 
 # YARN
 # Note: YARN is not mandatory for HAWQ. It is required only when the users set HAWQ to use YARN as resource manager
 rm_host = __get_component_host('rm_host')
 yarn_ha_enabled = default('/configurations/yarn-site/yarn.resourcemanager.ha.enabled', False)
 
+
 # Config files
 gpcheck_content = config['configurations']['gpcheck-env']['content']
 # database user limits
 hawq_limits = config['configurations']['hawq-limits-env']
 # sysctl parameters
 hawq_sysctl = config['configurations']['hawq-sysctl-env']
-
+# hawq config
 hawq_site = config['configurations']['hawq-site']
+# hdfs-client for enabling HAWQ to work with HDFS namenode HA
+hdfs_client = config['configurations']['hdfs-client']
+# yarn-client for enabling HAWQ to work with YARN resource manager HA
 yarn_client = config['configurations']['yarn-client']
+
+
+# Directories and ports
 hawq_master_dir = hawq_site.get('hawq_master_directory')
 hawq_segment_dir = hawq_site.get('hawq_segment_directory')
 hawq_master_temp_dir = hawq_site.get('hawq_master_temp_directory')