You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by co...@apache.org on 2016/03/18 03:22:45 UTC

[13/13] bigtop git commit: BIGTOP-2325. Deployment recipes for HAWQ

BIGTOP-2325. Deployment recipes for HAWQ

Initial draft for puppet recipes


Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/18370b22
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/18370b22
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/18370b22

Branch: refs/heads/BIGTOP-2320
Commit: 18370b22df9a761287fe4bdbfba5eade90c73455
Parents: b8ab393
Author: Konstantin Boudnik <co...@apache.org>
Authored: Tue Feb 16 21:27:27 2016 +0300
Committer: Konstantin Boudnik <co...@apache.org>
Committed: Thu Mar 17 19:20:01 2016 -0700

----------------------------------------------------------------------
 .../puppet/hieradata/bigtop/cluster.yaml        |   6 +
 bigtop-deploy/puppet/manifests/cluster.pp       |   4 +
 .../puppet/modules/hawq/templates/gpcheck.cnf   |  58 ++++
 .../puppet/modules/hawq/templates/hawq-site.xml | 158 +++++++++
 .../puppet/modules/hawq/templates/hawq.default  |  40 +++
 .../modules/hawq/templates/hdfs-client.xml      | 331 +++++++++++++++++++
 .../puppet/modules/hawq/templates/sysctl.conf   |  24 ++
 .../modules/hawq/templates/yarn-client.xml      | 123 +++++++
 bigtop-deploy/puppet/modules/hawq/tests/init.pp |  16 +
 .../src/common/hadoop/init-hcfs.json            |   1 +
 bigtop-packages/src/common/hawq/hawq-master.svc |   4 +-
 bigtop-packages/src/common/hawq/hawq.default    |  10 +
 bigtop-packages/src/deb/hawq/rules              |   4 +-
 bigtop.bom                                      |   2 +-
 14 files changed, 776 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
index de98502..cf8ffcb 100644
--- a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
+++ b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
@@ -174,3 +174,9 @@ zeppelin::server::spark_master_url: "yarn-client"
 zeppelin::server::hiveserver2_url: "jdbc:hive2://%{hiera('hadoop-hive::common::hiveserver2_host')}:%{hiera('hadoop-hive::common::hiveserver2_port')}"
 zeppelin::server::hiveserver2_user: "%{hiera('bigtop::hiveserver2_user')}"
 zeppelin::server::hiveserver2_password: "%{hiera('bigtop::hiveserver2_password')}"
+
+# hawq
+bigtop::hawq_master_node: "%{hiera('bigtop::hawq_master_node')}"
+bigtop::hawq_master_port: "5432"
+bigtop::hawq_master_datadir: "/var/run/hawq/work/masterdd"
+bigtop::hawq_segment_datadir: "/var/run/hawq/work/segmendd"

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-deploy/puppet/manifests/cluster.pp
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/manifests/cluster.pp b/bigtop-deploy/puppet/manifests/cluster.pp
index a0be567..f80ef5a 100644
--- a/bigtop-deploy/puppet/manifests/cluster.pp
+++ b/bigtop-deploy/puppet/manifests/cluster.pp
@@ -105,6 +105,9 @@ $roles_map = {
   zeppelin => {
     master => ["zeppelin-server"],
   },
+  hawq => {
+    master => ["hawq"],
+  },
 }
 
 class hadoop_cluster_node (
@@ -159,6 +162,7 @@ class node_with_roles ($roles = hiera("bigtop::roles")) inherits hadoop_cluster_
     "hadoop_hive",
     "hadoop_oozie",
     "hadoop_pig",
+    "hawq",
     "sqoop2",
     "hadoop_zookeeper",
     "hcatalog",

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-deploy/puppet/modules/hawq/templates/gpcheck.cnf
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/gpcheck.cnf b/bigtop-deploy/puppet/modules/hawq/templates/gpcheck.cnf
new file mode 100644
index 0000000..11ae02f
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/gpcheck.cnf
@@ -0,0 +1,58 @@
+[global]
+configfile_version = 4
+
+[linux.mount]
+mount.points = /
+
+[linux.sysctl]
+sysctl.kernel.shmmax = 500000000
+sysctl.kernel.shmmni = 4096
+sysctl.kernel.shmall = 4000000000
+sysctl.kernel.sem = 250 512000 100 2048
+sysctl.kernel.sysrq = 1
+sysctl.kernel.core_uses_pid = 1
+sysctl.kernel.msgmnb = 65536
+sysctl.kernel.msgmax = 65536
+sysctl.kernel.msgmni = 2048
+sysctl.net.ipv4.tcp_syncookies = 0
+sysctl.net.ipv4.ip_forward = 0
+sysctl.net.ipv4.conf.default.accept_source_route = 0
+sysctl.net.ipv4.tcp_tw_recycle = 1
+sysctl.net.ipv4.tcp_max_syn_backlog = 200000
+sysctl.net.ipv4.conf.all.arp_filter = 1
+sysctl.net.ipv4.ip_local_port_range = 1281 65535
+sysctl.net.core.netdev_max_backlog = 200000
+sysctl.vm.overcommit_memory = 2
+sysctl.fs.nr_open = 3000000
+sysctl.kernel.threads-max = 798720
+sysctl.kernel.pid_max = 798720
+# increase network
+sysctl.net.core.rmem_max = 2097152
+sysctl.net.core.wmem_max = 2097152
+
+[linux.limits]
+soft.nofile = 2900000
+hard.nofile = 2900000
+soft.nproc  = 131072
+hard.nproc  = 131072
+
+[linux.diskusage]
+diskusage.monitor.mounts = /
+diskusage.monitor.usagemax = 90%
+
+[hdfs]
+dfs.mem.namenode.heap = 40960
+dfs.mem.datanode.heap = 6144
+# in hdfs-site.xml
+dfs.support.append = true
+dfs.client.enable.read.from.local = true
+dfs.block.local-path-access.user = gpadmin
+dfs.datanode.max.transfer.threads = 40960
+dfs.client.socket-timeout = 300000000
+dfs.datanode.socket.write.timeout = 7200000
+dfs.namenode.handler.count = 60
+ipc.server.handler.queue.size = 3300
+dfs.datanode.handler.count = 60
+ipc.client.connection.maxidletime = 3600000
+dfs.namenode.accesstime.precision = -1
+

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-deploy/puppet/modules/hawq/templates/hawq-site.xml
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/hawq-site.xml b/bigtop-deploy/puppet/modules/hawq/templates/hawq-site.xml
new file mode 100644
index 0000000..713fa40
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/hawq-site.xml
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<configuration>
+  <property>
+    <name>hawq_master_address_host</name>
+    <value><%= @hawq_head %></value>
+    <description>The host name of hawq master.</description>
+  </property>
+
+  <property>
+    <name>hawq_master_address_port</name>
+    <value><%= @hawq_head_port %></value>
+    <description>The port of hawq master.</description>
+  </property>
+
+  <property>
+    <name>hawq_standby_address_host</name>
+    <value>none</value>
+    <description>The host name of hawq standby master.</description>
+  </property>
+
+  <property>
+    <name>hawq_segment_address_port</name>
+    <value>40000</value>
+    <description>The port of hawq segment.</description>
+  </property>
+
+  <property>
+    <name>hawq_dfs_url</name>
+    <value><%= @hadoop_head_node %>:<%= @hadoop_namenode_port %>/hawq_default</value>
+    <description>URL for accessing HDFS.</description>
+  </property>
+
+  <property>
+    <name>hawq_master_directory</name>
+    <value><% @hawq_masterdata_dir %></value>
+    <description>The directory of hawq master.</description>
+  </property>
+
+  <property>
+    <name>hawq_segment_directory</name>
+    <value><% @hawq_segmentdata_dir %>d</value>
+    <description>The directory of hawq segment.</description>
+  </property>
+
+  <property>
+    <name>hawq_master_temp_directory</name>
+    <value>/tmp</value>
+    <description>The temporary directory reserved for hawq master.</description>
+  </property>
+
+  <property>
+    <name>hawq_segment_temp_directory</name>
+    <value>/tmp</value>
+    <description>The temporary directory reserved for hawq segment.</description>
+  </property>
+
+  <!-- HAWQ resource manager parameters -->
+  <property>
+    <name>hawq_global_rm_type</name>
+    <value>none</value>
+    <description>The resource manager type to start for allocating resource.
+    'none' means hawq resource manager exclusively uses whole
+    cluster; 'yarn' means hawq resource manager contacts YARN
+    resource manager to negotiate resource.
+    </description>
+  </property>
+
+  <property>
+    <name>hawq_rm_memory_limit_perseg</name>
+    <value>64GB</value>
+    <description>The limit of memory usage in a hawq segment when
+    hawq_global_rm_type is set 'none'.
+    </description>
+  </property>
+
+  <property>
+    <name>hawq_rm_nvcore_limit_perseg</name>
+    <value>16</value>
+    <description>The limit of virtual core usage in a hawq segment when
+    hawq_global_rm_type is set 'none'.
+    </description>
+  </property>
+
+  <property>
+    <name>hawq_rm_yarn_address</name>
+    <value><%= @hawq_yarn_rm_host %>:<%= @hawq_yarn_rm_port %></value>
+    <description>The address of YARN resource manager server.</description>
+  </property>
+
+  <property>
+    <name>hawq_rm_yarn_scheduler_address</name>
+    <value>localhost:8030</value>
+    <description>The address of YARN scheduler server.</description>
+  </property>
+
+  <property>
+    <name>hawq_rm_yarn_queue_name</name>
+    <value>default</value>
+    <description>The YARN queue name to register hawq resource manager.</description>
+  </property>
+
+  <property>
+    <name>hawq_rm_yarn_app_name</name>
+    <value>hawq</value>
+    <description>The application name to register hawq resource manager in YARN.</description>
+  </property>
+  <!-- HAWQ resource manager parameters end here. -->
+
+  <!-- HAWQ resource enforcement parameters -->
+  <property>
+    <name>hawq_re_cpu_enable</name>
+    <value>false</value>
+    <description>The control to enable/disable CPU resource enforcement.</description>
+  </property>
+
+  <property>
+    <name>hawq_re_cgroup_mount_point</name>
+    <value>/sys/fs/cgroup</value>
+    <description>The mount point of CGroup file system for resource enforcement.
+    For example, /sys/fs/cgroup/cpu/hawq for CPU sub-system.
+    </description>
+  </property>
+
+  <property>
+    <name>hawq_re_cgroup_hierarchy_name</name>
+    <value>hawq</value>
+    <description>The name of the hierarchy to accomodate CGroup directories/files for resource enforcement.
+    For example, /sys/fs/cgroup/cpu/hawq for CPU sub-system.
+    </description>
+  </property>
+
+  <property>
+    <name>hawq_rm_nvseg_perquery_perseg_limit</name>
+    <value>8</value>
+    <description>This is something that init tries to push in</description>
+  </property>
+  <!-- HAWQ resource enforcement parameters end here. -->
+</configuration>

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-deploy/puppet/modules/hawq/templates/hawq.default
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/hawq.default b/bigtop-deploy/puppet/modules/hawq/templates/hawq.default
new file mode 100644
index 0000000..8190260
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/hawq.default
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export HAWQ_HOME="/usr/lib/hawq"
+## Due to some weird scripting, hawq doesn't work without GPHOME
+export GPHOME=$HAWQ_HOME
+export HAWQ_CONF_DIR="/etc/hawq/conf"
+
+export HAWQ_PID_DIR="/var/run/hawq"
+export HAWQ_LOG_DIR="/var/log/hawq"
+export HAWQ_WORK_DIR="/var/run/hawq/work"
+export HAWQ_IDENT_STRING=hawq
+
+export PATH=$HAWQ_HOME/bin:$HAWQ_HOME/ext/python/bin:$PATH
+export LD_LIBRARY_PATH=$HAWQ_HOME/lib:$HAWQ_HOME/ext/python/lib:$LD_LIBRARY_PATH
+export PYTHONPATH=$HAWQ_HOME/lib/python:$HAWQ_HOME/lib/python/pygresql:$PYTHONPATH
+export OPENSSL_CONF=$HAWQ_CONF_DIR/openssl.cnf
+export LIBHDFS3_CONF=$HAWQ_CONF_DIR/hdfs-client.xml
+export LIBYARN_CONF=$HAWQ_CONF_DIR/yarn-client.xml
+export HAWQSITE_CONF=$HAWQ_CONF_DIR/hawq-site.xml
+
+export HAWQ_MASTER_PORT=5432
+export HAWQ_SEGMENT_PORT=40000
+export HAWQ_MASTERDATA_DIR=<%= @hawq_masterdata_dir %>
+export HAWQ_SEGMENTDATA_DIR=<%= @hawq_segmentdata_dir %>
+
+export HAWQ_TIMEOUT=30
+export HAWQ_SHUTDOWN_MODE=smart

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-deploy/puppet/modules/hawq/templates/hdfs-client.xml
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/hdfs-client.xml b/bigtop-deploy/puppet/modules/hawq/templates/hdfs-client.xml
new file mode 100644
index 0000000..3f08696
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/hdfs-client.xml
@@ -0,0 +1,331 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+<configuration>
+
+  <!-- KDC
+       <property>
+       <name>hadoop.security.authentication</name>
+       <value>kerberos</value>
+       </property>
+       KDC -->
+
+  <!-- HA
+       <property>
+       <name>dfs.nameservices</name>
+       <value>phdcluster</value>
+       </property>
+
+       <property>
+       <name>dfs.ha.namenodes.phdcluster</name>
+       <value>nn1,nn2</value>
+       </property>
+
+       <property>
+       <name>dfs.namenode.rpc-address.phdcluster.nn1</name>
+       <value>mdw:9000</value>
+       </property>
+
+       <property>
+       <name>dfs.namenode.rpc-address.phdcluster.nn2</name>
+       <value>smdw:9000</value>
+       </property>
+
+<property>
+<name>dfs.namenode.http-address.phdcluster.nn1</name>
+<value>mdw:50070</value>
+</property>
+
+<property>
+<name>dfs.namenode.http-address.phdcluster.nn2</name>
+<value>smdw:50070</value>
+</property>
+
+HA -->
+
+  <!-- RPC client configuration -->
+  <property>
+    <name>rpc.client.timeout</name>
+    <value>3600000</value>
+    <description>
+      timeout interval of a RPC invocation in millisecond. default is 3600000.
+    </description>
+  </property>
+  <property>
+    <name>rpc.client.connect.tcpnodelay</name>
+    <value>true</value>
+    <description>
+      whether set socket TCP_NODELAY to true when connect to RPC server. default is true.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.max.idle</name>
+    <value>10000</value>
+    <description>
+      the max idle time of a RPC connection in millisecond. default is 10000.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.ping.interval</name>
+    <value>10000</value>
+    <description>
+      the interval which the RPC client send a heart beat to server. 0 means disable, default is 10000.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.connect.timeout</name>
+    <value>600000</value>
+    <description>
+      the timeout interval in millisecond when the RPC client is trying to setup the connection. default is 600000.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.connect.retry</name>
+    <value>10</value>
+    <description>
+      the max retry times if the RPC client fail to setup the connection to server. default is 10.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.read.timeout</name>
+    <value>3600000</value>
+    <description>
+      the timeout interval in millisecond when the RPC client is trying to read from server. default is 3600000.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.write.timeout</name>
+    <value>3600000</value>
+    <description>
+      the timeout interval in millisecond when the RPC client is trying to write to server. default is 3600000.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.socket.linger.timeout</name>
+    <value>-1</value>
+    <description>
+      set value to socket SO_LINGER when connect to RPC server. -1 means default OS value. default is -1.
+    </description>
+  </property>
+
+  <!-- dfs client configuration -->
+  <property>
+    <name>dfs.client.read.shortcircuit</name>
+    <value>true</value>
+    <description>
+      whether reading block file bypass datanode if the block and the client are on the same node. default is true.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.default.replica</name>
+    <value>3</value>
+    <description>
+      the default number of replica. default is 3.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.prefetchsize</name>
+    <value>10</value>
+    <description>
+      the default number of blocks which information will be prefetched. default is 10.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.client.failover.max.attempts</name>
+    <value>15</value>
+    <description>
+      if multiply namenodes are configured, it is the max retry times when the dfs client try to issue a RPC call. default is 15.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.default.blocksize</name>
+    <value>134217728</value>
+    <description>
+      default block size. default is 134217728.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.client.log.severity</name>
+    <value>INFO</value>
+    <description>
+      the minimal log severity level, valid values include FATAL, ERROR, INFO, DEBUG1, DEBUG2, DEBUG3. default is INFO.
+    </description>
+  </property>
+
+  <!-- input client configuration -->
+  <property>
+    <name>input.connect.timeout</name>
+    <value>600000</value>
+    <description>
+      the timeout interval in millisecond when the input stream is trying to setup the connection to datanode. default is 600000.
+    </description>
+  </property>
+
+  <property>
+    <name>input.read.timeout</name>
+    <value>3600000</value>
+    <description>
+      the timeout interval in millisecond when the input stream is trying to read from datanode. default is 3600000.
+    </description>
+  </property>
+
+  <property>
+    <name>input.write.timeout</name>
+    <value>3600000</value>
+    <description>
+      the timeout interval in millisecond when the input stream is trying to write to datanode. default is 3600000.
+    </description>
+  </property>
+
+  <property>
+    <name>input.localread.default.buffersize</name>
+    <value>2097152</value>
+    <description>
+      number of bytes of the buffer which is used to hold the data from block file and verify checksum.
+      it is only used when "dfs.client.read.shortcircuit" is set to true. default is 1048576.
+    </description>
+  </property>
+
+  <property>
+    <name>input.localread.blockinfo.cachesize</name>
+    <value>1000</value>
+    <description>
+      the size of block file path information cache. default is 1000.
+    </description>
+  </property>
+
+  <property>
+    <name>input.read.getblockinfo.retry</name>
+    <value>3</value>
+    <description>
+      the max retry times when the client fail to get block information from namenode. default is 3.
+    </description>
+  </property>
+
+  <!-- output client configuration -->
+  <property>
+    <name>output.replace-datanode-on-failure</name>
+    <value>false</value>
+    <description>
+      whether the client add new datanode into pipeline if the number of nodes in pipeline is less the specified number of replicas. default is true.
+    </description>
+  </property>
+
+  <property>
+    <name>output.default.chunksize</name>
+    <value>512</value>
+    <description>
+      the number of bytes of a chunk in pipeline. default is 512.
+    </description>
+  </property>
+
+  <property>
+    <name>output.default.packetsize</name>
+    <value>65536</value>
+    <description>
+      the number of bytes of a packet in pipeline. default is 65536.
+    </description>
+  </property>
+
+  <property>
+    <name>output.default.write.retry</name>
+    <value>10</value>
+    <description>
+      the max retry times when the client fail to setup the pipeline. default is 10.
+    </description>
+  </property>
+
+  <property>
+    <name>output.connect.timeout</name>
+    <value>600000</value>
+    <description>
+      the timeout interval in millisecond when the output stream is trying to setup the connection to datanode. default is 600000.
+    </description>
+  </property>
+
+  <property>
+    <name>output.read.timeout</name>
+    <value>3600000</value>
+    <description>
+      the timeout interval in millisecond when the output stream is trying to read from datanode. default is 3600000.
+    </description>
+  </property>
+
+  <property>
+    <name>output.write.timeout</name>
+    <value>3600000</value>
+    <description>
+      the timeout interval in millisecond when the output stream is trying to write to datanode. default is 3600000.
+    </description>
+  </property>
+
+  <property>
+    <name>output.packetpool.size</name>
+    <value>1024</value>
+    <description>
+      the max number of packets in a file's packet pool. default is 1024.
+    </description>
+  </property>
+
+  <property>
+    <name>output.close.timeout</name>
+    <value>900000</value>
+    <description>
+      the timeout interval in millisecond when close an output stream. default is 900000.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.domain.socket.path</name>
+    <value>/var/lib/hadoop-hdfs/dn_socket</value>
+    <description>
+      Optional.  This is a path to a UNIX domain socket that will be used for
+      communication between the DataNode and local HDFS clients.
+      If the string "_PORT" is present in this path, it will be replaced by the
+      TCP port of the DataNode.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.client.use.legacy.blockreader.local</name>
+    <value>false</value>
+    <description>
+      Legacy short-circuit reader implementation based on HDFS-2246 is used
+      if this configuration parameter is true.
+      This is for the platforms other than Linux
+      where the new implementation based on HDFS-347 is not available.
+    </description>
+  </property>
+
+</configuration>

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-deploy/puppet/modules/hawq/templates/sysctl.conf
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/sysctl.conf b/bigtop-deploy/puppet/modules/hawq/templates/sysctl.conf
new file mode 100644
index 0000000..2ac1598
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/sysctl.conf
@@ -0,0 +1,24 @@
+kernel.shmmax = 1000000000
+kernel.shmmni = 4096
+kernel.shmall = 4000000000
+kernel.sem = 250 512000 100 2048
+kernel.sysrq = 1
+kernel.core_uses_pid = 1
+kernel.msgmnb = 65536
+kernel.msgmax = 65536
+kernel.msgmni = 2048
+net.ipv4.tcp_syncookies = 0
+net.ipv4.ip_forward = 0
+net.ipv4.conf.default.accept_source_route = 0
+net.ipv4.tcp_tw_recycle = 1
+net.ipv4.tcp_max_syn_backlog = 200000
+net.ipv4.conf.all.arp_filter = 1
+net.ipv4.ip_local_port_range = 1281 65535
+net.core.netdev_max_backlog = 200000
+vm.overcommit_memory = 2
+fs.nr_open = 3000000
+kernel.threads-max = 798720
+kernel.pid_max = 798720
+# increase network
+net.core.rmem_max=2097152
+net.core.wmem_max=2097152

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-deploy/puppet/modules/hawq/templates/yarn-client.xml
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/hawq/templates/yarn-client.xml b/bigtop-deploy/puppet/modules/hawq/templates/yarn-client.xml
new file mode 100644
index 0000000..a5f9df7
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/templates/yarn-client.xml
@@ -0,0 +1,123 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+<configuration>
+
+  <!-- KDC
+       <property>
+       <name>hadoop.security.authentication</name>
+       <value>kerberos</value>
+       </property>
+       KDC -->
+
+  <!-- HA
+       <property>
+       <name>yarn.resourcemanager.ha</name>
+       <value>%RESOURCEMANAGER%:8032,%RESOURCEMANAGER2%:8032</value>
+       </property>
+       <property>
+       <name>yarn.resourcemanager.scheduler.ha</name>
+       <value>%RESOURCEMANAGER%:8030,%RESOURCEMANAGER2%:8030</value>
+       </property>
+       HA -->
+
+  <!-- RPC client configuration -->
+  <property>
+    <name>rpc.client.timeout</name>
+    <value>3600000</value>
+    <description>
+      timeout interval of a RPC invocation in millisecond. default is 3600000.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.connect.tcpnodelay</name>
+    <value>true</value>
+    <description>
+      whether set socket TCP_NODELAY to true when connect to RPC server. default is true.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.max.idle</name>
+    <value>10000</value>
+    <description>
+      the max idle time of a RPC connection in millisecond. default is 10000.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.ping.interval</name>
+    <value>10000</value>
+    <description>
+      the interval which the RPC client send a heart beat to server. 0 means disable, default is 10000.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.connect.timeout</name>
+    <value>600000</value>
+    <description>
+      the timeout interval in millisecond when the RPC client is trying to setup the connection. default is 600000.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.connect.retry</name>
+    <value>10</value>
+    <description>
+      the max retry times if the RPC client fail to setup the connection to server. default is 10.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.read.timeout</name>
+    <value>3600000</value>
+    <description>
+      the timeout interval in millisecond when the RPC client is trying to read from server. default is 3600000.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.write.timeout</name>
+    <value>3600000</value>
+    <description>
+      the timeout interval in millisecond when the RPC client is trying to write to server. default is 3600000.
+    </description>
+  </property>
+
+  <property>
+    <name>rpc.client.socket.linger.timeout</name>
+    <value>-1</value>
+    <description>
+      set value to socket SO_LINGER when connect to RPC server. -1 means default OS value. default is -1.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.client.failover.max.attempts</name>
+    <value>15</value>
+    <description>
+      if multiply resource managers are configured, it is the max retry times when the yarn client try to issue a RPC call. default is 15.
+    </description>
+  </property>
+</configuration>

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-deploy/puppet/modules/hawq/tests/init.pp
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/hawq/tests/init.pp b/bigtop-deploy/puppet/modules/hawq/tests/init.pp
new file mode 100644
index 0000000..37a5a08
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/hawq/tests/init.pp
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+hawq::cluster_node { "test-hawq-node": }

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-packages/src/common/hadoop/init-hcfs.json
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/hadoop/init-hcfs.json b/bigtop-packages/src/common/hadoop/init-hcfs.json
index f71f385..dea5f90 100644
--- a/bigtop-packages/src/common/hadoop/init-hcfs.json
+++ b/bigtop-packages/src/common/hadoop/init-hcfs.json
@@ -64,6 +64,7 @@
     ["/tmp/hadoop-yarn","777","mapred","mapred"],
     ["/var/log/hadoop-yarn/apps","1777","yarn","mapred"],
     ["/hbase",null,"hbase","hbase"],
+    ["/hawq_default","755","hawq","hawq"],
     ["/solr",null,"solr","solr"],
     ["/benchmarks","777",null,null],
     ["/user","755","HCFS_SUPER_USER",null],

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-packages/src/common/hawq/hawq-master.svc
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/hawq/hawq-master.svc b/bigtop-packages/src/common/hawq/hawq-master.svc
index 4fa5097..e5aca0d 100644
--- a/bigtop-packages/src/common/hawq/hawq-master.svc
+++ b/bigtop-packages/src/common/hawq/hawq-master.svc
@@ -34,7 +34,7 @@ start() {
         exit 0
     fi
 
-    su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} start $SVC_OPTS"
+    su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} start master $SVC_OPTS"
 
     checkstatusofproc
     RETVAL=$?
@@ -51,7 +51,7 @@ cat <<'__EOT__'
 stop() {
     log_success_msg "Stopping $DESC (${DAEMON}): "
 
-    su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} stop $SVC_OPTS"
+    su -s /bin/bash ${SVC_USER} -c "${EXEC_PATH} stop master $SVC_OPTS"
 
     sleep 3
     RETVAL=$?

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-packages/src/common/hawq/hawq.default
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/hawq/hawq.default b/bigtop-packages/src/common/hawq/hawq.default
index f8cf6e1..574e845 100644
--- a/bigtop-packages/src/common/hawq/hawq.default
+++ b/bigtop-packages/src/common/hawq/hawq.default
@@ -21,6 +21,8 @@ export HAWQ_LOG_DIR="/var/log/hawq"
 export HAWQ_WORK_DIR="/var/run/hawq/work"
 export HAWQ_IDENT_STRING=hawq
 
+export HAWQ_DATA_DIR=$HAWQ_WORK_DIR/hawq-data-directory
+
 export PATH=$HAWQ_HOME/bin:$HAWQ_HOME/ext/python/bin:$PATH
 export LD_LIBRARY_PATH=$HAWQ_HOME/lib:$HAWQ_HOME/ext/python/lib:$LD_LIBRARY_PATH
 export PYTHONPATH=$HAWQ_HOME/lib/python:$HAWQ_HOME/lib/python/pygresql:$PYTHONPATH
@@ -28,3 +30,11 @@ export OPENSSL_CONF=$HAWQ_CONF_DIR/openssl.cnf
 export LIBHDFS3_CONF=$HAWQ_CONF_DIR/hdfs-client.xml
 export LIBYARN_CONF=$HAWQ_CONF_DIR/yarn-client.xml
 export HAWQSITE_CONF=$HAWQ_CONF_DIR/hawq-site.xml
+
+export HAWQ_MASTER_PORT=5432
+export HAWQ_SEGMENT_PORT=40000
+export HAWQ_MASTERDATA_DIR=$HAWQ_DATA_DIR/masterdd
+export HAWQ_SEGMENTDATA_DIR=$HAWQ_DATA_DIR/segmentdd
+
+export HAWQ_TIMEOUT=30
+export HAWQ_SHUTDOWN_MODE=smart

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop-packages/src/deb/hawq/rules
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/hawq/rules b/bigtop-packages/src/deb/hawq/rules
index b4a3c9a..1e256a1 100755
--- a/bigtop-packages/src/deb/hawq/rules
+++ b/bigtop-packages/src/deb/hawq/rules
@@ -30,7 +30,7 @@ hawq_pkg_name=hawq
 	dh $@
 
 override_dh_auto_build:
-	bash debian/do-component-build 
+	bash debian/do-component-build
 
 
 override_dh_auto_install:
@@ -46,7 +46,7 @@ override_dh_auto_install:
 	bash debian/init.d.tmpl debian/hawq-master.svc deb debian/${hawq_pkg_name}-master-service.init
 	bash debian/init.d.tmpl debian/hawq-segment.svc deb debian/${hawq_pkg_name}-segment-service.init
 
-## Let's override the auto_configure and auto_clean to make sure existing 
+## Let's override the auto_configure and auto_clean to make sure existing
 ## top-level Makefile doesn't interfere with the package creation
 override_dh_auto_configure:
 

http://git-wip-us.apache.org/repos/asf/bigtop/blob/18370b22/bigtop.bom
----------------------------------------------------------------------
diff --git a/bigtop.bom b/bigtop.bom
index f8a6878..5670546 100644
--- a/bigtop.bom
+++ b/bigtop.bom
@@ -90,7 +90,7 @@ bigtop {
     zookeeper:['hadoop', 'hbase'],
     hadoop:['ignite-hadoop', 'hbase', 'crunch', 'pig', 'hive', 'tez', 'sqoop', 'sqoop2',
       'oozie', 'mahout', 'flume', 'giraph', 'solr', 'crunch', 'spark',
-      'phoenix', 'tachyon', 'kafka', 'ycsb', 'kite', 'hama', 'zeppelin',
+      'phoenix', 'tachyon', 'kafka', 'ycsb', 'kite', 'hama', 'zeppelin', 'hawq',
       'tajo', 'apex'
     ],
     hbase:['phoenix','giraph','ycsb'],