You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sw...@apache.org on 2016/12/01 08:14:10 UTC
[27/50] ambari git commit: AMBARI-19003. Perf: Fix
deploy-gce-perf-cluster.py to deploy separate server onto own cluster with
different settings for more cores and MySQL DB (alejandro)
AMBARI-19003. Perf: Fix deploy-gce-perf-cluster.py to deploy separate server onto own cluster with different settings for more cores and MySQL DB (alejandro)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/052da577
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/052da577
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/052da577
Branch: refs/heads/branch-feature-AMBARI-18901
Commit: 052da577bcf5c3130337b7e783bda2b9c0207127
Parents: f8bfa05
Author: Alejandro Fernandez <af...@hortonworks.com>
Authored: Tue Nov 29 12:43:58 2016 -0800
Committer: Alejandro Fernandez <af...@hortonworks.com>
Committed: Tue Nov 29 12:43:58 2016 -0800
----------------------------------------------------------------------
contrib/utils/perf/deploy-gce-perf-cluster.py | 243 ++++++++++++++-------
1 file changed, 169 insertions(+), 74 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/052da577/contrib/utils/perf/deploy-gce-perf-cluster.py
----------------------------------------------------------------------
diff --git a/contrib/utils/perf/deploy-gce-perf-cluster.py b/contrib/utils/perf/deploy-gce-perf-cluster.py
index 95ed98f..4737c6f 100644
--- a/contrib/utils/perf/deploy-gce-perf-cluster.py
+++ b/contrib/utils/perf/deploy-gce-perf-cluster.py
@@ -148,6 +148,7 @@ class SCP:
return {"exitstatus": scpstat.returncode, "log": log, "errormsg": errorMsg}
+
# main method to parse arguments from user and start work
def main():
parser = argparse.ArgumentParser(
@@ -178,6 +179,30 @@ def main():
args = parser.parse_args()
do_work(args)
+
+def do_work(args):
+ """
+ Check that all required args are passed in. If so, deploy the cluster.
+ :param args: Command line args
+ """
+ if not args.controller:
+ raise Exception("GCE controller ip address is not defined!")
+
+ if not args.key:
+ raise Exception("Path to gce ssh key is not defined!")
+
+ if not args.cluster_suffix:
+ raise Exception("Cluster name suffix is not defined!")
+
+ if not args.agent_prefix:
+ raise Exception("Agent name prefix is not defined!")
+
+ if not args.agents_count:
+ raise Exception("Agents count for whole cluster is not defined (will put 50 Agents per VM)!")
+
+ deploy_cluster(args)
+
+
def deploy_cluster(args):
"""
Process cluster deployment
@@ -186,49 +211,54 @@ def deploy_cluster(args):
# When dividing, need to get the ceil.
number_of_nodes = ((args.agents_count - 1) / NUMBER_OF_AGENTS_ON_HOST) + 1
- # trying to create cluster with needed params
- print "Creating cluster {0}-{1} with {2} large nodes on centos6...".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes))
- execute_command(args, args.controller, "/usr/sbin/gce up {0}-{1} {2} --centos6 --large".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)),
- "Failed to create cluster, probably not enough resources!", "-tt")
-
- # VMs are not accessible immediately
- time.sleep(10)
+ # In case of an error after creating VMs, can simply comment out this function to run again without creating VMs.
+ create_vms(args, number_of_nodes)
# getting list of vms information like hostname and ip address
print "Getting list of virtual machines from cluster..."
# Dictionary from host name to IP
- vms = get_vms_list(args)
+ (server_dict, agents_dict) = get_vms_list(args)
# check number of nodes in cluster to be the same as user asked
print "Checking count of created nodes in cluster..."
- if not vms or len(vms) < number_of_nodes:
- raise Exception("Cannot bring up enough nodes. Requested {0}, but got {1}. Probably not enough resources!".format(number_of_nodes, len(vms)))
+ if not agents_dict or len(agents_dict) < number_of_nodes:
+ raise Exception("Cannot bring up enough nodes. Requested {0}, but got {1}. Probably not enough resources!".format(number_of_nodes, len(agents_dict)))
- print "GCE cluster was successfully created!"
- pretty_print_vms(vms)
+ print "GCE cluster was successfully created!\n"
# installing/starting ambari-server and ambari-agents on each host
- server_host_name = sorted(vms.items())[0][0]
- server_installed = False
+ server_item = server_dict.items()[0]
+ server_host_name = server_item[0]
+ server_ip = server_item[1]
+ print "=========================="
+ print "Server Hostname: %s" % server_host_name
+ print "Server IP: %s" % server_ip
+ print "==========================\n"
+
+ # Sort the agents by hostname into a list.
+ sorted_agents = sort_hosts(agents_dict)
+ pretty_print_vms(sorted_agents)
- print "Creating server.sh script (which will be executed on server to install/configure/start ambari-server and ambari-agent)..."
- create_server_script(args, server_host_name)
+ print "Creating server.sh script (which will be executed on server to install/configure/start ambari-server)..."
+ create_server_script(server_host_name)
print "Creating agent.sh script (which will be executed on agent hosts to install/configure/start ambari-agent..."
- create_agent_script(args, server_host_name)
+ create_agent_script(server_host_name)
time.sleep(10)
+ prepare_server(args, server_host_name, server_ip)
+
# If the user asks for a number of agents that is not a multiple of 50, then only create how many are needed instead
# of 50 on every VM.
num_agents_left_to_create = args.agents_count
-
start_num = 1
- for (hostname, ip) in sorted(vms.items()):
+
+ for (hostname, ip) in sorted_agents:
num_agents_on_this_host = min(num_agents_left_to_create, NUMBER_OF_AGENTS_ON_HOST)
print "=========================="
- print "Working on VM {0} that will contain hosts %d - %d".format(hostname, start_num, start_num + num_agents_on_this_host - 1)
+ print "Working on VM {0} that will contain hosts {1} - {2}".format(hostname, start_num, start_num + num_agents_on_this_host - 1)
# The agent multiplier config will be different on each VM.
@@ -236,90 +266,117 @@ def deploy_cluster(args):
start_num += num_agents_on_this_host
num_agents_left_to_create -= num_agents_on_this_host
- if not server_installed:
- remote_path = "/server.sh"
- local_path = "server.sh"
- print "Copying server.sh to {0}...".format(hostname)
- put_file(args, ip, local_path, remote_path, "Failed to copy file!")
-
- print "Generating agent-multiplier.conf"
- execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname))
-
- print "Executing remote ssh command (set correct permissions and start executing server.sh in separate process) on {0}...".format(hostname)
- execute_command(args, ip, "cd /; chmod 777 server.sh; nohup ./server.sh >/server.log 2>&1 &",
- "Install/configure/start server script failed!")
- server_installed = True
- else:
- remote_path = "/agent.sh"
- local_path = "agent.sh"
- print "Copying agent.sh to {0}...".format(hostname)
- put_file(args, ip, local_path, remote_path, "Failed to copy file!")
-
- print "Generating agent-multiplier.conf"
- execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname))
-
- print "Executing remote ssh command (set correct permissions and start executing agent.sh in separate process) on {0}...".format(hostname)
- execute_command(args, ip, "cd /; chmod 777 agent.sh; nohup ./agent.sh >/agent.log 2>&1 &",
- "Install/configure start agent script failed!")
+ prepare_agent(args, hostname, ip, cmd_generate_multiplier_conf)
+ pass
print "All scripts where successfully copied and started on all hosts. " \
"\nPay attention that server.sh script need 5 minutes to finish and agent.sh need 3 minutes!"
-def do_work(args):
+def create_vms(args, number_of_nodes):
"""
- Check that all required args are passed in. If so, deploy the cluster.
+ Request the server and VMs for the agents from GCE.
:param args: Command line args
+ :param number_of_nodes: Number of VMs to request.
"""
- if not args.controller:
- raise Exception("GCE controller ip address is not defined!")
+ print "Creating server VM {0}-server-{1} with xxlarge nodes on centos6...".format(cluster_prefix, args.cluster_suffix)
+ execute_command(args, args.controller, "/usr/sbin/gce up {0}-server-{1} 1 --centos6 --xxlarge".format(cluster_prefix, args.cluster_suffix),
+ "Failed to create server, probably not enough resources!", "-tt")
+ time.sleep(10)
- if not args.key:
- raise Exception("Path to gce ssh key is not defined!")
+ # trying to create cluster with needed params
+ print "Creating agent VMs {0}-agent-{1} with {2} large nodes on centos6...".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes))
+ execute_command(args, args.controller, "/usr/sbin/gce up {0}-agent-{1} {2} --centos6 --large".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)),
+ "Failed to create cluster VMs, probably not enough resources!", "-tt")
- if not args.cluster_suffix:
- raise Exception("Cluster name suffix is not defined!")
+ # VMs are not accessible immediately
+ time.sleep(10)
- if not args.agent_prefix:
- raise Exception("Agent name prefix is not defined!")
- if not args.agents_count:
- raise Exception("Agents count for whole cluster is not defined (will put 50 Agents per VM)!")
+def prepare_server(args, hostname, ip):
+ remote_path = "/server.sh"
+ local_path = "server.sh"
+ print "Copying server.sh to {0}...".format(hostname)
+ put_file(args, ip, local_path, remote_path, "Failed to copy file!")
- deploy_cluster(args)
+ print "Executing remote ssh command (set correct permissions and start executing server.sh in separate process) on {0}...".format(hostname)
+ execute_command(args, ip, "cd /; chmod 777 server.sh; nohup ./server.sh >/server.log 2>&1 &",
+ "Install/configure/start server script failed!")
-def create_server_script(args, server_host_name):
+def prepare_agent(args, hostname, ip, cmd_generate_multiplier_conf):
+ remote_path = "/agent.sh"
+ local_path = "agent.sh"
+ print "Copying agent.sh to {0}...".format(hostname)
+ put_file(args, ip, local_path, remote_path, "Failed to copy file!")
+
+ print "Generating agent-multiplier.conf"
+ execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname))
+
+ print "Executing remote ssh command (set correct permissions and start executing agent.sh in separate process) on {0}...".format(hostname)
+ execute_command(args, ip, "cd /; chmod 777 agent.sh; nohup ./agent.sh >/agent.log 2>&1 &",
+ "Install/configure start agent script failed!")
+
+
+def create_server_script(server_host_name):
"""
Creating server.sh script in the same dir where current script is located
server.sh script will install, configure and start ambari-server and ambari-agent on host
- :param args: Command line args
:param server_host_name: Server host name
"""
+ # ambari-server setup <options> may not work property, so doing several calls like
+ # echo "arg=value" >> .../ambari.properties
+
contents = "#!/bin/bash\n" + \
"wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \
- "yum clean all; yum install git ambari-server ambari-agent -y\n" + \
- "cd /home; git clone https://github.com/apache/ambari.git\n" + \
+ "yum clean all; yum install git ambari-server -y\n" + \
+ "mkdir /home ; cd /home ; git clone https://github.com/apache/ambari.git\n" + \
"cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-server/resources/stacks/PERF\n" + \
"cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \
+ "\n" + \
+ "\n" + \
+ "yum install mysql-connector-java* -y\n" + \
+ "yum install mysql-server -y\n" + \
+ "service mysqld start\n" + \
+ "mysql -uroot -e \"CREATE DATABASE ambari;\"\n" + \
+ "mysql -uroot -e \"SOURCE /var/lib/ambari-server/resources/Ambari-DDL-MySQL-CREATE.sql;\" ambari\n" + \
+ "mysql -uroot -e \"CREATE USER 'ambari'@'%' IDENTIFIED BY 'bigdata';\"\n" + \
+ "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'%%';\"\n" + \
+ "mysql -uroot -e \"CREATE USER 'ambari'@'localhost' IDENTIFIED BY 'bigdata';\"\n" + \
+ "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'localhost';\"\n" + \
+ "mysql -uroot -e \"CREATE USER 'ambari'@'{0}' IDENTIFIED BY 'bigdata';\"\n".format(server_host_name) + \
+ "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'{0}';\"\n".format(server_host_name) + \
+ "mysql -uroot -e \"FLUSH PRIVILEGES;\"\n" + \
+ "\n" + \
+ "\n" + \
"ambari-server setup -s\n" + \
+ "ambari-server setup --database mysql --jdbc-db=mysql --jdbc-driver=/usr/share/java/mysql-connector-java.jar --databasehost=localhost --databaseport=3306 --databasename=ambari --databaseusername=ambari --databasepassword=bigdata\n" + \
+ "sed -i -e 's/=postgres/=mysql/g' /etc/ambari-server/conf/ambari.properties\n" + \
+ "sed -i -e 's/server.persistence.type=local/server.persistence.type=remote/g' /etc/ambari-server/conf/ambari.properties\n" + \
+ "sed -i -e 's/local.database.user=postgres//g' /etc/ambari-server/conf/ambari.properties\n" + \
+ "sed -i -e 's/server.jdbc.postgres.schema=ambari//g' /etc/ambari-server/conf/ambari.properties\n" + \
"sed -i -e 's/false/true/g' /var/lib/ambari-server/resources/stacks/PERF/1.0/metainfo.xml\n" + \
+ "\n" + \
+ "echo 'server.jdbc.driver=com.mysql.jdbc.Driver' >> /etc/ambari-server/conf/ambari.properties\n" + \
+ "echo 'server.jdbc.rca.url=jdbc:mysql://{0}:3306/ambari' >> /etc/ambari-server/conf/ambari.properties\n".format(server_host_name) + \
+ "echo 'server.jdbc.rca.driver=com.mysql.jdbc.Driver' >> /etc/ambari-server/conf/ambari.properties\n" + \
+ "echo 'server.jdbc.url=jdbc:mysql://{0}:3306/ambari' >> /etc/ambari-server/conf/ambari.properties\n".format(server_host_name) + \
+ "echo 'server.jdbc.port=3306' >> /etc/ambari-server/conf/ambari.properties\n" + \
+ "echo 'server.jdbc.hostname=localhost' >> /etc/ambari-server/conf/ambari.properties\n" + \
+ "echo 'server.jdbc.driver.path=/usr/share/java/mysql-connector-java.jar' >> /etc/ambari-server/conf/ambari.properties\n" + \
+ "\n" + \
"ambari-server start --skip-database-check\n" + \
- "sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \
- "sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \
- "python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n" + \
"exit 0"
with open("server.sh", "w") as f:
f.write(contents)
-def create_agent_script(args, server_host_name):
+def create_agent_script(server_host_name):
"""
Creating agent.sh script in the same dir where current script is located
agent.sh script will install, configure and start ambari-agent on host
- :param args: Command line args
:param server_host_name: Server host name
"""
@@ -327,7 +384,7 @@ def create_agent_script(args, server_host_name):
contents = "#!/bin/bash\n" + \
"wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \
"yum clean all; yum install git ambari-agent -y\n" + \
- "cd /home; git clone https://github.com/apache/ambari.git\n" + \
+ "mkdir /home ; cd /home; git clone https://github.com/apache/ambari.git\n" + \
"cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \
"sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \
"sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \
@@ -381,12 +438,27 @@ def put_file(args, ip, local_file, remote_file, fail_message, login='root'):
def get_vms_list(args):
"""
+ Get tuple of (x, y) where
+ x = dictionary from single server host name to ip
+ y = dictionary from multiple agent host names to ip
+ :param args: Command line arguments
+ :return: Tuple of dictionaries of hostnames and ip for server and agents.
+ """
+ # Get the server.
+ server = __get_vms_list_from_name(args, "{0}-server-{1}".format(cluster_prefix, args.cluster_suffix))
+
+ # Get the agents
+ agents = __get_vms_list_from_name(args, "{0}-agent-{1}".format(cluster_prefix, args.cluster_suffix))
+
+ return (server, agents)
+
+def __get_vms_list_from_name(args, cluster_name):
+ """
Method to parse "gce fqdn {cluster-name}" command output and get hosts and ips pairs for every host in cluster
:param args: Command line args
:return: Mapping of VM host name to ip.
"""
-
- gce_fqdb_cmd = '/usr/sbin/gce fqdn {0}-{1}'.format(cluster_prefix, args.cluster_suffix)
+ gce_fqdb_cmd = '/usr/sbin/gce fqdn {0}'.format(cluster_name)
out = execute_command(args, args.controller, gce_fqdb_cmd, "Failed to get VMs list!", "-tt")
lines = out.split('\n')
#print "LINES=" + str(lines)
@@ -405,13 +477,36 @@ def get_vms_list(args):
raise Exception('Cannot parse "{0}"'.format(lines))
+def sort_hosts(hosts):
+ """
+ Sort the hosts by name and take into account the numbers.
+ :param hosts: Dictionary from host name (e.g., perf-9-test, perf-62-test), to the IP
+ :return: Sorted list of tuples
+ """
+ host_names = hosts.keys()
+ sorted_host_tuples = [(None, None),] * len(hosts)
+
+ pattern = re.compile(".*?-agent-.*?(\d+)")
+ for host_name in host_names:
+ m = pattern.match(host_name)
+ if m and len(m.groups()) == 1:
+ number = int(m.group(1))
+ ip = hosts[host_name]
+ sorted_host_tuples[number - 1] = (host_name, ip)
+
+ return sorted_host_tuples
+
+
def pretty_print_vms(vms):
- print "----------------------------"
- print "Server IP: {0}".format(sorted(vms.items())[0][1])
+ """
+ Pretty print the VMs hostnames
+ :param vms: List of tuples (hostname, ip)
+ """
+ print "=========================="
print "Hostnames of nodes in cluster:"
- for (hostname, ip) in sorted(vms.items()):
+ for (hostname, ip) in vms:
print hostname
- print "----------------------------"
+ print "==========================\n"
if __name__ == "__main__":