You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@libcloud.apache.org by to...@apache.org on 2011/06/16 23:41:06 UTC
svn commit: r1136692 - /libcloud/trunk/libcloud/compute/base.py
Author: tomaz
Date: Thu Jun 16 21:41:05 2011
New Revision: 1136692
URL: http://svn.apache.org/viewvc?rev=1136692&view=rev
Log:
Refactor deploy_node - split it into 3 functions and make it more clear. Still
needs tests.
Modified:
libcloud/trunk/libcloud/compute/base.py
Modified: libcloud/trunk/libcloud/compute/base.py
URL: http://svn.apache.org/viewvc/libcloud/trunk/libcloud/compute/base.py?rev=1136692&r1=1136691&r2=1136692&view=diff
==============================================================================
--- libcloud/trunk/libcloud/compute/base.py (original)
+++ libcloud/trunk/libcloud/compute/base.py Thu Jun 16 21:41:05 2011
@@ -31,6 +31,7 @@ from libcloud.compute.ssh import SSHClie
from libcloud.common.base import ConnectionKey, ConnectionUserAndKey
from libcloud.httplib_ssl import LibcloudHTTPSConnection
from libcloud.common.base import LibcloudHTTPConnection
+from libcloud.common.types import LibcloudError
__all__ = [
"Node",
@@ -500,9 +501,6 @@ class NodeDriver(object):
existing implementation should be able to handle most such.
"""
# TODO: support ssh keys
- # FIX: this method is too long and complicated
- WAIT_PERIOD=3
- TIMEOUT=60 * 15 # 15 minutes
password = None
if 'generates_password' not in self.features["create_node"]:
@@ -515,72 +513,150 @@ class NodeDriver(object):
password = kwargs['auth'].password
node = self.create_node(**kwargs)
+
try:
- if 'generates_password' in self.features["create_node"]:
+ if 'generates_password' in self.features['create_node']:
password = node.extra.get('password')
- start = time.time()
- end = start + TIMEOUT
- while time.time() < end:
- # need to wait until we get a public IP address.
- # TODO: there must be a better way of doing this
- time.sleep(WAIT_PERIOD)
- nodes = self.list_nodes()
- nodes = filter(lambda n: n.uuid == node.uuid, nodes)
- if len(nodes) == 0:
- raise DeploymentError(
- node,
- ("Booted node[%s] " % node
- + "is missing from list_nodes."))
- if len(nodes) > 1:
- raise DeploymentError(
- node,
- ("Booted single node[%s], " % node
- + "but multiple nodes have same UUID"))
-
- node = nodes[0]
-
- if (node.public_ip is not None
- and node.public_ip != ""
- and node.state == NodeState.RUNNING):
- continue
-
- ssh_username = kwargs.get('ssh_username', 'root')
- ssh_port = kwargs.get('ssh_port', 22)
- ssh_timeout = kwargs.get('ssh_timeout', 20)
+ # Wait until node is up and running and has public IP assigned
+ node = self._wait_until_running(node=node, wait_period=3,
+ timeout=15*60)
+
+ ssh_username = kwargs.get('ssh_username', 'root')
+ ssh_port = kwargs.get('ssh_port', 22)
+ ssh_timeout = kwargs.get('ssh_timeout', 10)
- client = SSHClient(hostname=node.public_ip[0],
+ ssh_client = SSHClient(hostname=node.public_ip[0],
port=ssh_port, username=ssh_username,
password=password,
timeout=ssh_timeout)
- while time.time() < end:
- try:
- client.connect()
- except (IOError, socket.gaierror, socket.error), e:
- # Retry if a connection is refused or timeout
- # occured
- client.close()
- time.sleep(WAIT_PERIOD)
- continue
-
- max_tries, tries = 3, 0
- while tries < max_tries:
- try:
- n = kwargs["deploy"].run(node, client)
- client.close()
- raise
- except Exception, e:
- tries += 1
- if tries >= max_tries:
- raise DeploymentError(node,
- 'Failed after %d tries' % (max_tries))
+ # Connect to the SSH server running on the node
+ ssh_client = self._ssh_client_connect(ssh_client=ssh_client,
+ timeout=300)
+
+ # Execute the deployment task
+ node = self._run_deployment_script(task=kwargs['deploy'],
+ node=node,
+ ssh_client=ssh_client,
+ max_tries=3)
- except DeploymentError:
- raise
except Exception, e:
raise DeploymentError(node, e)
- return n
+ return node
+
+ def _run_deployment_script(self, task, node, ssh_client, max_tries=3):
+ """
+ Run the deployment script on the provided node. At this point it is
+ assumed that SSH connection has already been established.
+
+ @keyword task: Deployment task to run on the node.
+ @type task: C{Deployment}
+
+ @keyword node: Node to operate one
+ @type node: C{Node}
+
+ @keyword ssh_client: A configured and connected SSHClient instance
+ @type ssh_client: C{SSHClient}
+
+ @keyword max_tries: How many times to retry if a deployment fails
+ before giving up (default is 3)
+ @type max_tries: C{int}
+
+ @return: C{Node} Node instance on success.
+ """
+ tries = 0
+ while tries < max_tries:
+ try:
+ node = task.run(node, ssh_client)
+ ssh_client.close()
+ except Exception:
+ tries += 1
+ if tries >= max_tries:
+ raise LibcloudError(value='Failed after %d tries'
+ % (max_tries), driver=self)
+ else:
+ return node
+
+ def _ssh_client_connect(self, ssh_client, timeout=300):
+ """
+ Try to connect to the remote SSH server. If a connection times out or is
+ refused it is retried up to timeout number of seconds.
+
+ @keyword ssh_client: A configured SSHClient instance
+ @type ssh_client: C{SSHClient}
+
+ @keyword timeout: How many seconds to wait before timing out
+ (default is 600)
+ @type timeout: C{int}
+
+ @return: C{SSHClient} on success
+ """
+ start = time.time()
+ end = start + timeout
+
+ while time.time() < end:
+ try:
+ ssh_client.connect()
+ except (IOError, socket.gaierror, socket.error):
+ # Retry if a connection is refused or timeout
+ # occurred
+ ssh_client.close()
+ continue
+ else:
+ return ssh_client
+
+ raise LibcloudError(value='Could not connect to the remote SSH ' +
+ 'server. Giving up.', driver=self)
+
+ def _wait_until_running(self, node, wait_period=3, timeout=600):
+ """
+ Block until node is fully booted and has an IP address assigned.
+
+ @keyword node: Node instance.
+ @type node: C{Node}
+
+ @keyword wait_period: How many seconds to between each loop iteration
+ (default is 3)
+ @type wait_period: C{int}
+
+ @keyword timeout: How many seconds to wait before timing out
+ (default is 600)
+ @type timeout: C{int}
+
+ @return: C{Node} Node instance on success.
+ """
+ start = time.time()
+ end = start + timeout
+
+ while time.time() < end:
+ time.sleep(wait_period)
+ nodes = self.list_nodes()
+ nodes = filter(lambda n: n.uuid == node.uuid, nodes)
+
+ if len(nodes) == 0:
+ raise DeploymentError(
+ node,
+ ("Booted node[%s] " % node
+ + "is missing from list_nodes."))
+
+ if len(nodes) > 1:
+ raise DeploymentError(
+ node,
+ ("Booted single node[%s], " % node
+ + "but multiple nodes have same UUID"))
+
+ node = nodes[0]
+
+ if (node.public_ip is not None
+ and node.public_ip != ""
+ and node.state == NodeState.RUNNING):
+ return node
+ else:
+ continue
+
+ raise LibcloudError(value='Timed out after %s seconds' % (timeout),
+ driver=self)
def _get_size_price(self, size_id):
return get_size_price(driver_type='compute',