You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by dm...@apache.org on 2014/11/14 22:59:26 UTC

incubator-aurora git commit: Move zookeeper connection off the main thread to prevent client deadlocks.

Repository: incubator-aurora
Updated Branches:
  refs/heads/master 2450a1e22 -> 8741cdbdf


Move zookeeper connection off the main thread to prevent client deadlocks.

Reviewed at https://reviews.apache.org/r/27698/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/8741cdbd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/8741cdbd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/8741cdbd

Branch: refs/heads/master
Commit: 8741cdbdf4c63a95da33f6871d2f3982b0f0edf6
Parents: 2450a1e
Author: David McLaughlin <da...@dmclaughlin.com>
Authored: Fri Nov 14 13:59:13 2014 -0800
Committer: David McLaughlin <da...@dmclaughlin.com>
Committed: Fri Nov 14 13:59:13 2014 -0800

----------------------------------------------------------------------
 .../aurora/client/api/scheduler_client.py       | 20 +++++++++++++++++---
 .../aurora/client/api/test_scheduler_client.py  |  3 ++-
 2 files changed, 19 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8741cdbd/src/main/python/apache/aurora/client/api/scheduler_client.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/api/scheduler_client.py b/src/main/python/apache/aurora/client/api/scheduler_client.py
index 3a851cc..4ae18f0 100644
--- a/src/main/python/apache/aurora/client/api/scheduler_client.py
+++ b/src/main/python/apache/aurora/client/api/scheduler_client.py
@@ -21,6 +21,7 @@ from pystachio import Default, Integer, String
 from thrift.protocol import TJSONProtocol
 from thrift.transport import TTransport
 from twitter.common import log
+from twitter.common.concurrent import deadline, Timeout
 from twitter.common.quantity import Amount, Time
 from twitter.common.zookeeper.kazoo_client import TwitterKazooClient
 from twitter.common.zookeeper.serverset import ServerSet
@@ -116,22 +117,35 @@ class ZookeeperSchedulerClient(SchedulerClient):
     zk = TwitterKazooClient.make(str('%s:%s' % (cluster.zk, port)), verbose=verbose)
     return zk, ServerSet(zk, cluster.scheduler_zk_path, **kw)
 
-  def __init__(self, cluster, port=2181, verbose=False):
+  def __init__(self, cluster, port=2181, verbose=False, _deadline=deadline):
     SchedulerClient.__init__(self, verbose=verbose)
     self._cluster = cluster
     self._zkport = port
     self._endpoint = None
     self._uri = None
+    self._deadline = _deadline
 
   def _resolve(self):
     """Resolve the uri associated with this scheduler from zookeeper."""
     joined = threading.Event()
     def on_join(elements):
       joined.set()
+
     zk, serverset = self.get_scheduler_serverset(self._cluster, verbose=self._verbose,
-        port=self._zkport, on_join=on_join)
+      port=self._zkport, on_join=on_join)
+
     joined.wait(timeout=self.SERVERSET_TIMEOUT.as_(Time.SECONDS))
-    serverset_endpoints = list(serverset)
+
+    try:
+      # Need to perform this operation in a separate thread, because kazoo will wait for the
+      # result of this serverset evaluation indefinitely, which will prevent people killing
+      # the client with keyboard interrupts.
+      serverset_endpoints = self._deadline(lambda: list(serverset),
+        timeout=self.SERVERSET_TIMEOUT.as_(Time.SECONDS), daemon=True, propagate=True)
+    except Timeout:
+      raise self.CouldNotConnect("Failed to connect to Zookeeper within %d seconds." %
+        self.SERVERSET_TIMEOUT.as_(Time.SECONDS))
+
     if len(serverset_endpoints) == 0:
       raise self.CouldNotConnect('No schedulers detected in %s!' % self._cluster.name)
     instance = serverset_endpoints[0]

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8741cdbd/src/test/python/apache/aurora/client/api/test_scheduler_client.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/api/test_scheduler_client.py b/src/test/python/apache/aurora/client/api/test_scheduler_client.py
index 1f1c6e0..b3232e9 100644
--- a/src/test/python/apache/aurora/client/api/test_scheduler_client.py
+++ b/src/test/python/apache/aurora/client/api/test_scheduler_client.py
@@ -354,7 +354,8 @@ def test_url_when_not_connected_and_cluster_has_no_proxy_url(scheme):
   service_endpoints = [ServiceInstance.unpack(service_json)]
 
   def make_mock_client(proxy_url):
-    client = scheduler_client.ZookeeperSchedulerClient(Cluster(proxy_url=proxy_url))
+    client = scheduler_client.ZookeeperSchedulerClient(Cluster(proxy_url=proxy_url),
+        _deadline=lambda x, **kws: x())
     client.get_scheduler_serverset = mock.MagicMock(return_value=(mock_zk, service_endpoints))
     client.SERVERSET_TIMEOUT = Amount(0, Time.SECONDS)
     client._connect_scheduler = mock.MagicMock()