You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2018/02/05 16:02:00 UTC

[5/6] mesos git commit: Fixed connection refused error in IOSwitchboard for unix socket.

Fixed connection refused error in IOSwitchboard for unix socket.

Once IOSwitchboard's unix socket appears in the file system, the agent
can try to connect to it. If the agent attempts to connect to the
socket before `listen()` has been called, then it gets connection
refused error. To address this, we initialize a unix socket using a
provisional path and rename it after `listen()` has been called.

Review: https://reviews.apache.org/r/65261/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/474411fa
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/474411fa
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/474411fa

Branch: refs/heads/1.5.x
Commit: 474411fab68ffdb87c48d7bca449ff3e41cc4c1b
Parents: f877886
Author: Andrei Budnik <ab...@mesosphere.com>
Authored: Thu Jan 25 01:19:06 2018 +0100
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Mon Feb 5 16:55:04 2018 +0100

----------------------------------------------------------------------
 .../containerizer/mesos/io/switchboard.cpp      | 47 +++++++++++++-------
 src/slave/containerizer/mesos/paths.cpp         | 16 +++++++
 src/slave/containerizer/mesos/paths.hpp         | 13 ++++++
 3 files changed, 61 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/474411fa/src/slave/containerizer/mesos/io/switchboard.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/io/switchboard.cpp b/src/slave/containerizer/mesos/io/switchboard.cpp
index 6422fee..dc48a2b 100644
--- a/src/slave/containerizer/mesos/io/switchboard.cpp
+++ b/src/slave/containerizer/mesos/io/switchboard.cpp
@@ -686,10 +686,6 @@ Future<http::Connection> IOSwitchboard::_connect(
         return Failure("I/O switchboard has shutdown");
       }
 
-      // TODO(jieyu): We might still get a connection refused error
-      // here because the server might not have started listening on
-      // the socket yet. Consider retrying if 'http::connect' failed
-      // with ECONNREFUSED.
       return http::connect(address.get(), http::Scheme::HTTP);
     }));
 #endif // __WINDOWS__
@@ -828,16 +824,24 @@ Future<Nothing> IOSwitchboard::cleanup(
         // this container's `IOSwitchboardServer`. If it hasn't been
         // checkpointed yet, or the socket file itself hasn't been created,
         // we simply continue without error.
+        //
+        // NOTE: As the I/O switchboard creates a unix domain socket using
+        // a provisional address before initialiazing and renaming it, we assume
+        // that the absence of the unix socket at the original address means
+        // that the the I/O switchboard has been terminated before renaming.
         Result<unix::Address> address = getContainerIOSwitchboardAddress(
             flags.runtime_dir, containerId);
 
-        if (address.isSome()) {
-          Try<Nothing> rm = os::rm(address->path());
-          if (rm.isError()) {
-            LOG(ERROR) << "Failed to remove unix domain socket file"
-                       << " '" << address->path() << "' for container"
-                       << " '" << containerId << "': " << rm.error();
-          }
+        const string socketPath = address.isSome()
+          ? address->path()
+          : getContainerIOSwitchboardSocketProvisionalPath(
+                flags.runtime_dir, containerId);
+
+        Try<Nothing> rm = os::rm(socketPath);
+        if (rm.isError()) {
+          LOG(ERROR) << "Failed to remove unix domain socket file"
+                     << " '" << socketPath << "' for container"
+                     << " '" << containerId << "': " << rm.error();
         }
 
         return Nothing();
@@ -1034,22 +1038,35 @@ Try<Owned<IOSwitchboardServer>> IOSwitchboardServer::create(
     return Error("Failed to create socket: " + socket.error());
   }
 
-  Try<unix::Address> address = unix::Address::create(socketPath);
+  // Agent connects to the switchboard once it sees a unix socket. However,
+  // the unix socket is not ready to accept connections until `listen()` has
+  // been called. Therefore we initialize a unix socket using a provisional path
+  // and rename it after `listen()` has been called.
+  const string socketProvisionalPath =
+      getContainerIOSwitchboardSocketProvisionalPath(socketPath);
+
+  Try<unix::Address> address = unix::Address::create(socketProvisionalPath);
   if (address.isError()) {
-    return Error("Failed to build address from '" + socketPath + "':"
+    return Error("Failed to build address from '" + socketProvisionalPath + "':"
                  " " + address.error());
   }
 
   Try<unix::Address> bind = socket->bind(address.get());
   if (bind.isError()) {
-    return Error("Failed to bind to address '" + socketPath + "':"
+    return Error("Failed to bind to address '" + socketProvisionalPath + "':"
                  " " + bind.error());
   }
 
   Try<Nothing> listen = socket->listen(64);
   if (listen.isError()) {
     return Error("Failed to listen on socket at address"
-                 " '" + socketPath + "': " + listen.error());
+                 " '" + socketProvisionalPath + "': " + listen.error());
+  }
+
+  Try<Nothing> renameSocket = os::rename(socketProvisionalPath, socketPath);
+  if (renameSocket.isError()) {
+    return Error("Failed to rename socket from '" + socketProvisionalPath + "'"
+                 " to '" + socketPath + "': " + renameSocket.error());
   }
 
   return new IOSwitchboardServer(

http://git-wip-us.apache.org/repos/asf/mesos/blob/474411fa/src/slave/containerizer/mesos/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.cpp b/src/slave/containerizer/mesos/paths.cpp
index bb5570f..7e2779e 100644
--- a/src/slave/containerizer/mesos/paths.cpp
+++ b/src/slave/containerizer/mesos/paths.cpp
@@ -206,6 +206,22 @@ string getContainerIOSwitchboardSocketPath(
 }
 
 
+string getContainerIOSwitchboardSocketProvisionalPath(
+    const std::string& socketPath)
+{
+  return socketPath + "_provisional";
+}
+
+
+string getContainerIOSwitchboardSocketProvisionalPath(
+    const std::string& runtimeDir,
+    const ContainerID& containerId)
+{
+  return getContainerIOSwitchboardSocketProvisionalPath(
+      getContainerIOSwitchboardSocketPath(runtimeDir, containerId));
+}
+
+
 Result<unix::Address> getContainerIOSwitchboardAddress(
     const string& runtimeDir,
     const ContainerID& containerId)

http://git-wip-us.apache.org/repos/asf/mesos/blob/474411fa/src/slave/containerizer/mesos/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.hpp b/src/slave/containerizer/mesos/paths.hpp
index 65830a1..b9f0f45 100644
--- a/src/slave/containerizer/mesos/paths.hpp
+++ b/src/slave/containerizer/mesos/paths.hpp
@@ -144,6 +144,19 @@ std::string getContainerIOSwitchboardSocketPath(
     const ContainerID& containerId);
 
 
+// The helper method to get the io switchboard provisional socket path,
+// see comment in IOSwitchboardServer::create().
+std::string getContainerIOSwitchboardSocketProvisionalPath(
+    const std::string& socketPath);
+
+
+// The helper method to get the io switchboard provisional socket path,
+// see comment in IOSwitchboardServer::create().
+std::string getContainerIOSwitchboardSocketProvisionalPath(
+    const std::string& runtimeDir,
+    const ContainerID& containerId);
+
+
 // The helper method to read the io switchboard socket file.
 Result<process::network::unix::Address> getContainerIOSwitchboardAddress(
     const std::string& runtimeDir,