You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by bc...@apache.org on 2018/03/09 00:56:50 UTC

[trafficserver] branch master updated: Fixing #2906: improve draining

This is an automated email from the ASF dual-hosted git repository.

bcall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new 2107f0f  Fixing #2906: improve draining
2107f0f is described below

commit 2107f0f675dc084f5a829cb6268eee5a19c1e12c
Author: Zizhong Zhang <zi...@linkedin.com>
AuthorDate: Thu Mar 8 11:36:25 2018 -0800

    Fixing #2906: improve draining
    
    Add maximum time limit for draining
    (cherry picked from commit a9fe3b0ce0975bc99d44f1803c6abc2d91ca82cb)
    
    Add traffic_ctl server stop --drain
    (cherry picked from commit 14ba9d616e18cbd5906ce02ab691e5057861d631)
    
    Conflicts:
    	proxy/Main.cc
    	proxy/ProxyClientSession.cc
    	proxy/ProxyClientSession.h
    
    1. Adding `traffic server drain` subcommand
    2. Adding default option, --no-new-connection option and --undo option
    (cherry picked from commit 139fe5fb99831060fad8ea5b6c6455bd1993da00)
---
 cmd/traffic_ctl/server.cc              | 68 +++++++++++++++++++++++-------
 cmd/traffic_manager/traffic_manager.cc | 76 +++++++++++++++++++++++++++++++++-
 mgmt/BaseManager.h                     |  1 +
 mgmt/LocalManager.cc                   |  8 ++++
 mgmt/LocalManager.h                    | 12 +++++-
 mgmt/ProcessManager.cc                 |  3 ++
 mgmt/api/CoreAPI.cc                    | 44 +++++++++++++++++++-
 mgmt/api/CoreAPI.h                     |  2 +
 mgmt/api/CoreAPIRemote.cc              | 34 +++++++++++++++
 mgmt/api/INKMgmtAPI.cc                 | 12 ++++++
 mgmt/api/NetworkMessage.cc             |  6 +++
 mgmt/api/NetworkMessage.h              |  2 +
 mgmt/api/TSControlMain.cc              | 46 ++++++++++++++++++++
 mgmt/api/include/mgmtapi.h             | 23 ++++++++++
 proxy/Main.cc                          | 15 +++++--
 proxy/ProxyClientSession.cc            |  2 -
 proxy/ProxyClientSession.h             |  8 ++--
 proxy/http2/Http2ClientSession.cc      |  4 ++
 18 files changed, 338 insertions(+), 28 deletions(-)

diff --git a/cmd/traffic_ctl/server.cc b/cmd/traffic_ctl/server.cc
index 356af7f..d41064f 100644
--- a/cmd/traffic_ctl/server.cc
+++ b/cmd/traffic_ctl/server.cc
@@ -115,16 +115,23 @@ static int
 server_stop(unsigned argc, const char **argv)
 {
   TSMgmtError error;
+  const char *usage = "server stop [OPTIONS]";
+  unsigned flags    = TS_RESTART_OPT_NONE;
 
-  // I am not sure whether it really makes sense to add the --drain option here.
-  // TSProxyStateSet() is a synchronous API, returning only after the proxy has
-  // been shut down. However, draining can take a long time and we don't want
-  // to wait for it. Maybe the right approach is to make the stop async.
-  if (!CtrlProcessArguments(argc, argv, nullptr, 0) || n_file_arguments != 0) {
-    return CtrlCommandUsage("server stop");
+  const ArgumentDescription opts[] = {
+    {"drain", '-', "Wait for client connections to drain before stopping", "F", &drain, nullptr, nullptr},
+  };
+
+  if (!CtrlProcessArguments(argc, argv, opts, countof(opts)) || n_file_arguments != 0) {
+    return CtrlCommandUsage(usage, opts, countof(opts));
   }
 
-  error = TSProxyStateSet(TS_PROXY_OFF, TS_CACHE_CLEAR_NONE);
+  if (drain) {
+    flags |= TS_STOP_OPT_DRAIN;
+  }
+
+  error = TSStop(flags);
+
   if (error != TS_ERR_OKAY) {
     CtrlMgmtError(error, "server stop failed");
     return CTRL_EX_ERROR;
@@ -162,16 +169,49 @@ server_start(unsigned argc, const char **argv)
   return CTRL_EX_OK;
 }
 
+static int
+server_drain(unsigned argc, const char **argv)
+{
+  TSMgmtError error;
+  const char *usage = "server drain [OPTIONS]";
+
+  int no_new_connection            = 0;
+  int undo                         = 0;
+  const ArgumentDescription opts[] = {
+    {"no-new-connection", 'N', "Wait for new connections down to threshold before starting draining", "F", &no_new_connection,
+     nullptr, nullptr},
+    {"undo", 'U', "Recover server from the drain mode", "F", &undo, nullptr, nullptr},
+  };
+
+  if (!CtrlProcessArguments(argc, argv, opts, countof(opts)) || n_file_arguments != 0) {
+    return CtrlCommandUsage(usage, opts, countof(opts));
+  }
+
+  if (undo) {
+    error = TSDrain(TS_DRAIN_OPT_UNDO);
+  } else if (no_new_connection) {
+    error = TSDrain(TS_DRAIN_OPT_IDLE);
+  } else {
+    error = TSDrain(TS_DRAIN_OPT_NONE);
+  }
+
+  if (error != TS_ERR_OKAY) {
+    CtrlMgmtError(error, "server drain failed");
+    return CTRL_EX_ERROR;
+  }
+
+  return CTRL_EX_OK;
+}
+
 int
 subcommand_server(unsigned argc, const char **argv)
 {
-  const subcommand commands[] = {
-    {server_backtrace, "backtrace", "Show a full stack trace of the traffic_server process"},
-    {server_restart, "restart", "Restart Traffic Server"},
-    {server_start, "start", "Start the proxy"},
-    {server_status, "status", "Show the proxy status"},
-    {server_stop, "stop", "Stop the proxy"},
-  };
+  const subcommand commands[] = {{server_backtrace, "backtrace", "Show a full stack trace of the traffic_server process"},
+                                 {server_restart, "restart", "Restart Traffic Server"},
+                                 {server_start, "start", "Start the proxy"},
+                                 {server_status, "status", "Show the proxy status"},
+                                 {server_stop, "stop", "Stop the proxy"},
+                                 {server_drain, "drain", "Drain the requests"}};
 
   return CtrlGenericSubcommand("server", commands, countof(commands), argc, argv);
 }
diff --git a/cmd/traffic_manager/traffic_manager.cc b/cmd/traffic_manager/traffic_manager.cc
index 5af53f3..7a79c08 100644
--- a/cmd/traffic_manager/traffic_manager.cc
+++ b/cmd/traffic_manager/traffic_manager.cc
@@ -166,6 +166,39 @@ is_server_idle()
   return active <= threshold;
 }
 
+static bool
+is_server_idle_from_new_connection()
+{
+  RecInt active    = 0;
+  RecInt threshold = 0;
+  // TODO implement with the right metric
+
+  Debug("lm", "%" PRId64 " active clients, threshold is %" PRId64, active, threshold);
+
+  return active <= threshold;
+}
+
+static bool
+is_server_draining()
+{
+  RecInt draining = 0;
+  if (RecGetRecordInt("proxy.node.config.draining", &draining) != REC_ERR_OKAY) {
+    return false;
+  }
+  return draining != 0;
+}
+
+static bool
+waited_enough()
+{
+  RecInt timeout = 0;
+  if (RecGetRecordInt("proxy.config.stop.shutdown_timeout", &timeout) != REC_ERR_OKAY) {
+    return false;
+  }
+
+  return (lmgmt->mgmt_shutdown_triggered_at + timeout >= time(nullptr));
+}
+
 static void
 check_lockfile()
 {
@@ -682,6 +715,8 @@ main(int argc, const char **argv)
   RecRegisterStatInt(RECT_NODE, "proxy.node.config.restart_required.manager", 0, RECP_NON_PERSISTENT);
   RecRegisterStatInt(RECT_NODE, "proxy.node.config.restart_required.cop", 0, RECP_NON_PERSISTENT);
 
+  RecRegisterStatInt(RECT_NODE, "proxy.node.config.draining", 0, RECP_NON_PERSISTENT);
+
   binding = new BindingInstance;
   metrics_binding_initialize(*binding);
   metrics_binding_configure(*binding);
@@ -727,7 +762,10 @@ main(int argc, const char **argv)
       ::exit(0);
       break;
     case MGMT_PENDING_IDLE_RESTART:
-      if (is_server_idle()) {
+      if (!is_server_draining()) {
+        lmgmt->processDrain();
+      }
+      if (is_server_idle() || waited_enough()) {
         lmgmt->mgmtShutdown();
         ::exit(0);
       }
@@ -737,11 +775,45 @@ main(int argc, const char **argv)
       lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
       break;
     case MGMT_PENDING_IDLE_BOUNCE:
-      if (is_server_idle()) {
+      if (!is_server_draining()) {
+        lmgmt->processDrain();
+      }
+      if (is_server_idle() || waited_enough()) {
         lmgmt->processBounce();
         lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
       }
       break;
+    case MGMT_PENDING_STOP:
+      lmgmt->processShutdown();
+      lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
+      break;
+    case MGMT_PENDING_IDLE_STOP:
+      if (!is_server_draining()) {
+        lmgmt->processDrain();
+      }
+      if (is_server_idle() || waited_enough()) {
+        lmgmt->processShutdown();
+        lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
+      }
+      break;
+    case MGMT_PENDING_DRAIN:
+      if (!is_server_draining()) {
+        lmgmt->processDrain();
+      }
+      lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
+      break;
+    case MGMT_PENDING_IDLE_DRAIN:
+      if (is_server_idle_from_new_connection()) {
+        lmgmt->processDrain();
+        lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
+      }
+      break;
+    case MGMT_PENDING_UNDO_DRAIN:
+      if (is_server_draining()) {
+        lmgmt->processDrain(0);
+        lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
+      }
+      break;
     default:
       break;
     }
diff --git a/mgmt/BaseManager.h b/mgmt/BaseManager.h
index 3d7471c..f57df76 100644
--- a/mgmt/BaseManager.h
+++ b/mgmt/BaseManager.h
@@ -70,6 +70,7 @@
 // case statement.
 #define MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE 10011
 #define MGMT_EVENT_LIFECYCLE_MESSAGE 10012
+#define MGMT_EVENT_DRAIN 10013
 
 /***********************************************************************
  *
diff --git a/mgmt/LocalManager.cc b/mgmt/LocalManager.cc
index e64a6b6..05abb18 100644
--- a/mgmt/LocalManager.cc
+++ b/mgmt/LocalManager.cc
@@ -99,6 +99,14 @@ LocalManager::processBounce()
 }
 
 void
+LocalManager::processDrain(int to_drain)
+{
+  mgmt_log("[LocalManager::processDrain] Executing process drain request.\n");
+  signalEvent(MGMT_EVENT_DRAIN, to_drain ? "1" : "0");
+  return;
+}
+
+void
 LocalManager::rollLogFiles()
 {
   mgmt_log("[LocalManager::rollLogFiles] Log files are being rolled.\n");
diff --git a/mgmt/LocalManager.h b/mgmt/LocalManager.h
index b3e9ec3..524b086 100644
--- a/mgmt/LocalManager.h
+++ b/mgmt/LocalManager.h
@@ -48,8 +48,13 @@ enum ManagementPendingOperation {
   MGMT_PENDING_NONE,         // Do nothing
   MGMT_PENDING_RESTART,      // Restart TS and TM
   MGMT_PENDING_BOUNCE,       // Restart TS
+  MGMT_PENDING_STOP,         // Stop TS
+  MGMT_PENDING_DRAIN,        // Drain TS
   MGMT_PENDING_IDLE_RESTART, // Restart TS and TM when TS is idle
-  MGMT_PENDING_IDLE_BOUNCE   // Restart TS when TS is idle
+  MGMT_PENDING_IDLE_BOUNCE,  // Restart TS when TS is idle
+  MGMT_PENDING_IDLE_STOP,    // Stop TS when TS is idle
+  MGMT_PENDING_IDLE_DRAIN,   // Drain TS when TS is idle from new connections
+  MGMT_PENDING_UNDO_DRAIN,   // Recover TS from drain
 };
 
 class LocalManager : public BaseManager
@@ -83,6 +88,7 @@ public:
   void processShutdown(bool mainThread = false);
   void processRestart();
   void processBounce();
+  void processDrain(int to_drain = 1);
   void rollLogFiles();
   void clearStats(const char *name = NULL);
 
@@ -95,7 +101,9 @@ public:
   int proxy_launch_count                               = 0;
   bool proxy_launch_outstanding                        = false;
   ManagementPendingOperation mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
-  int proxy_running                                    = 0;
+  time_t mgmt_shutdown_triggered_at;
+  time_t mgmt_drain_triggered_at;
+  int proxy_running = 0;
   HttpProxyPort::Group m_proxy_ports;
   // Local inbound addresses to bind, if set.
   IpAddr m_inbound_ip4;
diff --git a/mgmt/ProcessManager.cc b/mgmt/ProcessManager.cc
index e382316..7122838 100644
--- a/mgmt/ProcessManager.cc
+++ b/mgmt/ProcessManager.cc
@@ -381,6 +381,9 @@ ProcessManager::handleMgmtMsgFromLM(MgmtMessageHdr *mh)
   case MGMT_EVENT_RESTART:
     signalMgmtEntity(MGMT_EVENT_RESTART);
     break;
+  case MGMT_EVENT_DRAIN:
+    signalMgmtEntity(MGMT_EVENT_DRAIN, data_raw, mh->data_len);
+    break;
   case MGMT_EVENT_CLEAR_STATS:
     signalMgmtEntity(MGMT_EVENT_CLEAR_STATS);
     break;
diff --git a/mgmt/api/CoreAPI.cc b/mgmt/api/CoreAPI.cc
index 868898c..154636f 100644
--- a/mgmt/api/CoreAPI.cc
+++ b/mgmt/api/CoreAPI.cc
@@ -403,7 +403,8 @@ Reconfigure()
 TSMgmtError
 Restart(unsigned options)
 {
-  lmgmt->mgmt_shutdown_outstanding = (options & TS_RESTART_OPT_DRAIN) ? MGMT_PENDING_IDLE_RESTART : MGMT_PENDING_RESTART;
+  lmgmt->mgmt_shutdown_triggered_at = time(nullptr);
+  lmgmt->mgmt_shutdown_outstanding  = (options & TS_RESTART_OPT_DRAIN) ? MGMT_PENDING_IDLE_RESTART : MGMT_PENDING_RESTART;
 
   return TS_ERR_OKAY;
 }
@@ -416,12 +417,51 @@ Restart(unsigned options)
 TSMgmtError
 Bounce(unsigned options)
 {
-  lmgmt->mgmt_shutdown_outstanding = (options & TS_RESTART_OPT_DRAIN) ? MGMT_PENDING_IDLE_BOUNCE : MGMT_PENDING_BOUNCE;
+  lmgmt->mgmt_shutdown_triggered_at = time(nullptr);
+  lmgmt->mgmt_shutdown_outstanding  = (options & TS_RESTART_OPT_DRAIN) ? MGMT_PENDING_IDLE_BOUNCE : MGMT_PENDING_BOUNCE;
 
   return TS_ERR_OKAY;
 }
 
 /*-------------------------------------------------------------------------
+ * Stop
+ *-------------------------------------------------------------------------
+ * Stops traffic_server process(es).
+ */
+TSMgmtError
+Stop(unsigned options)
+{
+  lmgmt->mgmt_shutdown_triggered_at = time(nullptr);
+  lmgmt->mgmt_shutdown_outstanding  = (options & TS_STOP_OPT_DRAIN) ? MGMT_PENDING_IDLE_STOP : MGMT_PENDING_STOP;
+
+  return TS_ERR_OKAY;
+}
+
+/*-------------------------------------------------------------------------
+ * Drain
+ *-------------------------------------------------------------------------
+ * Drain requests of traffic_server
+ */
+TSMgmtError
+Drain(unsigned options)
+{
+  switch (options) {
+  case TS_DRAIN_OPT_NONE:
+    lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_DRAIN;
+    break;
+  case TS_DRAIN_OPT_IDLE:
+    lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_IDLE_DRAIN;
+    break;
+  case TS_DRAIN_OPT_UNDO:
+    lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_UNDO_DRAIN;
+    break;
+  default:
+    ink_release_assert(!"Not expected to reach here");
+  }
+  return TS_ERR_OKAY;
+}
+
+/*-------------------------------------------------------------------------
  * StorageDeviceCmdOffline
  *-------------------------------------------------------------------------
  * Disable a storage device.
diff --git a/mgmt/api/CoreAPI.h b/mgmt/api/CoreAPI.h
index 7a20b10..84639b5 100644
--- a/mgmt/api/CoreAPI.h
+++ b/mgmt/api/CoreAPI.h
@@ -50,6 +50,8 @@ TSMgmtError ServerBacktrace(unsigned options, char **trace);
 TSMgmtError Reconfigure();                                                         // TS reread config files
 TSMgmtError Restart(unsigned options);                                             // restart TM
 TSMgmtError Bounce(unsigned options);                                              // restart traffic_server
+TSMgmtError Stop(unsigned options);                                                // stop traffic_server
+TSMgmtError Drain(unsigned options);                                               // drain requests of traffic_server
 TSMgmtError StorageDeviceCmdOffline(const char *dev);                              // Storage device operation.
 TSMgmtError LifecycleMessage(const char *tag, void const *data, size_t data_size); // Lifecycle alert to plugins.
 
diff --git a/mgmt/api/CoreAPIRemote.cc b/mgmt/api/CoreAPIRemote.cc
index 59e82e2..cdab53f 100644
--- a/mgmt/api/CoreAPIRemote.cc
+++ b/mgmt/api/CoreAPIRemote.cc
@@ -434,6 +434,40 @@ Bounce(unsigned options)
 }
 
 /*-------------------------------------------------------------------------
+ * Stop
+ *-------------------------------------------------------------------------
+ * Restart the traffic_server process(es) only.
+ */
+TSMgmtError
+Stop(unsigned options)
+{
+  TSMgmtError ret;
+  OpType optype        = OpType::STOP;
+  MgmtMarshallInt oval = options;
+
+  ret = MGMTAPI_SEND_MESSAGE(main_socket_fd, OpType::STOP, &optype, &oval);
+
+  return (ret == TS_ERR_OKAY) ? parse_generic_response(OpType::STOP, main_socket_fd) : ret;
+}
+
+/*-------------------------------------------------------------------------
+ * Drain
+ *-------------------------------------------------------------------------
+ * Drain requests of the traffic_server process(es) only.
+ */
+TSMgmtError
+Drain(unsigned options)
+{
+  TSMgmtError ret;
+  OpType optype        = OpType::DRAIN;
+  MgmtMarshallInt oval = options;
+
+  ret = MGMTAPI_SEND_MESSAGE(main_socket_fd, OpType::DRAIN, &optype, &oval);
+
+  return (ret == TS_ERR_OKAY) ? parse_generic_response(OpType::DRAIN, main_socket_fd) : ret;
+}
+
+/*-------------------------------------------------------------------------
  * StorageDeviceCmdOffline
  *-------------------------------------------------------------------------
  * Disable a storage device.
diff --git a/mgmt/api/INKMgmtAPI.cc b/mgmt/api/INKMgmtAPI.cc
index 25bcb85..b0b8189 100644
--- a/mgmt/api/INKMgmtAPI.cc
+++ b/mgmt/api/INKMgmtAPI.cc
@@ -813,6 +813,18 @@ TSBounce(unsigned options)
 }
 
 tsapi TSMgmtError
+TSStop(unsigned options)
+{
+  return Stop(options);
+}
+
+tsapi TSMgmtError
+TSDrain(unsigned options)
+{
+  return Drain(options);
+}
+
+tsapi TSMgmtError
 TSStorageDeviceCmdOffline(const char *dev)
 {
   return StorageDeviceCmdOffline(dev);
diff --git a/mgmt/api/NetworkMessage.cc b/mgmt/api/NetworkMessage.cc
index c7b244b..46261ff 100644
--- a/mgmt/api/NetworkMessage.cc
+++ b/mgmt/api/NetworkMessage.cc
@@ -45,6 +45,8 @@ static const struct NetCmdOperation requests[] = {
   /* RECONFIGURE                */ {1, {MGMT_MARSHALL_INT}},
   /* RESTART                    */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_INT}},
   /* BOUNCE                     */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_INT}},
+  /* STOP                       */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_INT}},
+  /* DRAIN                      */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_INT}},
   /* EVENT_RESOLVE              */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_STRING}},
   /* EVENT_GET_MLT              */ {1, {MGMT_MARSHALL_INT}},
   /* EVENT_ACTIVE               */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_STRING}},
@@ -71,6 +73,8 @@ static const struct NetCmdOperation responses[] = {
   /* RECONFIGURE                */ {1, {MGMT_MARSHALL_INT}},
   /* RESTART                    */ {1, {MGMT_MARSHALL_INT}},
   /* BOUNCE                     */ {1, {MGMT_MARSHALL_INT}},
+  /* STOP                       */ {1, {MGMT_MARSHALL_INT}},
+  /* DRAIN                      */ {1, {MGMT_MARSHALL_INT}},
   /* EVENT_RESOLVE              */ {1, {MGMT_MARSHALL_INT}},
   /* EVENT_GET_MLT              */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_STRING}},
   /* EVENT_ACTIVE               */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_INT}},
@@ -190,6 +194,8 @@ send_mgmt_error(int fd, OpType optype, TSMgmtError error)
   // Switch on operations, grouped by response format.
   switch (optype) {
   case OpType::BOUNCE:
+  case OpType::STOP:
+  case OpType::DRAIN:
   case OpType::EVENT_RESOLVE:
   case OpType::LIFECYCLE_MESSAGE:
   case OpType::PROXY_STATE_SET:
diff --git a/mgmt/api/NetworkMessage.h b/mgmt/api/NetworkMessage.h
index 45b5213..d529661 100644
--- a/mgmt/api/NetworkMessage.h
+++ b/mgmt/api/NetworkMessage.h
@@ -40,6 +40,8 @@ enum class OpType : MgmtMarshallInt {
   RECONFIGURE,
   RESTART,
   BOUNCE,
+  STOP,
+  DRAIN,
   EVENT_RESOLVE,
   EVENT_GET_MLT,
   EVENT_ACTIVE,
diff --git a/mgmt/api/TSControlMain.cc b/mgmt/api/TSControlMain.cc
index 841947d..58dddbf 100644
--- a/mgmt/api/TSControlMain.cc
+++ b/mgmt/api/TSControlMain.cc
@@ -600,6 +600,50 @@ handle_restart(int fd, void *req, size_t reqlen)
 }
 
 /**************************************************************************
+ * handle_stop
+ *
+ * purpose: handles request to stop TS
+ * output: TS_ERR_xx
+ * note: None
+ *************************************************************************/
+static TSMgmtError
+handle_stop(int fd, void *req, size_t reqlen)
+{
+  OpType optype;
+  MgmtMarshallInt options;
+  MgmtMarshallInt err;
+
+  err = recv_mgmt_request(req, reqlen, OpType::STOP, &optype, &options);
+  if (err == TS_ERR_OKAY) {
+    err = Stop(options);
+  }
+
+  return send_mgmt_response(fd, OpType::STOP, &err);
+}
+
+/**************************************************************************
+ * handle_drain
+ *
+ * purpose: handles request to drain TS
+ * output: TS_ERR_xx
+ * note: None
+ *************************************************************************/
+static TSMgmtError
+handle_drain(int fd, void *req, size_t reqlen)
+{
+  OpType optype;
+  MgmtMarshallInt options;
+  MgmtMarshallInt err;
+
+  err = recv_mgmt_request(req, reqlen, OpType::DRAIN, &optype, &options);
+  if (err == TS_ERR_OKAY) {
+    err = Drain(options);
+  }
+
+  return send_mgmt_response(fd, OpType::DRAIN, &err);
+}
+
+/**************************************************************************
  * handle_storage_device_cmd_offline
  *
  * purpose: handle storage offline command.
@@ -956,6 +1000,8 @@ static const control_message_handler handlers[] = {
   /* RECONFIGURE                */ {MGMT_API_PRIVILEGED, handle_reconfigure},
   /* RESTART                    */ {MGMT_API_PRIVILEGED, handle_restart},
   /* BOUNCE                     */ {MGMT_API_PRIVILEGED, handle_restart},
+  /* STOP                       */ {MGMT_API_PRIVILEGED, handle_stop},
+  /* DRAIN                      */ {MGMT_API_PRIVILEGED, handle_drain},
   /* EVENT_RESOLVE              */ {MGMT_API_PRIVILEGED, handle_event_resolve},
   /* EVENT_GET_MLT              */ {0, handle_event_get_mlt},
   /* EVENT_ACTIVE               */ {0, handle_event_active},
diff --git a/mgmt/api/include/mgmtapi.h b/mgmt/api/include/mgmtapi.h
index 7f1be29..c3567e9 100644
--- a/mgmt/api/include/mgmtapi.h
+++ b/mgmt/api/include/mgmtapi.h
@@ -201,6 +201,17 @@ typedef enum {
   TS_RESTART_OPT_DRAIN = 0x02, /* Wait for traffic to drain before restarting. */
 } TSRestartOptionT;
 
+typedef enum {
+  TS_STOP_OPT_NONE = 0x0,
+  TS_STOP_OPT_DRAIN, /* Wait for traffic to drain before stopping. */
+} TSStopOptionT;
+
+typedef enum {
+  TS_DRAIN_OPT_NONE = 0x0,
+  TS_DRAIN_OPT_IDLE, /* Wait for idle from new connections before draining. */
+  TS_DRAIN_OPT_UNDO, /* Recover TS from drain mode */
+} TSDrainOptionT;
+
 /***************************************************************************
  * Structures
  ***************************************************************************/
@@ -412,6 +423,18 @@ tsapi TSMgmtError TSActionDo(TSActionNeedT action);
  */
 tsapi TSMgmtError TSBounce(unsigned options);
 
+/* TSStop: stop the traffic_server process(es).
+ * Input: options - bitmask of TSRestartOptionT
+ * Output TSMgmtError
+ */
+tsapi TSMgmtError TSStop(unsigned options);
+
+/* TSDrain: drain requests of the traffic_server process.
+ * Input: options - TSDrainOptionT
+ * Output TSMgmtError
+ */
+tsapi TSMgmtError TSDrain(unsigned options);
+
 /* TSStorageDeviceCmdOffline: Request to make a cache storage device offline.
  * @arg dev Target device, specified by path to device.
  * @return Success.
diff --git a/proxy/Main.cc b/proxy/Main.cc
index eb6e878..1d07788 100644
--- a/proxy/Main.cc
+++ b/proxy/Main.cc
@@ -123,6 +123,7 @@ extern "C" int plock(int);
 static const long MAX_LOGIN = ink_login_name_max();
 
 static void *mgmt_restart_shutdown_callback(void *, char *, int data_len);
+static void *mgmt_drain_callback(void *, char *, int data_len);
 static void *mgmt_storage_device_cmd_callback(void *x, char *data, int len);
 static void *mgmt_lifecycle_msg_callback(void *x, char *data, int len);
 static void init_ssl_ctx_callback(void *ctx, bool server);
@@ -280,9 +281,8 @@ public:
       signal_received[SIGINT]  = false;
 
       RecInt timeout = 0;
-      if (RecGetRecordInt("proxy.config.stop.shutdown_timeout", &timeout) == REC_ERR_OKAY && timeout &&
-          !http_client_session_draining) {
-        http_client_session_draining = true;
+      if (RecGetRecordInt("proxy.config.stop.shutdown_timeout", &timeout) == REC_ERR_OKAY && timeout) {
+        RecSetRecordInt("proxy.node.config.draining", 1, REC_SOURCE_DEFAULT);
         if (!remote_management_flag) {
           // Close listening sockets here only if TS is running standalone
           RecInt close_sockets = 0;
@@ -1970,6 +1970,7 @@ main(int /* argc ATS_UNUSED */, const char **argv)
 
     pmgmt->registerMgmtCallback(MGMT_EVENT_SHUTDOWN, mgmt_restart_shutdown_callback, nullptr);
     pmgmt->registerMgmtCallback(MGMT_EVENT_RESTART, mgmt_restart_shutdown_callback, nullptr);
+    pmgmt->registerMgmtCallback(MGMT_EVENT_DRAIN, mgmt_drain_callback, nullptr);
 
     // Callback for various storage commands. These all go to the same function so we
     // pass the event code along so it can do the right thing. We cast that to <int> first
@@ -2029,6 +2030,14 @@ mgmt_restart_shutdown_callback(void *, char *, int /* data_len ATS_UNUSED */)
 }
 
 static void *
+mgmt_drain_callback(void *, char *arg, int len)
+{
+  ink_assert(len > 1 && (arg[0] == '0' || arg[0] == '1'));
+  RecSetRecordInt("proxy.node.config.draining", arg[0] == '1', REC_SOURCE_DEFAULT);
+  return nullptr;
+}
+
+static void *
 mgmt_storage_device_cmd_callback(void *data, char *arg, int len)
 {
   // data is the device name to control
diff --git a/proxy/ProxyClientSession.cc b/proxy/ProxyClientSession.cc
index 348ca22..8cb61eb 100644
--- a/proxy/ProxyClientSession.cc
+++ b/proxy/ProxyClientSession.cc
@@ -25,8 +25,6 @@
 #include "HttpDebugNames.h"
 #include "ProxyClientSession.h"
 
-bool http_client_session_draining = false;
-
 static int64_t next_cs_id = 0;
 
 ProxyClientSession::ProxyClientSession() : VConnection(nullptr)
diff --git a/proxy/ProxyClientSession.h b/proxy/ProxyClientSession.h
index 9eaaf63..c797e9d 100644
--- a/proxy/ProxyClientSession.h
+++ b/proxy/ProxyClientSession.h
@@ -31,8 +31,6 @@
 #include "InkAPIInternal.h"
 #include "http/HttpServerSession.h"
 
-extern bool http_client_session_draining;
-
 // Emit a debug message conditional on whether this particular client session
 // has debugging enabled. This should only be called from within a client session
 // member function.
@@ -125,7 +123,11 @@ public:
   bool
   is_draining() const
   {
-    return http_client_session_draining;
+    RecInt draining;
+    if (RecGetRecordInt("proxy.node.config.draining", &draining) != REC_ERR_OKAY) {
+      return false;
+    }
+    return draining != 0;
   }
 
   // Initiate an API hook invocation.
diff --git a/proxy/http2/Http2ClientSession.cc b/proxy/http2/Http2ClientSession.cc
index 1ef2690..ca7c6e9 100644
--- a/proxy/http2/Http2ClientSession.cc
+++ b/proxy/http2/Http2ClientSession.cc
@@ -347,6 +347,10 @@ Http2ClientSession::main_event_handler(int event, void *edata)
     break;
   }
 
+  if (!this->is_draining()) {
+    this->connection_state.set_shutdown_state(HTTP2_SHUTDOWN_NONE);
+  }
+
   // For a case we already checked Connection header and it didn't exist
   if (this->is_draining() && this->connection_state.get_shutdown_state() == HTTP2_SHUTDOWN_NONE) {
     this->connection_state.set_shutdown_state(HTTP2_SHUTDOWN_NOT_INITIATED);

-- 
To stop receiving notification emails like this one, please contact
bcall@apache.org.