You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by ig...@apache.org on 2012/12/04 00:10:24 UTC

[4/4] git commit: backport dd9d68b

backport dd9d68b


Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/e01ad3ee
Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/e01ad3ee
Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/e01ad3ee

Branch: refs/heads/3.2.x
Commit: e01ad3ee1a810c07784e2472e94b26b7cf0c5461
Parents: a7c0a42
Author: Igor Galić <i....@brainsware.org>
Authored: Tue Oct 30 21:52:42 2012 +0100
Committer: Igor Galić <i....@brainsware.org>
Committed: Mon Dec 3 23:35:53 2012 +0100

----------------------------------------------------------------------
 cop/TrafficCop.cc  |   74 +++++++++++++++++++++++++++++++----------------
 lib/ts/lockfile.cc |   55 ++++++----------------------------
 2 files changed, 59 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafficserver/blob/e01ad3ee/cop/TrafficCop.cc
----------------------------------------------------------------------
diff --git a/cop/TrafficCop.cc b/cop/TrafficCop.cc
index c867952..860825f 100644
--- a/cop/TrafficCop.cc
+++ b/cop/TrafficCop.cc
@@ -44,7 +44,7 @@ union semun
 #endif  // linux check
 
 // For debugging, turn this on.
-// #define TRACE_LOG_COP 1
+#define TRACE_LOG_COP 1
 
 #define OPTIONS_MAX     32
 #define OPTIONS_LEN_MAX 1024
@@ -211,6 +211,7 @@ chown_file_to_admin_user(const char *file) {
     }
   }
 }
+
 static void
 sig_child(int signum)
 {
@@ -238,6 +239,43 @@ sig_child(int signum)
 }
 
 static void
+sig_term(int signum)
+{
+  pid_t pid = 0;
+  int status = 0;
+  int err;
+  pid_t holding_pid;
+
+  //killsig = SIGTERM;
+
+  cop_log_trace("Entering sig_term(%d)\n", signum);
+
+  // safely^W commit suicide.
+  cop_log_trace("Sending signal %d to entire group\n", signum);
+  killpg(0, signum);
+  
+  cop_log_trace("Waiting for children to exit.");
+
+  for (;;) {
+    pid = waitpid(WAIT_ANY, &status, WNOHANG);
+
+    if (pid <= 0) {
+      break;
+    }
+    // TSqa03086 - We can not log the child status signal from
+    //   the signal handler since syslog can deadlock.  Record
+    //   the pid and the status in a global for logging
+    //   next time through the event loop.  We will occasionally
+    //   lose some information if we get two sig childs in rapid
+    //   succession
+    child_pid = pid;
+    child_status = status;
+  }
+  cop_log_trace("Leaving sig_term(%d), exiting traffic_cop\n", signum);
+  exit(0);
+}
+
+static void
 #if defined(solaris)
 sig_fatal(int signum, siginfo_t * t, void *c)
 #else
@@ -1380,18 +1418,7 @@ check_programs()
   Lockfile manager_lf(manager_lockfile);
   err = manager_lf.Open(&holding_pid);
   chown_file_to_admin_user(manager_lockfile);
-#if defined(linux)
-  // if lockfile held, but process doesn't exist, killall and try again
-  if (err == 0) {
-    if (kill(holding_pid, 0) == -1) {
-      cop_log(COP_WARNING, "%s's lockfile is held, but its pid (%d) is missing;"
-              " killing all processes named '%s' and retrying\n", manager_binary, holding_pid, manager_binary);
-      ink_killall(manager_binary, killsig);
-      sleep(1);                 // give signals a chance to be received
-      err = manager_lf.Open(&holding_pid);
-    }
-  }
-#endif
+
   if (err > 0) {
     // 'lockfile_open' returns the file descriptor of the opened
     // lockfile.  We need to close this before spawning the
@@ -1472,18 +1499,7 @@ check_programs()
 
     Lockfile server_lf(server_lockfile);
     err = server_lf.Open(&holding_pid);
-#if defined(linux)
-    // if lockfile held, but process doesn't exist, killall and try again
-    if (err == 0) {
-      if (kill(holding_pid, 0) == -1) {
-        cop_log(COP_WARNING, "%s's lockfile is held, but its pid (%d) is missing;"
-                " killing all processes named '%s' and retrying\n", server_binary, holding_pid, server_binary);
-        ink_killall(server_binary, killsig);
-        sleep(1);               // give signals a chance to be received
-        err = server_lf.Open(&holding_pid);
-      }
-    }
-#endif
+
     if (err > 0) {
       server_lf.Close();
 
@@ -1672,6 +1688,14 @@ init_signals()
   struct sigaction action;
 
   cop_log_trace("Entering init_signals()\n");
+  // Handle the SIGTERM signal: We simply do the same as
+  // in sig_child..
+  action.sa_handler = sig_term;
+  sigemptyset(&action.sa_mask);
+  action.sa_flags = 0;
+
+  sigaction(SIGTERM, &action, NULL);
+
   // Handle the SIGCHLD signal. We simply reap all children that
   // die (which should only be spawned traffic_manager's).
   action.sa_handler = sig_child;

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/e01ad3ee/lib/ts/lockfile.cc
----------------------------------------------------------------------
diff --git a/lib/ts/lockfile.cc b/lib/ts/lockfile.cc
index 5cde39a..c2d34c4 100644
--- a/lib/ts/lockfile.cc
+++ b/lib/ts/lockfile.cc
@@ -205,54 +205,21 @@ static void
 lockfile_kill_internal(pid_t init_pid, int init_sig, pid_t pid, const char *pname, int sig)
 {
   int err;
-
-#if defined(linux)
-
-  pid_t *pidv;
-  int pidvcnt;
-
-  // Need to grab pname's pid vector before we issue any kill signals.
-  // Specifically, this prevents the race-condition in which
-  // traffic_manager spawns a new traffic_server while we still think
-  // we're killall'ing the old traffic_server.
-  if (pname) {
-    ink_killall_get_pidv_xmalloc(pname, &pidv, &pidvcnt);
-  }
-
-  if (init_sig > 0) {
-    kill(init_pid, init_sig);
-    // sleep for a bit and give time for the first signal to be
-    // delivered
-    sleep(1);
-  }
-
-  do {
-    if ((err = kill(pid, sig)) == 0) {
-      sleep(1);
-    }
-    if (pname && (pidvcnt > 0)) {
-      ink_killall_kill_pidv(pidv, pidvcnt, sig);
-      sleep(1);
-    }
-  } while ((err == 0) || ((err < 0) && (errno == EINTR)));
-
-  ats_free(pidv);
-
-#else
+  int status;
 
   if (init_sig > 0) {
     kill(init_pid, init_sig);
-    // sleep for a bit and give time for the first signal to be
-    // delivered
-    sleep(1);
+    // Wait for children to exit
+    do {
+      err = waitpid(-1, &status, WNOHANG);
+      if (err == -1) break;
+    } while(!WIFEXITED(status) && !WIFSIGNALED(status));
   }
 
   do {
     err = kill(pid, sig);
   } while ((err == 0) || ((err < 0) && (errno == EINTR)));
 
-#endif  // linux check
-
 }
 
 void
@@ -294,14 +261,12 @@ Lockfile::KillGroup(int sig, int initial_sig, const char *pname)
 
     if ((pid < 0) || (pid == getpid()))
       pid = holding_pid;
-    else
-      pid = -pid;
 
     if (pid != 0) {
-      // We kill the holding_pid instead of the process_group
-      // initially since there is no point trying to get core files
-      // from a group since the core file of one overwrites the core
-      // file of another one
+      // This way, we kill the process_group:
+      pid = -pid;
+      // In order to get core files from each process, please
+      // set your core_pattern appropriately.
       lockfile_kill_internal(holding_pid, initial_sig, pid, pname, sig);
     }
   }