You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by ig...@apache.org on 2012/12/04 00:10:24 UTC
[4/4] git commit: backport dd9d68b
backport dd9d68b
Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/e01ad3ee
Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/e01ad3ee
Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/e01ad3ee
Branch: refs/heads/3.2.x
Commit: e01ad3ee1a810c07784e2472e94b26b7cf0c5461
Parents: a7c0a42
Author: Igor Galić <i....@brainsware.org>
Authored: Tue Oct 30 21:52:42 2012 +0100
Committer: Igor Galić <i....@brainsware.org>
Committed: Mon Dec 3 23:35:53 2012 +0100
----------------------------------------------------------------------
cop/TrafficCop.cc | 74 +++++++++++++++++++++++++++++++----------------
lib/ts/lockfile.cc | 55 ++++++----------------------------
2 files changed, 59 insertions(+), 70 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/e01ad3ee/cop/TrafficCop.cc
----------------------------------------------------------------------
diff --git a/cop/TrafficCop.cc b/cop/TrafficCop.cc
index c867952..860825f 100644
--- a/cop/TrafficCop.cc
+++ b/cop/TrafficCop.cc
@@ -44,7 +44,7 @@ union semun
#endif // linux check
// For debugging, turn this on.
-// #define TRACE_LOG_COP 1
+#define TRACE_LOG_COP 1
#define OPTIONS_MAX 32
#define OPTIONS_LEN_MAX 1024
@@ -211,6 +211,7 @@ chown_file_to_admin_user(const char *file) {
}
}
}
+
static void
sig_child(int signum)
{
@@ -238,6 +239,43 @@ sig_child(int signum)
}
static void
+sig_term(int signum)
+{
+ pid_t pid = 0;
+ int status = 0;
+ int err;
+ pid_t holding_pid;
+
+ //killsig = SIGTERM;
+
+ cop_log_trace("Entering sig_term(%d)\n", signum);
+
+ // safely^W commit suicide.
+ cop_log_trace("Sending signal %d to entire group\n", signum);
+ killpg(0, signum);
+
+ cop_log_trace("Waiting for children to exit.");
+
+ for (;;) {
+ pid = waitpid(WAIT_ANY, &status, WNOHANG);
+
+ if (pid <= 0) {
+ break;
+ }
+ // TSqa03086 - We can not log the child status signal from
+ // the signal handler since syslog can deadlock. Record
+ // the pid and the status in a global for logging
+ // next time through the event loop. We will occasionally
+ // lose some information if we get two sig childs in rapid
+ // succession
+ child_pid = pid;
+ child_status = status;
+ }
+ cop_log_trace("Leaving sig_term(%d), exiting traffic_cop\n", signum);
+ exit(0);
+}
+
+static void
#if defined(solaris)
sig_fatal(int signum, siginfo_t * t, void *c)
#else
@@ -1380,18 +1418,7 @@ check_programs()
Lockfile manager_lf(manager_lockfile);
err = manager_lf.Open(&holding_pid);
chown_file_to_admin_user(manager_lockfile);
-#if defined(linux)
- // if lockfile held, but process doesn't exist, killall and try again
- if (err == 0) {
- if (kill(holding_pid, 0) == -1) {
- cop_log(COP_WARNING, "%s's lockfile is held, but its pid (%d) is missing;"
- " killing all processes named '%s' and retrying\n", manager_binary, holding_pid, manager_binary);
- ink_killall(manager_binary, killsig);
- sleep(1); // give signals a chance to be received
- err = manager_lf.Open(&holding_pid);
- }
- }
-#endif
+
if (err > 0) {
// 'lockfile_open' returns the file descriptor of the opened
// lockfile. We need to close this before spawning the
@@ -1472,18 +1499,7 @@ check_programs()
Lockfile server_lf(server_lockfile);
err = server_lf.Open(&holding_pid);
-#if defined(linux)
- // if lockfile held, but process doesn't exist, killall and try again
- if (err == 0) {
- if (kill(holding_pid, 0) == -1) {
- cop_log(COP_WARNING, "%s's lockfile is held, but its pid (%d) is missing;"
- " killing all processes named '%s' and retrying\n", server_binary, holding_pid, server_binary);
- ink_killall(server_binary, killsig);
- sleep(1); // give signals a chance to be received
- err = server_lf.Open(&holding_pid);
- }
- }
-#endif
+
if (err > 0) {
server_lf.Close();
@@ -1672,6 +1688,14 @@ init_signals()
struct sigaction action;
cop_log_trace("Entering init_signals()\n");
+ // Handle the SIGTERM signal: We simply do the same as
+ // in sig_child..
+ action.sa_handler = sig_term;
+ sigemptyset(&action.sa_mask);
+ action.sa_flags = 0;
+
+ sigaction(SIGTERM, &action, NULL);
+
// Handle the SIGCHLD signal. We simply reap all children that
// die (which should only be spawned traffic_manager's).
action.sa_handler = sig_child;
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/e01ad3ee/lib/ts/lockfile.cc
----------------------------------------------------------------------
diff --git a/lib/ts/lockfile.cc b/lib/ts/lockfile.cc
index 5cde39a..c2d34c4 100644
--- a/lib/ts/lockfile.cc
+++ b/lib/ts/lockfile.cc
@@ -205,54 +205,21 @@ static void
lockfile_kill_internal(pid_t init_pid, int init_sig, pid_t pid, const char *pname, int sig)
{
int err;
-
-#if defined(linux)
-
- pid_t *pidv;
- int pidvcnt;
-
- // Need to grab pname's pid vector before we issue any kill signals.
- // Specifically, this prevents the race-condition in which
- // traffic_manager spawns a new traffic_server while we still think
- // we're killall'ing the old traffic_server.
- if (pname) {
- ink_killall_get_pidv_xmalloc(pname, &pidv, &pidvcnt);
- }
-
- if (init_sig > 0) {
- kill(init_pid, init_sig);
- // sleep for a bit and give time for the first signal to be
- // delivered
- sleep(1);
- }
-
- do {
- if ((err = kill(pid, sig)) == 0) {
- sleep(1);
- }
- if (pname && (pidvcnt > 0)) {
- ink_killall_kill_pidv(pidv, pidvcnt, sig);
- sleep(1);
- }
- } while ((err == 0) || ((err < 0) && (errno == EINTR)));
-
- ats_free(pidv);
-
-#else
+ int status;
if (init_sig > 0) {
kill(init_pid, init_sig);
- // sleep for a bit and give time for the first signal to be
- // delivered
- sleep(1);
+ // Wait for children to exit
+ do {
+ err = waitpid(-1, &status, WNOHANG);
+ if (err == -1) break;
+ } while(!WIFEXITED(status) && !WIFSIGNALED(status));
}
do {
err = kill(pid, sig);
} while ((err == 0) || ((err < 0) && (errno == EINTR)));
-#endif // linux check
-
}
void
@@ -294,14 +261,12 @@ Lockfile::KillGroup(int sig, int initial_sig, const char *pname)
if ((pid < 0) || (pid == getpid()))
pid = holding_pid;
- else
- pid = -pid;
if (pid != 0) {
- // We kill the holding_pid instead of the process_group
- // initially since there is no point trying to get core files
- // from a group since the core file of one overwrites the core
- // file of another one
+ // This way, we kill the process_group:
+ pid = -pid;
+ // In order to get core files from each process, please
+ // set your core_pattern appropriately.
lockfile_kill_internal(holding_pid, initial_sig, pid, pname, sig);
}
}