You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by tr...@apache.org on 2002/03/20 04:54:25 UTC
cvs commit: httpd-2.0/server/mpm/worker pod.c pod.h worker.c
trawick 02/03/19 19:54:25
Modified: server/mpm/worker pod.c pod.h worker.c
Log:
Make the listener thread stop accepting new connections and exit
cleanly at graceful restart time. This is a basic requirement of
reliable graceful restarts (the kind that won't drop connections).
This allows a future fix to make worker threads hang around until
they service all connections previously accepted by the listener
thread.
The old mechanism of doing a dummy connection to wake up the
listener thread in each old child process didn't work. It didn't
guarantee that (in the main thread) the byte was read from the pod
and global variables were set before the listener thread grabbed
the connection. It didn't guarantee that a child process in the
new generation didn't get some of the dummy connections.
Rather than burn extra syscalls adding a unique socket or pipe
to the poll set (and breaking single listen unserialized accept
in the same change), this uses a signal sent from the main thread
to the listener thread to break it out of the poll or accept.
(We don't worry about breaking it out of the optional mutex because
the child process holding the mutex will break out of poll/accept
and release the mutex, allowing a child blocked in the mutex to
get it. Eventually all children blocked in the mutex will come
out.)
Since the listener thread now exits reliably, the main thread
joins it.
Revision Changes Path
1.4 +1 -75 httpd-2.0/server/mpm/worker/pod.c
Index: pod.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/worker/pod.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- pod.c 13 Mar 2002 20:48:05 -0000 1.3
+++ pod.c 20 Mar 2002 03:54:25 -0000 1.4
@@ -87,9 +87,6 @@
*/
(*pod)->p = p;
- apr_sockaddr_info_get(&(*pod)->sa, ap_listeners->bind_addr->hostname,
- APR_UNSPEC, ap_listeners->bind_addr->port, 0, p);
-
return APR_SUCCESS;
}
@@ -147,75 +144,9 @@
return rv;
}
-/* This function connects to the server, then immediately closes the connection.
- * This permits the MPM to skip the poll when there is only one listening
- * socket, because it provides a alternate way to unblock an accept() when
- * the pod is used.
- */
-
-static apr_status_t dummy_connection(ap_pod_t *pod)
-{
- apr_status_t rv;
- apr_socket_t *sock;
- apr_pool_t *p;
-
- /* create a temporary pool for the socket. pconf stays around too long */
- rv = apr_pool_create(&p, pod->p);
- if (rv != APR_SUCCESS) {
- return rv;
- }
-
- rv = apr_socket_create(&sock, pod->sa->family, SOCK_STREAM, p);
- if (rv != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf,
- "get socket to connect to listener");
- return rv;
- }
- /* on some platforms (e.g., FreeBSD), the kernel won't accept many
- * queued connections before it starts blocking local connects...
- * we need to keep from blocking too long and instead return an error,
- * because the MPM won't want to hold up a graceful restart for a
- * long time
- */
- rv = apr_setsocketopt(sock, APR_SO_TIMEOUT, 3 * APR_USEC_PER_SEC);
- if (rv != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf,
- "set timeout on socket to connect to listener");
- apr_socket_close(sock);
- return rv;
- }
-
- rv = apr_connect(sock, pod->sa);
- if (rv != APR_SUCCESS) {
- int log_level = APLOG_WARNING;
-
- if (APR_STATUS_IS_TIMEUP(rv)) {
- /* probably some server processes bailed out already and there
- * is nobody around to call accept and clear out the kernel
- * connection queue; usually this is not worth logging
- */
- log_level = APLOG_DEBUG;
- }
-
- ap_log_error(APLOG_MARK, log_level, rv, ap_server_conf,
- "connect to listener");
- }
-
- apr_socket_close(sock);
- apr_pool_destroy(p);
-
- return rv;
-}
-
AP_DECLARE(apr_status_t) ap_mpm_pod_signal(ap_pod_t *pod, int graceful)
{
- apr_status_t rv;
-
- rv = pod_signal_internal(pod, graceful);
- if (rv != APR_SUCCESS) {
- return rv;
- }
- return dummy_connection(pod);
+ return pod_signal_internal(pod, graceful);
}
AP_DECLARE(void) ap_mpm_pod_killpg(ap_pod_t *pod, int num, int graceful)
@@ -225,11 +156,6 @@
for (i = 0; i < num && rv == APR_SUCCESS; i++) {
rv = pod_signal_internal(pod, graceful);
- }
- if (rv == APR_SUCCESS) {
- for (i = 0; i < num && rv == APR_SUCCESS; i++) {
- rv = dummy_connection(pod);
- }
}
}
1.3 +0 -1 httpd-2.0/server/mpm/worker/pod.h
Index: pod.h
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/worker/pod.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- pod.h 13 Mar 2002 20:48:05 -0000 1.2
+++ pod.h 20 Mar 2002 03:54:25 -0000 1.3
@@ -84,7 +84,6 @@
apr_file_t *pod_in;
apr_file_t *pod_out;
apr_pool_t *p;
- apr_sockaddr_t *sa;
};
AP_DECLARE(apr_status_t) ap_mpm_pod_open(apr_pool_t *p, ap_pod_t **pod);
1.93 +95 -7 httpd-2.0/server/mpm/worker/worker.c
Index: worker.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/worker/worker.c,v
retrieving revision 1.92
retrieving revision 1.93
diff -u -r1.92 -r1.93
--- worker.c 20 Mar 2002 03:38:07 -0000 1.92
+++ worker.c 20 Mar 2002 03:54:25 -0000 1.93
@@ -184,6 +184,7 @@
*/
typedef struct {
apr_thread_t **threads;
+ apr_thread_t *listener;
int child_num_arg;
apr_threadattr_t *threadattr;
} thread_starter;
@@ -229,6 +230,7 @@
static pid_t ap_my_pid; /* Linux getpid() doesn't work except in main
thread. Use this instead */
static pid_t parent_pid;
+static apr_os_thread_t *listener_os_thread;
/* Locks for accept serialization */
static apr_proc_mutex_t *accept_mutex;
@@ -239,9 +241,33 @@
#define SAFE_ACCEPT(stmt) (stmt)
#endif
+/* The LISTENER_SIGNAL signal will be sent from the main thread to the
+ * listener thread to wake it up for graceful termination (what a child
+ * process from an old generation does when the admin does "apachectl
+ * graceful"). This signal will be blocked in all threads of a child
+ * process except for the listener thread.
+ */
+#define LISTENER_SIGNAL SIGHUP
+
+static void wakeup_listener(void)
+{
+ /*
+ * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" and wake
+ * up the listener thread since it is the only thread with SIGHUP
+ * unblocked, but that doesn't work on Linux
+ */
+ pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
+}
+
static void signal_workers(void)
{
workers_may_exit = 1;
+
+ /* in case we weren't called from the listener thread, wake up the
+ * listener thread
+ */
+ wakeup_listener();
+
/* XXX: This will happen naturally on a graceful, and we don't care
* otherwise.
ap_queue_signal_all_wakeup(worker_queue); */
@@ -584,6 +610,13 @@
}
}
+static void unblock_the_listener(int sig)
+{
+ /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
+ * then we don't need this goofy function.
+ */
+}
+
static void *listener_thread(apr_thread_t *thd, void * dummy)
{
proc_info * ti = dummy;
@@ -597,6 +630,8 @@
apr_pollfd_t *pollset;
apr_status_t rv;
ap_listen_rec *lr, *last_lr = ap_listeners;
+ struct sigaction sa;
+ sigset_t sig_mask;
free(ti);
@@ -604,6 +639,21 @@
for(lr = ap_listeners ; lr != NULL ; lr = lr->next)
apr_poll_socket_add(pollset, lr->sd, APR_POLLIN);
+ sigemptyset(&sig_mask);
+ /* Unblock the signal used to wake this thread up, and set a handler for
+ * it.
+ */
+ sigaddset(&sig_mask, LISTENER_SIGNAL);
+#if defined(SIGPROCMASK_SETS_THREAD_MASK)
+ sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
+#else
+ pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
+#endif
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = unblock_the_listener;
+ sigaction(LISTENER_SIGNAL, &sa, NULL);
+
/* TODO: Switch to a system where threads reuse the results from earlier
poll calls - manoj */
while (1) {
@@ -617,6 +667,9 @@
!= APR_SUCCESS) {
int level = APLOG_EMERG;
+ if (workers_may_exit) {
+ break;
+ }
if (ap_scoreboard_image->parent[process_slot].generation !=
ap_scoreboard_image->global->running_generation) {
level = APLOG_DEBUG; /* common to get these at restart time */
@@ -685,8 +738,8 @@
rv = lr->accept_func(&csd, lr, ptrans);
/* If we were interrupted for whatever reason, just start
- * the main loop over again. (The worker MPM still uses
- * signals in the one_process case.) */
+ * the main loop over again.
+ */
if (APR_STATUS_IS_EINTR(rv)) {
continue;
}
@@ -699,6 +752,9 @@
!= APR_SUCCESS) {
int level = APLOG_EMERG;
+ if (workers_may_exit) {
+ break;
+ }
if (ap_scoreboard_image->parent[process_slot].generation !=
ap_scoreboard_image->global->running_generation) {
level = APLOG_DEBUG; /* common to get these at restart time */
@@ -812,7 +868,6 @@
apr_status_t rv;
int i = 0;
int threads_created = 0;
- apr_thread_t *listener;
/* We must create the fd queues before we start up the listener
* and worker threads. */
@@ -828,7 +883,7 @@
my_info->pid = my_child_num;
my_info->tid = i;
my_info->sd = 0;
- rv = apr_thread_create(&listener, thread_attr, listener_thread,
+ rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
my_info, pchild);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
@@ -842,6 +897,7 @@
apr_sleep(10 * APR_USEC_PER_SEC);
clean_child_exit(APEXIT_CHILDFATAL);
}
+ apr_os_thread_get(&listener_os_thread, ts->listener);
while (1) {
/* ap_threads_per_child does not include the listener thread */
for (i = 0; i < ap_threads_per_child; i++) {
@@ -901,11 +957,42 @@
return NULL;
}
-static void join_workers(apr_thread_t **threads)
+static void join_workers(apr_thread_t *listener, apr_thread_t **threads)
{
int i;
apr_status_t rv, thread_rv;
+ if (listener) {
+ int iter;
+
+ /* deal with a rare timing window which affects waking up the
+ * listener thread... if the signal sent to the listener thread
+ * is delivered between the time it verifies that the
+ * workers_may_exit flag is clear and the time it enters a
+ * blocking syscall, the signal didn't do any good... work around
+ * that by sleeping briefly and sending it again
+ */
+
+ iter = 0;
+ while (iter < 10 && pthread_kill(*listener_os_thread, 0) == 0) {
+ /* listener not dead yet */
+ apr_sleep(APR_USEC_PER_SEC / 2);
+ wakeup_listener();
+ ++iter;
+ }
+ if (iter >= 10) {
+ ap_log_error(APLOG_MARK, APLOG_CRIT, 0, ap_server_conf,
+ "the listener thread didn't exit");
+ }
+ else {
+ rv = apr_thread_join(&thread_rv, listener);
+ if (rv != APR_SUCCESS) {
+ ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
+ "apr_thread_join: unable to join listener thread");
+ }
+ }
+ }
+
for (i = 0; i < ap_threads_per_child; i++) {
if (threads[i]) { /* if we ever created this thread */
rv = apr_thread_join(&thread_rv, threads[i]);
@@ -1003,6 +1090,7 @@
apr_threadattr_detach_set(thread_attr, 0);
ts->threads = threads;
+ ts->listener = NULL;
ts->child_num_arg = child_num_arg;
ts->threadattr = thread_attr;
@@ -1038,7 +1126,7 @@
* If the worker hasn't exited, then this blocks until
* they have (then cleans up).
*/
- join_workers(threads);
+ join_workers(ts->listener, threads);
}
else { /* !one_process */
/* Watch for any messages from the parent over the POD */
@@ -1062,7 +1150,7 @@
* If the worker hasn't exited, then this blocks until
* they have (then cleans up).
*/
- join_workers(threads);
+ join_workers(ts->listener, threads);
}
}