You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by Dean Gaudet <dg...@arctic.org> on 1997/09/11 22:03:33 UTC
Re: [PATCH] scoreboard fixes (take 2)
Alexei just committed a win32 fix that conflicts with this patch.
Here's an update.
Dean
Index: CHANGES
===================================================================
RCS file: /export/home/cvs/apachen/src/CHANGES,v
retrieving revision 1.432
diff -u -r1.432 CHANGES
--- CHANGES 1997/09/10 17:43:17 1.432
+++ CHANGES 1997/09/11 20:00:39
@@ -1,5 +1,12 @@
Changes with Apache 1.3b1
+ *) API: the short_score record has been split into two pieces, one which
+ the parent writes on, and one which the child writes on. As part of
+ this change the get_scoreboard_info() function was removed, and
+ scoreboard_image was exported. This change fixes a race condition
+ in filebased scoreboard systems, and speeds up changes involving the
+ scoreboard in earlier 1.3 development. [Dean Gaudet]
+
*) API: New function child_terminate() triggers the child process to
exit, while allowing the child finish what it needs to for the
current request first.
Index: main/http_main.c
===================================================================
RCS file: /export/home/cvs/apachen/src/main/http_main.c,v
retrieving revision 1.217
diff -u -r1.217 http_main.c
--- http_main.c 1997/09/11 19:32:43 1.217
+++ http_main.c 1997/09/11 20:00:43
@@ -242,7 +242,7 @@
static int my_child_num;
#endif
-static scoreboard *scoreboard_image = NULL;
+scoreboard *scoreboard_image = NULL;
/* small utility macros to make things easier to read */
@@ -1424,7 +1424,7 @@
* anyway.
*/
-API_EXPORT(void) sync_scoreboard_image (void)
+inline void sync_scoreboard_image (void)
{
#ifdef SCOREBOARD_FILE
lseek (scoreboard_fd, 0L, 0);
@@ -1434,7 +1434,7 @@
#endif /* MULTITHREAD */
-API_EXPORT(int) exists_scoreboard_image (void)
+int exists_scoreboard_image (void)
{
return (scoreboard_image ? 1 : 0);
}
@@ -1459,17 +1459,14 @@
sync_scoreboard_image();
ss = &scoreboard_image->servers[child_num];
old_status = ss->status;
- ss->x.pid = my_pid;
ss->status = status;
#ifdef OPTIMIZE_TIMEOUTS
++ss->cur_vtime;
#endif
#if defined(STATUS)
-#ifdef OPTIMIZE_TIMEOUTS
- ss->last_used = ss->last_rtime; /* close enough */
-#else
- ss->last_used=time(NULL);
+#ifndef OPTIMIZE_TIMEOUTS
+ ss->last_used = time(NULL);
#endif
if (status == SERVER_READY || status == SERVER_DEAD) {
/*
@@ -1514,11 +1511,6 @@
#endif
}
-API_EXPORT(short_score) get_scoreboard_info(int i)
-{
- return (scoreboard_image->servers[i]);
-}
-
#if defined(STATUS)
void time_process_request (int child_num, int status)
{
@@ -1595,7 +1587,7 @@
int i;
for (i = 0; i < max_daemons_limit; ++i)
- if (scoreboard_image->servers[i].x.pid == pid)
+ if (scoreboard_image->parent[i].pid == pid)
return i;
return -1;
@@ -1608,7 +1600,7 @@
sync_scoreboard_image();
for (i = 0; i < max_daemons_limit; ++i) {
- int pid = scoreboard_image->servers[i].x.pid;
+ int pid = scoreboard_image->parent[i].pid;
if (pid != my_pid && pid != 0) {
int waitret = 0,
@@ -1687,7 +1679,7 @@
for (n = 0; n < max_daemons_limit; ++n) {
if (scoreboard_image->servers[n].status != SERVER_DEAD
- && waitpid (scoreboard_image->servers[n].x.pid, &status, WNOHANG)
+ && waitpid (scoreboard_image->parent[n].pid, &status, WNOHANG)
== -1
&& errno == ECHILD) {
sync_scoreboard_image ();
@@ -1720,7 +1712,7 @@
if(scoreboard_image->servers[pi].status != SERVER_DEAD)
{
e[hi] = pi;
- h[hi++] = (HANDLE)scoreboard_image->servers[pi].x.pid;
+ h[hi++] = (HANDLE)scoreboard_image->parent[pi].pid;
}
}
@@ -1730,9 +1722,9 @@
if(rv == -1)
err = GetLastError();
if((WAIT_OBJECT_0 <= (unsigned int)rv) && ((unsigned int)rv < (WAIT_OBJECT_0 + hi)))
- return(scoreboard_image->servers[e[rv - WAIT_OBJECT_0]].x.pid);
+ return(scoreboard_image->parent[e[rv - WAIT_OBJECT_0]].pid);
else if((WAIT_ABANDONED_0 <= (unsigned int)rv) && ((unsigned int)rv < (WAIT_ABANDONED_0 + hi)))
- return(scoreboard_image->servers[e[rv - WAIT_ABANDONED_0]].x.pid);
+ return(scoreboard_image->parent[e[rv - WAIT_ABANDONED_0]].pid);
}
}
@@ -2897,7 +2889,7 @@
}
}
-static int make_child(server_rec *s, int slot)
+static int make_child(server_rec *s, int slot, time_t now)
{
int pid;
@@ -2944,14 +2936,15 @@
child_main (slot);
}
- /* If the parent proceeds with a restart before the child has written
- * their pid into the scoreboard we'll end up "forgetting" about the
- * child. So we write the child pid into the scoreboard now. (This
- * is safe, because the child is going to be writing the same value
- * to the same word.)
- * XXX: this needs to be sync'd to disk in the non shared memory stuff
- */
- scoreboard_image->servers[slot].x.pid = pid;
+#ifdef OPTIMIZE_TIMEOUTS
+ scoreboard_image->parent[slot].last_rtime = now;
+#endif
+ scoreboard_image->parent[slot].pid = pid;
+#ifdef SCOREBOARD_FILE
+ lseek(scoreboard_fd, XtOffsetOf(scoreboard, parent[slot]), 0);
+ force_write(scoreboard_fd, &scoreboard_image->parent[slot],
+ sizeof(parent_score));
+#endif
return 0;
}
@@ -2961,12 +2954,13 @@
static void startup_children (int number_to_start)
{
int i;
+ time_t now = time(0);
for (i = 0; number_to_start && i < daemons_limit; ++i ) {
if (scoreboard_image->servers[i].status != SERVER_DEAD) {
continue;
}
- if (make_child (server_conf, i) < 0) {
+ if (make_child (server_conf, i, now) < 0) {
break;
}
--number_to_start;
@@ -2991,26 +2985,30 @@
int i;
int to_kill;
int idle_count;
- int free_head;
- int *free_ptr;
- int free_length;
short_score *ss;
-#ifdef OPTIMIZE_TIMEOUTS
time_t now = time(0);
-#endif
+ int free_length;
+ int free_slots[MAX_SPAWN_RATE];
+ int last_non_dead;
/* initialize the free_list */
- free_head = -1;
- free_ptr = &free_head;
free_length = 0;
to_kill = -1;
idle_count = 0;
+ last_non_dead = -1;
sync_scoreboard_image ();
for (i = 0; i < daemons_limit; ++i) {
+ if (i >= max_daemons_limit && free_length == idle_spawn_rate) break;
ss = &scoreboard_image->servers[i];
switch (ss->status) {
+ /* We consider a starting server as idle because we started it
+ * at least a cycle ago, and if it still hasn't finished starting
+ * then we're just going to swamp things worse by forking more.
+ * So we hopefully won't need to fork more if we count it.
+ */
+ case SERVER_STARTING:
case SERVER_READY:
++idle_count;
/* always kill the highest numbered child if we have to...
@@ -3024,39 +3022,43 @@
case SERVER_DEAD:
/* try to keep children numbers as low as possible */
if (free_length < idle_spawn_rate) {
- *free_ptr = i;
- free_ptr = &scoreboard_image->servers[i].x.free_list;
+ free_slots[free_length] = i;
++free_length;
}
break;
}
+ if (ss->status != SERVER_DEAD) {
+ last_non_dead = i;
#ifdef OPTIMIZE_TIMEOUTS
- if (ss->status != SERVER_DEAD && ss->timeout_len) {
- /* if it's a live server, with a live timeout then start checking
- * its timeout */
- if (ss->cur_vtime != ss->last_vtime) {
- /* it has made progress, so update its last_rtime, last_vtime */
- ss->last_rtime = now;
- ss->last_vtime = ss->cur_vtime;
- } else if (ss->last_rtime + ss->timeout_len < now) {
- /* no progress, and the timeout length has been exceeded */
- ss->timeout_len = 0;
- kill (ss->x.pid, SIGALRM);
+ if (ss->timeout_len) {
+ /* if it's a live server, with a live timeout then
+ * start checking its timeout */
+ parent_score *ps = &scoreboard_image->parent[i];
+ if (ss->cur_vtime != ps->last_vtime) {
+ /* it has made progress, so update its last_rtime,
+ * last_vtime */
+ ps->last_rtime = now;
+ ps->last_vtime = ss->cur_vtime;
+ } else if (ps->last_rtime + ss->timeout_len < now) {
+ /* no progress, and the timeout length has been exceeded */
+ ss->timeout_len = 0;
+ kill (ps->pid, SIGALRM);
+ }
}
- }
#endif
+ }
}
+ max_daemons_limit = last_non_dead + 1;
if (idle_count > daemons_max_free) {
/* kill off one child... we use SIGUSR1 because that'll cause it to
* shut down gracefully, in case it happened to pick up a request
* while we were counting
*/
- kill (scoreboard_image->servers[to_kill].x.pid, SIGUSR1);
+ kill (scoreboard_image->parent[to_kill].pid, SIGUSR1);
idle_spawn_rate = 1;
} else if (idle_count < daemons_min_free) {
/* terminate the free list */
- *free_ptr = -1;
- if (free_head == -1) {
+ if (free_length == 0) {
/* only report this condition once */
static int reported = 0;
@@ -3066,6 +3068,7 @@
" raising the MaxClients setting");
reported = 1;
}
+ idle_spawn_rate = 1;
} else {
if (idle_spawn_rate >= 4) {
aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
@@ -3073,12 +3076,8 @@
"to increase StartServers, or Min/MaxSpareServers)",
idle_spawn_rate);
}
- i = 0;
- while (i < idle_spawn_rate && free_head != -1) {
- int slot = free_head;
- free_head = scoreboard_image->servers[free_head].x.free_list;
- make_child (server_conf, slot);
- ++i;
+ for (i = 0; i < free_length; ++i) {
+ make_child (server_conf, free_slots[i], now);
}
/* the next time around we want to spawn twice as many if this
* wasn't good enough, but not if we've just done a graceful
@@ -3202,10 +3201,8 @@
/* we're still doing a 1-for-1 replacement of dead
* children with new children
*/
- make_child (server_conf, child_slot);
+ make_child (server_conf, child_slot, time(0));
--remaining_children_to_start;
- /* don't perform idle maintenance yet */
- continue;
}
} else if (is_graceful) {
/* Great, we've probably just lost a slot in the
@@ -3215,6 +3212,12 @@
aplog_error(APLOG_MARK, APLOG_WARNING, server_conf,
"long lost child came home! (pid %d)", pid );
}
+ /* Don't perform idle maintenance when a child dies,
+ * only do it when there's a timeout. Remember only a
+ * finite number of children can die, and it's pretty
+ * pathological for a lot to die suddenly.
+ */
+ continue;
} else if (remaining_children_to_start) {
/* we hit a 1 second timeout in which none of the previous
* generation of children needed to be reaped... so assume
Index: main/scoreboard.h
===================================================================
RCS file: /export/home/cvs/apachen/src/main/scoreboard.h,v
retrieving revision 1.29
diff -u -r1.29 scoreboard.h
--- scoreboard.h 1997/09/11 19:32:44 1.29
+++ scoreboard.h 1997/09/11 20:00:43
@@ -74,6 +74,7 @@
#define SERVER_BUSY_LOG 6 /* Logging the request */
#define SERVER_BUSY_DNS 7 /* Looking up a hostname */
#define SERVER_GRACEFUL 8 /* server is gracefully finishing request */
+#define SERVER_NUM_STATUS 9 /* number of status settings */
/* A "virtual time" is simply a counter that indicates that a child is
* making progress. The parent checks up on each child, and when they have
@@ -90,14 +91,9 @@
*/
typedef unsigned vtime_t;
+/* stuff which the children generally write, and the parent mainly reads */
typedef struct {
- union {
- pid_t pid; /* if it's not DEAD then this is the pid */
- int free_list; /* otherwise this is scratch space */
- } x;
#ifdef OPTIMIZE_TIMEOUTS
- vtime_t last_vtime; /* the last vtime the parent has seen */
- time_t last_rtime; /* time(0) of the last change */
vtime_t cur_vtime; /* the child's current vtime */
unsigned short timeout_len; /* length of the timeout */
#endif
@@ -119,7 +115,9 @@
#ifndef NO_TIMES
struct tms times;
#endif
+#ifndef OPTIMIZE_TIMEOUTS
time_t last_used;
+#endif
char client[32]; /* Keep 'em small... */
char request[64]; /* We just want an idea... */
char vhost[32]; /* What virtual host is being accessed? */
@@ -132,17 +130,28 @@
restart is required */
} global_score;
+/* stuff which the parent generally writes and the children rarely read */
+typedef struct {
+ pid_t pid;
+#ifdef OPTIMIZE_TIMEOUTS
+ time_t last_rtime; /* time(0) of the last change */
+ vtime_t last_vtime; /* the last vtime the parent has seen */
+#endif
+} parent_score;
+
typedef struct
{
short_score servers[HARD_SERVER_LIMIT];
+ parent_score parent[HARD_SERVER_LIMIT];
global_score global;
} scoreboard;
#define SCOREBOARD_SIZE sizeof(scoreboard)
-API_EXPORT(void) sync_scoreboard_image(void);
-API_EXPORT(short_score) get_scoreboard_info(int x);
-API_EXPORT(int) exists_scoreboard_image (void);
+void sync_scoreboard_image(void);
+int exists_scoreboard_image (void);
+
+extern scoreboard *scoreboard_image;
/* for time_process_request() in http_main.c */
#define START_PREQUEST 1
Index: modules/standard/mod_log_config.c
===================================================================
RCS file: /export/home/cvs/apachen/src/modules/standard/mod_log_config.c,v
retrieving revision 1.36
diff -u -r1.36 mod_log_config.c
--- mod_log_config.c 1997/08/23 02:59:45 1.36
+++ mod_log_config.c 1997/09/11 20:00:44
@@ -164,6 +164,7 @@
#include "httpd.h"
#include "http_config.h"
#include "http_core.h" /* For REMOTE_NAME */
+#include <limits.h>
module MODULE_VAR_EXPORT config_log_module;
@@ -175,6 +176,21 @@
static mode_t xfer_mode = ( S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH );
#endif
+/* POSIX.1 defines PIPE_BUF as the maximum number of bytes that is
+ * guaranteed to be atomic when writing a pipe. And PIPE_BUF >= 512
+ * is guaranteed. So we'll just guess 512 in the event the system
+ * doesn't have this. Now, for file writes there is actually no limit,
+ * the entire write is atomic. Whether all systems implement this
+ * correctly is another question entirely ... so we'll just use PIPE_BUF
+ * because it's probably a good guess as to what is implemented correctly
+ * everywhere.
+ */
+#ifdef PIPE_BUF
+#define LOG_BUFSIZE PIPE_BUF
+#else
+#define LOG_BUFSIZE (512)
+#endif
+
/*
* multi_log_state is our per-(virtual)-server configuration. We store
* an array of the logs we are going to use, each of type config_log_state.
@@ -209,6 +225,10 @@
char *fname;
array_header *format;
int log_fd;
+#ifdef BUFFERED_LOGS
+ int outcnt;
+ char outbuf[LOG_BUFSIZE];
+#endif
} config_log_state;
/*
@@ -525,6 +545,16 @@
return cp ? cp : "-";
}
+#ifdef BUFFERED_LOGS
+static void flush_log (config_log_state *cls)
+{
+ if (cls->outcnt && cls->log_fd != -1) {
+ write (cls->log_fd, cls->outbuf, cls->outcnt);
+ cls->outcnt = 0;
+ }
+}
+#endif
+
static int config_log_transaction(request_rec *r, config_log_state *cls,
array_header *default_format) {
array_header *strsa;
@@ -562,8 +592,20 @@
strcpy (s, strs[i]);
s += strlen (strs[i]);
}
-
- write(cls->log_fd, str, strlen(str));
+
+#ifdef BUFFERED_LOGS
+ if (len + cls->outcnt > LOG_BUFSIZE) {
+ flush_log (cls);
+ }
+ if (len >= LOG_BUFSIZE) {
+ write (cls->log_fd, str, len);
+ } else {
+ memcpy (&cls->outbuf[cls->outcnt], str, len);
+ cls->outcnt += len;
+ }
+#else
+ write (cls->log_fd, str, len);
+#endif
return OK;
}
@@ -606,7 +648,7 @@
(multi_log_state *)palloc(p, sizeof (multi_log_state));
mls->config_logs =
- make_array(p, 5, sizeof (config_log_state));
+ make_array(p, 1, sizeof (config_log_state));
mls->default_format = NULL;
mls->server_config_logs = NULL;
@@ -736,6 +778,9 @@
exit(1);
}
}
+#ifdef BUFFERED_LOGS
+ cls->outcnt = 0;
+#endif
return cls;
}
@@ -784,6 +829,32 @@
for (s = s->next; s; s = s->next) open_multi_logs (s, p);
}
+#ifdef BUFFERED_LOGS
+static void flush_all_logs (server_rec *s, pool *p)
+{
+ multi_log_state *mls;
+ array_header *log_list;
+ config_log_state *clsarray;
+ int i;
+
+ for (; s; s = s->next) {
+ mls = get_module_config(s->module_config, &config_log_module);
+ log_list = NULL;
+ if (mls->config_logs->nelts) {
+ log_list = mls->config_logs;
+ } else if (mls->server_config_logs) {
+ log_list = mls->server_config_logs;
+ }
+ if (log_list) {
+ clsarray = (config_log_state *)log_list->elts;
+ for (i = 0; i < log_list->nelts; ++i) {
+ flush_log (&clsarray[i]);
+ }
+ }
+ }
+}
+#endif
+
module MODULE_VAR_EXPORT config_log_module = {
STANDARD_MODULE_STUFF,
init_config_log, /* initializer */
@@ -802,6 +873,10 @@
multi_log_transaction, /* logger */
NULL, /* header parser */
NULL, /* child_init */
- NULL, /* child_exit */
+#ifdef BUFFERED_LOGS
+ flush_all_logs, /* child_exit */
+#else
+ NULL,
+#endif
NULL /* post read-request */
};
Index: modules/standard/mod_status.c
===================================================================
RCS file: /export/home/cvs/apachen/src/modules/standard/mod_status.c,v
retrieving revision 1.62
diff -u -r1.62 mod_status.c
--- mod_status.c 1997/09/11 19:32:49 1.62
+++ mod_status.c 1997/09/11 20:00:45
@@ -126,6 +126,7 @@
module MODULE_VAR_EXPORT status_module;
+#ifdef STATUS
/* Format the number of bytes nicely */
static void format_byte_out(request_rec *r,unsigned long bytes)
@@ -149,6 +150,7 @@
else
rprintf(r,"%.1f GB",(float)kbytes/MBYTE);
}
+#endif
static void show_time(request_rec *r,time_t tsecs)
{
@@ -223,7 +225,8 @@
int no_table_report=0;
server_rec *server = r->server;
short_score score_record;
- char status[]="??????????";
+ parent_score ps_record;
+ char status[SERVER_NUM_STATUS];
char stat_buffer[HARD_SERVER_LIMIT];
clock_t tu,ts,tcu,tcs;
@@ -290,9 +293,10 @@
sync_scoreboard_image();
for (i = 0; i<HARD_SERVER_LIMIT; ++i)
{
- score_record = get_scoreboard_info(i);
+ score_record = scoreboard_image->servers[i];
+ ps_record = scoreboard_image->parent[i];
res = score_record.status;
- stat_buffer[i] = status[res];
+ stat_buffer[i] = (res == SERVER_UNKNOWN) ? '?' : status[res];
if (res == SERVER_READY)
ready++;
else if (res != SERVER_DEAD && res != SERVER_UNKNOWN)
@@ -300,8 +304,7 @@
#if defined(STATUS)
lres = score_record.access_count;
bytes= score_record.bytes_served;
- if (lres!=0 || (score_record.status != SERVER_READY
- && score_record.status != SERVER_DEAD))
+ if (lres!=0 || (res != SERVER_READY && res != SERVER_DEAD))
{
#ifndef NO_TIMES
tu+=score_record.times.tms_utime;
@@ -444,7 +447,8 @@
for (i = 0; i<HARD_SERVER_LIMIT; ++i)
{
- score_record=get_scoreboard_info(i);
+ score_record = scoreboard_image->servers[i];
+ ps_record = scoreboard_image->parent[i];
#if defined(NO_GETTIMEOFDAY)
#ifndef NO_TIMES
@@ -487,7 +491,7 @@
i,(int)conn_lres,my_lres,lres);
else
rprintf(r,"<b>Server %d</b> (%d): %d|%lu|%lu [",
- i,(int)score_record.x.pid,(int)conn_lres,my_lres,lres);
+ i,(int)ps_record.pid,(int)conn_lres,my_lres,lres);
switch (score_record.status)
{
@@ -533,7 +537,11 @@
score_record.times.tms_cutime/tick,
score_record.times.tms_cstime/tick,
#endif
+#ifdef OPTIMIZE_TIMEOUTS
+ difftime(nowtime, ps_record.last_rtime),
+#else
difftime(nowtime, score_record.last_used),
+#endif
(long)req_time);
format_byte_out(r,conn_bytes);
rputs("|",r);
@@ -552,7 +560,7 @@
i,(int)conn_lres,my_lres,lres);
else
rprintf(r,"<tr><td><b>%d</b><td>%d<td>%d/%lu/%lu",
- i,(int)score_record.x.pid,(int)conn_lres,my_lres,lres);
+ i,(int)ps_record.pid,(int)conn_lres,my_lres,lres);
switch (score_record.status)
{
@@ -597,7 +605,11 @@
score_record.times.tms_cutime +
score_record.times.tms_cstime)/tick,
#endif
+#ifdef OPTIMIZE_TIMEOUTS
+ difftime(nowtime, ps_record.last_rtime),
+#else
difftime(nowtime, score_record.last_used),
+#endif
(long)req_time);
rprintf(r,"<td>%-1.1f<td>%-2.2f<td>%-2.2f\n",
(float)conn_bytes/KBYTE, (float)my_bytes/MBYTE,