You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by Dean Gaudet <dg...@arctic.org> on 1998/02/09 06:36:40 UTC

proper logging on pipelined connections

Woah this is more ugly than I thought it would be.  I think I uncovered
some race conditions in the 1.3b4 code, I'll nail them in 1.3b5 regardless
what we do with this patch. 

Ok the bug I'm trying to fix:  pipelined connections can result in
log_transaction() before the last write() for the request.

My fix is to create a queue of pending requests.  When their last bytes
are written the log is done.  In order to do this I need to keep around
the request_rec... which means a bunch of resources aren't freed
immediately.  The maximum number of pending requests is
DEFAULT_BUFFER_SIZE / minimum size of a response.  Given that there's a
bunch of headers in a response, the minimum size is somewhere at least 128
bytes.  So that means the max pending is something like 4096 / 128 = 32. 
(This is quite easy to run into if you do a whole bunch of HEAD requests.)

For an example of the increased mem usage I made 75 pipelined GET requests
for the contents of the /icons directory.  The server increased from 996K
to 1056K.  Then I made the same requests as HEADs and it went up to 1140K.

Take a look at the patch... it won't work on win32 without help because I
have to play in child_main.  I probably didn't get the time status stuff
correct... it's hard to do when you've got overlapping requests.

Personally I don't really care about my logs being this accurate, I can't
justify the memory usage for it.  The current code logs everything that
this new code would, but just doesn't get the time right.

This new code doesn't even do the extra work required to log when
a request wasn't really served to the child... that's getting really
ugly.

A more memory efficient solution would split up log_transaction() into
two functions:

    log_transaction_pre_commit(request_rec *r, logging_rec *lr)

    log_transaction_commit(logging_rec *lr)

The logging_rec would have a fresh pool (so that r->pool could
be destroyed), and contain essentially just a request_config vector.
The logging module would be expected to build a logging record the way
it wants from request_rec and stuff it into its request_config vector.
The record wouldn't be logged until log_transaction_commit was performed.

Dean

Index: include/buff.h
===================================================================
RCS file: /export/home/cvs/apache-1.3/src/include/buff.h,v
retrieving revision 1.34
diff -u -r1.34 buff.h
--- buff.h	1998/02/07 21:54:13	1.34
+++ buff.h	1998/02/09 05:19:11
@@ -90,6 +90,8 @@
 
 typedef struct buff_struct BUFF;
 
+struct buff_outhook;
+
 struct buff_struct {
     int flags;			/* flags */
     unsigned char *inptr;	/* pointer to next location to read */
@@ -117,6 +119,11 @@
     Sfio_t *sf_in;
     Sfio_t *sf_out;
 #endif
+
+    /* used internally to implement bouthook() */
+    struct buff_outhook *outgoing_head;
+    struct buff_outhook **outgoing_tail;
+    struct buff_outhook *outgoing_free;
 };
 
 #ifdef B_SFIO
@@ -193,6 +200,13 @@
 API_EXPORT(int) bfileno(BUFF *fb, int direction);
 
 /* bflush() if a read now would block, but don't actually read anything */
-API_EXPORT(void) bhalfduplex(BUFF *fb);
+API_EXPORT(int) bhalfduplex(BUFF *fb);
+
+/* bouthook() tells BUFF to call func(data) when the most recently bwritten
+ * byte has been passed to the kernel.  func(data) is guaranteed to be called
+ * in the same order as the bouthook() calls.  0 returned on success, -1
+ * returned on error.
+ */
+API_EXPORT(int) bouthook(BUFF *fb, void (*func)(void *data), void *data);
 
 #endif	/* !APACHE_BUFF_H */
Index: main/buff.c
===================================================================
RCS file: /export/home/cvs/apache-1.3/src/main/buff.c,v
retrieving revision 1.63
diff -u -r1.63 buff.c
--- buff.c	1998/02/07 10:34:43	1.63
+++ buff.c	1998/02/09 05:19:12
@@ -93,6 +93,13 @@
 #define LARGE_WRITE_THRESHOLD 31
 #endif
 
+/* structure for managing bouthook() */
+struct buff_outhook {
+    struct buff_outhook *next;
+    void (*func)(void *);
+    void *data;
+    int outcnt;
+};
 
 /*
  * Buffered I/O routines.
@@ -349,6 +356,10 @@
 		       (size_t) SF_UNBOUND, 1, SF_WRITE);
 #endif
 
+    fb->outgoing_head = NULL;
+    fb->outgoing_tail = &fb->outgoing_head;
+    fb->outgoing_free = NULL;
+
     return fb;
 }
 
@@ -388,6 +399,70 @@
     }
 }
 
+API_EXPORT(int) bouthook(BUFF *fb, void (*func)(void *), void *data)
+{
+    struct buff_outhook *hook;
+
+    /* is this even writable? */
+    if (!(fb->flags & B_WR)) {
+	return -1;
+    }
+    /* is the buffer empty? if so do it right away */
+    if ((fb->outchunk == -1 && fb->outcnt == 0)
+	|| fb->outcnt == fb->outchunk + CHUNK_HEADER_SIZE) {
+	func(data);
+	return 0;
+    }
+    /* otherwise we need to queue it up */
+    hook = fb->outgoing_free;
+    if (!hook) {
+	hook = palloc(fb->pool, sizeof(*hook));
+    }
+    else {
+	fb->outgoing_free = hook->next;
+    }
+    hook->func = func;
+    hook->data = data;
+    hook->outcnt = fb->outcnt;
+    hook->next = NULL;
+    *fb->outgoing_tail = hook;
+    fb->outgoing_tail = &hook->next;
+    return 0;
+}
+
+/* process all the outhooks up to and including outcnt */
+static void process_outhook(BUFF *fb, int outcnt)
+{
+    struct buff_outhook *hook;
+    struct buff_outhook *tmp;
+
+    hook = fb->outgoing_head;
+    while (hook && hook->outcnt <= outcnt) {
+	hook->func(hook->data);
+	tmp = hook->next;
+	hook->next = fb->outgoing_free;
+	hook = tmp;
+    }
+    fb->outgoing_head = hook;
+    if (!hook) {
+	fb->outgoing_tail = &fb->outgoing_head;
+    }
+}
+
+/* shift any remaining outhooks by i bytes since the buffer
+ * has been contracted
+ */
+static void subtract_outhook(BUFF *fb, int i)
+{
+    struct buff_outhook *hook;
+
+    hook = fb->outgoing_head;
+    while (hook) {
+	hook->outcnt -= i;
+	hook = hook->next;
+    }
+}
+
 static int bflush_core(BUFF *fb);
 
 /*
@@ -532,14 +607,18 @@
 #endif
 
 
-API_EXPORT(void) bhalfduplex(BUFF *fb)
+/* If a read would block then bflush now.  Returns 0 if a read wouldn't
+ * block or there's no output.  Returns 1 if a read would block or there's
+ * some error (in any event it flushed if it returns 1).
+ */
+API_EXPORT(int) bhalfduplex(BUFF *fb)
 {
     int rv;
     fd_set fds;
     struct timeval tv;
 
     if (fb->incnt > 0 || fb->outcnt == 0) {
-	return;
+	return 0;
     }
     /* test for a block */
     do {
@@ -552,7 +631,9 @@
     /* treat any error as if it would block as well */
     if (rv != 1) {
 	bflush(fb);
+	return 1;
     }
+    return 0;
 }
 
 static ap_inline int saferead_guts(BUFF *fb, void *buf, int nbyte)
@@ -1129,11 +1210,12 @@
 	++nvec;
     }
 
-    fb->outcnt = 0;
     if (writev_it_all(fb, vec, nvec)) {
 	return -1;
     }
-    else if (fb->flags & B_CHUNK) {
+    process_outhook(fb, fb->outcnt);
+    fb->outcnt = 0;
+    if (fb->flags & B_CHUNK) {
 	start_chunk(fb);
     }
     return nbyte;
@@ -1223,6 +1305,7 @@
 		/* we cannot continue after a chunked error */
 		return -1;
 	    }
+	    process_outhook(fb, fb->outcnt);
 	    fb->outcnt = 0;
 	    break;
 	}
@@ -1231,6 +1314,8 @@
 	    return nwr ? nwr : -1;
 	}
 
+	process_outhook(fb, i);
+
 	/* deal with a partial write */
 	if (i < fb->outcnt) {
 	    int j, n = fb->outcnt;
@@ -1238,6 +1323,7 @@
 	    for (j = i; j < n; j++)
 		x[j - i] = x[j];
 	    fb->outcnt -= i;
+	    subtract_outhook(fb, i);
 	}
 	else
 	    fb->outcnt = 0;
@@ -1293,6 +1379,8 @@
 	if (i <= 0)
 	    return -1;
 
+	process_outhook(fb, i);
+
 	/*
 	 * We should have written all the data, but if the fd was in a
 	 * strange (non-blocking) mode, then we might not have done so.
@@ -1302,6 +1390,7 @@
 	    unsigned char *x = fb->outbase;
 	    for (j = i; j < n; j++)
 		x[j - i] = x[j];
+	    subtract_outhook(fb, i);
 	}
 	fb->outcnt -= i;
 
Index: main/http_main.c
===================================================================
RCS file: /export/home/cvs/apache-1.3/src/main/http_main.c,v
retrieving revision 1.286
diff -u -r1.286 http_main.c
--- http_main.c	1998/02/09 01:09:39	1.286
+++ http_main.c	1998/02/09 05:19:19
@@ -1130,7 +1130,7 @@
  * distinguish between a dropped connection and something that might be
  * worth logging.
  */
-static void lingering_close(request_rec *r)
+static void lingering_close(conn_rec *conn)
 {
     char dummybuf[2048];
     struct timeval tv;
@@ -1144,20 +1144,20 @@
 
     /* Send any leftover data to the client, but never try to again */
 
-    if (bflush(r->connection->client) == -1) {
-	kill_timeout(r);
-	bclose(r->connection->client);
+    if (bflush(conn->client) == -1) {
+	kill_timeout(0);
+	bclose(conn->client);
 	return;
     }
-    bsetflag(r->connection->client, B_EOUT, 1);
+    bsetflag(conn->client, B_EOUT, 1);
 
     /* Close our half of the connection --- send the client a FIN */
 
-    lsd = r->connection->client->fd;
+    lsd = conn->client->fd;
 
-    if ((shutdown(lsd, 1) != 0) || r->connection->aborted) {
-	kill_timeout(r);
-	bclose(r->connection->client);
+    if ((shutdown(lsd, 1) != 0) || conn->aborted) {
+	kill_timeout(0);
+	bclose(conn->client);
 	return;
     }
 
@@ -1197,9 +1197,9 @@
 
     /* Should now have seen final ack.  Safe to finally kill socket */
 
-    bclose(r->connection->client);
+    bclose(conn->client);
 
-    kill_timeout(r);
+    kill_timeout(0);
 }
 #endif /* ndef NO_LINGCLOSE */
 
@@ -2910,6 +2910,34 @@
     requests_this_child = max_requests_per_child = 1;
 }
 
+static void log_and_cleanup_request(void *vr)
+{
+    request_rec *r = vr;
+#ifdef STATUS
+    int old_stat;
+
+    old_stat = update_child_status(r->connection->child_num,
+                                   SERVER_BUSY_LOG, r);
+#endif
+    log_transaction(r);
+
+#ifdef STATUS
+    (void) update_child_status(r->connection->child_num, old_stat, r);
+    time_process_request(r->connection->child_num, STOP_PREQUEST);
+    increment_counts(my_child_num, r);
+#endif
+
+    /* note that this is atomic -- timeout_req will only ever be NULL,
+     * r, or some subsequent request.  After r has been handled it will
+     * never again be equal to r until we do the destroy_pool(r->pool).
+     */
+    if (timeout_req == r) {
+	kill_timeout(0);	/* Cancel any outstanding alarms. */
+	timeout_req = NULL;	/* No request in progress */
+    }
+    destroy_pool(r->pool);
+}
+
 void child_main(int child_num_arg)
 {
     NET_SIZE_T clen;
@@ -3163,14 +3191,23 @@
 
 	    process_request(r);
 
-#if defined(STATUS)
-	    increment_counts(my_child_num, r);
-#endif
+	    /*
+	    * We want to flush the last packet if this isn't a pipelining connection
+	    * *before* we start into logging.  Suppose that the logging causes a DNS
+	    * lookup to occur, which may have a high latency.  If we hold off on
+	    * this packet, then it'll appear like the link is stalled when really
+	    * it's the application that's stalled.
+	    */
+	    if (bhalfduplex(r->connection->client)) {
+		log_and_cleanup_request(r);
+	    }
+	    else {
+		bouthook(r->connection->client, log_and_cleanup_request, r);
+	    }
 
 	    if (!current_conn->keepalive || current_conn->aborted)
 		break;
 
-	    destroy_pool(r->pool);
 	    (void) update_child_status(my_child_num, SERVER_BUSY_KEEPALIVE,
 				       (request_rec *) NULL);
 
@@ -3202,16 +3239,22 @@
 	 * in our buffers.  If possible, try to avoid a hard close until the
 	 * client has ACKed our FIN and/or has stopped sending us data.
 	 */
+	
+	/* Note that r == NULL means that no request was read.  r != NULL
+	 * means that a request was completed and we decided to not do
+	 * keepalive.  In this case r points to possibly freed memory so
+	 * we can't use r at all... but we really only need current_conn
+	 */
 
 #ifdef NO_LINGCLOSE
 	bclose(conn_io);	/* just close it */
 #else
-	if (r && r->connection
-	    && !r->connection->aborted
-	    && r->connection->client
-	    && (r->connection->client->fd >= 0)) {
+	if (r && current_conn
+	    && !current_conn->aborted
+	    && current_conn->client
+	    && (current_conn->client->fd >= 0)) {
 
-	    lingering_close(r);
+	    lingering_close(current_conn);
 	}
 	else {
 	    bsetflag(conn_io, B_EOUT, 1);
Index: main/http_request.c
===================================================================
RCS file: /export/home/cvs/apache-1.3/src/main/http_request.c,v
retrieving revision 1.106
diff -u -r1.106 http_request.c
--- http_request.c	1998/02/08 18:16:03	1.106
+++ http_request.c	1998/02/09 05:19:20
@@ -1135,32 +1135,10 @@
 void process_request(request_rec *r)
 {
 #ifdef STATUS
-    int old_stat;
-
     time_process_request(r->connection->child_num, START_PREQUEST);
 #endif
 
     process_request_internal(r);
-
-#ifdef STATUS
-    old_stat = update_child_status(r->connection->child_num,
-                                   SERVER_BUSY_LOG, r);
-#endif
-
-    /*
-     * We want to flush the last packet if this isn't a pipelining connection
-     * *before* we start into logging.  Suppose that the logging causes a DNS
-     * lookup to occur, which may have a high latency.  If we hold off on
-     * this packet, then it'll appear like the link is stalled when really
-     * it's the application that's stalled.
-     */
-    bhalfduplex(r->connection->client);
-    log_transaction(r);
-
-#ifdef STATUS
-    (void) update_child_status(r->connection->child_num, old_stat, r);
-    time_process_request(r->connection->child_num, STOP_PREQUEST);
-#endif
 }
 
 table *rename_original_env(pool *p, table *t)