You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@httpd.apache.org by Justin Erenkrantz <je...@ebuilt.com> on 2001/09/24 23:03:11 UTC

[PATCH] Take 3 of httpd filter rewrite...

It seems that this patch is growing by leaps and bounds each time
I post.  =-)

For those that haven't been tracking the threads, major points:
- Removes the dechunk filter and merges it with ap_http_filter (aka
  HTTP_IN).  The dechunk filter was incorrectly handling certain
  cases (trailers).
- Moves ap_http_filter from a connection filter to a request filter
  to support the consolidation above (it needs header info).
- Change support code to allow the http_filter to be a
  request filter (how the request is setup initially).
- Move most of the logic from the HTTP_IN to CORE_IN (core_input_filter)
  HTTP_IN is now only concerned about HTTP things.  The core filter
  is now responsible for returning data.  It is impossible to
  consolidate dechunk and http without this because HTTP_IN previously
  buffered data.  As Greg has suggested, it may make sense to write 
  some brigade functions that handle input (getline).  It should be 
  fairly trivial to add these.  Some of the calls in ap_http_filter
  could be switched as well.
- Removes the EOF return code in the bucket code (may be omitted?)
- Patch input_body_filter in httpd-test to use buckets/brigades
  properly.  It was assuming that it would return more bytes than
  asked for.
  
Passes httpd-test (with patch below for input_body_filter) and
looks good with flood too.

This is fairly large, but I think we're moving in the right
direction here.  http_protocol.c is just a gnarly diff.
-- justin

Index: server/core.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/core.c,v
retrieving revision 1.61
diff -u -r1.61 core.c
--- server/core.c	2001/09/19 05:52:42	1.61
+++ server/core.c	2001/09/24 20:46:51
@@ -2753,23 +2753,145 @@
     return ap_pass_brigade(r->output_filters, bb);
 }
 
+typedef struct core_filter_ctx {
+    apr_bucket_brigade *b;
+} core_ctx_t;
+
 static int core_input_filter(ap_filter_t *f, apr_bucket_brigade *b, ap_input_mode_t mode, apr_off_t *readbytes)
 {
     apr_bucket *e;
+    apr_status_t rv;
+    core_ctx_t *ctx = f->ctx;
+    char *buff, *pos;
+    apr_size_t len;
+
+    if (!ctx)
+    {
+        f->ctx = ctx = apr_pcalloc(f->c->pool, sizeof(*ctx));
+        ctx->b = apr_brigade_create(f->c->pool);
 
-    if (!f->ctx) {    /* If we haven't passed up the socket yet... */
-        f->ctx = (void *)1;
+        /* seed the brigade with the client socket. */
         e = apr_bucket_socket_create(f->c->client_socket);
+        APR_BRIGADE_INSERT_TAIL(ctx->b, e);
+    }
+
+    if (mode == AP_MODE_PEEK) {
+        apr_bucket *e;
+        const char *str, *c;
+        apr_size_t length;
+
+        /* The purpose of this loop is to ignore any CRLF (or LF) at the end
+         * of a request.  Many browsers send extra lines at the end of POST
+         * requests.  We use the PEEK method to determine if there is more
+         * data on the socket, so that we know if we should delay sending the
+         * end of one request until we have served the second request in a
+         * pipelined situation.  We don't want to actually delay sending a
+         * response if the server finds a CRLF (or LF), becuause that doesn't
+         * mean that there is another request, just a blank line.
+         */
+        while (1) {
+
+            if (APR_BRIGADE_EMPTY(ctx->b))
+                return APR_EOF;
+
+            e = APR_BRIGADE_FIRST(ctx->b);
+
+            rv = apr_bucket_read(e, &str, &length, APR_NONBLOCK_READ);
+
+            if (rv != APR_SUCCESS)
+                return rv;
+
+            c = str;
+            while (c < str + length) {
+                if (*c == APR_ASCII_LF)
+                    c++;
+                else if (*c == APR_ASCII_CR && *(c + 1) == APR_ASCII_LF)
+                    c += 2;
+                else 
+                    return APR_SUCCESS;
+            }
+            /* If we reach here, we were a bucket just full of CRLFs, so
+             * just toss the bucket. */
+            /* FIXME: Is this the right thing to do in the core? */
+            apr_bucket_delete(e);
+        }
+    }
+
+    /* If readbytes is -1, we want to just read everything until the end
+     * of the brigade, which in this case means the end of the socket.  To
+     * do this, we loop through the entire brigade, until the socket is
+     * exhausted, at which point, it will automagically remove itself from
+     * the brigade.
+     */
+    if (*readbytes == -1) {
+        apr_bucket *e;
+        apr_off_t total;
+        const char *str;
+        apr_size_t len;
+        APR_BRIGADE_FOREACH(e, ctx->b) {
+            /* We don't care about these values.  We just want to force the
+             * lower level to just read it. */
+            apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
+        }
+        APR_BRIGADE_CONCAT(b, ctx->b);
+
+        /* Force a recompute of the length */
+        apr_brigade_length(b, 1, &total);
+        *readbytes = total;
+        /* We have read until the brigade was empty, so we know that we 
+         * must be EOS. */
+        e = apr_bucket_eos_create();
         APR_BRIGADE_INSERT_TAIL(b, e);
         return APR_SUCCESS;
     }
-    else {            
-        /* Either some code lost track of the socket
-         * bucket or we already found out that the
-         * client closed.
-         */
-        return APR_EOF;
+    /* readbytes == 0 is "read a single line". otherwise, read a block. */
+    if (*readbytes) {
+        apr_off_t total;
+        apr_bucket *e;
+        apr_bucket_brigade *newbb;
+
+        newbb = NULL;
+
+        apr_brigade_partition(ctx->b, *readbytes, &e);
+        /* Must do split before CONCAT */
+        if (e != APR_BRIGADE_SENTINEL(ctx->b)) {
+            newbb = apr_brigade_split(ctx->b, e);
+        }
+        APR_BRIGADE_CONCAT(b, ctx->b);
+
+        /* FIXME: Is this really needed?  Due to pointer use in sentinels,
+         * I think so. */
+        if (newbb)
+            APR_BRIGADE_CONCAT(ctx->b, newbb);
+
+        apr_brigade_length(b, 1, &total);
+        *readbytes = total;
+
+        return APR_SUCCESS;
+    }
+
+    /* we are reading a single LF line, e.g. the HTTP headers */
+    while (!APR_BRIGADE_EMPTY(ctx->b)) {
+        e = APR_BRIGADE_FIRST(ctx->b);
+        if ((rv = apr_bucket_read(e, (const char **)&buff, &len, 
+                                  mode)) != APR_SUCCESS) {
+            return rv;
+        }
+
+        pos = memchr(buff, APR_ASCII_LF, len);
+        /* We found a match. */
+        if (pos != NULL) {
+            apr_bucket_split(e, pos - buff + 1);
+            APR_BUCKET_REMOVE(e);
+            APR_BRIGADE_INSERT_TAIL(b, e);
+            return APR_SUCCESS;
+        }
+        APR_BUCKET_REMOVE(e);
+        APR_BRIGADE_INSERT_TAIL(b, e);
+        *readbytes += len;
     }
+
+    return APR_SUCCESS;
 }
 
 /* Default filter.  This filter should almost always be used.  Its only job
Index: modules/http/http_core.c
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/http/http_core.c,v
retrieving revision 1.282
diff -u -r1.282 http_core.c
--- modules/http/http_core.c	2001/08/25 23:43:18	1.282
+++ modules/http/http_core.c	2001/09/24 20:46:51
@@ -262,7 +262,6 @@
 
 static int ap_pre_http_connection(conn_rec *c)
 {
-    ap_add_input_filter("HTTP_IN", NULL, NULL, c);
     ap_add_input_filter("CORE_IN", NULL, NULL, c);
     ap_add_output_filter("CORE", NULL, NULL, c);
     return OK;
@@ -302,6 +301,15 @@
     return OK;
 }
 
+static int ap_http_create_req(request_rec *r)
+{
+    if (!r->main)
+    {
+        ap_add_input_filter("HTTP_IN", NULL, r, r->connection);
+    }
+    return OK;
+}
+
 static void ap_http_insert_filter(request_rec *r)
 {
     if (!r->main) {
@@ -320,10 +328,10 @@
     ap_hook_map_to_storage(ap_send_http_trace,NULL,NULL,APR_HOOK_MIDDLE);
     ap_hook_http_method(http_method,NULL,NULL,APR_HOOK_REALLY_LAST);
     ap_hook_default_port(http_port,NULL,NULL,APR_HOOK_REALLY_LAST);
+    ap_hook_create_request(ap_http_create_req, NULL, NULL, APR_HOOK_MIDDLE);
 
     ap_hook_insert_filter(ap_http_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST);
     ap_register_input_filter("HTTP_IN", ap_http_filter, AP_FTYPE_CONNECTION);
-    ap_register_input_filter("DECHUNK", ap_dechunk_filter, AP_FTYPE_TRANSCODE);
     ap_register_output_filter("HTTP_HEADER", ap_http_header_filter, 
                               AP_FTYPE_HTTP_HEADER);
     ap_register_output_filter("CHUNK", chunk_filter, AP_FTYPE_TRANSCODE);
Index: modules/http/http_protocol.c
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/http/http_protocol.c,v
retrieving revision 1.362
diff -u -r1.362 http_protocol.c
--- modules/http/http_protocol.c	2001/09/17 13:12:37	1.362
+++ modules/http/http_protocol.c	2001/09/24 20:46:51
@@ -481,265 +481,117 @@
     return AP_HTTP_METHODS[methnum];
 }
 
-struct dechunk_ctx {
-    apr_size_t chunk_size;
-    apr_size_t bytes_delivered;
+static long get_chunk_size(char *);
+
+typedef struct http_filter_ctx {
+    int status;
+    apr_size_t remaining;
     enum {
-        WANT_HDR /* must have value zero */,
-        WANT_BODY,
-        WANT_TRL
+        BODY_NONE   /* must have value of zero */,
+        BODY_LENGTH,
+        BODY_CHUNK
     } state;
-};
-
-static long get_chunk_size(char *);
+} http_ctx_t;
 
-apr_status_t ap_dechunk_filter(ap_filter_t *f, apr_bucket_brigade *bb,
-                               ap_input_mode_t mode, apr_off_t *readbytes)
+/* Hi, I'm the main input filter for HTTP requests. 
+ * I handle chunked and content-length bodies. */
+apr_status_t ap_http_filter(ap_filter_t *f, apr_bucket_brigade *b, ap_input_mode_t mode, apr_off_t *readbytes)
 {
+    apr_bucket *e;
+    http_ctx_t *ctx = f->ctx;
     apr_status_t rv;
-    struct dechunk_ctx *ctx = f->ctx;
-    apr_bucket *b;
-    const char *buf;
-    apr_size_t len;
 
     if (!ctx) {
-        f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(struct dechunk_ctx));
+        f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx));
+        ctx->status = f->r->status;
     }
 
-    do {
-        if (ctx->chunk_size == ctx->bytes_delivered) {
-            /* Time to read another chunk header or trailer...  ap_http_filter() is 
-             * the next filter in line and it knows how to return a brigade with 
-             * one line.
-             */
-            char line[30];
+    /* Basically, we have to stay out of the way until server/protocol.c
+     * says it is okay - which it does by setting r->status to OK. */
+    if (f->r->status != ctx->status)
+    {
+        int old_status;
+        /* Allow us to be reentrant! */
+        old_status = ctx->status;
+        ctx->status = f->r->status;
+
+        if (old_status == HTTP_REQUEST_TIME_OUT && f->r->status == HTTP_OK)
+        {
+            const char *tenc, *lenp;
+            tenc = apr_table_get(f->r->headers_in, "Transfer-Encoding");
+            lenp = apr_table_get(f->r->headers_in, "Content-Length");
+
+            if (tenc) {
+                if (!strcasecmp(tenc, "chunked")) {
+                    char line[30];
             
-            if ((rv = ap_getline(line, sizeof(line), f->r,
-                                 0 /* readline */)) < 0) {
-                return rv;
-            }
-            switch(ctx->state) {
-            case WANT_HDR:
-                ctx->chunk_size = get_chunk_size(line);
-                ctx->bytes_delivered = 0;
-                if (ctx->chunk_size == 0) {
-                    ctx->state = WANT_TRL;
+                    if ((rv = ap_getline(line, sizeof(line), f->r, 0)) < 0) {
+                        return rv;
+                    }
+                    ctx->state = BODY_CHUNK;
+                    ctx->remaining = get_chunk_size(line);
                 }
-                else {
-                    ctx->state = WANT_BODY;
-                }
-                break;
-            case WANT_TRL:
-                /* XXX sanity check end chunk here */
-                if (strlen(line)) {
-                    /* bad trailer */
-                }
-                if (ctx->chunk_size == 0) { /* we just finished the last chunk? */
-                    /* ### woah... ap_http_filter() is doing this, too */
-                    /* append eos bucket and get out */
-                    b = apr_bucket_eos_create();
-                    APR_BRIGADE_INSERT_TAIL(bb, b);
-                    return APR_SUCCESS;
-                }
-                ctx->state = WANT_HDR;
-                break;
-            default:
-                ap_assert(ctx->state == WANT_HDR || ctx->state == WANT_TRL);
             }
-        }
-    } while (ctx->state != WANT_BODY);
+            else if (lenp) {
+                const char *pos = lenp;
 
-    if (ctx->state == WANT_BODY) {
-        /* Tell ap_http_filter() how many bytes to deliver. */
-        apr_off_t chunk_bytes = ctx->chunk_size - ctx->bytes_delivered;
-
-        if ((rv = ap_get_brigade(f->next, bb, mode,
-                                 &chunk_bytes)) != APR_SUCCESS) {
-            return rv;
-        }
-
-        /* Walk through the body, accounting for bytes, and removing an eos
-         * bucket if ap_http_filter() delivered the entire chunk.
-         *
-         * ### this shouldn't be necessary. 1) ap_http_filter shouldn't be
-         * ### adding EOS buckets. 2) it shouldn't return more bytes than
-         * ### we requested, therefore the total len can be found with a
-         * ### simple call to apr_brigade_length(). no further munging
-         * ### would be needed.
-         */
-        b = APR_BRIGADE_FIRST(bb);
-        while (b != APR_BRIGADE_SENTINEL(bb) && !APR_BUCKET_IS_EOS(b)) {
-            apr_bucket_read(b, &buf, &len, mode);
-            AP_DEBUG_ASSERT(len <= ctx->chunk_size - ctx->bytes_delivered);
-            ctx->bytes_delivered += len;
-            b = APR_BUCKET_NEXT(b);
-        }
-        if (ctx->bytes_delivered == ctx->chunk_size) {
-            AP_DEBUG_ASSERT(APR_BUCKET_IS_EOS(b));
-            apr_bucket_delete(b);
-            ctx->state = WANT_TRL;
+                while (apr_isdigit(*pos) || apr_isspace(*pos))
+                    ++pos;
+                if (*pos == '\0') {
+                    ctx->state = BODY_LENGTH;
+                    ctx->remaining = atol(lenp);
+                }
+            }
         }
     }
-
-    return APR_SUCCESS;
-}
-
-typedef struct http_filter_ctx {
-    apr_bucket_brigade *b;
-} http_ctx_t;
 
-apr_status_t ap_http_filter(ap_filter_t *f, apr_bucket_brigade *b, ap_input_mode_t mode, apr_off_t *readbytes)
-{
-    apr_bucket *e;
-    char *buff;
-    apr_size_t len;
-    char *pos;
-    http_ctx_t *ctx = f->ctx;
-    apr_status_t rv;
+    if (!ctx->remaining)
+    {
+        switch (ctx->state)
+        {
+        case BODY_NONE:
+            break;
+        case BODY_LENGTH:
+            e = apr_bucket_eos_create();
+            APR_BRIGADE_INSERT_TAIL(b, e);
+            return APR_SUCCESS;
+        case BODY_CHUNK:
+            {
+                char line[30];
+        
+                ctx->state = BODY_NONE;
+
+                /* We need to read the CRLF after the chunk.  */
+                if ((rv = ap_getline(line, sizeof(line), f->r, 0)) < 0) {
+                    return rv;
+                }
 
-    if (!ctx) {
-        f->ctx = ctx = apr_pcalloc(f->c->pool, sizeof(*ctx));
-        ctx->b = apr_brigade_create(f->c->pool);
-    }
+                /* Read the real chunk line. */
+                if ((rv = ap_getline(line, sizeof(line), f->r, 0)) < 0) {
+                    return rv;
+                }
+                ctx->state = BODY_CHUNK;
+                ctx->remaining = get_chunk_size(line);
 
-    if (mode == AP_MODE_PEEK) {
-        apr_bucket *e;
-        const char *str;
-        apr_size_t length;
-
-        /* The purpose of this loop is to ignore any CRLF (or LF) at the end
-         * of a request.  Many browsers send extra lines at the end of POST
-         * requests.  We use the PEEK method to determine if there is more
-         * data on the socket, so that we know if we should delay sending the
-         * end of one request until we have served the second request in a
-         * pipelined situation.  We don't want to actually delay sending a
-         * response if the server finds a CRLF (or LF), becuause that doesn't
-         * mean that there is another request, just a blank line.
-         */
-        while (1) {
-            if (APR_BRIGADE_EMPTY(ctx->b)) {
-                e = NULL;
-            }
-            else {
-                e = APR_BRIGADE_FIRST(ctx->b);
-            }
-            if (!e || apr_bucket_read(e, &str, &length, APR_NONBLOCK_READ) != APR_SUCCESS) {
-                return APR_EOF;
-            }
-            else {
-                const char *c = str;
-                while (c < str + length) {
-                    if (*c == APR_ASCII_LF)
-                        c++;
-                    else if (*c == APR_ASCII_CR && *(c + 1) == APR_ASCII_LF)
-                        c += 2;
-                    else return APR_SUCCESS;
+                if (!ctx->remaining)
+                {
+                    e = apr_bucket_eos_create();
+                    APR_BRIGADE_INSERT_TAIL(b, e);
+                    return APR_SUCCESS;
                 }
-                apr_bucket_delete(e);
             }
+            break;
         }
     }
 
-    if (APR_BRIGADE_EMPTY(ctx->b)) {
-        if ((rv = ap_get_brigade(f->next, ctx->b, mode, readbytes)) != APR_SUCCESS) {
-            return rv;
-        }
-    }
+    rv = ap_get_brigade(f->next, b, mode, readbytes);
 
-    /* If readbytes is -1, we want to just read everything until the end
-     * of the brigade, which in this case means the end of the socket.  To
-     * do this, we loop through the entire brigade, until the socket is
-     * exhausted, at which point, it will automagically remove itself from
-     * the brigade.
-     */
-    if (*readbytes == -1) {
-        apr_bucket *e;
-        apr_off_t total;
-        APR_BRIGADE_FOREACH(e, ctx->b) {
-            const char *str;
-            apr_size_t len;
-            apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
-        }
-        APR_BRIGADE_CONCAT(b, ctx->b);
-        apr_brigade_length(b, 1, &total);
-        *readbytes = total;
-        e = apr_bucket_eos_create();
-        APR_BRIGADE_INSERT_TAIL(b, e);
-        return APR_SUCCESS;
-    }
-    /* readbytes == 0 is "read a single line". otherwise, read a block. */
-    if (*readbytes) {
-        apr_off_t total;
-
-        /* ### the code below, which moves bytes from one brigade to the
-           ### other is probably bogus. presuming the next filter down was
-           ### working properly, it should not have returned more than
-           ### READBYTES bytes, and we wouldn't have to do any work.
-        */
-
-        APR_BRIGADE_NORMALIZE(ctx->b);
-        if (APR_BRIGADE_EMPTY(ctx->b)) {
-            if ((rv = ap_get_brigade(f->next, ctx->b, mode, readbytes)) != APR_SUCCESS) {
-                return rv;
-            }
-        }
-            
-        apr_brigade_partition(ctx->b, *readbytes, &e);
-        APR_BRIGADE_CONCAT(b, ctx->b);
-        if (e != APR_BRIGADE_SENTINEL(ctx->b)) {
-            apr_bucket_brigade *temp;
-
-            temp = apr_brigade_split(b, e);
-
-            /* ### darn. gotta ensure the split brigade is in the proper pool.
-               ### this is a band-aid solution; we shouldn't even be doing
-               ### all of this brigade munging (per the comment above).
-               ### until then, this will get the right lifetimes. */
-            APR_BRIGADE_CONCAT(ctx->b, temp);
-        }
-        else {
-            if (!APR_BRIGADE_EMPTY(ctx->b)) {
-                ctx->b = NULL; /*XXX*/
-            }
-        }
-        apr_brigade_length(b, 1, &total);
-        *readbytes -= total;
+    if (rv != APR_SUCCESS)
+        return rv;
 
-        /* ### this is a hack. it is saying, "if we have read everything
-           ### that was requested, then we are at the end of the request."
-           ### it presumes that the next filter up will *only* call us
-           ### with readbytes set to the Content-Length of the request.
-           ### that may not always be true, and this code is *definitely*
-           ### too presumptive of the caller's intent. the point is: this
-           ### filter is not the guy that is parsing the headers or the
-           ### chunks to determine where the end of the request is, so we
-           ### shouldn't be monkeying with EOS buckets.
-        */
-        if (*readbytes == 0) {
-            apr_bucket *eos = apr_bucket_eos_create();
-                
-            APR_BRIGADE_INSERT_TAIL(b, eos);
-        }
-        return APR_SUCCESS;
-    }
-
-    /* we are reading a single line, e.g. the HTTP headers */
-    while (!APR_BRIGADE_EMPTY(ctx->b)) {
-        e = APR_BRIGADE_FIRST(ctx->b);
-        if ((rv = apr_bucket_read(e, (const char **)&buff, &len, mode)) != APR_SUCCESS) {
-            return rv;
-        }
-
-        pos = memchr(buff, APR_ASCII_LF, len);
-        if (pos != NULL) {
-            apr_bucket_split(e, pos - buff + 1);
-            APR_BUCKET_REMOVE(e);
-            APR_BRIGADE_INSERT_TAIL(b, e);
-            return APR_SUCCESS;
-        }
-        APR_BUCKET_REMOVE(e);
-        APR_BRIGADE_INSERT_TAIL(b, e);
-    }
+    if (ctx->state != BODY_NONE)
+        ctx->remaining -= *readbytes;
+
     return APR_SUCCESS;
 }
 
@@ -1406,7 +1258,6 @@
         }
 
         r->read_chunked = 1;
-        ap_add_input_filter("DECHUNK", NULL, r, r->connection);
     }
     else if (lenp) {
         const char *pos = lenp;

Index: buckets/apr_buckets_pipe.c
===================================================================
RCS file: /home/cvs/apr-util/buckets/apr_buckets_pipe.c,v
retrieving revision 1.41
diff -u -r1.41 apr_buckets_pipe.c
--- buckets/apr_buckets_pipe.c	2001/09/22 22:36:07	1.41
+++ buckets/apr_buckets_pipe.c	2001/09/24 20:45:44
@@ -110,10 +110,6 @@
         *str = a->data;
         if (rv == APR_EOF) {
             apr_file_close(p);
-            if (block == APR_NONBLOCK_READ) {
-                /* XXX: this is bogus, should return APR_SUCCESS */
-                return APR_EOF;
-            }
         }
     }
     return APR_SUCCESS;
Index: buckets/apr_buckets_socket.c
===================================================================
RCS file: /home/cvs/apr-util/buckets/apr_buckets_socket.c,v
retrieving revision 1.30
diff -u -r1.30 apr_buckets_socket.c
--- buckets/apr_buckets_socket.c	2001/09/22 22:36:07	1.30
+++ buckets/apr_buckets_socket.c	2001/09/24 20:45:44
@@ -111,10 +111,6 @@
         free(buf);
         a = apr_bucket_immortal_make(a, "", 0);
         *str = a->data;
-        if (rv == APR_EOF && block == APR_NONBLOCK_READ) {
-            /* XXX: this is bogus... should return APR_SUCCESS */
-            return APR_EOF;
-        }
     }
     return APR_SUCCESS;
 }

Index: perl-framework/c-modules/input_body_filter/mod_input_body_filter.c
===================================================================
RCS file: /home/cvs/httpd-test/perl-framework/c-modules/input_body_filter/mod_input_body_filter.c,v
retrieving revision 1.4
diff -u -r1.4 mod_input_body_filter.c
--- perl-framework/c-modules/input_body_filter/mod_input_body_filter.c	2001/09/08 22:07:14	1.4
+++ perl-framework/c-modules/input_body_filter/mod_input_body_filter.c	2001/09/24 20:58:16
@@ -76,25 +76,43 @@
     }
 }
 
+typedef struct input_body_ctx_t {
+    apr_bucket_brigade *b;
+} input_body_ctx_t;
+
 static int input_body_filter_handler(ap_filter_t *f, apr_bucket_brigade *bb, 
                                      ap_input_mode_t mode, apr_off_t *readbytes)
 {
     apr_status_t rv;
     apr_pool_t *p = f->r->pool;
+    input_body_ctx_t *ctx = f->ctx;
 
-    if (APR_BRIGADE_EMPTY(bb)) {
-        rv = ap_get_brigade(f->next, bb, mode, readbytes);
-        if (rv != APR_SUCCESS) {
+    if (!ctx) {
+        f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx));
+        ctx->b = apr_brigade_create(f->r->pool);
+    }
+
+    if (APR_BRIGADE_EMPTY(ctx->b))
+    {
+        if ((rv = ap_get_brigade(f->next, ctx->b, mode,
+                                 readbytes)) != APR_SUCCESS) {
             return rv;
         }
     }
 
-    while (!APR_BRIGADE_EMPTY(bb)) {
+    while (!APR_BRIGADE_EMPTY(ctx->b)) {
         const char *data;
         apr_size_t len;
         apr_bucket *bucket;
 
-        bucket = APR_BRIGADE_FIRST(bb);
+        bucket = APR_BRIGADE_FIRST(ctx->b);
+
+        if (APR_BUCKET_IS_EOS(bucket)) {
+            APR_BUCKET_REMOVE(bucket);
+            APR_BRIGADE_INSERT_TAIL(bb, bucket);
+            break;
+        }
+
         rv = apr_bucket_read(bucket, &data, &len, mode);
 
         if (rv != APR_SUCCESS) {
@@ -110,10 +128,6 @@
         }
 
         APR_BRIGADE_INSERT_TAIL(bb, bucket);
-
-        if (APR_BUCKET_IS_EOS(bucket)) {
-            break;
-        }
     }
 
     return OK;

Re: [PATCH] Take 3 of httpd filter rewrite...

Posted by Cliff Woolley <cl...@yahoo.com>.

On Wed, 26 Sep 2001, Greg Stein wrote:

> Um. The above loop sucks. ctx->b is *only* a SOCKET bucket. We know that as
> a given since that is a private brigade that we populated. Further, the
> above loop will read *everything* on that socket into memory. Man... talk
> about Denial of Service. "Hey! Apache! Take this gigabyte of data!"
>
> Okay... so we drop yet another comment about a hack. But while we're leaving
> hacks around here, you may as well toss the loop. apr_brigade_length() is
> going to do exactly the same thing -- the loop is redundant. Just do the
> CONCAT after calling length()

Yep.  (:-/)

> > +        apr_brigade_partition(ctx->b, *readbytes, &e);
> > +        /* Must do split before CONCAT */
> > +        if (e != APR_BRIGADE_SENTINEL(ctx->b)) {
> > +            newbb = apr_brigade_split(ctx->b, e);
> > +        }
>
> This feels like our APIs are wrong. We really ought to be able to partition
> then split. Checking for the sentinel seems like too much "inside
> knowledge." The split() ought to be able to assume a sentinel means "no
> splitting".

It already does.


APU_DECLARE(apr_bucket_brigade *) apr_brigade_split(apr_bucket_brigade *b,
						 apr_bucket *e)
{
    apr_bucket_brigade *a;
    apr_bucket *f;

    a = apr_brigade_create(b->p);
    /* Return an empty brigade if there is nothing left in
     * the first brigade to split off
     */
    if (e != APR_BRIGADE_SENTINEL(b)) {
        f = APR_RING_LAST(&b->list);
        APR_RING_UNSPLICE(e, f, link);
        APR_RING_SPLICE_HEAD(&a->list, e, f, apr_bucket, link);
    }
    return a;
}

--Cliff


--------------------------------------------------------------
   Cliff Woolley
   cliffwoolley@yahoo.com
   Charlottesville, VA

Re: [PATCH] Take 3 of httpd filter rewrite...

Posted by Greg Stein <gs...@lyra.org>.

Hmm. This doesn't seem to address some of the concerns that I detailed in
Msg-ID: <20...@lyra.org>

On Mon, Sep 24, 2001 at 02:03:11PM -0700, Justin Erenkrantz wrote:
> It seems that this patch is growing by leaps and bounds each time
> I post.  =-)

:-)

Actually, http_protocol.c shrank dramatically.

>...
> - Change support code to allow the http_filter to be a
>   request filter (how the request is setup initially).

I think the request filter should be inserted during ap_read_request, after
the headers are read. There is no reason to insert it before then. It only
means you have to write nasty code to get out of the way of reading the
request line and the headers.

>...
> --- server/core.c	2001/09/19 05:52:42	1.61
> +++ server/core.c	2001/09/24 20:46:51
>...
> +        /* seed the brigade with the client socket. */
>          e = apr_bucket_socket_create(f->c->client_socket);
> +        APR_BRIGADE_INSERT_TAIL(ctx->b, e);

Per another email, we might want to consider *not* creating a SOCKET bucket.
It just means that we have to transfer buckets from ctx->b to (param) b. We
may as well read from the socket directly into a new bucket on b.

>...
> +    if (mode == AP_MODE_PEEK) {
> +        apr_bucket *e;
> +        const char *str, *c;
> +        apr_size_t length;
> +
> +        /* The purpose of this loop is to ignore any CRLF (or LF) at the end

Note that we should insert a big ass ### right here. AP_MODE_PEEK should be
just that: peek at the data. Not a magic incantation to delete any extra
CRLF sequences in the input. If we *are* intended to do that, then the
symbol should be AP_MODE_TOSS_CRLF.

That said: keep this code in the patch -- I'm not suggesting it should not
be here (it was a problem before, and can continue to be a problem). It is
just that we should note how bogus this is :-)

>...
> +    /* If readbytes is -1, we want to just read everything until the end

Who does this? And why is it done, rather than just shutting down the
connection? Further, I think we should have a symbolic constant for this.

Again: not a problem with the patch, but with the original code. Please drop
a comment somewhere to fix this.

>...
> +    if (*readbytes == -1) {
> +        apr_bucket *e;
> +        apr_off_t total;
> +        const char *str;
> +        apr_size_t len;

This "len" overrides a declaration further out. In fact, there are several
declarations of "len" and "length" in this function. Just do one, and reuse
it for each of the reads. Same for "str".

> +        APR_BRIGADE_FOREACH(e, ctx->b) {
> +            /* We don't care about these values.  We just want to force the
> +             * lower level to just read it. */
> +            apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
> +        }
> +        APR_BRIGADE_CONCAT(b, ctx->b);
> +
> +        /* Force a recompute of the length */
> +        apr_brigade_length(b, 1, &total);

Um. The above loop sucks. ctx->b is *only* a SOCKET bucket. We know that as
a given since that is a private brigade that we populated. Further, the
above loop will read *everything* on that socket into memory. Man... talk
about Denial of Service. "Hey! Apache! Take this gigabyte of data!"

Whoever is issuing that -1 read should rethink things. We cannot simply read
the entire contents of a socket into memory.

Okay... so we drop yet another comment about a hack. But while we're leaving
hacks around here, you may as well toss the loop. apr_brigade_length() is
going to do exactly the same thing -- the loop is redundant. Just do the
CONCAT after calling length()

>...
> +    /* readbytes == 0 is "read a single line". otherwise, read a block. */
> +    if (*readbytes) {
> +        apr_off_t total;
> +        apr_bucket *e;
> +        apr_bucket_brigade *newbb;
> +
> +        newbb = NULL;
> +
> +        apr_brigade_partition(ctx->b, *readbytes, &e);
> +        /* Must do split before CONCAT */
> +        if (e != APR_BRIGADE_SENTINEL(ctx->b)) {
> +            newbb = apr_brigade_split(ctx->b, e);
> +        }

This feels like our APIs are wrong. We really ought to be able to partition
then split. Checking for the sentinel seems like too much "inside
knowledge." The split() ought to be able to assume a sentinel means "no
splitting".

>...
> +    /* we are reading a single LF line, e.g. the HTTP headers */
> +    while (!APR_BRIGADE_EMPTY(ctx->b)) {
> +        e = APR_BRIGADE_FIRST(ctx->b);
> +        if ((rv = apr_bucket_read(e, (const char **)&buff, &len,

Change the declaration of buff to be const char * and you won't need this
dumb cast. No need for it to be char* in the first place.

> +                                  mode)) != APR_SUCCESS) {
> +            return rv;
> +        }
> +
> +        pos = memchr(buff, APR_ASCII_LF, len);

Please move the declaration for pos into this block. I had to search a long
ways back to find that one. And it should probably be const char * also.

>...
> --- modules/http/http_core.c	2001/08/25 23:43:18	1.282
> +++ modules/http/http_core.c	2001/09/24 20:46:51
>...
> +static int ap_http_create_req(request_rec *r)
> +{
> +    if (!r->main)
> +    {
> +        ap_add_input_filter("HTTP_IN", NULL, r, r->connection);
> +    }
> +    return OK;
> +}

Per my previous email, I think this should be inserted at a different point
in the code flow. It will also remove the need to hook create_request.

>...
> --- modules/http/http_protocol.c	2001/09/17 13:12:37	1.362
> +++ modules/http/http_protocol.c	2001/09/24 20:46:51
> @@ -481,265 +481,117 @@
>      return AP_HTTP_METHODS[methnum];
>  }
>  
> -struct dechunk_ctx {
> -    apr_size_t chunk_size;
> -    apr_size_t bytes_delivered;
> +static long get_chunk_size(char *);
> +
> +typedef struct http_filter_ctx {
> +    int status;

See previous comments about "status". Basically: this is a nasty hack and
the comments (or lack thereof!) do nothing to note that, nor to elucidate
what is going on.

>...
> +        BODY_NONE   /* must have value of zero */,

Doesn't need to be zero...  (see comments before)

>...
> +apr_status_t ap_http_filter(ap_filter_t *f, apr_bucket_brigade *b, ap_input_mode_t mode, apr_off_t *readbytes)

Wow. This function seems like it is going to be dramatically simpler. Hard
to see through all of this mess, though :-)

I'm +1 on the patch, but with the caveat that *many* more comments need to
be added. There are still hacks all over the place (from before Justin's
patch, and at least one new one). I tried to comment the hacks before;
thankfully, Justin's patch removes most of those. But there are still more,
and we can't just let those slide...

Cheers,
-g

-- 
Greg Stein, http://www.lyra.org/

Re: [PATCH] Take 3 of httpd filter rewrite...

Posted by Justin Erenkrantz <je...@ebuilt.com>.

On Fri, Sep 28, 2001 at 09:29:32PM -0700, Ryan Bloom wrote:
> I agree with Greg that creating this socket bucket isn't necessary.  This filter
> should just read from the network.

That's a bit more than I'm comfortable doing right now.  We can certainly
add this once it gets committed.  It shouldn't change the logic
substantially.

> > +        apr_brigade_partition(ctx->b, *readbytes, &e);
> > +        /* Must do split before CONCAT */
> > +        if (e != APR_BRIGADE_SENTINEL(ctx->b)) {
> > +            newbb = apr_brigade_split(ctx->b, e);
> > +        }
> > +        APR_BRIGADE_CONCAT(b, ctx->b);
> 
> I'm assuming this has been removed since this patch was posted.

Yes, it has.  =-)

> Looks good to me.  I would prefer it if this wasn't committed until the server
> was working correctly again, because that would make debugging any bugs
> that are introduced much easier.  But, I am NOT even asking for that delay.

Thanks - I know you are very busy.  =)

Yeah, I know.  I'm going to start looking at what the heck happened
with the current code and see if I can track this down.  I know that 
I'm not crazy now - jsachs said he sees it too.

Once the server is stable, I'll commit this patch as posted and
then follow up shortly thereafter with the commits for comments and 
then another one for the ap_getline changes.  I also have an idea
on how to delay the insertion of the HTTP_IN filter until after
we know we have a valid HTTP request.  These are tested, but I'd
rather make them distinct commits than doing it all at once.
-- justin

Re: [PATCH] Take 3 of httpd filter rewrite...

Posted by Ryan Bloom <rb...@covalent.net>.

> Index: server/core.c
> ===================================================================
> RCS file: /home/cvs/httpd-2.0/server/core.c,v
> retrieving revision 1.61
> diff -u -r1.61 core.c
> --- server/core.c	2001/09/19 05:52:42	1.61
> +++ server/core.c	2001/09/24 20:46:51
> @@ -2753,23 +2753,145 @@
>      return ap_pass_brigade(r->output_filters, bb);
>  }
>
> +typedef struct core_filter_ctx {
> +    apr_bucket_brigade *b;
> +} core_ctx_t;
> +
>  static int core_input_filter(ap_filter_t *f, apr_bucket_brigade *b,
> ap_input_mode_t mode, apr_off_t *readbytes) {
>      apr_bucket *e;
> +    apr_status_t rv;
> +    core_ctx_t *ctx = f->ctx;
> +    char *buff, *pos;
> +    apr_size_t len;
> +
> +    if (!ctx)
> +    {
> +        f->ctx = ctx = apr_pcalloc(f->c->pool, sizeof(*ctx));
> +        ctx->b = apr_brigade_create(f->c->pool);
>
> -    if (!f->ctx) {    /* If we haven't passed up the socket yet... */
> -        f->ctx = (void *)1;
> +        /* seed the brigade with the client socket. */
>          e = apr_bucket_socket_create(f->c->client_socket);

I agree with Greg that creating this socket bucket isn't necessary.  This filter
should just read from the network.

> +        apr_brigade_partition(ctx->b, *readbytes, &e);
> +        /* Must do split before CONCAT */
> +        if (e != APR_BRIGADE_SENTINEL(ctx->b)) {
> +            newbb = apr_brigade_split(ctx->b, e);
> +        }
> +        APR_BRIGADE_CONCAT(b, ctx->b);

I'm assuming this has been removed since this patch was posted.

Looks good to me.  I would prefer it if this wasn't committed until the server
was working correctly again, because that would make debugging any bugs
that are introduced much easier.  But, I am NOT even asking for that delay.

Ryan
______________________________________________________________
Ryan Bloom				rbb@apache.org
Covalent Technologies			rbb@covalent.net
--------------------------------------------------------------