You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by po...@apache.org on 2009/09/24 16:25:19 UTC

svn commit: r818492 - in /httpd/httpd/trunk: CHANGES modules/cache/mod_cache.c modules/cache/mod_cache.h

Author: poirier
Date: Thu Sep 24 14:25:19 2009
New Revision: 818492

URL: http://svn.apache.org/viewvc?rev=818492&view=rev
Log:
mod_cache: don't cache incomplete responses, per RFC 2616, 13.8.

PR: 15866

Modified:
    httpd/httpd/trunk/CHANGES
    httpd/httpd/trunk/modules/cache/mod_cache.c
    httpd/httpd/trunk/modules/cache/mod_cache.h

Modified: httpd/httpd/trunk/CHANGES
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/CHANGES?rev=818492&r1=818491&r2=818492&view=diff
==============================================================================
--- httpd/httpd/trunk/CHANGES [utf-8] (original)
+++ httpd/httpd/trunk/CHANGES [utf-8] Thu Sep 24 14:25:19 2009
@@ -10,6 +10,9 @@
      mod_proxy_ftp: NULL pointer dereference on error paths.
      [Stefan Fritsch <sf fritsch.de>, Joe Orton]
 
+  *) mod_cache: don't cache incomplete responses, per RFC 2616, 13.8.
+     PR15866.  [Dan Poirier]
+
   *) ab: ab segfaults in verbose mode on https sites
      PR46393.  [Ryan Niebur]
 

Modified: httpd/httpd/trunk/modules/cache/mod_cache.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cache/mod_cache.c?rev=818492&r1=818491&r2=818492&view=diff
==============================================================================
--- httpd/httpd/trunk/modules/cache/mod_cache.c (original)
+++ httpd/httpd/trunk/modules/cache/mod_cache.c Thu Sep 24 14:25:19 2009
@@ -77,6 +77,7 @@
                                                        &cache_module);
     if (!cache) {
         cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
+        cache->size = -1;
         ap_set_module_config(r->request_config, &cache_module, cache);
     }
 
@@ -312,6 +313,100 @@
     return ap_pass_brigade(f->next, bb);
 }
 
+/* Find out the size of the cached response body.
+   Returns -1 if unable to do so.
+ */
+static apr_off_t get_cached_response_body_size(cache_request_rec *cache, request_rec *r)
+{
+    cache_handle_t *h = cache->handle;
+    apr_off_t size = -1;
+    apr_bucket_brigade *bb;
+    apr_status_t rv;
+
+    /* There's not an API to ask the cache provider for the size
+       directly, so retrieve it and count the bytes.
+
+       But it's not as inefficient as it might look.  mod_disk_cache
+       will just create a file bucket and set its length to the file
+       size, and we'll access that length here without ever having to
+       read the cached file.
+
+       If there's some other cache provider that has to read the whole
+       cached body to fill in the brigade, though, that would make
+       this rather expensive.
+
+       XXX Add a standard cache provider API to get the size of the
+       cached data.
+    */
+
+    bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
+    rv = cache->provider->recall_body(h, r->pool, bb);
+    if (rv != APR_SUCCESS) {
+        ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
+                      "cache: error recalling body");
+        /* try to remove cache entry, it's probably messed up */
+        cache->provider->remove_url(h, r->pool);
+    }
+    else {
+        int all_buckets_here = 0;
+        apr_bucket *e;
+
+        size=0;
+        for (e = APR_BRIGADE_FIRST(bb);
+             e != APR_BRIGADE_SENTINEL(bb);
+             e = APR_BUCKET_NEXT(e)) {
+            if (APR_BUCKET_IS_EOS(e)) {
+                all_buckets_here=1;
+                break;
+            }
+            if (APR_BUCKET_IS_FLUSH(e)) {
+                continue;
+            }
+            if (e->length == (apr_size_t)-1) {
+                break;
+            }
+            size += e->length;
+        }
+        if (!all_buckets_here) {
+            size = -1;
+        }
+    }
+    apr_brigade_destroy(bb);
+    return size;
+}
+
+/* Check that the response body we cached has the same length 
+ * as the Content-Length header, if available.  If not, cancel
+ * caching this response.
+ */
+static int validate_content_length(ap_filter_t *f, cache_request_rec *cache, request_rec *r)
+{
+    int rv = OK;
+
+    if (cache->size != -1) { /* We have a content-length to check */
+        apr_off_t actual_len = get_cached_response_body_size(cache, r);
+        if ((actual_len != -1) && (cache->size != actual_len)) {
+            ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
+                          "cache: Content-Length header was %"APR_OFF_T_FMT" "
+                          "but response body size was %"APR_OFF_T_FMT
+                          ", removing url from cache: %s",
+                          cache->size, actual_len,
+                          r->unparsed_uri
+                          );
+            ap_remove_output_filter(f);
+            rv = cache->provider->remove_url(cache->handle, r->pool);
+            if (rv != OK) {
+                /* Probably a mod_disk_cache cache area has been (re)mounted
+                 * read-only, or that there is a permissions problem.
+                 */
+                ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
+                             "cache: attempt to remove url from cache unsuccessful.");
+            }
+        }
+    }
+    return rv;
+}
+
 
 /*
  * CACHE_SAVE filter
@@ -342,7 +437,7 @@
     const char *cc_out, *cl;
     const char *exps, *lastmods, *dates, *etag;
     apr_time_t exp, date, lastmod, now;
-    apr_off_t size;
+    apr_off_t size = -1;
     cache_info *info = NULL;
     char *reason;
     apr_pool_t *p;
@@ -360,6 +455,7 @@
          */
         cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
         ap_set_module_config(r->request_config, &cache_module, cache);
+        cache->size = -1;
     }
 
     reason = NULL;
@@ -402,6 +498,11 @@
 
         }
 
+        /* Was this the final bucket? If yes, perform sanity checks. */
+        if ((rv == APR_SUCCESS) && APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(in))) {
+            rv = validate_content_length(f, cache, r);
+        }
+
         return ap_pass_brigade(f->next, in);
     }
 
@@ -640,6 +741,9 @@
         }
     }
 
+    /* remember content length to check response size against later */
+    cache->size = size;
+
     /* It's safe to cache the response.
      *
      * There are two possiblities at this point:
@@ -917,6 +1021,11 @@
     ap_cache_remove_lock(conf, r, cache->handle ?
             (char *)cache->handle->cache_obj->key : NULL, in);
 
+    /* Was this the final bucket? If yes, perform sanity checks. */
+    if ((rv == APR_SUCCESS) && APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(in))) {
+        rv = validate_content_length(f, cache, r);
+    }
+
     return ap_pass_brigade(f->next, in);
 }
 

Modified: httpd/httpd/trunk/modules/cache/mod_cache.h
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cache/mod_cache.h?rev=818492&r1=818491&r2=818492&view=diff
==============================================================================
--- httpd/httpd/trunk/modules/cache/mod_cache.h (original)
+++ httpd/httpd/trunk/modules/cache/mod_cache.h Thu Sep 24 14:25:19 2009
@@ -267,6 +267,7 @@
     char *key;                          /* The cache key created for this
                                          * request
                                          */
+    apr_off_t size;                     /* the content length from the headers, or -1 */
 } cache_request_rec;
 
 



Re: svn commit: r818492 - in /httpd/httpd/trunk: CHANGES modules/cache/mod_cache.c modules/cache/mod_cache.h

Posted by Graham Leggett <mi...@sharp.fm>.
Dan Poirier wrote:

> What I'd been thinking about, but haven't implemented yet, was extending
> the cache provider API (in trunk only) so we can ask the cache provider
> for the size.
> 
> Your idea of a wrapper function sounds good for 2.2 though, since it
> doesn't require an API change.  How about if I give that a try in trunk
> so we can see how it works before considering a backport?

Thinking further on what Ruediger said, I agree with him - we cannot
assume on behalf of an underlying module what it might cache, or how it
might cache it.

It is possible that a cache might choose to cache incomplete files[1],
and this feature would prevent it from doing so.

I think this feature makes far more sense living in mod_disk_cache for
now, rather than mod_cache, as mod_disk_cache's current behaviour is to
cache complete responses.

[1] Think of a YouTube type scenario, where a user might download part
of a video, then stop it. In this case a cache might benefit by passing
a range request to the backend to complete a partially cached file,
rather than attempting to cache the file again from scratch.

Regards,
Graham
--

Re: svn commit: r818492 - in /httpd/httpd/trunk: CHANGES modules/cache/mod_cache.c modules/cache/mod_cache.h

Posted by Ruediger Pluem <rp...@apache.org>.

On 09/24/2009 10:18 PM, Dan Poirier wrote:
> Ruediger Pluem <rp...@apache.org> writes:
> 
>> On 09/24/2009 04:25 PM, poirier@apache.org wrote:
>>> +       If there's some other cache provider that has to read the whole
>>> +       cached body to fill in the brigade, though, that would make
>>> +       this rather expensive.
>> Exactly for this reason I don't like this approach. Currently we have only
>> mod_disk_cache but this might change in the future and the provider API is
>> open. So there might be third party modules affected by this.
>>
>> Why don't we just count the bytes as we store them? My first thought would be
>> to write a wrapper function for cache->provider->store_body() that could look
>> like the following and would be called instead of cache->provider->store_body():
> 
> What I'd been thinking about, but haven't implemented yet, was extending
> the cache provider API (in trunk only) so we can ask the cache provider
> for the size.

+1 for trunk.

> 
> Your idea of a wrapper function sounds good for 2.2 though, since it
> doesn't require an API change.  How about if I give that a try in trunk
> so we can see how it works before considering a backport?

+1

Regards

RĂ¼diger


Re: svn commit: r818492 - in /httpd/httpd/trunk: CHANGES modules/cache/mod_cache.c modules/cache/mod_cache.h

Posted by Dan Poirier <po...@pobox.com>.
Ruediger Pluem <rp...@apache.org> writes:

> On 09/24/2009 04:25 PM, poirier@apache.org wrote:
>> +       If there's some other cache provider that has to read the whole
>> +       cached body to fill in the brigade, though, that would make
>> +       this rather expensive.
>
> Exactly for this reason I don't like this approach. Currently we have only
> mod_disk_cache but this might change in the future and the provider API is
> open. So there might be third party modules affected by this.
>
> Why don't we just count the bytes as we store them? My first thought would be
> to write a wrapper function for cache->provider->store_body() that could look
> like the following and would be called instead of cache->provider->store_body():

What I'd been thinking about, but haven't implemented yet, was extending
the cache provider API (in trunk only) so we can ask the cache provider
for the size.

Your idea of a wrapper function sounds good for 2.2 though, since it
doesn't require an API change.  How about if I give that a try in trunk
so we can see how it works before considering a backport?

-- 
Dan Poirier <po...@pobox.com>


Re: svn commit: r818492 - in /httpd/httpd/trunk: CHANGES modules/cache/mod_cache.c modules/cache/mod_cache.h

Posted by Ruediger Pluem <rp...@apache.org>.

On 09/24/2009 04:25 PM, poirier@apache.org wrote:
> Author: poirier
> Date: Thu Sep 24 14:25:19 2009
> New Revision: 818492
> 
> URL: http://svn.apache.org/viewvc?rev=818492&view=rev
> Log:
> mod_cache: don't cache incomplete responses, per RFC 2616, 13.8.
> 
> PR: 15866
> 
> Modified:
>     httpd/httpd/trunk/CHANGES
>     httpd/httpd/trunk/modules/cache/mod_cache.c
>     httpd/httpd/trunk/modules/cache/mod_cache.h
> 

> Modified: httpd/httpd/trunk/modules/cache/mod_cache.c
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cache/mod_cache.c?rev=818492&r1=818491&r2=818492&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/modules/cache/mod_cache.c (original)
> +++ httpd/httpd/trunk/modules/cache/mod_cache.c Thu Sep 24 14:25:19 2009

> @@ -312,6 +313,100 @@
>      return ap_pass_brigade(f->next, bb);
>  }
>  
> +/* Find out the size of the cached response body.
> +   Returns -1 if unable to do so.
> + */
> +static apr_off_t get_cached_response_body_size(cache_request_rec *cache, request_rec *r)
> +{
> +    cache_handle_t *h = cache->handle;
> +    apr_off_t size = -1;
> +    apr_bucket_brigade *bb;
> +    apr_status_t rv;
> +
> +    /* There's not an API to ask the cache provider for the size
> +       directly, so retrieve it and count the bytes.
> +
> +       But it's not as inefficient as it might look.  mod_disk_cache
> +       will just create a file bucket and set its length to the file
> +       size, and we'll access that length here without ever having to
> +       read the cached file.
> +
> +       If there's some other cache provider that has to read the whole
> +       cached body to fill in the brigade, though, that would make
> +       this rather expensive.

Exactly for this reason I don't like this approach. Currently we have only
mod_disk_cache but this might change in the future and the provider API is
open. So there might be third party modules affected by this.

Why don't we just count the bytes as we store them? My first thought would be
to write a wrapper function for cache->provider->store_body() that could look
like the following and would be called instead of cache->provider->store_body():

apr_status_t store_body(cache_request_rec *cache, request_rec *r, apr_bucket_brigade *bb)
{
    apr_status_t rv;

    rv = cache->provider->store_body(cache->handle, r, bb);
    if (rv == APR_SUCCESS) {
        apr_off_t len;

        /*
         * As store_body needs to read the buckets in the brigade anyway
         * it is very unlikely that the brigade still contains buckets of
         * undefined length. But if yes, don't force a read but just accept
         * a failure for our size check.
         */
        apr_brigade_length(bb, 0, &len);
        if ((len != -1) && (cache->actual_size != -1)) {
            cache->actual_size += len;
        }
        else {
            cache->actual_size = -1;
        }
    }
    return rv;
}

actual_size would be another apr_off_t field that needs to be added to cache_request_rec
and would need to be initialized with 0.
And the part from validate_content_length would transfer to

    if (cache->size != -1) { /* We have a content-length to check */
        if ((cache->actual_size != -1) && (cache->size != cache->actual_size)) {


> +
> +       XXX Add a standard cache provider API to get the size of the
> +       cached data.
> +    */
> +
> +    bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
> +    rv = cache->provider->recall_body(h, r->pool, bb);
> +    if (rv != APR_SUCCESS) {
> +        ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
> +                      "cache: error recalling body");
> +        /* try to remove cache entry, it's probably messed up */
> +        cache->provider->remove_url(h, r->pool);
> +    }
> +    else {
> +        int all_buckets_here = 0;
> +        apr_bucket *e;
> +
> +        size=0;
> +        for (e = APR_BRIGADE_FIRST(bb);
> +             e != APR_BRIGADE_SENTINEL(bb);
> +             e = APR_BUCKET_NEXT(e)) {
> +            if (APR_BUCKET_IS_EOS(e)) {
> +                all_buckets_here=1;
> +                break;
> +            }
> +            if (APR_BUCKET_IS_FLUSH(e)) {
> +                continue;
> +            }
> +            if (e->length == (apr_size_t)-1) {
> +                break;
> +            }
> +            size += e->length;
> +        }
> +        if (!all_buckets_here) {
> +            size = -1;
> +        }
> +    }
> +    apr_brigade_destroy(bb);
> +    return size;
> +}
> +
> +/* Check that the response body we cached has the same length 
> + * as the Content-Length header, if available.  If not, cancel
> + * caching this response.
> + */
> +static int validate_content_length(ap_filter_t *f, cache_request_rec *cache, request_rec *r)
> +{
> +    int rv = OK;
> +
> +    if (cache->size != -1) { /* We have a content-length to check */
> +        apr_off_t actual_len = get_cached_response_body_size(cache, r);
> +        if ((actual_len != -1) && (cache->size != actual_len)) {
> +            ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
> +                          "cache: Content-Length header was %"APR_OFF_T_FMT" "
> +                          "but response body size was %"APR_OFF_T_FMT
> +                          ", removing url from cache: %s",
> +                          cache->size, actual_len,
> +                          r->unparsed_uri
> +                          );
> +            ap_remove_output_filter(f);
> +            rv = cache->provider->remove_url(cache->handle, r->pool);
> +            if (rv != OK) {
> +                /* Probably a mod_disk_cache cache area has been (re)mounted
> +                 * read-only, or that there is a permissions problem.
> +                 */
> +                ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
> +                             "cache: attempt to remove url from cache unsuccessful.");
> +            }
> +        }
> +    }
> +    return rv;
> +}
> +
>  

Regards

RĂ¼diger