You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by ia...@apache.org on 2002/02/05 01:15:42 UTC

cvs commit: httpd-2.0/modules/experimental cache_storage.c mod_cache.c mod_cache.h

ianh        02/02/04 16:15:42

  Modified:    modules/experimental cache_storage.c mod_cache.c mod_cache.h
  Log:
  add a optional function ap_cache_generate_key
  which allows a 3rd party module to generate the key name based
  on the request_rec.
  
  the idea here is for it to also be able to mess with expiry times
  and cachability
  
  add another option.. CacheIgnoreCacheControl. this ignores a 'incoming request's
  attempts to get a fresh copy. Mainly I see this as being usefull in r-proxy's
  
  Revision  Changes    Path
  1.14      +22 -8     httpd-2.0/modules/experimental/cache_storage.c
  
  Index: cache_storage.c
  ===================================================================
  RCS file: /home/cvs/httpd-2.0/modules/experimental/cache_storage.c,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- cache_storage.c	3 Feb 2002 19:04:15 -0000	1.13
  +++ cache_storage.c	5 Feb 2002 00:15:42 -0000	1.14
  @@ -66,6 +66,8 @@
   	APR_HOOK_LINK(open_entity)
   )
   
  +extern APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key;
  +
   extern module AP_MODULE_DECLARE_DATA cache_module;
   
   /* -------------------------------------------------------------- */
  @@ -78,9 +80,13 @@
   {
       const char *next = types;
       const char *type;
  -    const char *key;
  +    apr_status_t rv;
  +    char *key;
   
  -    key = cache_create_key(r);
  +    rv = cache_generate_key(r,r->pool,&key);
  +    if (rv != APR_SUCCESS) {
  +        return rv;
  +    }
   
       /* for each specified cache type, delete the URL */
       while(next) {
  @@ -107,13 +113,16 @@
       cache_handle_t *h = apr_pcalloc(r->pool, sizeof(h));
       const char *next = types;
       const char *type;
  -    const char *key;
  +    char *key;
       apr_status_t rv;
       cache_request_rec *cache = (cache_request_rec *) 
                            ap_get_module_config(r->request_config, &cache_module);
   
  +    rv =  cache_generate_key(r,r->pool,&key);
  +    if (rv != APR_SUCCESS) {
  +        return rv;
  +    }
       /* for each specified cache type, delete the URL */
  -    key = cache_create_key(r);
       while (next) {
           type = ap_cache_tokstr(r->pool, next, &next);
           switch (rv = cache_run_create_entity(h, r, type, key, size)) {
  @@ -162,10 +171,14 @@
       const char *type;
       apr_status_t rv;
       cache_info *info;
  -    const char *key;
  +    char *key;
       cache_request_rec *cache = (cache_request_rec *) 
                            ap_get_module_config(r->request_config, &cache_module);
  -    key = cache_create_key(r);
  +
  +    rv =  cache_generate_key(r,r->pool,&key);
  +    if (rv != APR_SUCCESS) {
  +        return rv;
  +    }
       /* go through the cache types till we get a match */
       cache->handle = apr_palloc(r->pool, sizeof(cache_handle_t));
   
  @@ -235,9 +248,10 @@
       return APR_SUCCESS;
   }
   
  -const char* cache_create_key( request_rec *r ) 
  +apr_status_t cache_generate_key_default( request_rec *r, apr_pool_t*p, char**key ) 
   {
  -    return r->uri;
  +   *key = apr_pstrdup(p,r->uri);
  +   return APR_SUCCESS;
   }
   APR_IMPLEMENT_EXTERNAL_HOOK_RUN_FIRST(cache, CACHE, int, create_entity, 
                                         (cache_handle_t *h, request_rec *r, const char *type, 
  
  
  
  1.25      +50 -9     httpd-2.0/modules/experimental/mod_cache.c
  
  Index: mod_cache.c
  ===================================================================
  RCS file: /home/cvs/httpd-2.0/modules/experimental/mod_cache.c,v
  retrieving revision 1.24
  retrieving revision 1.25
  diff -u -r1.24 -r1.25
  --- mod_cache.c	3 Feb 2002 06:24:55 -0000	1.24
  +++ mod_cache.c	5 Feb 2002 00:15:42 -0000	1.25
  @@ -61,6 +61,7 @@
   #include "mod_cache.h"
   
   module AP_MODULE_DECLARE_DATA cache_module;
  +APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key;
   
   /* -------------------------------------------------------------- */
   
  @@ -147,14 +148,20 @@
                        "cache: URL exceeds length threshold: %s", url);
           return DECLINED;
       }
  -    if (ap_cache_liststr(cc_in, "no-store", NULL) ||
  -        ap_cache_liststr(pragma, "no-cache", NULL) || (auth != NULL)) {
  -        /* delete the previously cached file */
  -        cache_remove_url(r, cache->types, url);
  -
  +    if (conf->ignorecachecontrol_set == 1 && conf->ignorecachecontrol == 1 && auth == NULL) {
           ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server,
  -                     "cache: no-store forbids caching of %s", url);
  -        return DECLINED;
  +            "incoming request is asking for a uncached version of %s, but we know better and are ignoring it", url);
  +    }
  +    else {
  +        if (ap_cache_liststr(cc_in, "no-store", NULL) ||
  +            ap_cache_liststr(pragma, "no-cache", NULL) || (auth != NULL)) {
  +            /* delete the previously cached file */
  +            cache_remove_url(r, cache->types, url);
  +
  +            ap_log_error(APLOG_MARK, APLOG_DEBUG | APLOG_NOERRNO, 0, r->server,
  +                        "cache: no-store forbids caching of %s", url);
  +            return DECLINED;
  +        }
       }
   
       /*
  @@ -396,7 +403,7 @@
   
       /* check first whether running this filter has any point or not */
       if(r->no_cache) {
  -	ap_remove_output_filter(f);
  +	    ap_remove_output_filter(f);
           return ap_pass_brigade(f->next, in);
       }
   
  @@ -738,6 +745,8 @@
       ps->complete_set = 0;
       ps->no_last_mod_ignore_set = 0;
       ps->no_last_mod_ignore = 0;
  +    ps->ignorecachecontrol = 0;
  +    ps->ignorecachecontrol_set = 0 ;
       return ps;
   }
   
  @@ -772,6 +781,11 @@
           (overrides->no_last_mod_ignore_set) ? 
                       base->no_last_mod_ignore : 
                       overrides->no_last_mod_ignore;
  +    ps->ignorecachecontrol  =
  +        (overrides->ignorecachecontrol_set) ? 
  +                    base->ignorecachecontrol : 
  +                    overrides->ignorecachecontrol;
  +
       return ps;
   }
   static const char
  @@ -785,6 +799,7 @@
       return NULL;
   
   }
  +
   static const char
   *set_cache_on(cmd_parms *parms, void *dummy, int flag)
   {
  @@ -795,6 +810,17 @@
       conf->cacheon_set = 1;
       return NULL;
   }
  +static const char
  +*set_cache_ignore_cachecontrol( cmd_parms *parms, void *dummy, int flag)
  +{
  +    cache_server_conf *conf = ap_get_module_config(parms->server->module_config, 
  +                                                   &cache_module);
  +
  +    conf->ignorecachecontrol = 1;
  +    conf->ignorecachecontrol_set = 1;
  +    return NULL;
  +
  +}
   
   static const char
   *add_cache_enable(cmd_parms *parms, 
  @@ -905,6 +931,18 @@
       conf->complete_set = 1;
       return NULL;
   }
  +static cache_post_config(apr_pool_t *p, apr_pool_t *plog,
  +                                apr_pool_t *ptemp, server_rec *s)
  +{
  +     /* This is the means by which unusual (non-unix) os's may find alternate
  +     * means to run a given command (e.g. shebang/registry parsing on Win32)
  +     */
  +    cache_generate_key    = APR_RETRIEVE_OPTIONAL_FN(ap_cache_generate_key);
  +    if (!cache_generate_key) {
  +        cache_generate_key = cache_generate_key_default;
  +    }
  +    return OK;
  +}
   
   static const command_rec cache_cmds[] =
   {
  @@ -935,7 +973,9 @@
        AP_INIT_FLAG("CacheIgnoreNoLastMod", set_cache_ignore_no_last_mod, NULL, 
                RSRC_CONF, 
                "Ignore Responses where there is no Last Modified Header"),
  -
  +     AP_INIT_FLAG("CacheIgnoreCacheControl", set_cache_ignore_cachecontrol, NULL, 
  +            RSRC_CONF, 
  +            "Ignore requests from the client for uncached content"),
       AP_INIT_TAKE1("CacheLastModifiedFactor", set_cache_factor, NULL, RSRC_CONF,
        "The factor used to estimate Expires date from LastModified date"),
       AP_INIT_TAKE1("CacheForceCompletion", set_cache_complete, NULL, RSRC_CONF,
  @@ -964,6 +1004,7 @@
       ap_register_output_filter("CACHE_CONDITIONAL", 
                                 cache_conditional_filter, 
                                 AP_FTYPE_CONTENT+1);
  +    ap_hook_post_config(cache_post_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
   }
   
   module AP_MODULE_DECLARE_DATA cache_module =
  
  
  
  1.18      +13 -1     httpd-2.0/modules/experimental/mod_cache.h
  
  Index: mod_cache.h
  ===================================================================
  RCS file: /home/cvs/httpd-2.0/modules/experimental/mod_cache.h,v
  retrieving revision 1.17
  retrieving revision 1.18
  diff -u -r1.17 -r1.18
  --- mod_cache.h	4 Feb 2002 04:43:34 -0000	1.17
  +++ mod_cache.h	5 Feb 2002 00:15:42 -0000	1.18
  @@ -74,6 +74,7 @@
   #include "apr_md5.h"
   #include "apr_pools.h"
   #include "apr_strings.h"
  +#include "apr_optional.h"
   #define APR_WANT_STRFUNC
   #include "apr_want.h"
   
  @@ -168,9 +169,12 @@
       int factor_set;
       int complete;               /* Force cache completion after this point */
       int complete_set;
  -    /* ignore the last-modified header when deciding to cache this request */
  +    /** ignore the last-modified header when deciding to cache this request */
       int no_last_mod_ignore_set;
       int no_last_mod_ignore; 
  +    /** ignore client's requests for uncached responses */
  +    int ignorecachecontrol;
  +    int ignorecachecontrol_set;
   } cache_server_conf;
   
   /* cache info information */
  @@ -250,6 +254,7 @@
   int cache_create_entity(request_rec *r, const char *types, char *url, apr_size_t size);
   int cache_remove_entity(request_rec *r, const char *types, cache_handle_t *h);
   int cache_select_url(request_rec *r, const char *types, char *url);
  +apr_status_t cache_generate_key_default( request_rec *r, apr_pool_t*p, char**key );
   /**
    * create a key for the cache based on the request record
    * this is the 'default' version, which can be overridden by a default function
  @@ -294,5 +299,12 @@
                              const char *urlkey))
   APR_DECLARE_EXTERNAL_HOOK(cache, CACHE, int, remove_url, 
                             (const char *type, const char *urlkey))
  +
  +
  +
  +APR_DECLARE_OPTIONAL_FN(apr_status_t, 
  +                        ap_cache_generate_key, 
  +                        (request_rec *r, apr_pool_t*p, char**key ));
  +
   
   #endif /*MOD_CACHE_H*/
  
  
  

Re: cvs commit: httpd-2.0/modules/experimental cache_storage.c

Posted by Ian Holsman <ia...@apache.org>.
Graham Leggett wrote:
> This is a cryptographically signed message in MIME format.
> 
> --------------ms4949D2B6D144F9280EAD0AAA
> Content-Type: text/plain; charset=us-ascii
> Content-Transfer-Encoding: 7bit
> 
> Ian Holsman wrote:
> 
> 
>>This directive is >only< on the incoming request, If I'm caching results
>>of a expensive call to a backend server, why should some piddly client
>>tell me to re-get the info, forcing a subsecond response to become a 10
>>second one ?
>>
> 
> Because the HTTP protocol says it can.
> 
> Part of the reason for the existance of mod_cache is to solve this
> problem. The client comes in and says GET /expensive HTTP/1.1. Mod_cache
> uses it's freshness calculations to determine whether a cheap previously
> cached copy gets returned. But: if the client says GET /expensive
> HTTP/1.1; Cache-Control: max-age=0; then the client gets the expensive
> up-to-date version - why? Because the user specifically wanted it.

That's way by default it's not enabled.
I'm trying to cater for the case when the mean spirited service provider
doesn't care what the user 'asks' for their going to get what there 
given. ;-)
> 
> If your app takes 10 seconds to generate a page on each hit, then the
> app is broken and should be replaced by something which updates a flat
> file every so often. This will ensure that no client will ever have to
> sit out the 10 second regeneration time should the cache need to be
> refreshed for whatever reason.
> 

actually my plan is to cache flat-file. believe it or not one of our 
major bottlenecks in now retrieving the file off NFS after something
else had written it to a flat file.
If you want I can give you a more detailed explanation on how our
website is setup and how I see the mod_cache helping (offline)


> In addition the v1.3 cache contains a design flaw, where until the
> expensive 10 second request is complete, all further requests will also
> generate a 10 second query. This can cause some nasty load spikes on
> servers until at least one query is complete, and a cache file becomes
> valid.
> 
> 
>>I'll make the directive a bit more clear maybe
>>'CacheIgnoreIncomingCacheControl'
>>and put a message who this breaks RFC compliance
>>
> 
> I think a better idea is to create a mechanism to override the freshness
> calculations. For example, you might say "all files under this regex
> will have a freshness lifetime of 3600 seconds, regardless of
> Cache-Control from the backend server". Or even better - just make sure
> your backend server generates Cache-Control headers correctly.

I agree here.
that's what the optional function is meant for.. someone else can 
implment the freshness calculations and URI -> cache-key conversions

BTW .. the backend server we can control and fix and hence why the 
directive doesn't do anything on the backend response.

..Ian

> 
> Regards,
> Graham
> 




Re: cvs commit: httpd-2.0/modules/experimental cache_storage.c

Posted by Graham Leggett <mi...@sharp.fm>.
Ian Holsman wrote:

> This directive is >only< on the incoming request, If I'm caching results
> of a expensive call to a backend server, why should some piddly client
> tell me to re-get the info, forcing a subsecond response to become a 10
> second one ?

Because the HTTP protocol says it can.

Part of the reason for the existance of mod_cache is to solve this
problem. The client comes in and says GET /expensive HTTP/1.1. Mod_cache
uses it's freshness calculations to determine whether a cheap previously
cached copy gets returned. But: if the client says GET /expensive
HTTP/1.1; Cache-Control: max-age=0; then the client gets the expensive
up-to-date version - why? Because the user specifically wanted it.

If your app takes 10 seconds to generate a page on each hit, then the
app is broken and should be replaced by something which updates a flat
file every so often. This will ensure that no client will ever have to
sit out the 10 second regeneration time should the cache need to be
refreshed for whatever reason.

In addition the v1.3 cache contains a design flaw, where until the
expensive 10 second request is complete, all further requests will also
generate a 10 second query. This can cause some nasty load spikes on
servers until at least one query is complete, and a cache file becomes
valid.

> I'll make the directive a bit more clear maybe
> 'CacheIgnoreIncomingCacheControl'
> and put a message who this breaks RFC compliance

I think a better idea is to create a mechanism to override the freshness
calculations. For example, you might say "all files under this regex
will have a freshness lifetime of 3600 seconds, regardless of
Cache-Control from the backend server". Or even better - just make sure
your backend server generates Cache-Control headers correctly.

Regards,
Graham
-- 
-----------------------------------------
minfrin@sharp.fm		"There's a moon
					over Bourbon Street
						tonight..."

Re: cvs commit: httpd-2.0/modules/experimental cache_storage.c

Posted by Ian Holsman <ia...@apache.org>.
Graham Leggett wrote:
> This is a cryptographically signed message in MIME format.
> 
> --------------msE98EC4A31A500D9720859F64
> Content-Type: text/plain; charset=us-ascii
> Content-Transfer-Encoding: 7bit
> 
> ianh@apache.org wrote:
> 
> 
>>  add another option.. CacheIgnoreCacheControl. this ignores a 'incoming request's
>>  attempts to get a fresh copy. Mainly I see this as being usefull in r-proxy's
>>
> 
> Ignoring the cache control headers effectively breaks the HTTP protocol
> - is there a specific application for this option?

This directive is >only< on the incoming request, If I'm caching results 
of a expensive call to a backend server, why should some piddly client 
tell me to re-get the info, forcing a subsecond response to become a 10 
second one ? run a couple of these requests and bang.. your server dies 
as all the threads are busy servicing these 10 seconds requests.

The directive has no effect on what happens with the response back from 
the handler (be it proxy or whatever) if IT sets the cache-control it 
still honors it.

> 
> We need to be careful with some of the ability to override these headers
> - it must be made clear in the docs or the config options that these are
> non standard functions that should not normally be used unless the admin
> knows what they are doing.
> 
> For example, ignoring Cache-Control might cause the server to ignore
> Cache-Control: no-store, which could be viewed as a security issue (even
> though a minor one).

I'll make the directive a bit more clear maybe 
'CacheIgnoreIncomingCacheControl'
and put a message who this breaks RFC compliance

> 
> Regards,
> Graham
> 




Re: cvs commit: httpd-2.0/modules/experimental cache_storage.c mod_cache.c mod_cache.h

Posted by Graham Leggett <mi...@sharp.fm>.
ianh@apache.org wrote:

>   add another option.. CacheIgnoreCacheControl. this ignores a 'incoming request's
>   attempts to get a fresh copy. Mainly I see this as being usefull in r-proxy's

Ignoring the cache control headers effectively breaks the HTTP protocol
- is there a specific application for this option?

We need to be careful with some of the ability to override these headers
- it must be made clear in the docs or the config options that these are
non standard functions that should not normally be used unless the admin
knows what they are doing.

For example, ignoring Cache-Control might cause the server to ignore
Cache-Control: no-store, which could be viewed as a security issue (even
though a minor one).

Regards,
Graham
-- 
-----------------------------------------
minfrin@sharp.fm		"There's a moon
					over Bourbon Street
						tonight..."