You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by Davi Arnaut <da...@haxent.com.br> on 2006/07/24 02:50:30 UTC

[patch 00/10] SoC 2006 -- extending the cache architecture

Hi,

Here is the first code drop regarding my SoC proposal "Redesigning and extending
the Apache cache architecture" (http://verdesmares.com/Apache/proposal.txt).

The patches are mainly clenaups and bug-fixes, except a new mod_disk_cache
directory structure. Further clenaups still required.

Other than that, I modified a bit the way and which storege callback
are called.

Also, I would like to thank the patience of my mentor, Paul Querna.

--
Davi Arnaut

[patch 05/10] pass uri_meets_conditions values by reference

Posted by Davi Arnaut <da...@haxent.com.br>.
Don't pass 'large' objects by value when not needed, it wastes time and space.

Index: trunk/modules/cache/cache_util.c
===================================================================
--- trunk.orig/modules/cache/cache_util.c
+++ trunk/modules/cache/cache_util.c
@@ -28,40 +28,41 @@ extern module AP_MODULE_DECLARE_DATA cac
 /* Determine if "url" matches the hostname, scheme and port and path
  * in "filter". All but the path comparisons are case-insensitive.
  */
-static int uri_meets_conditions(apr_uri_t filter, int pathlen, apr_uri_t url)
+static int uri_meets_conditions(const apr_uri_t *filter, apr_size_t pathlen,
+                                const apr_uri_t *url)
 {
     /* Compare the hostnames */
-    if(filter.hostname) {
-        if (!url.hostname) {
+    if(filter->hostname) {
+        if (!url->hostname) {
             return 0;
         }
-        else if (strcasecmp(filter.hostname, url.hostname)) {
+        else if (strcasecmp(filter->hostname, url->hostname)) {
             return 0;
         }
     }
 
     /* Compare the schemes */
-    if(filter.scheme) {
-        if (!url.scheme) {
+    if(filter->scheme) {
+        if (!url->scheme) {
             return 0;
         }
-        else if (strcasecmp(filter.scheme, url.scheme)) {
+        else if (strcasecmp(filter->scheme, url->scheme)) {
             return 0;
         }
     }
 
     /* Compare the ports */
-    if(filter.port_str) {
-        if (url.port_str && filter.port != url.port) {
+    if(filter->port_str) {
+        if (url->port_str && filter->port != url->port) {
             return 0;
         }
         /* NOTE:  ap_port_of_scheme will return 0 if given NULL input */
-        else if (filter.port != apr_uri_port_of_scheme(url.scheme)) {
+        else if (filter->port != apr_uri_port_of_scheme(url->scheme)) {
             return 0;
         }
     }
-    else if(url.port_str && filter.scheme) {
-        if (apr_uri_port_of_scheme(filter.scheme) == url.port) {
+    else if(url->port_str && filter->scheme) {
+        if (apr_uri_port_of_scheme(filter->scheme) == url->port) {
             return 0;
         }
     }
@@ -69,12 +70,11 @@ static int uri_meets_conditions(apr_uri_
     /* Url has met all of the filter conditions so far, determine
      * if the paths match.
      */
-    return !strncmp(filter.path, url.path, pathlen);
+    return !strncmp(filter->path, url->path, pathlen);
 }
 
 CACHE_DECLARE(cache_provider_list *)ap_cache_get_providers(request_rec *r,
-                                                  cache_server_conf *conf,
-                                                  apr_uri_t uri)
+                                                  cache_server_conf *conf)
 {
     cache_provider_list *providers = NULL;
     int i;
@@ -83,7 +83,7 @@ CACHE_DECLARE(cache_provider_list *)ap_c
     for (i = 0; i < conf->cacheenable->nelts; i++) {
         struct cache_enable *ent =
                                 (struct cache_enable *)conf->cacheenable->elts;
-        if (uri_meets_conditions(ent[i].url, ent[i].pathlen, uri)) {
+        if (uri_meets_conditions(&ent[i].url, ent[i].pathlen, &r->parsed_uri)) {
             /* Fetch from global config and add to the list. */
             cache_provider *provider;
             provider = ap_lookup_provider(CACHE_PROVIDER_GROUP, ent[i].type,
@@ -120,7 +120,7 @@ CACHE_DECLARE(cache_provider_list *)ap_c
     for (i = 0; i < conf->cachedisable->nelts; i++) {
         struct cache_disable *ent =
                                (struct cache_disable *)conf->cachedisable->elts;
-        if (uri_meets_conditions(ent[i].url, ent[i].pathlen, uri)) {
+        if (uri_meets_conditions(&ent[i].url, ent[i].pathlen, &r->parsed_uri)) {
             /* Stop searching now. */
             return NULL;
         }
Index: trunk/modules/cache/mod_cache.c
===================================================================
--- trunk.orig/modules/cache/mod_cache.c
+++ trunk/modules/cache/mod_cache.c
@@ -106,7 +106,7 @@ static int cache_url_handler(request_rec
     /*
      * Which cache module (if any) should handle this request?
      */
-    if (!(providers = ap_cache_get_providers(r, conf, r->parsed_uri))) {
+    if (!(providers = ap_cache_get_providers(r, conf))) {
         return DECLINED;
     }
 
Index: trunk/modules/cache/mod_cache.h
===================================================================
--- trunk.orig/modules/cache/mod_cache.h
+++ trunk/modules/cache/mod_cache.h
@@ -280,7 +280,7 @@ CACHE_DECLARE(char *) ap_cache_generate_
                                              unsigned int *dirlevels,
                                              unsigned int *divisors,
                                              const char *name);
-CACHE_DECLARE(cache_provider_list *)ap_cache_get_providers(request_rec *r, cache_server_conf *conf, apr_uri_t uri);
+CACHE_DECLARE(cache_provider_list *)ap_cache_get_providers(request_rec *r, cache_server_conf *conf);
 CACHE_DECLARE(int) ap_cache_liststr(apr_pool_t *p, const char *list,
                                     const char *key, char **val);
 CACHE_DECLARE(const char *)ap_cache_tokstr(apr_pool_t *p, const char *list, const char **str);

--

[patch 03/10] shrink cache_save_filter

Posted by Davi Arnaut <da...@haxent.com.br>.
Move parts of cache_save_filter() to a smaller check_cacheable_request()
function that is easier to read and understand. Also, a useless assignment
is removed.


Index: trunk/modules/cache/mod_cache.c
===================================================================
--- trunk.orig/modules/cache/mod_cache.c
+++ trunk/modules/cache/mod_cache.c
@@ -294,65 +294,11 @@ static int cache_out_filter(ap_filter_t 
  *   Finally, pass the data to the next filter (the network or whatever)
  */
 
-static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in)
+static const char* check_cacheable_request(request_rec *r,
+    cache_request_rec *cache, cache_server_conf *conf)
 {
-    int rv = !OK;
-    int date_in_errhdr = 0;
-    request_rec *r = f->r;
-    cache_request_rec *cache;
-    cache_server_conf *conf;
-    const char *cc_out, *cl;
-    const char *exps, *lastmods, *dates, *etag;
-    apr_time_t exp, date, lastmod, now;
-    apr_off_t size;
-    cache_info *info = NULL;
-    char *reason;
-    apr_pool_t *p;
-
-    conf = (cache_server_conf *) ap_get_module_config(r->server->module_config,
-                                                      &cache_module);
-
-    /* Setup cache_request_rec */
-    cache = (cache_request_rec *) ap_get_module_config(r->request_config,
-                                                       &cache_module);
-    if (!cache) {
-        /* user likely configured CACHE_SAVE manually; they should really use
-         * mod_cache configuration to do that
-         */
-        cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
-        ap_set_module_config(r->request_config, &cache_module, cache);
-    }
-
-    reason = NULL;
-    p = r->pool;
-    /*
-     * Pass Data to Cache
-     * ------------------
-     * This section passes the brigades into the cache modules, but only
-     * if the setup section (see below) is complete.
-     */
-    if (cache->block_response) {
-        /* We've already sent down the response and EOS.  So, ignore
-         * whatever comes now.
-         */
-        return APR_SUCCESS;
-    }
-
-    /* have we already run the cachability check and set up the
-     * cached file handle?
-     */
-    if (cache->in_checked) {
-        /* pass the brigades into the cache, then pass them
-         * up the filter stack
-         */
-        rv = cache->provider->store_body(cache->handle, r, in);
-        if (rv != APR_SUCCESS) {
-            ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
-                         "cache: Cache provider's store_body failed!");
-            ap_remove_output_filter(f);
-        }
-        return ap_pass_brigade(f->next, in);
-    }
+    apr_time_t exp, lastmod;
+    const char *exps, *lastmods, *etag, *cc_out, *reason = NULL;
 
     /*
      * Setup Data in Cache
@@ -420,11 +366,11 @@ static int cache_save_filter(ap_filter_t
          * We include 304 Not Modified here too as this is the origin server
          * telling us to serve the cached copy.
          */
-        reason = apr_psprintf(p, "Response status %d", r->status);
+        reason = apr_psprintf(r->pool, "Response status %d", r->status);
     }
     else if (exps != NULL && exp == APR_DATE_BAD) {
         /* if a broken Expires header is present, don't cache it */
-        reason = apr_pstrcat(p, "Broken expires header: ", exps, NULL);
+        reason = apr_pstrcat(r->pool, "Broken expires header: ", exps, NULL);
     }
     else if (exps != NULL && exp != APR_DATE_BAD && exp < r->request_time)
     {
@@ -503,20 +449,16 @@ static int cache_save_filter(ap_filter_t
         reason = "r->no_cache present";
     }
 
-    if (reason) {
-        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
-                     "cache: %s not cached. Reason: %s", r->unparsed_uri,
-                     reason);
+    cache->exp = exp;
+    cache->lastmod = lastmod;
 
-        /* remove this filter from the chain */
-        ap_remove_output_filter(f);
-
-        /* ship the data up the stack */
-        return ap_pass_brigade(f->next, in);
-    }
+    return reason;
+}
 
-    /* Make it so that we don't execute this path again. */
-    cache->in_checked = 1;
+static apr_off_t request_cl(request_rec *r, apr_bucket_brigade *in)
+{
+    const char *cl;
+    apr_off_t size;
 
     /* Set the content length if known.
      */
@@ -561,48 +503,18 @@ static int cache_save_filter(ap_filter_t
         }
     }
 
-    /* It's safe to cache the response.
-     *
-     * There are two possiblities at this point:
-     * - cache->handle == NULL. In this case there is no previously
-     * cached entity anywhere on the system. We must create a brand
-     * new entity and store the response in it.
-     * - cache->stale_handle != NULL. In this case there is a stale
-     * entity in the system which needs to be replaced by new
-     * content (unless the result was 304 Not Modified, which means
-     * the cached entity is actually fresh, and we should update
-     * the headers).
-     */
-
-    /* Did we have a stale cache entry that really is stale? */
-    if (cache->stale_handle) {
-        if (r->status == HTTP_NOT_MODIFIED) {
-            /* Oh, hey.  It isn't that stale!  Yay! */
-            cache->handle = cache->stale_handle;
-            info = &cache->handle->cache_obj->info;
-            rv = OK;
-        }
-        else {
-            /* Oh, well.  Toss it. */
-            cache->provider->remove_entity(cache->stale_handle);
-            /* Treat the request as if it wasn't conditional. */
-            cache->stale_handle = NULL;
-        }
-    }
+    return size;
+}
 
-    /* no cache handle, create a new entity */
-    if (!cache->handle) {
-        rv = cache_create_entity(r, size);
-        info = apr_pcalloc(r->pool, sizeof(cache_info));
-        /* We only set info->status upon the initial creation. */
-        info->status = r->status;
-    }
+static void prepare_cacheable_request(request_rec *r, cache_request_rec *cache,
+    cache_info *info, cache_server_conf *conf)
+{
+    const char *dates;
+    int date_in_errhdr = 0;
+    apr_time_t exp, lastmod, now, date;
 
-    if (rv != OK) {
-        /* Caching layer declined the opportunity to cache the response */
-        ap_remove_output_filter(f);
-        return ap_pass_brigade(f->next, in);
-    }
+    exp = cache->exp;
+    lastmod = cache->lastmod;
 
     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
                  "cache: Caching url: %s", r->unparsed_uri);
@@ -671,7 +583,6 @@ static int cache_save_filter(ap_filter_t
     if (lastmod != APR_DATE_BAD && lastmod > date) {
         /* if it's in the future, then replace by date */
         lastmod = date;
-        lastmods = dates;
         ap_log_error(APLOG_MARK, APLOG_DEBUG, 0,
                      r->server,
                      "cache: Last modified is in the future, "
@@ -710,7 +621,129 @@ static int cache_save_filter(ap_filter_t
             apr_table_set(r->headers_out, "Expires", expire_hdr);
         }
     }
+
     info->expire = exp;
+}
+
+static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in)
+{
+    int rv = !OK;
+    request_rec *r = f->r;
+    cache_request_rec *cache;
+    cache_server_conf *conf;
+    apr_off_t size;
+    cache_info *info = NULL;
+    const char *reason;
+    apr_pool_t *p;
+
+    conf = (cache_server_conf *) ap_get_module_config(r->server->module_config,
+                                                      &cache_module);
+
+    /* Setup cache_request_rec */
+    cache = (cache_request_rec *) ap_get_module_config(r->request_config,
+                                                       &cache_module);
+    if (!cache) {
+        /* user likely configured CACHE_SAVE manually; they should really use
+         * mod_cache configuration to do that
+         */
+        cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
+        ap_set_module_config(r->request_config, &cache_module, cache);
+    }
+
+    p = r->pool;
+
+    /*
+     * Pass Data to Cache
+     * ------------------
+     * This section passes the brigades into the cache modules, but only
+     * if the setup section (see below) is complete.
+     */
+    if (cache->block_response) {
+        /* We've already sent down the response and EOS.  So, ignore
+         * whatever comes now.
+         */
+        return APR_SUCCESS;
+    }
+
+    /* have we already run the cachability check and set up the
+     * cached file handle?
+     */
+    if (cache->in_checked) {
+        /* pass the brigades into the cache, then pass them
+         * up the filter stack
+         */
+        rv = cache->provider->store_body(cache->handle, r, in);
+        if (rv != APR_SUCCESS) {
+            ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
+                         "cache: Cache provider's store_body failed!");
+            ap_remove_output_filter(f);
+        }
+        return ap_pass_brigade(f->next, in);
+    }
+
+    reason = check_cacheable_request(r, cache, conf);
+
+    if (reason) {
+        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
+                     "cache: %s not cached. Reason: %s", r->unparsed_uri,
+                     reason);
+
+        /* remove this filter from the chain */
+        ap_remove_output_filter(f);
+
+        /* ship the data up the stack */
+        return ap_pass_brigade(f->next, in);
+    }
+
+    /* Make it so that we don't execute this path again. */
+    cache->in_checked = 1;
+
+    size = request_cl(r, in);
+
+    /* It's safe to cache the response.
+     *
+     * There are two possiblities at this point:
+     * - cache->handle == NULL. In this case there is no previously
+     * cached entity anywhere on the system. We must create a brand
+     * new entity and store the response in it.
+     * - cache->stale_handle != NULL. In this case there is a stale
+     * entity in the system which needs to be replaced by new
+     * content (unless the result was 304 Not Modified, which means
+     * the cached entity is actually fresh, and we should update
+     * the headers).
+     */
+
+    /* Did we have a stale cache entry that really is stale? */
+    if (cache->stale_handle) {
+        if (r->status == HTTP_NOT_MODIFIED) {
+            /* Oh, hey.  It isn't that stale!  Yay! */
+            cache->handle = cache->stale_handle;
+            info = &cache->handle->cache_obj->info;
+            rv = OK;
+        }
+        else {
+            /* Oh, well.  Toss it. */
+            cache->provider->remove_entity(cache->stale_handle);
+            /* Treat the request as if it wasn't conditional. */
+            cache->stale_handle = NULL;
+        }
+    }
+
+    /* no cache handle, create a new entity */
+    if (!cache->handle) {
+        rv = cache_create_entity(r, size);
+        info = apr_pcalloc(r->pool, sizeof(cache_info));
+        /* We only set info->status upon the initial creation. */
+        info->status = r->status;
+    }
+
+    if (rv != OK) {
+        /* Caching layer declined the opportunity to cache the response */
+        ap_remove_output_filter(f);
+        return ap_pass_brigade(f->next, in);
+    }
+
+    prepare_cacheable_request(r, cache, info, conf);
 
     /* We found a stale entry which wasn't really stale. */
     if (cache->stale_handle) {

--

[patch 06/10] dont choke on empty URI path

Posted by Davi Arnaut <da...@haxent.com.br>.
According to my reading of RFC3986 (section 6.2.3.) if a URI contains an
authority component and an empty path, the empty path is to be equivalent
to "/".

It explicitly cites the following four URIs as equivalents:

    http://example.com
    http://example.com/
    http://example.com:/
    http://example.com:80/

Index: trunk/modules/cache/cache_util.c
===================================================================
--- trunk.orig/modules/cache/cache_util.c
+++ trunk/modules/cache/cache_util.c
@@ -67,9 +67,23 @@ static int uri_meets_conditions(const ap
         }
     }
 
+    /* For HTTP caching purposes, an empty (NULL) path is equivalent to
+     * a single "/" path. RFCs 3986/2396
+     */
+
+    if (!url->path) {
+        if (*filter->path == '/' && pathlen == 1) {
+            return 1;
+        }
+        else {
+            return 0;
+        }
+    }
+
     /* Url has met all of the filter conditions so far, determine
      * if the paths match.
      */
+
     return !strncmp(filter->path, url->path, pathlen);
 }
 

--

[patch 08/10] unify storage providers entry calls

Posted by Davi Arnaut <da...@haxent.com.br>.
Later on those will handle all HTTP specific caching nuances

Index: trunk/modules/cache/cache_storage.c
===================================================================
--- trunk.orig/modules/cache/cache_storage.c
+++ trunk/modules/cache/cache_storage.c
@@ -24,6 +24,30 @@ extern module AP_MODULE_DECLARE_DATA cac
 
 /* -------------------------------------------------------------- */
 
+apr_status_t cache_store_entity_headers(cache_request_rec *cache,
+                                        request_rec *r, cache_info *info)
+{
+    return cache->provider->store_headers(cache->handle, r, info);
+}
+
+apr_status_t cache_store_entity_body(cache_request_rec *cache, request_rec *r,
+                                     apr_bucket_brigade *bb)
+{
+    return cache->provider->store_body(cache->handle, r, bb);
+}
+
+apr_status_t cache_recall_entity_headers(const cache_provider *provider,
+                                         cache_handle_t *h, request_rec *r)
+{
+    return provider->recall_headers(h, r);
+}
+
+apr_status_t cache_recall_entity_body(cache_request_rec *cache,
+                                      apr_pool_t *pool, apr_bucket_brigade *bb)
+{
+    return cache->provider->recall_body(cache->handle, pool, bb);
+}
+
 /*
  * delete all URL entities from the cache
  *
@@ -311,7 +335,7 @@ int cache_select(request_rec *r)
         switch ((rv = list->provider->open_entity(h, r, key))) {
         case OK: {
 
-            if (list->provider->recall_headers(h, r) != APR_SUCCESS) {
+            if (cache_recall_entity_headers(list->provider, h, r) != APR_SUCCESS) {
                 /* TODO: Handle this error */
                 return DECLINED;
             }
Index: trunk/modules/cache/mod_cache.c
===================================================================
--- trunk.orig/modules/cache/mod_cache.c
+++ trunk/modules/cache/mod_cache.c
@@ -271,7 +271,7 @@ static int cache_out_filter(ap_filter_t 
     r->status = cache->handle->cache_obj->info.status;
 
     /* recall_headers() was called in cache_select() */
-    cache->provider->recall_body(cache->handle, r->pool, bb);
+    cache_recall_entity_body(cache, r->pool, bb);
 
     /* This filter is done once it has served up its content */
     ap_remove_output_filter(f);
@@ -676,7 +676,7 @@ static int cache_save_filter(ap_filter_t
         /* pass the brigades into the cache, then pass them
          * up the filter stack
          */
-        rv = cache->provider->store_body(cache->handle, r, in);
+        rv = cache_store_entity_body(cache, r, in);
         if (rv != APR_SUCCESS) {
             ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
                          "cache: Cache provider's store_body failed!");
@@ -781,7 +781,7 @@ static int cache_save_filter(ap_filter_t
      * permissions problems or a read-only (re)mount. This must be handled
      * later.
      */
-    rv = cache->provider->store_headers(cache->handle, r, info);
+    rv = cache_store_entity_headers(cache, r, info);
 
     /* Did we just update the cached headers on a revalidated response?
      *
@@ -809,7 +809,7 @@ static int cache_save_filter(ap_filter_t
             APR_BRIGADE_INSERT_TAIL(bb, bkt);
         }
         else {
-            cache->provider->recall_body(cache->handle, r->pool, bb);
+            cache_recall_entity_body(cache, r->pool, bb);
         }
 
         cache->block_response = 1;
@@ -845,7 +845,7 @@ static int cache_save_filter(ap_filter_t
         return ap_pass_brigade(f->next, in);
     }
 
-    rv = cache->provider->store_body(cache->handle, r, in);
+    rv = cache_store_entity_body(cache, r, in);
     if (rv != APR_SUCCESS) {
         ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
                      "cache: store_body failed");
Index: trunk/modules/cache/mod_cache.h
===================================================================
--- trunk.orig/modules/cache/mod_cache.h
+++ trunk/modules/cache/mod_cache.h
@@ -305,13 +305,11 @@ apr_status_t cache_generate_key_default(
  */
 const char* cache_create_key( request_rec*r );
 
-/*
-apr_status_t cache_store_entity_headers(cache_handle_t *h, request_rec *r, cache_info *info);
-apr_status_t cache_store_entity_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *bb);
-
-apr_status_t cache_recall_entity_headers(cache_handle_t *h, request_rec *r);
-apr_status_t cache_recall_entity_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb);
-*/
+apr_status_t cache_store_entity_headers(cache_request_rec *cache, request_rec *r, cache_info *info);
+apr_status_t cache_store_entity_body(cache_request_rec *cache, request_rec *r, apr_bucket_brigade *bb);
+
+apr_status_t cache_recall_entity_headers(const cache_provider *provider, cache_handle_t *h, request_rec *r);
+apr_status_t cache_recall_entity_body(cache_request_rec *cache, apr_pool_t *pool, apr_bucket_brigade *bb);
 
 /* hooks */
 

--

[patch 02/10] revamped mod_disk_cache directory structure

Posted by Davi Arnaut <da...@haxent.com.br>.
This patch converts the mod_disk_cache cache directory structure to a
uniformly distributed N level hierarchy. The admin specifies the number
of levels (or directories) and the files are scattered across the
directories. Example:

CacheRoot /cache/
# CacheDirLevels n l1 l2 ln
CacheDirLevels 2 4 256 16

In this example, the directory tree will be three levels deep. The first
level will have 4 directories, each of the 4 first level directories will
have 256 directories (second level) and so on to the last level.

Directory tree layout for the example:

/cache/
/cache/0d
/cache/0d/36
/cache/0d/36/06
/cache/0d/36/06/7a50a5c38a73abdb6db28a2b0f6881e5.data
/cache/0d/36/06/7a50a5c38a73abdb6db28a2b0f6881e5.header

This patch adds support for symlinking the directories to separate disk
or partitions by creating the cache files on their destination directory.
The symlinks can also be used to load balance the cache files between
disks because each last level directory gets the same (on average) number
of files -- on a cache setup with 4 first level directories each one
receives 25%, linking the three of them to disk A and the last one to
disk B yields a 75/25 distribution between disks A (75) and B (25).

Index: trunk/modules/cache/cache_util.c
===================================================================
--- trunk.orig/modules/cache/cache_util.c
+++ trunk/modules/cache/cache_util.c
@@ -19,6 +19,7 @@
 #include "mod_cache.h"
 
 #include <ap_provider.h>
+#include <util_md5.h>
 
 /* -------------------------------------------------------------- */
 
@@ -489,54 +490,49 @@ CACHE_DECLARE(void) ap_cache_usec2hex(ap
     y[sizeof(j) * 2] = '\0';
 }
 
-static void cache_hash(const char *it, char *val, int ndepth, int nlength)
+static unsigned int cdb_string_hash(const char *str)
 {
-    apr_md5_ctx_t context;
-    unsigned char digest[16];
-    char tmp[22];
-    int i, k, d;
-    unsigned int x;
-    static const char enc_table[64] =
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@";
-
-    apr_md5_init(&context);
-    apr_md5_update(&context, (const unsigned char *) it, strlen(it));
-    apr_md5_final(digest, &context);
-
-    /* encode 128 bits as 22 characters, using a modified uuencoding
-     * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is
-     * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters
-     */
-    for (i = 0, k = 0; i < 15; i += 3) {
-        x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2];
-        tmp[k++] = enc_table[x >> 18];
-        tmp[k++] = enc_table[(x >> 12) & 0x3f];
-        tmp[k++] = enc_table[(x >> 6) & 0x3f];
-        tmp[k++] = enc_table[x & 0x3f];
-    }
-
-    /* one byte left */
-    x = digest[15];
-    tmp[k++] = enc_table[x >> 2];    /* use up 6 bits */
-    tmp[k++] = enc_table[(x << 4) & 0x3f];
-
-    /* now split into directory levels */
-    for (i = k = d = 0; d < ndepth; ++d) {
-        memcpy(&val[i], &tmp[k], nlength);
-        k += nlength;
-        val[i + nlength] = '/';
-        i += nlength + 1;
-    }
-    memcpy(&val[i], &tmp[k], 22 - k);
-    val[i + 22 - k] = '\0';
-}
-
-CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, int dirlevels,
-                                            int dirlength, const char *name)
-{
-    char hashfile[66];
-    cache_hash(name, hashfile, dirlevels, dirlength);
-    return apr_pstrdup(p, hashfile);
+    unsigned int hash = 5381;
+
+    while (*str) {
+        hash += (hash << 5);
+        hash ^= *str++;
+    }
+
+    return hash;
+}
+
+#define MD5_HEXDIGESTSIZE   (APR_MD5_DIGESTSIZE * 2 + 1)
+
+CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, unsigned int nlevels,
+                                            unsigned int *dirlevels,
+                                            unsigned int *divisors,
+                                            const char *name)
+{
+    char *md5_hash;
+    unsigned int i;
+    char *ptr, *key;
+    unsigned char h;
+    unsigned int cdb_hash;
+    static const char hex[] = "0123456789abcdef";
+
+    md5_hash = ap_md5_binary(p, (unsigned char *) name, (int) strlen(name));
+
+    cdb_hash = cdb_string_hash(name);
+
+    key = ptr = apr_palloc(p, nlevels * LEVEL_DIR_LENGTH +
+                           MD5_HEXDIGESTSIZE);
+
+    for (i = 0; i < nlevels; i++) {
+        h = (cdb_hash / divisors[i]) % dirlevels[i];
+        *ptr++ = hex[h >> 4];
+        *ptr++ = hex[h & 0xf];
+        *ptr++ = '/';
+    }
+
+    memcpy(ptr, md5_hash, MD5_HEXDIGESTSIZE);
+
+    return key;
 }
 
 /* Create a new table consisting of those elements from an input
Index: trunk/modules/cache/mod_cache.h
===================================================================
--- trunk.orig/modules/cache/mod_cache.h
+++ trunk/modules/cache/mod_cache.h
@@ -72,6 +72,8 @@
 
 #include "apr_atomic.h"
 
+#define LEVEL_DIR_LENGTH    3
+
 #ifndef MAX
 #define MAX(a,b)                ((a) > (b) ? (a) : (b))
 #endif
@@ -274,8 +276,9 @@ CACHE_DECLARE(void) ap_cache_accept_head
 
 CACHE_DECLARE(apr_time_t) ap_cache_hex2usec(const char *x);
 CACHE_DECLARE(void) ap_cache_usec2hex(apr_time_t j, char *y);
-CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, int dirlevels, 
-                                             int dirlength, 
+CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, unsigned int nlevels,
+                                             unsigned int *dirlevels,
+                                             unsigned int *divisors,
                                              const char *name);
 CACHE_DECLARE(cache_provider_list *)ap_cache_get_providers(request_rec *r, cache_server_conf *conf, apr_uri_t uri);
 CACHE_DECLARE(int) ap_cache_liststr(apr_pool_t *p, const char *list,
Index: trunk/modules/cache/mod_disk_cache.c
===================================================================
--- trunk.orig/modules/cache/mod_disk_cache.c
+++ trunk/modules/cache/mod_disk_cache.c
@@ -70,13 +70,14 @@ static char *header_file(apr_pool_t *p, 
                          disk_cache_object_t *dobj, const char *name)
 {
     if (!dobj->hashfile) {
-        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
-                                                conf->dirlength, name);
+        dobj->hashfile = ap_cache_generate_name(p, conf->nlevels, conf->dirlevels,
+                                                conf->divisors, name);
     }
 
     if (dobj->prefix) {
         return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
-                           dobj->hashfile, CACHE_HEADER_SUFFIX, NULL);
+                           dobj->hashfile + (conf->nlevels + LEVEL_DIR_LENGTH),
+                           CACHE_HEADER_SUFFIX, NULL);
      }
      else {
         return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
@@ -88,13 +89,14 @@ static char *data_file(apr_pool_t *p, di
                        disk_cache_object_t *dobj, const char *name)
 {
     if (!dobj->hashfile) {
-        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
-                                                conf->dirlength, name);
+        dobj->hashfile = ap_cache_generate_name(p, conf->nlevels, conf->dirlevels,
+                                                conf->divisors, name);
     }
 
     if (dobj->prefix) {
         return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
-                           dobj->hashfile, CACHE_DATA_SUFFIX, NULL);
+                           dobj->hashfile + (conf->nlevels + LEVEL_DIR_LENGTH),
+                           CACHE_DATA_SUFFIX, NULL);
      }
      else {
         return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
@@ -102,7 +104,7 @@ static char *data_file(apr_pool_t *p, di
      }
 }
 
-static void mkdir_structure(disk_cache_conf *conf, const char *file, apr_pool_t *pool)
+static void mkdir_structure(disk_cache_conf *conf, char *file, apr_pool_t *pool)
 {
     apr_status_t rv;
     char *p;
@@ -123,11 +125,41 @@ static void mkdir_structure(disk_cache_c
     }
 }
 
+static apr_status_t disk_mktemp(apr_file_t **fp, const char *dest, char **tempfile,
+                                apr_int32_t flags, disk_cache_conf *conf,
+                                apr_pool_t *p)
+{
+    char *temp;
+    apr_status_t rv;
+    struct iovec iov[2];
+
+    iov[0].iov_base = (char *) dest;
+    iov[0].iov_len  = conf->nlevels * LEVEL_DIR_LENGTH +
+                      conf->cache_root_len;
+
+    iov[1].iov_base = AP_TEMPFILE;
+    iov[1].iov_len  = sizeof AP_TEMPFILE;
+
+    temp = apr_pstrcatv(p, iov, 2, NULL);
+
+    rv = apr_file_mktemp(fp, temp, flags, p);
+
+    if (APR_STATUS_IS_ENOENT(rv)) {
+        mkdir_structure(conf, temp, p);
+        memcpy(temp + iov[0].iov_len, AP_TEMPFILE, sizeof AP_TEMPFILE);
+        rv = apr_file_mktemp(fp, temp, flags, p);
+    }
+
+    *tempfile = temp;
+
+    return rv;
+}
+
 /* htcacheclean may remove directories underneath us.
  * So, we'll try renaming three times at a cost of 0.002 seconds.
  */
 static apr_status_t safe_file_rename(disk_cache_conf *conf,
-                                     const char *src, const char *dest,
+                                     const char *src, char *dest,
                                      apr_pool_t *pool)
 {
     apr_status_t rv;
@@ -359,7 +391,6 @@ static int create_entity(cache_handle_t 
     dobj->root_len = conf->cache_root_len;
     dobj->datafile = data_file(r->pool, conf, dobj, key);
     dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
-    dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
 
     return OK;
 }
@@ -467,7 +498,6 @@ static int open_entity(cache_handle_t *h
     dobj->key = nkey;
     dobj->name = key;
     dobj->datafile = data_file(r->pool, conf, dobj, nkey);
-    dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
 
     /* Open the data file */
     flags = APR_READ|APR_BINARY;
@@ -841,11 +871,9 @@ static apr_status_t store_headers(cache_
             apr_array_header_t* varray;
             apr_uint32_t format = VARY_FORMAT_VERSION;
 
-            mkdir_structure(conf, dobj->hdrsfile, r->pool);
-
-            rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
-                                 APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
-                                 r->pool);
+            rv = disk_mktemp(&dobj->tfd, dobj->hdrsfile, &dobj->tempfile,
+                             APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
+                             conf, r->pool);
 
             if (rv != APR_SUCCESS) {
                 return rv;
@@ -876,7 +904,6 @@ static apr_status_t store_headers(cache_
                 return rv;
             }
 
-            dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
             tmp = regen_key(r->pool, r->headers_in, varray, dobj->name);
             dobj->prefix = dobj->hdrsfile;
             dobj->hashfile = NULL;
@@ -885,10 +912,9 @@ static apr_status_t store_headers(cache_
         }
     }
 
-
-    rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile,
-                         APR_CREATE | APR_WRITE | APR_BINARY |
-                         APR_BUFFERED | APR_EXCL, r->pool);
+    rv = disk_mktemp(&dobj->hfd, dobj->hdrsfile, &dobj->tempfile,
+                     APR_CREATE | APR_WRITE | APR_BINARY | APR_BUFFERED |
+                     APR_EXCL, conf, r->pool);
 
     if (rv != APR_SUCCESS) {
         return rv;
@@ -956,9 +982,6 @@ static apr_status_t store_headers(cache_
      * about to write the new headers file.
      */
     rv = apr_file_remove(dobj->hdrsfile, r->pool);
-    if (rv != APR_SUCCESS) {
-        mkdir_structure(conf, dobj->hdrsfile, r->pool);
-    }
 
     rv = safe_file_rename(conf, dobj->tempfile, dobj->hdrsfile, r->pool);
     if (rv != APR_SUCCESS) {
@@ -969,8 +992,6 @@ static apr_status_t store_headers(cache_
         return rv;
     }
 
-    dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
-
     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
                  "disk_cache: Stored headers for URL %s",  dobj->name);
     return APR_SUCCESS;
@@ -989,9 +1010,9 @@ static apr_status_t store_body(cache_han
      * in file_cache_el_final().
      */
     if (!dobj->tfd) {
-        rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
-                             APR_CREATE | APR_WRITE | APR_BINARY |
-                             APR_BUFFERED | APR_EXCL, r->pool);
+        rv = disk_mktemp(&dobj->tfd, dobj->datafile, &dobj->tempfile,
+                         APR_CREATE | APR_WRITE | APR_BINARY |
+                         APR_BUFFERED | APR_EXCL, conf, r->pool);
         if (rv != APR_SUCCESS) {
             return rv;
         }
@@ -1067,19 +1088,35 @@ static apr_status_t store_body(cache_han
     return APR_SUCCESS;
 }
 
+static void calculate_divisors(unsigned int nlevels, unsigned int *dirlevels,
+                               unsigned int *divisors)
+{
+    unsigned int i;
+
+    divisors[0] = 1;
+
+    for (i = 1; i < nlevels; i++) {
+        divisors[i] = divisors[i-1] * dirlevels[i-1];
+    }
+}
+
 static void *create_config(apr_pool_t *p, server_rec *s)
 {
+    unsigned int divisors[DEFAULT_NLEVELS];
     disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf));
+    unsigned int dirlevels[] = { DEFAULT_DIRLEVEL1, DEFAULT_DIRLEVEL2 };
 
-    /* XXX: Set default values */
-    conf->dirlevels = DEFAULT_DIRLEVELS;
-    conf->dirlength = DEFAULT_DIRLENGTH;
+    conf->nlevels = sizeof dirlevels/sizeof *dirlevels;
+    conf->dirlevels = dirlevels;
+    conf->divisors = divisors;
     conf->maxfs = DEFAULT_MAX_FILE_SIZE;
     conf->minfs = DEFAULT_MIN_FILE_SIZE;
 
     conf->cache_root = NULL;
     conf->cache_root_len = 0;
 
+    calculate_divisors(conf->nlevels, conf->dirlevels, conf->divisors);
+
     return conf;
 }
 
@@ -1098,40 +1135,36 @@ static const char
     return NULL;
 }
 
-/*
- * Consider eliminating the next two directives in favor of
- * Ian's prime number hash...
- * key = hash_fn( r->uri)
- * filename = "/key % prime1 /key %prime2/key %prime3"
- */
 static const char
-*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg)
+*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, int argc,
+                     char *const argv[])
 {
+    unsigned int len;
     disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
                                                  &disk_cache_module);
-    int val = atoi(arg);
-    if (val < 1)
-        return "CacheDirLevels value must be an integer greater than 0";
-    if (val * conf->dirlength > CACHEFILE_LEN)
-        return "CacheDirLevels*CacheDirLength value must not be higher than 20";
-    conf->dirlevels = val;
-    return NULL;
-}
-static const char
-*set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg)
-{
-    disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
-                                                 &disk_cache_module);
-    int val = atoi(arg);
-    if (val < 1)
-        return "CacheDirLength value must be an integer greater than 0";
-    if (val * conf->dirlevels > CACHEFILE_LEN)
-        return "CacheDirLevels*CacheDirLength value must not be higher than 20";
 
-    conf->dirlength = val;
+    if (!argc)
+        return "The number of levels was not specified.";
+
+    conf->nlevels = atoi(*argv);
+
+    if (conf->nlevels != --argc)
+        return apr_psprintf(parms->pool, "%u levels, but only %u were given.",
+                            conf->nlevels, argc);
+
+    len = conf->nlevels * sizeof(*conf->dirlevels);
+
+    conf->dirlevels = apr_palloc(parms->pool, len);
+    conf->divisors = apr_palloc(parms->pool, len);
+
+    while (argc--) {
+        conf->dirlevels[argc] = atoi(argv[argc+1]);
+    }
+
+    calculate_divisors(conf->nlevels, conf->dirlevels, conf->divisors);
+
     return NULL;
 }
-
 static const char
 *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
 {
@@ -1153,14 +1186,12 @@ static const command_rec disk_cache_cmds
 {
     AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF,
                  "The directory to store cache files"),
-    AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
-                  "The number of levels of subdirectories in the cache"),
-    AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF,
-                  "The number of characters in subdirectory names"),
     AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF,
                   "The minimum file size to cache a document"),
     AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF,
                   "The maximum file size to cache a document"),
+    AP_INIT_TAKE_ARGV("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
+                      "The number of levels of subdirectories in the cache"),
     {NULL}
 };
 
Index: trunk/modules/cache/mod_disk_cache.h
===================================================================
--- trunk.orig/modules/cache/mod_disk_cache.h
+++ trunk/modules/cache/mod_disk_cache.h
@@ -61,10 +61,10 @@ typedef struct disk_cache_object {
     char *tempfile;    /* temp file tohold the content */
     const char *prefix;
     const char *datafile;    /* name of file where the data will go */
-    const char *hdrsfile;    /* name of file where the hdrs will go */
+    char *hdrsfile;          /* name of file where the hdrs will go */
     const char *hashfile;    /* Computed hash key for this URI */
-    const char *name;   /* Requested URI without vary bits - suitable for mortals. */
-    const char *key;    /* On-disk prefix; URI with Vary bits (if present) */
+    const char *name;        /* Requested URI without vary bits - suitable for mortals. */
+    const char *key;         /* On-disk prefix; URI with Vary bits (if present) */
     apr_file_t *fd;          /* data file */
     apr_file_t *hfd;         /* headers file */
     apr_file_t *tfd;         /* temporary file for data */
@@ -78,16 +78,19 @@ typedef struct disk_cache_object {
  */
 /* TODO: Make defaults OS specific */
 #define CACHEFILE_LEN 20        /* must be less than HASH_LEN/2 */
-#define DEFAULT_DIRLEVELS 3
-#define DEFAULT_DIRLENGTH 2
+#define DEFAULT_NLEVELS 2
+#define DEFAULT_DIRLEVEL1 16
+#define DEFAULT_DIRLEVEL2 256
+#define DEFAULT_DIRLEVEL_MAX 256
 #define DEFAULT_MIN_FILE_SIZE 1
 #define DEFAULT_MAX_FILE_SIZE 1000000
 
 typedef struct {
     const char* cache_root;
     apr_size_t cache_root_len;
-    int dirlevels;               /* Number of levels of subdirectories */
-    int dirlength;               /* Length of subdirectory names */
+    unsigned int nlevels;        /* number of directories levels       */
+    unsigned int *dirlevels;     /* number of subdirs for each level   */
+    unsigned int *divisors;      /* divisor for each level             */
     apr_size_t minfs;            /* minumum file size for cached files */
     apr_size_t maxfs;            /* maximum file size for cached files */
 } disk_cache_conf;

--

[patch 01/10] dont cache expired objects

Posted by Davi Arnaut <da...@haxent.com.br>.
Don't cache requests with a expires date in the past; otherwise the cache code
will _always_ try to cache the URL, leading to numerous rename() errors on win32
since the URL is already cached.

Index: trunk/modules/cache/mod_cache.c
===================================================================
--- trunk.orig/modules/cache/mod_cache.c
+++ trunk/modules/cache/mod_cache.c
@@ -426,6 +426,11 @@ static int cache_save_filter(ap_filter_t
         /* if a broken Expires header is present, don't cache it */
         reason = apr_pstrcat(p, "Broken expires header: ", exps, NULL);
     }
+    else if (exps != NULL && exp != APR_DATE_BAD && exp < r->request_time)
+    {
+        /* if a Expires header is in the past, don't cache it */
+        reason = "Expires header already expired, not cacheable";
+    }
     else if (r->args && exps == NULL) {
         /* if query string present but no expiration time, don't cache it
          * (RFC 2616/13.9)

--

[patch 09/10] move open_entity

Posted by Davi Arnaut <da...@haxent.com.br>.
Move the open_entity call to cache_recall_entity_headers, adding a
new urlkey parameter.

Index: trunk/modules/cache/cache_storage.c
===================================================================
--- trunk.orig/modules/cache/cache_storage.c
+++ trunk/modules/cache/cache_storage.c
@@ -37,8 +37,16 @@ apr_status_t cache_store_entity_body(cac
 }
 
 apr_status_t cache_recall_entity_headers(const cache_provider *provider,
-                                         cache_handle_t *h, request_rec *r)
+                                         cache_handle_t *h, request_rec *r,
+                                         const char *urlkey)
 {
+    apr_status_t rv;
+
+    rv = provider->open_entity(h, r, urlkey);
+
+    if (rv != OK)
+        return rv;
+
     return provider->recall_headers(h, r);
 }
 
@@ -332,14 +340,9 @@ int cache_select(request_rec *r)
     list = cache->providers;
 
     while (list) {
-        switch ((rv = list->provider->open_entity(h, r, key))) {
-        case OK: {
-
-            if (cache_recall_entity_headers(list->provider, h, r) != APR_SUCCESS) {
-                /* TODO: Handle this error */
-                return DECLINED;
-            }
+        rv = cache_recall_entity_headers(list->provider, h, r, key);
 
+        if (rv == APR_SUCCESS) {
             if (cache_check_request(h, r, cache, list) != OK) {
                 return DECLINED;
             }
@@ -350,16 +353,11 @@ int cache_select(request_rec *r)
             cache->handle = h;
             return OK;
         }
-        case DECLINED: {
+        else {
             /* try again with next cache type */
             list = list->next;
             continue;
         }
-        default: {
-            /* oo-er! an error */
-            return rv;
-        }
-        }
     }
     return DECLINED;
 }
Index: trunk/modules/cache/mod_cache.h
===================================================================
--- trunk.orig/modules/cache/mod_cache.h
+++ trunk/modules/cache/mod_cache.h
@@ -308,7 +308,8 @@ const char* cache_create_key( request_re
 apr_status_t cache_store_entity_headers(cache_request_rec *cache, request_rec *r, cache_info *info);
 apr_status_t cache_store_entity_body(cache_request_rec *cache, request_rec *r, apr_bucket_brigade *bb);
 
-apr_status_t cache_recall_entity_headers(const cache_provider *provider, cache_handle_t *h, request_rec *r);
+apr_status_t cache_recall_entity_headers(const cache_provider *provider, cache_handle_t *h, request_rec *r,
+                                         const char *urlkey);
 apr_status_t cache_recall_entity_body(cache_request_rec *cache, apr_pool_t *pool, apr_bucket_brigade *bb);
 
 /* hooks */

--

[patch 04/10] shrink cache_url_handler

Posted by Davi Arnaut <da...@haxent.com.br>.
Move parts of cache_url_handler() to a smaller add_cache_filters()
function that is easier to read and understand.

Index: trunk/modules/cache/mod_cache.c
===================================================================
--- trunk.orig/modules/cache/mod_cache.c
+++ trunk/modules/cache/mod_cache.c
@@ -48,6 +48,44 @@ static ap_filter_rec_t *cache_remove_url
  *     oh well.
  */
 
+static void add_cache_filters(request_rec *r, cache_request_rec *cache)
+{
+    /*
+     * Add cache_save filter to cache this request. Choose
+     * the correct filter by checking if we are a subrequest
+     * or not.
+     */
+    if (r->main) {
+        ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
+                        r->server,
+                        "Adding CACHE_SAVE_SUBREQ filter for %s",
+                        r->uri);
+        ap_add_output_filter_handle(cache_save_subreq_filter_handle,
+                                    NULL, r, r->connection);
+    }
+    else {
+        ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
+                        r->server, "Adding CACHE_SAVE filter for %s",
+                        r->uri);
+        ap_add_output_filter_handle(cache_save_filter_handle,
+                                    NULL, r, r->connection);
+    }
+
+    ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
+                    "Adding CACHE_REMOVE_URL filter for %s",
+                    r->uri);
+
+    /* Add cache_remove_url filter to this request to remove a
+     * stale cache entry if needed. Also put the current cache
+     * request rec in the filter context, as the request that
+     * is available later during running the filter maybe
+     * different due to an internal redirect.
+     */
+    cache->remove_url_filter =
+        ap_add_output_filter_handle(cache_remove_url_filter_handle,
+                                    cache, r, r->connection);
+}
+
 static int cache_url_handler(request_rec *r, int lookup)
 {
     apr_status_t rv;
@@ -111,41 +149,7 @@ static int cache_url_handler(request_rec
     if (rv != OK) {
         if (rv == DECLINED) {
             if (!lookup) {
-
-                /*
-                 * Add cache_save filter to cache this request. Choose
-                 * the correct filter by checking if we are a subrequest
-                 * or not.
-                 */
-                if (r->main) {
-                    ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
-                                 r->server,
-                                 "Adding CACHE_SAVE_SUBREQ filter for %s",
-                                 r->uri);
-                    ap_add_output_filter_handle(cache_save_subreq_filter_handle,
-                                                NULL, r, r->connection);
-                }
-                else {
-                    ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
-                                 r->server, "Adding CACHE_SAVE filter for %s",
-                                 r->uri);
-                    ap_add_output_filter_handle(cache_save_filter_handle,
-                                                NULL, r, r->connection);
-                }
-
-                ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
-                             "Adding CACHE_REMOVE_URL filter for %s",
-                             r->uri);
-
-                /* Add cache_remove_url filter to this request to remove a
-                 * stale cache entry if needed. Also put the current cache
-                 * request rec in the filter context, as the request that
-                 * is available later during running the filter maybe
-                 * different due to an internal redirect.
-                 */
-                cache->remove_url_filter =
-                    ap_add_output_filter_handle(cache_remove_url_filter_handle,
-                                                cache, r, r->connection);
+                add_cache_filters(r, cache);
             }
             else {
                 if (cache->stale_headers) {

--

[patch 10/10] combine open_entity and recall_headers callbacks

Posted by Davi Arnaut <da...@haxent.com.br>.
An open_entity without a close_entity call is pointless, those can
be resumed to a single call.

Index: trunk/modules/cache/cache_storage.c
===================================================================
--- trunk.orig/modules/cache/cache_storage.c
+++ trunk/modules/cache/cache_storage.c
@@ -40,14 +40,7 @@ apr_status_t cache_recall_entity_headers
                                          cache_handle_t *h, request_rec *r,
                                          const char *urlkey)
 {
-    apr_status_t rv;
-
-    rv = provider->open_entity(h, r, urlkey);
-
-    if (rv != OK)
-        return rv;
-
-    return provider->recall_headers(h, r);
+    return provider->recall_headers(h, r, urlkey);
 }
 
 apr_status_t cache_recall_entity_body(cache_request_rec *cache,
Index: trunk/modules/cache/mod_cache.h
===================================================================
--- trunk.orig/modules/cache/mod_cache.h
+++ trunk/modules/cache/mod_cache.h
@@ -213,12 +213,10 @@ typedef struct {
     int (*remove_entity) (cache_handle_t *h);
     apr_status_t (*store_headers)(cache_handle_t *h, request_rec *r, cache_info *i);
     apr_status_t (*store_body)(cache_handle_t *h, request_rec *r, apr_bucket_brigade *b);
-    apr_status_t (*recall_headers) (cache_handle_t *h, request_rec *r);
+    apr_status_t (*recall_headers) (cache_handle_t *h, request_rec *r, const char *urlkey);
     apr_status_t (*recall_body) (cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb); 
     int (*create_entity) (cache_handle_t *h, request_rec *r,
                            const char *urlkey, apr_off_t len);
-    int (*open_entity) (cache_handle_t *h, request_rec *r,
-                           const char *urlkey);
     int (*remove_url) (cache_handle_t *h, apr_pool_t *p);
 } cache_provider;
 
Index: trunk/modules/cache/mod_disk_cache.c
===================================================================
--- trunk.orig/modules/cache/mod_disk_cache.c
+++ trunk/modules/cache/mod_disk_cache.c
@@ -57,7 +57,7 @@ module AP_MODULE_DECLARE_DATA disk_cache
 static int remove_entity(cache_handle_t *h);
 static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *i);
 static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *b);
-static apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
+static apr_status_t recall_headers(cache_handle_t *h, request_rec *r, const char *urlkey);
 static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb);
 static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
                                apr_file_t *file);
@@ -777,9 +777,15 @@ static apr_status_t read_table(cache_han
  * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
  * Is that okay, or should they be collapsed where possible?
  */
-static apr_status_t recall_headers(cache_handle_t *h, request_rec *r)
+static apr_status_t recall_headers(cache_handle_t *h, request_rec *r,
+                                   const char *urlkey)
 {
-    disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
+    disk_cache_object_t *dobj;
+
+    if (open_entity(h, r, urlkey) != OK)
+        return DECLINED;
+
+    dobj = (disk_cache_object_t *) h->cache_obj->vobj;
 
     /* This case should not happen... */
     if (!dobj->hfd) {
@@ -1203,7 +1209,6 @@ static const cache_provider cache_disk_p
     &recall_headers,
     &recall_body,
     &create_entity,
-    &open_entity,
     &remove_url,
 };
 
Index: trunk/modules/cache/mod_mem_cache.c
===================================================================
--- trunk.orig/modules/cache/mod_mem_cache.c
+++ trunk/modules/cache/mod_mem_cache.c
@@ -108,7 +108,7 @@ static mem_cache_conf *sconf;
 static int remove_entity(cache_handle_t *h);
 static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *i);
 static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *b);
-static apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
+static apr_status_t recall_headers(cache_handle_t *h, request_rec *r, const char *urlkey);
 static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb);
 
 static void cleanup_cache_object(cache_object_t *obj);
@@ -628,10 +628,16 @@ static int remove_url(cache_handle_t *h,
     return OK;
 }
 
-static apr_status_t recall_headers(cache_handle_t *h, request_rec *r)
+static apr_status_t recall_headers(cache_handle_t *h, request_rec *r,
+                                   const char *urlkey)
 {
     int rc;
-    mem_cache_object_t *mobj = (mem_cache_object_t*) h->cache_obj->vobj;
+    mem_cache_object_t *mobj;
+
+    if (open_entity(h, r, urlkey) != OK)
+        return DECLINED;
+
+    mobj = (mem_cache_object_t*) h->cache_obj->vobj;
 
     h->req_hdrs = apr_table_make(r->pool, mobj->num_req_hdrs);
     h->resp_hdrs = apr_table_make(r->pool, mobj->num_header_out);
@@ -1053,7 +1059,6 @@ static const cache_provider cache_mem_pr
     &recall_headers,
     &recall_body,
     &create_mem_entity,
-    &open_entity,
     &remove_url,
 };
 
@@ -1065,7 +1070,6 @@ static const cache_provider cache_fd_pro
     &recall_headers,
     &recall_body,
     &create_fd_entity,
-    &open_entity,
     &remove_url,
 };
 

--

[patch 07/10] shrink cache_select

Posted by Davi Arnaut <da...@haxent.com.br>.
Move parts of cache_select() to a smaller cache_check_request()
function that is easier to read and understand.

Index: trunk/modules/cache/cache_storage.c
===================================================================
--- trunk.orig/modules/cache/cache_storage.c
+++ trunk/modules/cache/cache_storage.c
@@ -169,6 +169,115 @@ CACHE_DECLARE(void) ap_cache_accept_head
     }
 }
 
+static int cache_check_request(cache_handle_t *h, request_rec *r,
+                               cache_request_rec *cache,
+                               cache_provider_list *list)
+{
+    int fresh;
+    char *vary = NULL;
+
+    /*
+     * Check Content-Negotiation - Vary
+     *
+     * At this point we need to make sure that the object we found in
+     * the cache is the same object that would be delivered to the
+     * client, when the effects of content negotiation are taken into
+     * effect.
+     *
+     * In plain english, we want to make sure that a language-negotiated
+     * document in one language is not given to a client asking for a
+     * language negotiated document in a different language by mistake.
+     *
+     * This code makes the assumption that the storage manager will
+     * cache the req_hdrs if the response contains a Vary
+     * header.
+     *
+     * RFC2616 13.6 and 14.44 describe the Vary mechanism.
+     */
+    vary = apr_pstrdup(r->pool, apr_table_get(h->resp_hdrs, "Vary"));
+    while (vary && *vary) {
+        char *name = vary;
+        const char *h1, *h2;
+
+        /* isolate header name */
+        while (*vary && !apr_isspace(*vary) && (*vary != ','))
+            ++vary;
+        while (*vary && (apr_isspace(*vary) || (*vary == ','))) {
+            *vary = '\0';
+            ++vary;
+        }
+
+        /*
+         * is this header in the request and the header in the cached
+         * request identical? If not, we give up and do a straight get
+         */
+        h1 = apr_table_get(r->headers_in, name);
+        h2 = apr_table_get(h->req_hdrs, name);
+        if (h1 == h2) {
+            /* both headers NULL, so a match - do nothing */
+        }
+        else if (h1 && h2 && !strcmp(h1, h2)) {
+            /* both headers exist and are equal - do nothing */
+        }
+        else {
+            /* headers do not match, so Vary failed */
+            ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
+                        r->server,
+                        "cache_select_url(): Vary header mismatch.");
+            return DECLINED;
+        }
+    }
+
+    cache->provider = list->provider;
+    cache->provider_name = list->provider_name;
+
+    /* Is our cached response fresh enough? */
+    fresh = ap_cache_check_freshness(h, r);
+    if (!fresh) {
+        const char *etag, *lastmod;
+
+        ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
+            "Cached response for %s isn't fresh.  Adding/replacing "
+            "conditional request headers.", r->uri);
+
+        /* Make response into a conditional */
+        cache->stale_headers = apr_table_copy(r->pool,
+                                                r->headers_in);
+
+        /* We can only revalidate with our own conditionals: remove the
+         * conditions from the original request.
+         */
+        apr_table_unset(r->headers_in, "If-Match");
+        apr_table_unset(r->headers_in, "If-Modified-Since");
+        apr_table_unset(r->headers_in, "If-None-Match");
+        apr_table_unset(r->headers_in, "If-Range");
+        apr_table_unset(r->headers_in, "If-Unmodified-Since");
+
+        etag = apr_table_get(h->resp_hdrs, "ETag");
+        lastmod = apr_table_get(h->resp_hdrs, "Last-Modified");
+
+        if (etag || lastmod) {
+            /* If we have a cached etag and/or Last-Modified add in
+             * our own conditionals.
+             */
+
+            if (etag) {
+                apr_table_set(r->headers_in, "If-None-Match", etag);
+            }
+
+            if (lastmod) {
+                apr_table_set(r->headers_in, "If-Modified-Since",
+                                lastmod);
+            }
+            cache->stale_handle = h;
+        }
+
+        return DECLINED;
+    }
+
+    return OK;
+}
+
 /*
  * select a specific URL entity in the cache
  *
@@ -201,110 +310,13 @@ int cache_select(request_rec *r)
     while (list) {
         switch ((rv = list->provider->open_entity(h, r, key))) {
         case OK: {
-            char *vary = NULL;
-            int fresh;
 
             if (list->provider->recall_headers(h, r) != APR_SUCCESS) {
                 /* TODO: Handle this error */
                 return DECLINED;
             }
 
-            /*
-             * Check Content-Negotiation - Vary
-             *
-             * At this point we need to make sure that the object we found in
-             * the cache is the same object that would be delivered to the
-             * client, when the effects of content negotiation are taken into
-             * effect.
-             *
-             * In plain english, we want to make sure that a language-negotiated
-             * document in one language is not given to a client asking for a
-             * language negotiated document in a different language by mistake.
-             *
-             * This code makes the assumption that the storage manager will
-             * cache the req_hdrs if the response contains a Vary
-             * header.
-             *
-             * RFC2616 13.6 and 14.44 describe the Vary mechanism.
-             */
-            vary = apr_pstrdup(r->pool, apr_table_get(h->resp_hdrs, "Vary"));
-            while (vary && *vary) {
-                char *name = vary;
-                const char *h1, *h2;
-
-                /* isolate header name */
-                while (*vary && !apr_isspace(*vary) && (*vary != ','))
-                    ++vary;
-                while (*vary && (apr_isspace(*vary) || (*vary == ','))) {
-                    *vary = '\0';
-                    ++vary;
-                }
-
-                /*
-                 * is this header in the request and the header in the cached
-                 * request identical? If not, we give up and do a straight get
-                 */
-                h1 = apr_table_get(r->headers_in, name);
-                h2 = apr_table_get(h->req_hdrs, name);
-                if (h1 == h2) {
-                    /* both headers NULL, so a match - do nothing */
-                }
-                else if (h1 && h2 && !strcmp(h1, h2)) {
-                    /* both headers exist and are equal - do nothing */
-                }
-                else {
-                    /* headers do not match, so Vary failed */
-                    ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
-                                r->server,
-                                "cache_select_url(): Vary header mismatch.");
-                    return DECLINED;
-                }
-            }
-
-            cache->provider = list->provider;
-            cache->provider_name = list->provider_name;
-
-            /* Is our cached response fresh enough? */
-            fresh = ap_cache_check_freshness(h, r);
-            if (!fresh) {
-                const char *etag, *lastmod;
-
-                ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
-                  "Cached response for %s isn't fresh.  Adding/replacing "
-                  "conditional request headers.", r->uri);
-
-                /* Make response into a conditional */
-                cache->stale_headers = apr_table_copy(r->pool,
-                                                      r->headers_in);
-
-                /* We can only revalidate with our own conditionals: remove the
-                 * conditions from the original request.
-                 */
-                apr_table_unset(r->headers_in, "If-Match");
-                apr_table_unset(r->headers_in, "If-Modified-Since");
-                apr_table_unset(r->headers_in, "If-None-Match");
-                apr_table_unset(r->headers_in, "If-Range");
-                apr_table_unset(r->headers_in, "If-Unmodified-Since");
-
-                etag = apr_table_get(h->resp_hdrs, "ETag");
-                lastmod = apr_table_get(h->resp_hdrs, "Last-Modified");
-
-                if (etag || lastmod) {
-                    /* If we have a cached etag and/or Last-Modified add in
-                     * our own conditionals.
-                     */
-
-                    if (etag) {
-                        apr_table_set(r->headers_in, "If-None-Match", etag);
-                    }
-
-                    if (lastmod) {
-                        apr_table_set(r->headers_in, "If-Modified-Since",
-                                      lastmod);
-                    }
-                    cache->stale_handle = h;
-                }
-
+            if (cache_check_request(h, r, cache, list) != OK) {
                 return DECLINED;
             }
 

--

Re: [patch 00/10] SoC 2006 -- extending the cache architecture

Posted by Graham Leggett <mi...@sharp.fm>.
On Mon, July 24, 2006 2:50 am, Davi Arnaut wrote:

> Here is the first code drop regarding my SoC proposal "Redesigning and
> extending
> the Apache cache architecture"
> (http://verdesmares.com/Apache/proposal.txt).
>
> The patches are mainly clenaups and bug-fixes, except a new mod_disk_cache
> directory structure. Further clenaups still required.

Can you add these patches to a bugzilla entry so they don't fall through
the cracks?

Regards,
Graham
--