You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by Davi Arnaut <da...@haxent.com.br> on 2006/07/20 16:58:01 UTC

[PATCH] revamped mod_disk_cache directory structure

Hi,

This patch converts the mod_disk_cache cache directory structure to a
uniformly distributed two level hierarchy. The admin specifies the number
of level-1 and level-2 directories and the files are scattered across
the level-2 directories.

Also, with this patch it is possible to designate directories to separate
partitions because the temporary files are created on the destination
directory.

For example, running Apache/proxy+cache for a small network:

[root@cache1 cache]# sh files-per-directory.sh 
dir: 00/ subs: 139 files: 632 size: 4.8M
dir: 01/ subs: 156 files: 765 size: 5.7M
dir: 02/ subs: 144 files: 626 size: 4.8M
dir: 03/ subs: 160 files: 714 size: 6.1M
dir: 04/ subs: 169 files: 820 size: 5.9M
dir: 05/ subs: 131 files: 590 size: 4.1M
dir: 06/ subs: 148 files: 677 size: 5.3M
dir: 07/ subs: 142 files: 644 size: 5.8M
dir: 08/ subs: 148 files: 749 size: 5.8M
dir: 09/ subs: 158 files: 711 size: 6.3M
dir: 0A/ subs: 146 files: 666 size: 5.1M
dir: 0B/ subs: 157 files: 701 size: 5.1M
dir: 0C/ subs: 157 files: 671 size: 5.2M
dir: 0D/ subs: 157 files: 711 size: 5.7M
dir: 0E/ subs: 149 files: 704 size: 5.6M
dir: 0F/ subs: 158 files: 742 size: 5.8M

--
Davi Arnaut

Index: modules/cache/cache_util.c
===================================================================
--- modules/cache/cache_util.c	(revision 423984)
+++ modules/cache/cache_util.c	(working copy)
@@ -19,6 +19,7 @@
 #include "mod_cache.h"
 
 #include <ap_provider.h>
+#include <util_md5.h>
 
 /* -------------------------------------------------------------- */
 
@@ -489,54 +490,31 @@
     y[sizeof(j) * 2] = '\0';
 }
 
-static void cache_hash(const char *it, char *val, int ndepth, int nlength)
+static unsigned int cdb_string_hash(const char *str)
 {
-    apr_md5_ctx_t context;
-    unsigned char digest[16];
-    char tmp[22];
-    int i, k, d;
-    unsigned int x;
-    static const char enc_table[64] =
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@";
+    unsigned int hash = 5381;
 
-    apr_md5_init(&context);
-    apr_md5_update(&context, (const unsigned char *) it, strlen(it));
-    apr_md5_final(digest, &context);
+    while (*str)
+        hash = 33 * hash + *str++;
 
-    /* encode 128 bits as 22 characters, using a modified uuencoding
-     * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is
-     * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters
-     */
-    for (i = 0, k = 0; i < 15; i += 3) {
-        x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2];
-        tmp[k++] = enc_table[x >> 18];
-        tmp[k++] = enc_table[(x >> 12) & 0x3f];
-        tmp[k++] = enc_table[(x >> 6) & 0x3f];
-        tmp[k++] = enc_table[x & 0x3f];
-    }
-
-    /* one byte left */
-    x = digest[15];
-    tmp[k++] = enc_table[x >> 2];    /* use up 6 bits */
-    tmp[k++] = enc_table[(x << 4) & 0x3f];
-
-    /* now split into directory levels */
-    for (i = k = d = 0; d < ndepth; ++d) {
-        memcpy(&val[i], &tmp[k], nlength);
-        k += nlength;
-        val[i + nlength] = '/';
-        i += nlength + 1;
-    }
-    memcpy(&val[i], &tmp[k], 22 - k);
-    val[i + 22 - k] = '\0';
+    return hash;
 }
 
-CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, int dirlevels,
-                                            int dirlength, const char *name)
+CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, unsigned int L1,
+                                            unsigned int L2, const char *name)
 {
-    char hashfile[66];
-    cache_hash(name, hashfile, dirlevels, dirlength);
-    return apr_pstrdup(p, hashfile);
+    char *key;
+    char *md5_hash;
+    unsigned int cdb_hash;
+
+    md5_hash = ap_md5_binary(p, (unsigned char *) name, (int) strlen(name));
+
+    cdb_hash = cdb_string_hash(md5_hash) / L2;
+
+    key = apr_psprintf(p, "%02X/%02X/%s", (cdb_hash / L2) % L1,
+                       cdb_hash % L2, md5_hash);
+
+    return key;
 }
 
 /* Create a new table consisting of those elements from an input
Index: modules/cache/mod_cache.h
===================================================================
--- modules/cache/mod_cache.h	(revision 423984)
+++ modules/cache/mod_cache.h	(working copy)
@@ -274,8 +274,8 @@
 
 CACHE_DECLARE(apr_time_t) ap_cache_hex2usec(const char *x);
 CACHE_DECLARE(void) ap_cache_usec2hex(apr_time_t j, char *y);
-CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, int dirlevels, 
-                                             int dirlength, 
+CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, unsigned int L1,
+                                             unsigned int L2,
                                              const char *name);
 CACHE_DECLARE(cache_provider_list *)ap_cache_get_providers(request_rec *r, cache_server_conf *conf, apr_uri_t uri);
 CACHE_DECLARE(int) ap_cache_liststr(apr_pool_t *p, const char *list,
Index: modules/cache/mod_disk_cache.c
===================================================================
--- modules/cache/mod_disk_cache.c	(revision 423984)
+++ modules/cache/mod_disk_cache.c	(working copy)
@@ -66,17 +66,38 @@
  * Local static functions
  */
 
+static apr_status_t disk_mktemp(apr_file_t **fp, const char *dest, char **tempfile,
+                                apr_int32_t flags, apr_size_t cache_root_len,
+                                apr_pool_t *p)
+{
+    apr_status_t rv;
+    struct iovec iov[2];
+
+    iov[0].iov_base = (char *) dest;
+    iov[0].iov_len  = cache_root_len + DIR_LEVELS_LEN;
+
+    iov[1].iov_base = AP_TEMPFILE;
+    iov[1].iov_len  = sizeof AP_TEMPFILE;
+
+    *tempfile = apr_pstrcatv(p, iov, 2, NULL);
+
+    rv = apr_file_mktemp(fp, *tempfile, flags, p);
+
+    return rv;
+}
+
 static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
                          disk_cache_object_t *dobj, const char *name)
 {
     if (!dobj->hashfile) {
-        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
-                                                conf->dirlength, name);
+        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevel1,
+                                                conf->dirlevel2, name);
     }
 
     if (dobj->prefix) {
         return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
-                           dobj->hashfile, CACHE_HEADER_SUFFIX, NULL);
+                           dobj->hashfile + DIR_LEVELS_LEN,
+                           CACHE_HEADER_SUFFIX, NULL);
      }
      else {
         return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
@@ -88,13 +109,14 @@
                        disk_cache_object_t *dobj, const char *name)
 {
     if (!dobj->hashfile) {
-        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
-                                                conf->dirlength, name);
+        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevel1,
+                                                conf->dirlevel2, name);
     }
 
     if (dobj->prefix) {
         return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
-                           dobj->hashfile, CACHE_DATA_SUFFIX, NULL);
+                           dobj->hashfile + DIR_LEVELS_LEN,
+                           CACHE_DATA_SUFFIX, NULL);
      }
      else {
         return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
@@ -359,7 +381,6 @@
     dobj->root_len = conf->cache_root_len;
     dobj->datafile = data_file(r->pool, conf, dobj, key);
     dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
-    dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
 
     return OK;
 }
@@ -467,7 +488,6 @@
     dobj->key = nkey;
     dobj->name = key;
     dobj->datafile = data_file(r->pool, conf, dobj, nkey);
-    dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
 
     /* Open the data file */
     flags = APR_READ|APR_BINARY;
@@ -843,9 +863,9 @@
 
             mkdir_structure(conf, dobj->hdrsfile, r->pool);
 
-            rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
-                                 APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
-                                 r->pool);
+            rv = disk_mktemp(&dobj->tfd, dobj->hdrsfile, &dobj->tempfile,
+                             APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
+                             conf->cache_root_len, r->pool);
 
             if (rv != APR_SUCCESS) {
                 return rv;
@@ -876,7 +896,6 @@
                 return rv;
             }
 
-            dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
             tmp = regen_key(r->pool, r->headers_in, varray, dobj->name);
             dobj->prefix = dobj->hdrsfile;
             dobj->hashfile = NULL;
@@ -885,11 +904,10 @@
         }
     }
 
+    rv = disk_mktemp(&dobj->hfd, dobj->hdrsfile, &dobj->tempfile,
+                     APR_CREATE | APR_WRITE | APR_BINARY | APR_BUFFERED |
+                     APR_EXCL, conf->cache_root_len, r->pool);
 
-    rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile,
-                         APR_CREATE | APR_WRITE | APR_BINARY |
-                         APR_BUFFERED | APR_EXCL, r->pool);
-
     if (rv != APR_SUCCESS) {
         return rv;
     }
@@ -969,8 +987,6 @@
         return rv;
     }
 
-    dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
-
     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
                  "disk_cache: Stored headers for URL %s",  dobj->name);
     return APR_SUCCESS;
@@ -989,9 +1005,10 @@
      * in file_cache_el_final().
      */
     if (!dobj->tfd) {
-        rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
-                             APR_CREATE | APR_WRITE | APR_BINARY |
-                             APR_BUFFERED | APR_EXCL, r->pool);
+        rv = disk_mktemp(&dobj->tfd, dobj->datafile, &dobj->tempfile,
+                         APR_CREATE | APR_WRITE | APR_BINARY |
+                         APR_BUFFERED | APR_EXCL, conf->cache_root_len,
+                         r->pool);
         if (rv != APR_SUCCESS) {
             return rv;
         }
@@ -1072,8 +1089,8 @@
     disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf));
 
     /* XXX: Set default values */
-    conf->dirlevels = DEFAULT_DIRLEVELS;
-    conf->dirlength = DEFAULT_DIRLENGTH;
+    conf->dirlevel1 = DEFAULT_DIRLEVEL1;
+    conf->dirlevel2 = DEFAULT_DIRLEVEL2;
     conf->maxfs = DEFAULT_MAX_FILE_SIZE;
     conf->minfs = DEFAULT_MIN_FILE_SIZE;
 
@@ -1105,33 +1122,22 @@
  * filename = "/key % prime1 /key %prime2/key %prime3"
  */
 static const char
-*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg)
+*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg1,
+                     const char *arg2)
 {
     disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
                                                  &disk_cache_module);
-    int val = atoi(arg);
-    if (val < 1)
+    int val1 = atoi(arg1);
+    int val2 = atoi(arg2);
+
+    if (val1 < 1 || val2 < 1)
         return "CacheDirLevels value must be an integer greater than 0";
-    if (val * conf->dirlength > CACHEFILE_LEN)
-        return "CacheDirLevels*CacheDirLength value must not be higher than 20";
-    conf->dirlevels = val;
-    return NULL;
-}
-static const char
-*set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg)
-{
-    disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
-                                                 &disk_cache_module);
-    int val = atoi(arg);
-    if (val < 1)
-        return "CacheDirLength value must be an integer greater than 0";
-    if (val * conf->dirlevels > CACHEFILE_LEN)
-        return "CacheDirLevels*CacheDirLength value must not be higher than 20";
 
-    conf->dirlength = val;
+    conf->dirlevel1 = val1;
+    conf->dirlevel2 = val2;
+
     return NULL;
 }
-
 static const char
 *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
 {
@@ -1153,10 +1159,8 @@
 {
     AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF,
                  "The directory to store cache files"),
-    AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
+    AP_INIT_TAKE2("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
                   "The number of levels of subdirectories in the cache"),
-    AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF,
-                  "The number of characters in subdirectory names"),
     AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF,
                   "The minimum file size to cache a document"),
     AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF,
Index: modules/cache/mod_disk_cache.h
===================================================================
--- modules/cache/mod_disk_cache.h	(revision 423984)
+++ modules/cache/mod_disk_cache.h	(working copy)
@@ -24,6 +24,8 @@
 #define VARY_FORMAT_VERSION 3
 #define DISK_FORMAT_VERSION 4
 
+#define DIR_LEVELS_LEN      6
+
 #define CACHE_HEADER_SUFFIX ".header"
 #define CACHE_DATA_SUFFIX   ".data"
 #define CACHE_VDIR_SUFFIX   ".vary"
@@ -78,16 +80,16 @@
  */
 /* TODO: Make defaults OS specific */
 #define CACHEFILE_LEN 20        /* must be less than HASH_LEN/2 */
-#define DEFAULT_DIRLEVELS 3
-#define DEFAULT_DIRLENGTH 2
+#define DEFAULT_DIRLEVEL1 16
+#define DEFAULT_DIRLEVEL2 256
 #define DEFAULT_MIN_FILE_SIZE 1
 #define DEFAULT_MAX_FILE_SIZE 1000000
 
 typedef struct {
     const char* cache_root;
     apr_size_t cache_root_len;
-    int dirlevels;               /* Number of levels of subdirectories */
-    int dirlength;               /* Length of subdirectory names */
+    unsigned int dirlevel1;      /* Number of level 1 directories      */
+    unsigned int dirlevel2;      /* Number of level 2 subdirectories   */
     apr_size_t minfs;            /* minumum file size for cached files */
     apr_size_t maxfs;            /* maximum file size for cached files */
 } disk_cache_conf;

Re: [PATCH] revamped mod_disk_cache directory structure

Posted by Colm MacCarthaigh <co...@stdlib.net>.
On Thu, Jul 20, 2006 at 06:16:26PM -0300, Davi Arnaut wrote:
> >I'm not sure it goes far enough though. What if an admin has two
> >filesystems/disks they can to store the cache on, or what if it's 7?
> 
> "CacheDirLevels n 256" for n = 1,2,...,7,...

Ahh, now I get it, cool.

> >What if one is a 160GB filesystem and the other only 10GB?
> 
> This was not the scope of the patch but it's one step towards. I
> think we can add load balancing later.

I think we'll need to change our syntax too, to "mount" cache areas
individually. Can be messy.

> >If we're going to tackle these kind problems, we need to look at  how
> >things like diablo (which gets it right IMO) do it.
> >
> 
> diablo ? the game ? :-)

Diablo the news server :-) http://www.openusenet.org/diablo/

-- 
Colm MacCárthaigh                        Public Key: colm+pgp@stdlib.net

Re: [PATCH] revamped mod_disk_cache directory structure

Posted by Davi Arnaut <da...@haxent.com.br>.
Em 20/07/2006, às 17:06, Colm MacCarthaigh escreveu:

> On Thu, Jul 20, 2006 at 11:58:01AM -0300, Davi Arnaut wrote:
>> Also, with this patch it is possible to designate directories to  
>> separate
>> partitions because the temporary files are created on the destination
>> directory.
>
> I'm not sure it goes far enough though. What if an admin has two
> filesystems/disks they can to store the cache on, or what if it's 7?

"CacheDirLevels n 256" for n = 1,2,...,7,...

> What if one is a 160GB filesystem and the other only 10GB?

This was not the scope of the patch but it's one step towards. I  
think we
can add load balancing later.

> If we're going to tackle these kind problems, we need to look at  
> how things like
> diablo (which gets it right IMO) do it.
>

diablo ? the game ? :-)

--
Davi Arnaut


Re: [PATCH] revamped mod_disk_cache directory structure

Posted by Colm MacCarthaigh <co...@stdlib.net>.
On Thu, Jul 20, 2006 at 11:58:01AM -0300, Davi Arnaut wrote:
> Also, with this patch it is possible to designate directories to separate
> partitions because the temporary files are created on the destination
> directory.

I'm not sure it goes far enough though. What if an admin has two
filesystems/disks they can to store the cache on, or what if it's 7?
What if one is a 160GB filesystem and the other only 10GB?  If we're
going to tackle these kind problems, we need to look at how things like
diablo (which gets it right IMO) do it.

-- 
Colm MacCárthaigh                        Public Key: colm+pgp@stdlib.net