You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by ma...@hyperreal.org on 1998/06/02 14:50:47 UTC

cvs commit: apache-1.3/src/modules/proxy mod_proxy.h proxy_cache.c

martin      98/06/02 05:50:47

  Modified:    src      CHANGES
               src/modules/proxy mod_proxy.h proxy_cache.c
  Log:
  Do Proxy Garbage Collection in background & other proxy garbage fixes
  
  Revision  Changes    Path
  1.881     +5 -0      apache-1.3/src/CHANGES
  
  Index: CHANGES
  ===================================================================
  RCS file: /home/cvs/apache-1.3/src/CHANGES,v
  retrieving revision 1.880
  retrieving revision 1.881
  diff -u -u -r1.880 -r1.881
  --- CHANGES	1998/06/02 08:01:49	1.880
  +++ CHANGES	1998/06/02 12:50:44	1.881
  @@ -1,5 +1,10 @@
   Changes with Apache 1.3.1
   
  +  *) Proxy Cache Fixes: account for directory sizes, fork off garbage collection
  +     to continue in background, use predefined types (off_t, size_t, time_t),
  +     log the current cache usage percentage at LogLevel debug
  +     [Martin Kraemer, based on discussion between Dean Gaudet & Dirk vanGulik]
  +
   Changes with Apache 1.3.0
   
     *) Using a type map file as a custom error document was not possible.
  
  
  
  1.35      +6 -6      apache-1.3/src/modules/proxy/mod_proxy.h
  
  Index: mod_proxy.h
  ===================================================================
  RCS file: /home/cvs/apache-1.3/src/modules/proxy/mod_proxy.h,v
  retrieving revision 1.34
  retrieving revision 1.35
  diff -u -u -r1.34 -r1.35
  --- mod_proxy.h	1998/05/27 22:56:04	1.34
  +++ mod_proxy.h	1998/06/02 12:50:46	1.35
  @@ -188,11 +188,11 @@
   /* static information about the local cache */
   struct cache_conf {
       const char *root;		/* the location of the cache directory */
  -    int space;			/* Maximum cache size (in 1024 bytes) */
  -    int maxexpire;		/* Maximum time to keep cached files in secs */
  -    int defaultexpire;		/* default time to keep cached file in secs */
  +    off_t space;			/* Maximum cache size (in 1024 bytes) */
  +    time_t maxexpire;		/* Maximum time to keep cached files in secs */
  +    time_t defaultexpire;	/* default time to keep cached file in secs */
       double lmfactor;		/* factor for estimating expires date */
  -    int gcinterval;		/* garbage collection interval, in seconds */
  +    time_t gcinterval;		/* garbage collection interval, in seconds */
       int dirlevels;		/* Number of levels of subdirectories */
       int dirlength;		/* Length of subdirectory names */
   };
  @@ -207,7 +207,7 @@
       array_header *nocaches;
       char *domain;		/* domain name to use in absence of a domain name in the request */
       int req;			/* true if proxy requests are enabled */
  -    int recv_buffer_size;
  +    size_t recv_buffer_size;
   } proxy_server_conf;
   
   struct hdr_entry {
  @@ -229,7 +229,7 @@
       time_t lmod;		/* last-modified date of cached entity */
       time_t date;		/* the date the cached file was last touched */
       int version;		/* update count of the file */
  -    unsigned int len;		/* content length */
  +    off_t len;			/* content length */
       char *protocol;		/* Protocol, and major/minor number, e.g. HTTP/1.1 */
       int status;			/* the status of the cached file */
       char *resp_line;		/* the whole status like (protocol, code + message) */
  
  
  
  1.42      +182 -55   apache-1.3/src/modules/proxy/proxy_cache.c
  
  Index: proxy_cache.c
  ===================================================================
  RCS file: /home/cvs/apache-1.3/src/modules/proxy/proxy_cache.c,v
  retrieving revision 1.41
  retrieving revision 1.42
  diff -u -u -r1.41 -r1.42
  --- proxy_cache.c	1998/05/29 18:20:58	1.41
  +++ proxy_cache.c	1998/06/02 12:50:46	1.42
  @@ -71,33 +71,37 @@
   
   DEF_Explain
   
  -#ifndef abs
  -#define	abs(c)	((c) >= 0 ? (c) : -(c))
  -#endif
  -
   struct gc_ent {
       unsigned long int len;
       time_t expire;
       char file[HASH_LEN + 1];
  -
   };
  -
  -static int gcdiff(const void *ap, const void *bp)
  -{
  -    const struct gc_ent *a = *(const struct gc_ent * const *) ap;
  -    const struct gc_ent *b = *(const struct gc_ent * const *) bp;
  -
  -    if (a->expire > b->expire)
  -	return 1;
  -    else if (a->expire < b->expire)
  -	return -1;
  -    else
  -	return 0;
  -}
   
  -static int curbytes, cachesize, every;
  -static unsigned long int curblocks;
  -static time_t garbage_now, garbage_expire;
  +/* Poor man's 61 bit arithmetic */
  +typedef struct {
  +    long lower;	/* lower 30 bits of result */
  +    long upper; /* upper 31 bits of result */
  +} long61_t;
  +
  +/* FIXME: The block size can be different on a `per file system' base.
  + * This would make automatic detection highly OS specific.
  + * In the GNU fileutils code for du(1), you can see how complicated it can
  + * become to detect the block size. And, with BSD-4.x fragments, it
  + * it even more difficult to get precise results.
  + * As a compromise (and to improve on the incorrect counting of cache
  + * size on byte level, omitting directory sizes entirely, which was
  + * used up to apache-1.3b7) we're rounding to multiples of 512 here.
  + * Your file system may be using larger blocks (I certainly hope so!)
  + * but it will hardly use smaller blocks.
  + * (So this approximation is still closer to reality than the old behavior).
  + * The best solution would be automatic detection, the next best solution
  + * IMHO is a sensible default and the possibility to override it.
  + */
  +
  +#define ROUNDUP2BLOCKS(_bytes) (((_bytes)+block_size-1) & ~(block_size-1))
  +static long block_size = 512;	/* this must be a power of 2 */
  +static long61_t curbytes, cachesize;
  +static time_t every, garbage_now, garbage_expire;
   static char *filename;
   static mutex *garbage_mutex = NULL;
   
  @@ -114,6 +118,10 @@
   static int sub_garbage_coll(request_rec *r, array_header *files,
   			    const char *cachedir, const char *cachesubdir);
   static void help_proxy_garbage_coll(request_rec *r);
  +#if !defined(WIN32) && !defined(MPE) && !defined(__EMX__)
  +static void detached_proxy_garbage_coll(request_rec *r);
  +#endif
  +
   
   void ap_proxy_garbage_coll(request_rec *r)
   {
  @@ -128,7 +136,13 @@
   	inside = 1;
       (void) ap_release_mutex(garbage_mutex);
   
  +    ap_block_alarms();		/* avoid SIGALRM on big cache cleanup */
  +#if !defined(WIN32) && !defined(MPE) && !defined(__EMX__)
  +    detached_proxy_garbage_coll(r);
  +#else
       help_proxy_garbage_coll(r);
  +#endif
  +    ap_unblock_alarms();
   
       (void) ap_acquire_mutex(garbage_mutex);
       inside = 0;
  @@ -136,6 +150,116 @@
   }
   
   
  +static void
  +add_long61 (long61_t *accu, long val)
  +{
  +    /* Add in lower 30 bits */
  +    accu->lower += (val & 0x3FFFFFFFL);
  +    /* add in upper bits, and carry */
  +    accu->upper += (val >> 30) + ((accu->lower & ~0x3FFFFFFFL) != 0L);
  +    /* Clear carry */
  +    accu->lower &= 0x3FFFFFFFL;
  +}
  +
  +static void
  +sub_long61 (long61_t *accu, long val)
  +{
  +    int carry = (val & 0x3FFFFFFFL) > accu->lower;
  +    /* Subtract lower 30 bits */
  +    accu->lower = accu->lower - (val & 0x3FFFFFFFL) + ((carry) ? 0x40000000 : 0);
  +    /* add in upper bits, and carry */
  +    accu->upper -= (val >> 30) + carry;
  +}
  +
  +/* Compare two long61's:
  + * return <0 when left < right
  + * return  0 when left == right
  + * return >0 when left > right
  + */
  +static long
  +cmp_long61 (long61_t *left, long61_t *right)
  +{
  +    return (left->upper == right->upper) ? (left->lower - right->lower)
  +					 : (left->upper - right->upper);
  +}
  +
  +/* Compare two gc_ent's, sort them by expiration date */
  +static int gcdiff(const void *ap, const void *bp)
  +{
  +    const struct gc_ent *a = (const struct gc_ent * const) ap;
  +    const struct gc_ent *b = (const struct gc_ent * const) bp;
  +
  +    if (a->expire > b->expire)
  +	return 1;
  +    else if (a->expire < b->expire)
  +	return -1;
  +    else
  +	return 0;
  +}
  +
  +#if !defined(WIN32) && !defined(MPE) && !defined(__EMX__)
  +static void detached_proxy_garbage_coll(request_rec *r)
  +{
  +    pid_t pid;
  +    int status;
  +    pid_t pgrp;
  +
  +    switch (pid = fork()) {
  +	case -1:
  +	    ap_log_error(APLOG_MARK, APLOG_ERR, r->server,
  +			 "proxy: fork() for cache cleanup failed");
  +	    return;
  +
  +	case 0:	/* Child */
  +
  +	    /* close all sorts of things, including the socket fd */
  +	    ap_cleanup_for_exec();
  +
  +	    /* Fork twice to disassociate from the child */
  +	    switch (pid = fork()) {
  +		case -1:
  +		    ap_log_error(APLOG_MARK, APLOG_ERR, r->server,
  +			 "proxy: fork(2nd) for cache cleanup failed");
  +		    exit(1);
  +
  +		case 0:	/* Child */
  +		    /* The setpgrp() stuff was snarfed from http_main.c */
  +#ifndef NO_SETSID
  +		    if ((pgrp = setsid()) == -1) {
  +			perror("setsid");
  +			fprintf(stderr, "httpd: setsid failed\n");
  +			exit(1);
  +		    }
  +#elif defined(NEXT) || defined(NEWSOS)
  +		    if (setpgrp(0, getpid()) == -1 || (pgrp = getpgrp(0)) == -1) {
  +			perror("setpgrp");
  +			fprintf(stderr, "httpd: setpgrp or getpgrp failed\n");
  +			exit(1);
  +		    }
  +#else
  +		    if ((pgrp = setpgrp(getpid(), 0)) == -1) {
  +			perror("setpgrp");
  +			fprintf(stderr, "httpd: setpgrp failed\n");
  +			exit(1);
  +		    }
  +#endif
  +		    help_proxy_garbage_coll(r);
  +		    exit(0);
  +
  +		default:    /* Father */
  +		    /* After grandson has been forked off, */
  +		    /* there's nothing else to do. */
  +		    exit(0);		    
  +	    }
  +	default:
  +	    /* Wait until grandson has been forked off */
  +	    /* (without wait we'd leave a zombie) */
  +	    waitpid(pid, &status, 0);
  +	    return;
  +    }
  +}
  +#endif /* ndef WIN32 */
  +
   static void help_proxy_garbage_coll(request_rec *r)
   {
       const char *cachedir;
  @@ -145,17 +269,24 @@
       const struct cache_conf *conf = &pconf->cache;
       array_header *files;
       struct stat buf;
  -    struct gc_ent *fent, **elts;
  +    struct gc_ent *fent;
       int i, timefd;
  -    static time_t lastcheck = BAD_DATE;		/* static data!!! */
  +    static time_t lastcheck = BAD_DATE;		/* static (per-process) data!!! */
   
       cachedir = conf->root;
  -    cachesize = conf->space;
  +    /* configured size is given in kB. Make it bytes, convert to long61_t: */
  +    cachesize.lower = cachesize.upper = 0;
  +    add_long61(&cachesize, conf->space << 10);
       every = conf->gcinterval;
   
       if (cachedir == NULL || every == -1)
   	return;
       garbage_now = time(NULL);
  +    /* Usually, the modification time of <cachedir>/.time can only increase.
  +     * Thus, even with several child processes having their own copy of
  +     * lastcheck, if time(NULL) still < lastcheck then it's not time
  +     * for GC yet.
  +     */
       if (garbage_now != -1 && lastcheck != BAD_DATE && garbage_now < lastcheck + every)
   	return;
   
  @@ -176,7 +307,7 @@
   		ap_log_error(APLOG_MARK, APLOG_ERR, r->server,
   			     "proxy: creat(%s)", filename);
   	    else
  -		lastcheck = abs(garbage_now);	/* someone else got in there */
  +		lastcheck = garbage_now;	/* someone else got in there */
   	    ap_unblock_alarms();
   	    return;
   	}
  @@ -192,22 +323,24 @@
   	    ap_log_error(APLOG_MARK, APLOG_ERR, r->server,
   			 "proxy: utimes(%s)", filename);
       }
  -    files = ap_make_array(r->pool, 100, sizeof(struct gc_ent *));
  -    curblocks = 0;
  -    curbytes = 0;
  +    files = ap_make_array(r->pool, 100, sizeof(struct gc_ent));
  +    curbytes.upper = curbytes.lower = 0L;
   
       sub_garbage_coll(r, files, cachedir, "/");
   
  -    if (curblocks < cachesize || curblocks + curbytes <= cachesize) {
  +    if (cmp_long61(&curbytes, &cachesize) < 0L) {
  +	ap_log_error(APLOG_MARK, APLOG_DEBUG|APLOG_NOERRNO, r->server,
  +			 "proxy GC: Cache is %ld%% full (nothing deleted)",
  +			 ((curbytes.upper<<20)|(curbytes.lower>>10))*100/conf->space);
   	ap_unblock_alarms();
   	return;
       }
   
  -    qsort(files->elts, files->nelts, sizeof(struct gc_ent *), gcdiff);
  +    /* sort the files we found by expiration date */
  +    qsort(files->elts, files->nelts, sizeof(struct gc_ent), gcdiff);
   
  -    elts = (struct gc_ent **) files->elts;
       for (i = 0; i < files->nelts; i++) {
  -	fent = elts[i];
  +	fent = &((struct gc_ent *) files->elts)[i];
   	sprintf(filename, "%s%s", cachedir, fent->file);
   	Explain3("GC Unlinking %s (expiry %ld, garbage_now %ld)", filename, fent->expire, garbage_now);
   #if TESTING
  @@ -221,16 +354,15 @@
   	else
   #endif
   	{
  -	    curblocks -= fent->len >> 10;
  -	    curbytes -= fent->len & 0x3FF;
  -	    if (curbytes < 0) {
  -		curbytes += 1024;
  -		curblocks--;
  -	    }
  -	    if (curblocks < cachesize || curblocks + curbytes <= cachesize)
  +	    sub_long61(&curbytes, ROUNDUP2BLOCKS(fent->len));
  +	    if (cmp_long61(&curbytes, &cachesize) < 0)
   		break;
   	}
       }
  +
  +    ap_log_error(APLOG_MARK, APLOG_DEBUG|APLOG_NOERRNO, r->server,
  +			 "proxy GC: Cache is %ld%% full (%d deleted)",
  +			 ((curbytes.upper<<20)|(curbytes.lower>>10))*100/conf->space, i);
       ap_unblock_alarms();
   }
   
  @@ -342,6 +474,9 @@
   		rmdir(newcachedir);
   #endif
   		--nfiles;
  +	    } else {
  +		/* Directory is not empty. Account for its size: */
  +		add_long61(&curbytes, ROUNDUP2BLOCKS(buf.st_size));
   	    }
   	    continue;
   	}
  @@ -378,22 +513,14 @@
    * file.
    *
    */
  -	/* FIXME: We should make the array an array of gc_ents, not gc_ent *s
  -	 */
  -	fent = ap_palloc(r->pool, sizeof(struct gc_ent));
  +	fent = (struct gc_ent *) ap_push_array(files);
   	fent->len = buf.st_size;
   	fent->expire = garbage_expire;
   	strcpy(fent->file, cachesubdir);
   	strcat(fent->file, ent->d_name);
  -	*(struct gc_ent **) ap_push_array(files) = fent;
   
   /* accumulate in blocks, to cope with directories > 4Gb */
  -	curblocks += buf.st_size >> 10;		/* Kbytes */
  -	curbytes += buf.st_size & 0x3FF;
  -	if (curbytes >= 1024) {
  -	    curbytes -= 1024;
  -	    curblocks++;
  -	}
  +	add_long61(&curbytes, ROUNDUP2BLOCKS(buf.st_size));
       }
   
       closedir(dir);
  @@ -467,7 +594,7 @@
   	q = ap_proxy_get_header(c->hdrs, "Content-Length");
   	if (q == NULL) {
   	    strp = ap_palloc(p, 15);
  -	    ap_snprintf(strp, 15, "%u", c->len);
  +	    ap_snprintf(strp, 15, "%lu", c->len);
   	    ap_proxy_add_header(c->hdrs, "Content-Length", strp, HDR_REP);
   	}
       }
  @@ -590,7 +717,7 @@
   	    }
   	    ap_pclosef(r->pool, cachefp->fd);
   	    Explain0("Use local copy, cached file hasn't changed");
  -	    return USE_LOCAL_COPY;
  +	    return HTTP_NOT_MODIFIED;
   	}
   
   /* Ok, has been modified */
  @@ -735,7 +862,7 @@
   /* no date header! */
   /* add one; N.B. use the time _now_ rather than when we were checking the cache
    */
  -	date = abs(now);
  +	date = now;
   	p = ap_gm_timestr_822(r->pool, now);
   	dates = ap_proxy_add_header(resp_hdrs, "Date", p, HDR_REP);
   	Explain0("Added date header");
  @@ -775,10 +902,10 @@
   	    double maxex = conf->cache.maxexpire;
   	    if (x > maxex)
   		x = maxex;
  -	    expc = abs(now) + (int) x;
  +	    expc = now + (int) x;
   	}
   	else
  -	    expc = abs(now) + conf->cache.defaultexpire;
  +	    expc = now + conf->cache.defaultexpire;
   	Explain1("Expiry date calculated %ld", expc);
       }
   
  @@ -820,7 +947,7 @@
   	    ap_pclosef(r->pool, c->fp->fd);
   	    Explain0("Remote document not modified, use local copy");
   	    /* CHECKME: Is this right? Shouldn't we check IMS again here? */
  -	    return USE_LOCAL_COPY;
  +	    return HTTP_NOT_MODIFIED;
   	}
   	else {
   /* return the whole document */