You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by Bill Stoddard <bi...@wstoddard.com> on 2002/06/03 18:48:21 UTC

[PATCH] Add content negotiation and expiration model to mod_cache

Most of this code was lifted from 1.3 proxy_cache.c. There are two problems with this code
that I am aware of and the first must be fixed before the patch is committed. First,
cache_read_entity_headers() is being called twice, once from mod_cache.c and now from
cache_storage.c.  Perhaps removing the call from mod_cache will be sufficient. I don't
know the right anwer yet. Second, mod_disk_cache is broken because it does not store
response_time/request_time which breaks the age calculation algorithm. This is easy to
fix.

Bill

Index: cache_storage.c
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/experimental/cache_storage.c,v
retrieving revision 1.21
diff -u -r1.21 cache_storage.c
--- cache_storage.c 28 May 2002 18:04:43 -0000 1.21
+++ cache_storage.c 3 Jun 2002 16:38:35 -0000
@@ -174,6 +174,10 @@
     char *key;
     cache_request_rec *cache = (cache_request_rec *)
                          ap_get_module_config(r->request_config, &cache_module);
+    const char *cc_cresp, *cc_req, *pragma_cresp;
+    const char *agestr = NULL;
+    char *val;
+    apr_time_t age_c = 0;

     rv =  cache_generate_key(r,r->pool,&key);
     if (rv != APR_SUCCESS) {
@@ -186,18 +190,171 @@
         type = ap_cache_tokstr(r->pool, next, &next);
         switch ((rv = cache_run_open_entity(cache->handle, r, type, key))) {
         case OK: {
+            apr_time_t age, maxage_req, maxage_cresp, maxage, smaxage, maxstale,
minfresh;
+            char *vary;
+
             info = &(cache->handle->cache_obj->info);
-            /* XXX:
-             * Handle being returned a collection of entities.
+            if (cache_read_entity_headers(cache->handle, r) != APR_SUCCESS) {
+                /* TODO: Handle this error */
+                return DECLINED;
+            }
+
+            /*
+             * Check Content-Negotiation - Vary
+             *
+             * At this point we need to make sure that the object we found in the cache
+             * is the same object that would be delivered to the client, when the
+             * effects of content negotiation are taken into effect.
+             *
+             * In plain english, we want to make sure that a language-negotiated
+             * document in one language is not given to a client asking for a
+             * language negotiated document in a different language by mistake.
+             *
+             * RFC2616 13.6 and 14.44 describe the Vary mechanism.
+             */
+            vary = ap_pstrdup(r->pool, ap_table_get(r->headers_out, "Vary"));
+            while (vary && *vary) {
+                char *name = vary;
+                const char *h1, *h2;
+
+                /* isolate header name */
+                while (*vary && !ap_isspace(*vary) && (*vary != ','))
+                    ++vary;
+                while (*vary && (ap_isspace(*vary) || (*vary == ','))) {
+                    *vary = '\0';
+                    ++vary;
+                }
+
+                /*
+                 * is this header in the request and the header in the cached
+                 * request identical? If not, we give up and do a straight get
+                 */
+                h1 = ap_table_get(r->headers_in, name);
+                h2 = ap_table_get(info->req_hdrs, name);
+                if (h1 == h2) {
+                    /* both headers NULL, so a match - do nothing */
+                }
+                else if (h1 && h2 && !strcmp(h1, h2)) {
+                    /* both headers exist and are equal - do nothing */
+                }
+                else {
+                    /* headers do not match, so Vary failed */
+                    ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
+                                 "cache_select_url(): Vary header mismatch - Cached
document cannot be used. \n");
+                    apr_table_clear(r->headers_out);
+                    r->status_line = NULL;
+                    cache->handle = NULL;
+                    return DECLINED;
+                }
+            }
+
+            cache->fresh = 0;
+            /*
+             * We now want to check if our cached data is still fresh. This depends
+             * on a few things, in this order:
+             *
+             * - RFC2616 14.9.4 End to end reload, Cache-Control: no-cache no-cache in
+             * either the request or the cached response means that we must
+             * revalidate the request unconditionally, overriding any expiration
+             * mechanism. It's equivalent to max-age=0,must-revalidate.
+             *
+             * - RFC2616 14.32 Pragma: no-cache This is treated the same as
+             * Cache-Control: no-cache.
+             *
+             * - RFC2616 14.9.3 Cache-Control: max-stale, must-revalidate,
+             * proxy-revalidate if the max-stale request header exists, modify the
+             * stale calculations below so that an object can be at most <max-stale>
+             * seconds stale before we request a revalidation, _UNLESS_ a
+             * must-revalidate or proxy-revalidate cached response header exists to
+             * stop us doing this.
+             *
+             * - RFC2616 14.9.3 Cache-Control: s-maxage the origin server specifies the
+             * maximum age an object can be before it is considered stale. This
+             * directive has the effect of proxy|must revalidate, which in turn means
+             * simple ignore any max-stale setting.
+             *
+             * - RFC2616 14.9.4 Cache-Control: max-age this header can appear in both
+             * requests and responses. If both are specified, the smaller of the two
+             * takes priority.
+             *
+             * - RFC2616 14.21 Expires: if this request header exists in the cached
+             * entity, and it's value is in the past, it has expired.
+             *
              */
+            cc_cresp = ap_table_get(r->headers_out, "Cache-Control");
+            cc_req = ap_table_get(r->headers_in, "Cache-Control");
+            pragma_cresp = ap_table_get(r->headers_out, "Pragma");  /* TODO: pragma_cresp
not being used? */
+            if ((agestr = ap_table_get(r->headers_out, "Age"))) {
+                age_c = atoi(agestr);
+            }

-            /* Has the cache entry expired? */
-            if (r->request_time > info->expire)
-                cache->fresh = 0;
+            /* calculate age of object */
+            age = ap_cache_current_age(info, age_c);
+
+            /* extract s-maxage */
+            if (cc_cresp && ap_cache_liststr(cc_cresp, "s-maxage", &val))
+                smaxage = atoi(val);
             else
-                cache->fresh = 1;
+                smaxage = -1;

-            /*** do content negotiation here */
+            /* extract max-age from request */
+            if (cc_req && ap_cache_liststr(cc_req, "max-age", &val))
+                maxage_req = atoi(val);
+            else
+                maxage_req = -1;
+
+            /* extract max-age from response */
+            if (cc_cresp && ap_cache_liststr(cc_cresp, "max-age", &val))
+                maxage_cresp = atoi(val);
+            else
+                maxage_cresp = -1;
+
+            /*
+             * if both maxage request and response, the smaller one takes priority
+             */
+            if (-1 == maxage_req)
+                maxage = maxage_cresp;
+            else if (-1 == maxage_cresp)
+                maxage = maxage_req;
+            else
+                maxage = MIN(maxage_req, maxage_cresp);
+
+            /* extract max-stale */
+            if (cc_req && ap_cache_liststr(cc_req, "max-stale", &val))
+                maxstale = atoi(val);
+            else
+                maxstale = 0;
+
+            /* extract min-fresh */
+            if (cc_req && ap_cache_liststr(cc_req, "min-fresh", &val))
+                minfresh = atoi(val);
+            else
+                minfresh = 0;
+
+            /* override maxstale if must-revalidate or proxy-revalidate */
+            if (maxstale && ((cc_cresp &&
+                              ap_cache_liststr(cc_cresp, "must-revalidate", NULL))
+                             || (cc_cresp && ap_cache_liststr(cc_cresp,
+                                                              "proxy-revalidate",
NULL))))
+                maxstale = 0;
+            /* handle expiration */
+            if ((-1 < smaxage && age < (smaxage - minfresh)) ||
+                (-1 < maxage && age < (maxage + maxstale - minfresh)) ||
+                (info->expire != APR_DATE_BAD && age < (info->expire - info->date +
maxstale - minfresh))) {
+                /* it's fresh darlings... */
+                /* set age header on response */
+                ap_table_set(r->headers_out, "Age",
+                             ap_psprintf(r->pool, "%lu", (unsigned long)age));
+
+                /* add warning if maxstale overrode freshness calculation */
+                if (!((-1 < smaxage && age < smaxage) ||
+                      (-1 < maxage && age < maxage) ||
+                      (info->expire != APR_DATE_BAD && (info->expire - info->date) >
age))) {
+                    /* make sure we don't stomp on a previous warning */
+                    ap_table_merge(r->headers_out, "Warning", "110 Response is stale");
+                }
+                cache->fresh = 1;
+            }
             return OK;
         }
         case DECLINED: {
Index: cache_util.c
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/experimental/cache_util.c,v
retrieving revision 1.15
diff -u -r1.15 cache_util.c
--- cache_util.c 3 Apr 2002 17:34:01 -0000 1.15
+++ cache_util.c 3 Jun 2002 16:38:35 -0000
@@ -133,7 +133,26 @@
     return type;
 }

-/*
+
+/* do a HTTP/1.1 age calculation */
+CACHE_DECLARE(apr_time_t) ap_cache_current_age(cache_info *info, const apr_time_t
age_value)
+{
+    apr_time_t apparent_age, corrected_received_age, response_delay,
corrected_initial_age,
+           resident_time, current_age;
+
+    /* Perform an HTTP/1.1 age calculation. (RFC2616 13.2.3) */
+
+    apparent_age = MAX(0, info->response_time - info->date);
+    corrected_received_age = MAX(apparent_age, age_value);
+    response_delay = info->response_time - info->request_time;
+    corrected_initial_age = corrected_received_age + response_delay;
+    resident_time = apr_time_now() - info->response_time;
+    current_age = corrected_initial_age + resident_time;
+
+    return (current_age);
+}
+
+/*
  * list is a comma-separated list of case-insensitive tokens, with
  * optional whitespace around the tokens.
  * The return returns 1 if the token val is found in the list, or 0
Index: mod_cache.c
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/experimental/mod_cache.c,v
retrieving revision 1.42
diff -u -r1.42 mod_cache.c
--- mod_cache.c 17 May 2002 11:33:09 -0000 1.42
+++ mod_cache.c 3 Jun 2002 16:38:35 -0000
@@ -152,8 +152,6 @@
      * - RFC2616 14.9.2 Cache-Control: no-store
      * - Pragma: no-cache
      * - Any requests requiring authorization.
-     * - Any URLs whose length exceeds MAX_URL_LENGTH
-     * - TODO: Make MAX_URL_LENGTH a config directive?
      */
     if (conf->ignorecachecontrol_set == 1 && conf->ignorecachecontrol == 1 && auth ==
NULL) {
         ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
@@ -252,7 +250,7 @@
                              r->server,
                              "cache: conditional - add cache_in filter and "
                              "DECLINE");
-
+                /* Why not add CACHE_CONDITIONAL? */
                 ap_add_output_filter("CACHE_IN", NULL, r, r->connection);

                 return DECLINED;
Index: mod_cache.h
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/experimental/mod_cache.h,v
retrieving revision 1.26
diff -u -r1.26 mod_cache.h
--- mod_cache.h 3 Apr 2002 17:34:01 -0000 1.26
+++ mod_cache.h 3 Jun 2002 16:38:35 -0000
@@ -198,6 +198,11 @@
     apr_time_t request_time;
     apr_time_t response_time;
     apr_size_t len;
+    apr_time_t ims;    /*  If-Modified_Since header value    */
+    apr_time_t ius;    /*  If-UnModified_Since header value    */
+    const char *im;         /* If-Match header value */
+    const char *inm;         /* If-None-Match header value */
+    apr_table_t *req_hdrs;   /* These are the original request headers.   */
 };

 /* cache handle information */
@@ -238,9 +243,9 @@


 /* cache_util.c */
-/**
- *
- */
+/* do a HTTP/1.1 age calculation */
+CACHE_DECLARE(apr_time_t) ap_cache_current_age(cache_info *info, const apr_time_t
age_value);
+CACHE_DECLARE(const char *) ap_cache_date_canon(apr_pool_t *p, const char *x1);
 CACHE_DECLARE(apr_time_t) ap_cache_hex2usec(const char *x);
 CACHE_DECLARE(void) ap_cache_usec2hex(apr_time_t j, char *y);
 CACHE_DECLARE(char *) generate_name(apr_pool_t *p, int dirlevels,
Index: mod_disk_cache.c
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/experimental/mod_disk_cache.c,v
retrieving revision 1.35
diff -u -r1.35 mod_disk_cache.c
--- mod_disk_cache.c 30 May 2002 16:34:38 -0000 1.35
+++ mod_disk_cache.c 3 Jun 2002 16:38:36 -0000
@@ -228,6 +228,7 @@
      * date SP expire SP count CRLF
      * dates are stored as a hex representation of apr_time_t (number of
      * microseconds since 00:00:00 january 1, 1970 UTC)
+     * TODO: Add request_time, response_time et. al.
      */
     rv = apr_file_gets(&urlbuff[0], urllen, fd);
     if (rv != APR_SUCCESS) {
Index: mod_mem_cache.c
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/experimental/mod_mem_cache.c,v
retrieving revision 1.61
diff -u -r1.61 mod_mem_cache.c
--- mod_mem_cache.c 28 May 2002 18:04:43 -0000 1.61
+++ mod_mem_cache.c 3 Jun 2002 16:38:36 -0000
@@ -715,6 +715,12 @@
     if (info->expire) {
         obj->info.expire = info->expire;
     }
+    if (info->response_time) {
+        obj->info.response_time = info->response_time;
+    }
+    if (info->request_time) {
+        obj->info.request_time = info->request_time;
+    }
     if (info->content_type) {
         obj->info.content_type = (char*) calloc(1, strlen(info->content_type) + 1);
         if (!obj->info.content_type) {