You are viewing a plain text version of this content. The canonical link for it is here.
Posted to modproxy-dev@apache.org by Christian von Roques <ro...@mti.ag> on 2001/02/22 18:11:22 UTC

Re: IE 5, mod_proxy, and ProxyPass (1/3)

I planned to do that for a long time too and finally found the time to
do it.  Below you'll find my updated version of Graham's HTTP/1.1
patch, which I extended last august to use persistent connections to
its clients.  This patch uses ap_send_http_header() instead of writing
the headers to the clients itself.  Persistent connections from the
proxy to content-providers are still unsupported.

The version below is not yet well tested, I'll test and probably
deploy it in the next few days.  As of now I haven't tested its
caching behaviour at all, I plan to use it as part of an HTTPS/1.1
capable reverse proxy.

I'll come back to you if the testing results in any bugfixes.
The patch is against todays CVS of apache-1.3.x.

        Christian.

diff -X nodiff.pats -urd apache/src/main/alloc.c apache+http1.1_cache/src/main/alloc.c
--- apache/src/main/alloc.c	Wed Jan 31 17:52:45 2001
+++ apache+http1.1_cache/src/main/alloc.c	Mon Feb 12 17:20:50 2001
@@ -1458,6 +1458,24 @@
     return res;
 }
 
+/* overlay one table on another - keys in base will be replaced by keys in overlay */
+API_EXPORT(int) ap_replace_tables(table *base, table *overlay)
+{
+    table_entry *elts = (table_entry *) overlay->a.elts;
+    int i, q = 0;
+    const char *val;
+
+    for (i = 0; i < overlay->a.nelts; ++i) {
+	val = ap_table_get(base, elts[i].key);
+	if (!val || strcmp(val, elts[i].val))
+	    q = 1;
+	ap_table_set(base, elts[i].key, elts[i].val);
+    }
+
+    return q;
+}
+
+
 /* And now for something completely abstract ...
 
  * For each key value given as a vararg:
diff -X nodiff.pats -urd apache/src/main/http_protocol.c apache+http1.1_cache/src/main/http_protocol.c
--- apache/src/main/http_protocol.c	Sun Feb 18 12:51:07 2001
+++ apache+http1.1_cache/src/main/http_protocol.c	Sun Feb 18 12:55:13 2001
@@ -1467,12 +1467,10 @@
     if (!r->status_line)
         r->status_line = status_lines[ap_index_of_response(r->status)];
 
-    /* mod_proxy is only HTTP/1.0, so avoid sending HTTP/1.1 error response;
-     * kluge around broken browsers when indicated by force-response-1.0
+    /* kluge around broken browsers when indicated by force-response-1.0
      */
-    if (r->proxyreq != NOT_PROXY
-        || (r->proto_num == HTTP_VERSION(1,0)
-            && ap_table_get(r->subprocess_env, "force-response-1.0"))) {
+    if (r->proto_num == HTTP_VERSION(1,0)
+	&& ap_table_get(r->subprocess_env, "force-response-1.0")) {
 
         protocol = "HTTP/1.0";
         r->connection->keepalive = -1;
diff -X nodiff.pats -urd apache/src/modules/proxy/mod_proxy.h apache+http1.1_cache/src/modules/proxy/mod_proxy.h
--- apache/src/modules/proxy/mod_proxy.h	Wed Jan 31 17:53:02 2001
+++ apache+http1.1_cache/src/modules/proxy/mod_proxy.h	Mon Feb 12 17:20:50 2001
@@ -184,6 +184,13 @@
 #define DEFAULT_CACHE_COMPLETION (0.9)
 #define DEFAULT_CACHE_GCINTERVAL SEC_ONE_HR
 
+#ifndef MAX
+#define MAX(a,b)		((a) > (b) ? (a) : (b))
+#endif
+#ifndef MIN
+#define MIN(a,b)		((a) < (b) ? (a) : (b))
+#endif
+
 /* static information about the local cache */
 struct cache_conf {
     const char *root;		/* the location of the cache directory */
@@ -240,12 +247,20 @@
     char *filename;		/* name of the cache file, or NULL if no cache */
     char *tempfile;		/* name of the temporary file, of NULL if not caching */
     time_t ims;			/* if-modified-since date of request; -1 if no header */
+    time_t ius;			/* If-Unmodified-Since date of request; -1 if no header */
+    const char *im;		/* If-Match etag of request; NULL if no header */
+    const char *inm;		/* If-None-Match etag of request; NULL if no header */
     BUFF *fp;			/* the cache file descriptor if the file is cached
 				   and may be returned, or NULL if the file is
 				   not cached (or must be reloaded) */
+    BUFF *origfp;		/* the old cache file descriptor if the file has
+				   been revalidated and is being rewritten to
+				   disk */
     time_t expire;		/* calculated expire date of cached entity */
     time_t lmod;		/* last-modified date of cached entity */
     time_t date;		/* the date the cached file was last touched */
+    time_t req_time;		/* the time the request started */
+    time_t resp_time;		/* the time the response was received */
     int version;		/* update count of the file */
     off_t len;			/* content length */
     char *protocol;		/* Protocol, and major/minor number, e.g. HTTP/1.1 */
@@ -253,15 +268,11 @@
     unsigned int written;	/* total *content* bytes written to cache */
     float cache_completion;	/* specific to this request */
     char *resp_line;		/* the whole status like (protocol, code + message) */
-    table *hdrs;		/* the HTTP headers of the file */
+    table *req_hdrs;		/* the original request headers when it was made */
+    table *hdrs;		/* the original HTTP response headers of the file */
+    char *xcache;		/* the X-Cache header value to be sent to client */
 } cache_req;
 
-/* Additional information passed to the function called by ap_table_do() */
-struct tbl_do_args {
-    request_rec *req;
-    cache_req *cache;
-};
-
 struct per_thread_data {
     struct hostent hpbuf;
     u_long ipaddr;
@@ -305,9 +316,9 @@
 			 char **passwordp, char **hostp, int *port);
 const char *ap_proxy_date_canon(pool *p, const char *x);
 table *ap_proxy_read_headers(request_rec *r, char *buffer, int size, BUFF *f);
-long int ap_proxy_send_fb(BUFF *f, request_rec *r, cache_req *c);
-void ap_proxy_send_headers(request_rec *r, const char *respline, table *hdrs);
-int ap_proxy_liststr(const char *list, const char *val);
+long int ap_proxy_send_fb(BUFF *f, request_rec *r, cache_req *c, off_t len, int nowrite);
+void ap_proxy_write_headers(cache_req *c, const char *respline, table *t);
+int ap_proxy_liststr(const char *list, const char *key, char **val);
 void ap_proxy_hash(const char *it, char *val, int ndepth, int nlength);
 int ap_proxy_hex2sec(const char *x);
 void ap_proxy_sec2hex(int t, char *y);
@@ -323,5 +334,9 @@
 /* This function is called by ap_table_do() for all header lines */
 int ap_proxy_send_hdr_line(void *p, const char *key, const char *value);
 unsigned ap_proxy_bputs2(const char *data, BUFF *client, cache_req *cache);
+time_t ap_proxy_current_age(cache_req *c, const time_t age_value);
+BUFF *ap_proxy_open_cachefile(request_rec *r, char *filename);
+BUFF *ap_proxy_create_cachefile(request_rec *r, char *filename);
+void ap_proxy_clear_connection(pool *p, table *headers);
 
 #endif /*MOD_PROXY_H*/
diff -X nodiff.pats -urd apache/src/modules/proxy/proxy_cache.c apache+http1.1_cache/src/modules/proxy/proxy_cache.c
--- apache/src/modules/proxy/proxy_cache.c	Wed Jan 31 17:53:02 2001
+++ apache+http1.1_cache/src/modules/proxy/proxy_cache.c	Mon Feb 12 17:25:17 2001
@@ -62,6 +62,7 @@
 #include "http_conf_globals.h"
 #include "http_log.h"
 #include "http_main.h"
+#include "http_core.h"
 #include "util_date.h"
 #ifdef WIN32
 #include <sys/utime.h>
@@ -413,7 +414,7 @@
 static int sub_garbage_coll(request_rec *r, array_header *files,
 			  const char *cachebasedir, const char *cachesubdir)
 {
-    char line[27];
+    char line[17*(3)];
     char cachedir[HUGE_STRING_LEN];
     struct stat buf;
     int fd, i;
@@ -567,7 +568,7 @@
         }
 #endif
  
-	i = read(fd, line, 26);
+	i = read(fd, line, 17*(3)-1);
 	close(fd);
 	if (i == -1) {
 	    ap_log_error(APLOG_MARK, APLOG_ERR, r->server,
@@ -575,8 +576,8 @@
 	    continue;
 	}
 	line[i] = '\0';
-	garbage_expire = ap_proxy_hex2sec(line + 18);
-	if (!ap_checkmask(line, "&&&&&&&& &&&&&&&& &&&&&&&&") ||
+	garbage_expire = ap_proxy_hex2sec(line + 17*(2));
+	if (!ap_checkmask(line, "&&&&&&&&&&&&&&&& &&&&&&&&&&&&&&&& &&&&&&&&&&&&&&&&") ||
 	    garbage_expire == BAD_DATE) {
 	    /* bad file */
 	    if (garbage_now != -1 && buf.st_atime > garbage_now + SEC_ONE_DAY &&
@@ -619,16 +620,38 @@
  * returns 1 on success,
  *         0 on failure (bad file or wrong URL)
  *        -1 on UNIX error
+ *
+ * We read the cache hex header, then the message response line and
+ * response headers, and finally we return with the filepointer
+ * pointing at the start of the message body itself, ready to be
+ * shipped to the client later on, if appropriate.
  */
 static int rdcache(request_rec *r, BUFF *cachefp, cache_req *c)
 {
-    char urlbuff[1034], *strp;
+    char urlbuff[HUGE_STRING_LEN], *strp;
     int len;
+
 /* read the data from the cache file */
-/* format
- * date SP lastmod SP expire SP count SP content-length CRLF
- * dates are stored as hex seconds since 1970
+    
+    /* Format:
+     *
+     * The cache needs to keep track of the following information:
+     * - Date, LastMod, Version, ReqTime, RespTime, ContentLength
+     * - The original request headers (for Vary)
+     * - The original response headers (for returning with a cached response)
+     * - The body of the message
+     *
+     * date SP lastmod SP expire SP count SP request-time SP response-time SP content-lengthCRLF
+     * (dates are stored as hex seconds since 1970)
+     * Original URLCRLF
+     * Original Request Headers
+     * CRLF
+     * Original Response Headers
+     * CRLF
+     * Body
  */
+
+    /* retrieve cachefile information values */
     len = ap_bgets(urlbuff, sizeof urlbuff, cachefp);
     if (len == -1)
 	return -1;
@@ -637,14 +660,16 @@
     urlbuff[len - 1] = '\0';
 
     if (!ap_checkmask(urlbuff,
-		   "&&&&&&&& &&&&&&&& &&&&&&&& &&&&&&&& &&&&&&&&"))
+		      "&&&&&&&&&&&&&&&& &&&&&&&&&&&&&&&& &&&&&&&&&&&&&&&& &&&&&&&&&&&&&&&& &&&&&&&&&&&&&&&& &&&&&&&&&&&&&&&& &&&&&&&&&&&&&&&&"))
 	return 0;
 
-    c->date = ap_proxy_hex2sec(urlbuff);
-    c->lmod = ap_proxy_hex2sec(urlbuff + 9);
-    c->expire = ap_proxy_hex2sec(urlbuff + 18);
-    c->version = ap_proxy_hex2sec(urlbuff + 27);
-    c->len = ap_proxy_hex2sec(urlbuff + 36);
+    c->date = ap_proxy_hex2sec(urlbuff + 17*(0));
+    c->lmod = ap_proxy_hex2sec(urlbuff + 17*(1));
+    c->expire = ap_proxy_hex2sec(urlbuff + 17*(2));
+    c->version = ap_proxy_hex2sec(urlbuff + 17*(3));
+    c->req_time = ap_proxy_hex2sec(urlbuff + 17*(4));
+    c->resp_time = ap_proxy_hex2sec(urlbuff + 17*(5));
+    c->len = ap_proxy_hex2sec(urlbuff + 17*(6));
 
 /* check that we have the same URL */
     len = ap_bgets(urlbuff, sizeof urlbuff, cachefp);
@@ -657,7 +682,12 @@
     if (strcmp(urlbuff + 7, c->url) != 0)
 	return 0;
 
-/* What follows is the message */
+/* then the original request headers */
+    c->req_hdrs = ap_proxy_read_headers(r, urlbuff, sizeof urlbuff, cachefp);
+    if (c->req_hdrs == NULL)
+        return -1;
+
+/* then the original response headers */
     len = ap_bgets(urlbuff, sizeof urlbuff, cachefp);
     if (len == -1)
 	return -1;
@@ -685,6 +715,194 @@
 
 
 /*
+ * Call this to check the possible conditional status of
+ * the client request, and return the response from the cache
+ *
+ * Conditionals include If-Modified-Since, If-Match, If-Unmodified-Since
+ * and If-None-Match.
+ *
+ * We don't yet understand If-Range, but we will...
+ */
+int ap_proxy_cache_conditional(request_rec *r, cache_req *c, BUFF *cachefp)
+{
+    const char *etag, *wetag;
+
+    /* get etag */
+    if ((etag = ap_table_get(c->hdrs, "Etag"))) {
+        wetag = ap_pstrcat(r->pool, "W/", etag, NULL);
+    }
+
+    /* check for If-Match, If-Unmodified-Since */
+    while (1) {
+	
+        /* check If-Match and If-Unmodified-Since exist
+         *
+         * If neither of these exist, the request is not conditional, and
+         * we serve it normally
+         */
+        if (!c->im && BAD_DATE == c->ius) {
+            break;
+        }
+
+        /* check If-Match
+         *
+         * we check if the Etag on the cached file is in the list of Etags
+         * in the If-Match field. The comparison must be a strong comparison,
+         * so the Etag cannot be marked as weak. If the comparision fails
+         * we return 412 Precondition Failed.
+         *
+         * if If-Match is specified AND
+         * If-Match is not a "*" AND
+         * Etag is missing or weak or not in the list THEN
+         * return 412 Precondition Failed
+         */
+
+        if (c->im) {
+            if (strcmp(c->im, "*") &&
+            (!etag || (strlen(etag) > 1 && 'W' == etag[0] && '/' == etag[1]) || !ap_proxy_liststr(c->im, etag, NULL))) {
+                Explain0("If-Match specified, and it didn't - return 412");
+            }
+            else {
+                Explain0("If-Match specified, and it matched");
+                break;
+            }
+        }
+
+        /* check If-Unmodified-Since
+         *
+         * if If-Unmodified-Since is specified AND
+         * Last-Modified is specified somewhere AND
+         * If-Unmodified-Since is in the past compared to Last-Modified THEN
+         * return 412 Precondition Failed
+         */
+        if (BAD_DATE != c->ius && BAD_DATE != c->lmod) {
+            if (c->ius < c->lmod) {
+                Explain0("If-Unmodified-Since specified, but it wasn't - return 412");
+            }
+            else {
+                Explain0("If-Unmodified-Since specified, and it was unmodified");
+                break;
+            }
+        }
+
+        /* if cache file is being updated */
+        if (c->origfp) {
+            ap_proxy_write_headers(c, c->resp_line, c->hdrs);
+            ap_proxy_send_fb(c->origfp, r, c, c->len, 1);
+            ap_pclosef(r->pool, ap_bfileno(c->origfp, B_WR));
+            ap_proxy_cache_tidy(c);
+        }
+        else
+            ap_pclosef(r->pool, ap_bfileno(cachefp, B_WR));
+
+        Explain0("Use your cached copy, conditional precondition failed.");
+        return HTTP_PRECONDITION_FAILED;
+    }
+
+
+    /* check for If-None-Match, If-Modified-Since
+     *
+     */
+    while (1) {
+
+        /* check for existance of If-None-Match and If-Modified-Since
+         *
+         * if neither of these headers have been set, then the request
+         * is not conditional, and we just send the cached response and
+         * be done with it.
+         */
+        if (!c->inm && BAD_DATE == c->ims) {
+            break;
+        }
+
+        /* check If-None-Match
+         *
+         * we check if the Etag on the cached file is in the list of Etags
+         * in the If-None-Match field. The comparison must be a strong comparison,
+         * so the Etag cannot be marked as weak. If the comparision fails
+         * we return 412 Precondition Failed.
+         *
+         * if If-None-Match is specified:
+         * if If-None-Match is a "*" THEN 304
+         * else if Etag is specified AND we get a match THEN 304
+         * else if Weak Etag is specified AND we get a match THEN 304
+         * else sent the original object
+         */
+        if (c->inm) {
+            if (!strcmp(c->inm, "*")) {
+                Explain0("If-None-Match: * specified, return 304");
+            }
+            else if (etag && ap_proxy_liststr(c->inm, etag, NULL)) {
+                Explain0("If-None-Match: specified and we got a strong match - return 304");
+            }
+            else if (wetag && ap_proxy_liststr(c->inm, wetag, NULL)) {
+                Explain0("If-None-Match specified, and we got a weak match - return 304");
+            }
+            else
+                break;
+        }
+
+        /* check If-Modified-Since
+         *
+         * if If-Modified-Since is specified AND
+         * Last-Modified is specified somewhere:
+         * if last modification date is earlier than If-Modified-Since THEN 304
+         * else send the original object
+         */
+        if (BAD_DATE != c->ims && BAD_DATE != c->lmod) {
+            if (c->ims >= c->lmod) {
+                Explain0("If-Modified-Since specified and not modified, try return 304");
+            }
+            else
+                break;
+        }
+
+
+        /* are we updating the cache file? */
+        if (c->origfp) {
+            ap_proxy_write_headers(c, c->resp_line, c->hdrs);
+            ap_proxy_send_fb(c->origfp, r, c, c->len, 1);
+            ap_pclosef(r->pool, ap_bfileno(c->origfp, B_WR));
+            ap_proxy_cache_tidy(c);
+        }
+        else
+            ap_pclosef(r->pool, ap_bfileno(cachefp, B_WR));
+
+        Explain0("Use local copy, cached file hasn't changed");
+        return HTTP_NOT_MODIFIED;
+    }
+
+
+    /* No conditional - just send it cousin! */
+    Explain0("Local copy modified, send it");
+    r->status_line = strchr(c->resp_line, ' ') + 1;
+    r->status = c->status;
+
+    /* Prepare and send headers to client */
+    ap_overlap_tables(r->headers_out, c->hdrs, AP_OVERLAP_TABLES_SET);
+    ap_table_setn(r->headers_out, "X-Cache", c->xcache);
+    r->content_type = ap_table_get(r->headers_out, "Content-Type");
+    ap_send_http_header(r);
+
+    /* are we rewriting the cache file? */
+    if (c->origfp) {
+        ap_proxy_write_headers(c, c->resp_line, c->hdrs);
+        ap_proxy_send_fb(c->origfp, r, c, c->len, r->header_only);
+        ap_pclosef(r->pool, ap_bfileno(c->origfp, B_WR));
+        ap_proxy_cache_tidy(c);
+        return OK;
+    }
+
+    /* no, we not */
+    if (!r->header_only)
+        ap_proxy_send_fb(cachefp, r, NULL, c->len, 0);
+
+    ap_pclosef(r->pool, ap_bfileno(cachefp, B_WR));
+    return OK;
+}
+
+
+/*
  * Call this to test for a resource in the cache
  * Returns DECLINED if we need to check the remote host
  * or an HTTP status code if successful
@@ -694,73 +912,122 @@
  *      if cached file is not expired then
  *         if last modified after if-modified-since then send body
  *         else send 304 Not modified
- *      else
+ *      else if cached file is expired then
  *         if last modified after if-modified-since then add
  *            last modified date to request
  */
 int ap_proxy_cache_check(request_rec *r, char *url, struct cache_conf *conf,
 		      cache_req **cr)
 {
-    char hashfile[66];
-    const char *imstr, *pragma, *auth;
+    const char *datestr, *pragma_req = NULL, *pragma_cresp = NULL, *cc_req = NULL, *cc_cresp = NULL, *vary = NULL;
     cache_req *c;
     time_t now;
     BUFF *cachefp;
-    int cfd, i;
-    const long int zero = 0L;
+    int i;
     void *sconf = r->server->module_config;
     proxy_server_conf *pconf =
     (proxy_server_conf *) ap_get_module_config(sconf, &proxy_module);
+    const char *agestr = NULL;
+    char *val;
+    time_t age_c = 0;
+    time_t age, maxage_req, maxage_cresp, maxage, smaxage, maxstale, minfresh;
 
     c = ap_pcalloc(r->pool, sizeof(cache_req));
     *cr = c;
     c->req = r;
     c->url = ap_pstrdup(r->pool, url);
+    c->filename = NULL;
+    c->tempfile = NULL;
+    c->fp = NULL;
+    c->origfp = NULL;
+    c->version = 0;
+    c->len = -1;
+    c->req_hdrs = NULL;
+    c->hdrs = NULL;
+    c->xcache = NULL;
 
-/* get the If-Modified-Since date of the request */
+/* get the If-Modified-Since date of the request, if it exists */
     c->ims = BAD_DATE;
-    imstr = ap_table_get(r->headers_in, "If-Modified-Since");
-    if (imstr != NULL) {
+    datestr = ap_table_get(r->headers_in, "If-Modified-Since");
+    if (datestr != NULL) {
 /* this may modify the value in the original table */
-	imstr = ap_proxy_date_canon(r->pool, imstr);
-	c->ims = ap_parseHTTPdate(imstr);
+	datestr = ap_proxy_date_canon(r->pool, datestr);
+	c->ims = ap_parseHTTPdate(datestr);
 	if (c->ims == BAD_DATE)	/* bad or out of range date; remove it */
 	    ap_table_unset(r->headers_in, "If-Modified-Since");
     }
 
+/* get the If-Unmodified-Since date of the request, if it exists */
+    c->ius = BAD_DATE;
+    datestr = ap_table_get(r->headers_in, "If-Unmodified-Since");
+    if (datestr != NULL) {
+	/* this may modify the value in the original table */
+	datestr = ap_proxy_date_canon(r->pool, datestr);
+	c->ius = ap_parseHTTPdate(datestr);
+	if (c->ius == BAD_DATE) /* bad or out of range date; remove it */
+	    ap_table_unset(r->headers_in, "If-Unmodified-Since");
+    }
+
+    /* get the If-Match of the request, if it exists */
+    c->im = ap_table_get(r->headers_in, "If-Match");
+
+    /* get the If-None-Match of the request, if it exists */
+    c->inm = ap_table_get(r->headers_in, "If-None-Match");
+
 /* find the filename for this cache entry */
-    ap_proxy_hash(url, hashfile, pconf->cache.dirlevels, pconf->cache.dirlength);
-    if (conf->root != NULL)
+    if (conf->root != NULL) {
+	char hashfile[66];
+	ap_proxy_hash(url, hashfile, pconf->cache.dirlevels, pconf->cache.dirlength);
 	c->filename = ap_pstrcat(r->pool, conf->root, "/", hashfile, NULL);
-    else
+    } else {
+	c->filename = NULL;
+	c->fp = NULL;
+	Explain0("No CacheRoot, so no caching. Declining.");
+	return DECLINED;
+    }
+
+    /* find certain cache controlling headers */
+    pragma_req = ap_table_get(r->headers_in, "Pragma");
+    cc_req = ap_table_get(r->headers_in, "Cache-Control");
+
+
+    /* first things first - does the request allow us to return
+     * cached information at all? If not, just decline the request.
+     *
+     * Note that there is a big difference between not being allowed
+     * to cache a request (no-store) and not being allowed to return
+     * a cached request without revalidation (max-age=0).
+     *
+     * Caching is forbidden under the following circumstances:
+     *
+     * - RFC2616 14.9.2 Cache-Control: no-store
+     * we are not supposed to store this request at all. Behave as a tunnel.
+     * 
+     */
+    if (ap_proxy_liststr(cc_req, "no-store", NULL)) {
+
+	/* delete the previously cached file */
+	if (c->filename)
+	    unlink(c->filename);
+	c->fp = NULL;
 	c->filename = NULL;
+	Explain0("no-store forbids caching. Declining.");
+	return DECLINED;
+    }
+
 
+    /* if the cache file exists, open it */
     cachefp = NULL;
-/* find out about whether the request can access the cache */
-    pragma = ap_table_get(r->headers_in, "Pragma");
-    auth = ap_table_get(r->headers_in, "Authorization");
-    Explain5("Request for %s, pragma=%s, auth=%s, ims=%ld, imstr=%s", url,
-	     pragma, auth, (long)c->ims, imstr);
+    Explain3("Request for %s, pragma_req=%s, ims=%ld", url,
+	     pragma_req, c->ims);
     if (c->filename != NULL && r->method_number == M_GET &&
-	strlen(url) < 1024 && !ap_proxy_liststr(pragma, "no-cache") &&
-	auth == NULL) {
-	Explain1("Check file %s", c->filename);
-	cfd = open(c->filename, O_RDWR | O_BINARY);
-	if (cfd != -1) {
-	    ap_note_cleanups_for_fd(r->pool, cfd);
-	    cachefp = ap_bcreate(r->pool, B_RD | B_WR);
-	    ap_bpushfd(cachefp, cfd, cfd);
-	}
-	else if (errno != ENOENT)
-	    ap_log_rerror(APLOG_MARK, APLOG_ERR, r,
-			 "proxy: error opening cache file %s",
-			 c->filename);
-#ifdef EXPLAIN
-	else
-	    Explain1("File %s not found", c->filename);
-#endif
+	strlen(url) < 1024 ) {
+
+	cachefp = ap_proxy_open_cachefile(r, c->filename);
     }
 
+
+    /* if a cache file exists, try read body and headers from cache file */
     if (cachefp != NULL) {
 	i = rdcache(r, cachefp, c);
 	if (i == -1)
@@ -774,68 +1041,234 @@
 	    ap_pclosef(r->pool, ap_bfileno(cachefp, B_WR));
 	    cachefp = NULL;
 	}
+	if (c->hdrs) {
+	    cc_cresp = ap_table_get(c->hdrs, "Cache-Control");
+	    pragma_cresp = ap_table_get(c->hdrs, "Pragma");
+	    vary = ap_table_get(c->hdrs, "Vary");
+	    if ((agestr = ap_table_get(c->hdrs, "Age"))) {
+		age_c = atoi(agestr);
+	    }
+    }
     }
+    
+    /* if a cache file does not exist, create empty header array */
 /* fixed?  in this case, we want to get the headers from the remote server
    it will be handled later if we don't do this (I hope ;-)
+       
     if (cachefp == NULL)
 	c->hdrs = ap_make_table(r->pool, 20);
 */
     /* FIXME: Shouldn't we check the URL somewhere? */
-    now = time(NULL);
-/* Ok, have we got some un-expired data? */
-    if (cachefp != NULL && c->expire != BAD_DATE && now < c->expire) {
-	Explain0("Unexpired data available");
-/* check IMS */
-	if (c->lmod != BAD_DATE && c->ims != BAD_DATE && c->ims >= c->lmod) {
-/* has the cached file changed since this request? */
-	    if (c->date == BAD_DATE || c->date > c->ims) {
-/* No, but these header values may have changed, so we send them with the
- * 304 HTTP_NOT_MODIFIED response
+
+
+    /* Check Content-Negotiation - Vary
+     *
+     * At this point we need to make sure that the object we found in the cache
+     * is the same object that would be delivered to the client, when the
+     * effects of content negotiation are taken into effect.
+     *
+     * In plain english, we want to make sure that a language-negotiated
+     * document in one language is not given to a client asking for a
+     * language negotiated document in a different language by mistake.
+     *
+     * RFC2616 13.6 and 14.44 describe the Vary mechanism.
  */
-		const char *q;
+    if (c->hdrs && c->req_hdrs) {
+	char *vary = ap_pstrdup(r->pool, ap_table_get(c->hdrs, "Vary"));
 
-		if ((q = ap_table_get(c->hdrs, "Expires")) != NULL)
-		    ap_table_set(r->headers_out, "Expires", q);
+	while (vary && *vary) {
+	    char *name = vary;
+	    const char *h1, *h2;
+
+	    /* isolate header name */
+	    while (*vary && !ap_isspace(*vary) && (*vary != ','))
+		++vary;
+	    while (*vary && (ap_isspace(*vary) || (*vary == ','))) {
+		*vary = '\0';
+		++vary;
 	    }
-	    ap_pclosef(r->pool, ap_bfileno(cachefp, B_WR));
-	    Explain0("Use local copy, cached file hasn't changed");
-	    return HTTP_NOT_MODIFIED;
+
+	    /* is this header in the request and the header in the cached
+	     * request identical? If not, we give up and do a straight get */
+	    h1 = ap_table_get(r->headers_in, name);
+	    h2 = ap_table_get(c->req_hdrs, name);
+	    if (h1 == h2) {
+		/* both headers NULL, so a match - do nothing */
 	}
+	    else if (h1 && h2 && !strcmp(h1, h2)) {
+		/* both headers exist and are equal - do nothing */
+	    }
+	    else {
 
-/* Ok, has been modified */
-	Explain0("Local copy modified, send it");
-	r->status_line = strchr(c->resp_line, ' ') + 1;
-	r->status = c->status;
-	if (!r->assbackwards) {
-	    ap_soft_timeout("proxy send headers", r);
-	    ap_proxy_send_headers(r, c->resp_line, c->hdrs);
-	    ap_kill_timeout(r);
+		/* headers do not match, so Vary failed */
+		c->fp = cachefp;
+		Explain0("Vary header mismatch - object must be fetched from scratch. Declining.");
+		return DECLINED;
+	    }
 	}
-	ap_bsetopt(r->connection->client, BO_BYTECT, &zero);
-	r->sent_bodyct = 1;
-	if (!r->header_only)
-	    ap_proxy_send_fb(cachefp, r, NULL);
-	ap_pclosef(r->pool, ap_bfileno(cachefp, B_WR));
-	return OK;
     }
 
-/* if we already have data and a last-modified date, and it is not a head
- * request, then add an If-Modified-Since
+
+    /* We now want to check if our cached data is still fresh. This depends
+     * on a few things, in this order:
+     *
+     * - RFC2616 14.9.4 End to end reload, Cache-Control: no-cache
+     * no-cache in either the request or the cached response means that
+     * we must revalidate the request unconditionally, overriding any
+     * expiration mechanism. It's equivalent to max-age=0,must-revalidate.
+     *
+     * - RFC2616 14.32 Pragma: no-cache
+     * This is treated the same as Cache-Control: no-cache.
+     *
+     * - RFC2616 14.9.3 Cache-Control: max-stale, must-revalidate, proxy-revalidate
+     * if the max-stale request header exists, modify the stale calculations
+     * below so that an object can be at most <max-stale> seconds stale before
+     * we request a revalidation, _UNLESS_ a must-revalidate or
+     * proxy-revalidate cached response header exists to stop us doing this.
+     *
+     * - RFC2616 14.9.3 Cache-Control: s-maxage
+     * the origin server specifies the maximum age an object can be before
+     * it is considered stale. This directive has the effect of proxy|must
+     * revalidate, which in turn means simple ignore any max-stale setting.
+     *
+     * - RFC2616 14.9.4 Cache-Control: max-age
+     * this header can appear in both requests and responses. If both are
+     * specified, the smaller of the two takes priority.
+     *
+     * - RFC2616 14.21 Expires:
+     * if this request header exists in the cached entity, and it's value is
+     * in the past, it has expired.
  */
 
-    if (cachefp != NULL && c->lmod != BAD_DATE && !r->header_only) {
-/*
- * use the later of the one from the request and the last-modified date
- * from the cache
+    /* calculate age of object */
+    age = ap_proxy_current_age(c, age_c);
+
+    /* extract s-maxage */
+    if (cc_cresp && ap_proxy_liststr(cc_cresp, "s-maxage", &val))
+	smaxage = atoi(val);
+    else
+	smaxage = -1;
+
+    /* extract max-age from request */
+    if (cc_cresp && ap_proxy_liststr(cc_req, "max-age", &val))
+	maxage_req =  atoi(val);
+    else
+	maxage_req = -1;
+
+    /* extract max-age from response */
+    if (cc_cresp && ap_proxy_liststr(cc_cresp, "max-age", &val))
+	maxage_cresp =	atoi(val);
+    else
+	maxage_cresp = -1;
+
+    /* if both maxage request and response, the smaller one takes priority */
+    if (-1 == maxage_req)
+	maxage = maxage_cresp;
+    else if (-1 == maxage_cresp)
+	maxage = maxage_req;
+    else
+	maxage = MIN(maxage_req, maxage_cresp);
+
+    /* extract max-stale */
+    if (cc_req && ap_proxy_liststr(cc_req, "max-stale", &val))
+	maxstale =  atoi(val);
+    else
+	maxstale = 0;
+
+    /* extract min-fresh */
+    if (cc_req && ap_proxy_liststr(cc_req, "min-fresh", &val))
+	minfresh =  atoi(val);
+    else
+	minfresh = 0;
+
+    /* override maxstale if must-revalidate or proxy-revalidate */
+    if (maxstale && ( (cc_cresp && ap_proxy_liststr(cc_cresp, "must-revalidate", NULL)) || (cc_cresp && ap_proxy_liststr(cc_cresp, "proxy-revalidate", NULL)) ))
+	maxstale = 0;
+
+    now = time(NULL);
+    if (cachefp != NULL &&
+
+	/* handle no-cache */
+	!( (cc_req && ap_proxy_liststr(cc_req, "no-cache", NULL)) ||
+	  (pragma_req && ap_proxy_liststr(pragma_req, "no-cache", NULL)) ||
+	  (cc_cresp && ap_proxy_liststr(cc_cresp, "no-cache", NULL)) ||
+	  (pragma_cresp && ap_proxy_liststr(pragma_cresp, "no-cache", NULL)) ) &&
+
+	/* handle expiration */
+	( (-1 < smaxage && age < (smaxage - minfresh)) ||
+	  (-1 < maxage && age < (maxage + maxstale - minfresh)) ||
+	  (c->expire != BAD_DATE && age < (c->expire - c->date + maxstale - minfresh)) )
+
+	) {
+
+	/* it's fresh darlings... */
+
+	Explain0("Unexpired data available");
+
+	/* set age header on response */
+	ap_table_set(c->hdrs, "Age",
+			ap_psprintf(r->pool, "%lu", (unsigned long)age));
+
+	/* add warning if maxstale overrode freshness calculation */
+	if (!( (-1 < smaxage && age < smaxage) ||
+	     (-1 < maxage && age < maxage) ||
+	     (c->expire != BAD_DATE && (c->expire - c->date) > age) )) {
+	    ap_table_set(c->hdrs, "Warning", "110 Response is stale");
+	}
+
+	/* check conditionals (If-Modified-Since, etc) */
+	c->xcache = ap_pstrcat(r->pool, "HIT from ", ap_get_server_name(r), NULL);
+	return ap_proxy_cache_conditional(r, c, cachefp);
+
+
+    }
+
+    /* at this point we have determined our cached data needs revalidation
+     * but first - we check 1 thing:
+     *
+     * RFC2616 14.9.4 - if "only-if-cached" specified, send a
+     * 504 Gateway Timeout - we're not allowed to revalidate the object
  */
+    if (ap_proxy_liststr(cc_req, "only-if-cached", NULL)) {
+	if (cachefp)
+	    ap_pclosef(r->pool, ap_bfileno(cachefp, B_WR));
+	return HTTP_GATEWAY_TIME_OUT;
+    }
+
+
+    /* If we already have cached data and a last-modified date, and it is
+     * not a head request, then add an If-Modified-Since.
+     *
+     * If we also have an Etag, then the object must have come from
+     * an HTTP/1.1 server. Add an If-None-Match as well.
+     *
+     * See RFC2616 13.3.4
+     */
+
+    if (cachefp != NULL && !r->header_only) {
+
+	const char *etag = ap_table_get(c->hdrs, "Etag");
+
+	/* If-Modified-Since */
+	if (c->lmod != BAD_DATE) {
+	    /* use the later of the one from the request and the last-modified date
+	     * from the cache */
 	if (c->ims == BAD_DATE || c->ims < c->lmod) {
 	    const char *q;
 
 	    if ((q = ap_table_get(c->hdrs, "Last-Modified")) != NULL)
-		ap_table_set(r->headers_in, "If-Modified-Since",
-			  (char *) q);
+		    ap_table_set(r->headers_in, "If-Modified-Since", (char *) q);
+	    }
+	}
+	
+	/* If-None-Match */
+	if (etag) {
+	    ap_table_set(r->headers_in, "If-None-Match", etag);
 	}
+
     }
+
+
     c->fp = cachefp;
 
     Explain0("Local copy not present or expired. Declining.");
@@ -863,14 +1296,16 @@
 #endif 
     request_rec *r = c->req;
     char *p;
-    int i;
     const char *expire, *lmods, *dates, *clen;
     time_t expc, date, lmod, now;
-    char buff[46];
+    char buff[17*7+1];
     void *sconf = r->server->module_config;
     proxy_server_conf *conf =
     (proxy_server_conf *) ap_get_module_config(sconf, &proxy_module);
-    const long int zero = 0L;
+    const char *cc_resp;
+    table *req_hdrs;
+
+    cc_resp = ap_table_get(resp_hdrs, "Cache-Control");
 
     c->tempfile = NULL;
 
@@ -898,37 +1333,84 @@
     else
 	lmod = BAD_DATE;
 
+
 /*
  * what responses should we not cache?
- * Unknown status responses and those known to be uncacheable
- * 304 HTTP_NOT_MODIFIED response when we have no valid cache file, or
- * 200 HTTP_OK response from HTTP/1.0 and up without a Last-Modified header, or
- * HEAD requests, or
- * requests with an Authorization header, or
- * protocol requests nocache (e.g. ftp with user/password)
+ *
+ * At this point we decide based on the response headers whether it
+ * is appropriate _NOT_ to cache the data from the server. There are
+ * a whole lot of conditions that prevent us from caching this data.
+ * They are tested here one by one to be clear and unambiguous.
+ *
+ * RFC2616 13.4 we are allowed to cache 200, 203, 206, 300, 301 or 410
+ * We don't cache 206, because we don't (yet) cache partial responses.
+ * We include 304 Not Modified here too as this is the origin server
+ * telling us to serve the cached copy.
  */
-/* @@@ XXX FIXME: is the test "r->status != HTTP_MOVED_PERMANENTLY" correct?
- * or shouldn't it be "ap_is_HTTP_REDIRECT(r->status)" ? -MnKr */
-    if ((r->status != HTTP_OK && r->status != HTTP_MOVED_PERMANENTLY && r->status != HTTP_NOT_MODIFIED) ||
+    if ((r->status != HTTP_OK && r->status != HTTP_NON_AUTHORITATIVE && r->status != HTTP_MULTIPLE_CHOICES && r->status != HTTP_MOVED_PERMANENTLY && r->status != HTTP_NOT_MODIFIED) ||
+	
+	/* if a broken Expires header is present, don't cache it */
 	(expire != NULL && expc == BAD_DATE) ||
+
+	/* if the server said 304 Not Modified but we have no cache file - pass
+	 * this untouched to the user agent, it's not for us. */
 	(r->status == HTTP_NOT_MODIFIED && (c == NULL || c->fp == NULL)) ||
+	
+	/* 200 OK response from HTTP/1.0 and up without a Last-Modified header */
 	(r->status == HTTP_OK && lmods == NULL && is_HTTP1) ||
+	
+	/* HEAD requests */
 	r->header_only ||
-	ap_table_get(r->headers_in, "Authorization") != NULL ||
+	
+	/* RFC2616 14.9.2 Cache-Control: no-store response indicating do not
+	 * cache, or stop now if you are trying to cache it */
+        ap_proxy_liststr(cc_resp, "no-store", NULL) ||
+	
+	/* RFC2616 14.9.1 Cache-Control: private
+	 * this object is marked for this user's eyes only. Behave as a tunnel. */
+        ap_proxy_liststr(cc_resp, "private", NULL) ||
+
+	/* RFC2616 14.8 Authorisation:
+	 * if authorisation is included in the request, we don't cache, but we
+	 * can cache if the following exceptions are true:
+	 * 1) If Cache-Control: s-maxage is included
+	 * 2) If Cache-Control: must-revalidate is included
+	 * 3) If Cache-Control: public is included
+	 */
+        (ap_table_get(r->headers_in, "Authorization") != NULL
+	 
+	 && !(ap_proxy_liststr(cc_resp, "s-maxage", NULL) || ap_proxy_liststr(cc_resp, "must-revalidate", NULL) || ap_proxy_liststr(cc_resp, "public", NULL))
+	 ) ||
+	
+	/* or we've been asked not to cache it above */
 	nocache) {
+	
 	Explain1("Response is not cacheable, unlinking %s", c->filename);
+	
 /* close the file */
 	if (c->fp != NULL) {
 	    ap_pclosef(r->pool, ap_bfileno(c->fp, B_WR));
 	    c->fp = NULL;
 	}
+	
 /* delete the previously cached file */
         if (c->filename)
             unlink(c->filename);
 	return DECLINED;	/* send data to client but not cache */
     }
 
-/* otherwise, we are going to cache the response */

Re: IE 5, mod_proxy, and ProxyPass (1/3)

Posted by Christian von Roques <ro...@mti.ag>.
I'm sorry for the Message/partial, some days ago I upgraded my MUA and
now it tried to outsmart me :-(

If you have problems reading my previous email, I *now*, that I tamed
my MUA, can resend it in one part or just send the patch as a private
email to everybody who asks.

Sorry for the inconvenience,

        Christian.