You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by mi...@apache.org on 2010/03/30 00:30:46 UTC

svn commit: r928921 - in /httpd/httpd/trunk: CHANGES docs/manual/programs/htcacheclean.xml support/htcacheclean.c

Author: minfrin
Date: Mon Mar 29 22:30:46 2010
New Revision: 928921

URL: http://svn.apache.org/viewvc?rev=928921&view=rev
Log:
htcacheclean: Introduce the ability to clean specific URLs from the
cache, if provided as an optional parameter on the command line.

Modified:
    httpd/httpd/trunk/CHANGES
    httpd/httpd/trunk/docs/manual/programs/htcacheclean.xml
    httpd/httpd/trunk/support/htcacheclean.c

Modified: httpd/httpd/trunk/CHANGES
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/CHANGES?rev=928921&r1=928920&r2=928921&view=diff
==============================================================================
--- httpd/httpd/trunk/CHANGES [utf-8] (original)
+++ httpd/httpd/trunk/CHANGES [utf-8] Mon Mar 29 22:30:46 2010
@@ -28,6 +28,10 @@ Changes with Apache 2.3.7
      processing is completed, avoiding orphaned callback pointers.
      [Brett Gervasoni <brettg senseofsecurity.com>, Jeff Trawick]
 
+  *) htcacheclean: Introduce the ability to clean specific URLs from the
+     cache, if provided as an optional parameter on the command line.
+     [Graham Leggett]
+
   *) core: Introduce the IncludeStrict directive, which explicitly fails
      server startup if no files or directories match a wildcard path.
      [Graham Leggett]

Modified: httpd/httpd/trunk/docs/manual/programs/htcacheclean.xml
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/docs/manual/programs/htcacheclean.xml?rev=928921&r1=928920&r2=928921&view=diff
==============================================================================
--- httpd/httpd/trunk/docs/manual/programs/htcacheclean.xml (original)
+++ httpd/httpd/trunk/docs/manual/programs/htcacheclean.xml Mon Mar 29 22:30:46 2010
@@ -29,9 +29,12 @@
     <p><code>htcacheclean</code> is used to keep the size of
     <module>mod_disk_cache</module>'s storage within a certain limit. This
     tool can run either manually or in daemon mode. When running in
-    daemon mode, it sleeps in the background and checks the cache directories
+    daemon mode, it sleeps in the background and checks the cache directory
     at regular intervals for cached content to be removed. You can stop the daemon
-    cleanly by sending it a TERM or INT signal.</p>
+    cleanly by sending it a TERM or INT signal. When run manually, a once off
+    check of the cache directory is made for cached content to be removed. If
+    one or more URLs are specified, each URL will be deleted from the cache, if
+    present.</p>
 </summary>
 <seealso><module>mod_disk_cache</module></seealso>
 
@@ -53,6 +56,13 @@
     -<strong>d</strong><var>interval</var>
     -<strong>p</strong><var>path</var>
     -<strong>l</strong><var>limit</var></code></p>
+
+    <p><code><strong>htcacheclean</strong>
+    [ -<strong>D</strong> ]
+    [ -<strong>v</strong> ]
+    [ -<strong>t</strong> ]
+    -<strong>p</strong><var>path</var>
+    <var>url</var></code></p>
 </section>
 
 <section id="options"><title>Options</title>
@@ -113,9 +123,28 @@
     </dl>
 </section>
 
+<section id="delete"><title>Deleting a specific URL</title>
+    <p>If <code>htcacheclean</code> is passed one or more URLs, each URL will
+    be deleted from the cache. If multiple variants of an URL exists, all
+    variants would be deleted.</p>
+
+    <p>When a reverse proxied URL is to be deleted, the effective URL is
+    constructed from the <strong>Host</strong> header, the
+    <strong>port</strong>, the <strong>path</strong> and the
+    <strong>query</strong>. Note the '?' in the URL must always be specified
+    explicitly, whether a query string is present or not. For example, an
+    attempt to delete the path <strong>/</strong> from the server
+    <strong>localhost</strong>, the URL to delete would be
+    <strong>http://localhost:80/?</strong>.</p>
+
+</section>
+    
 <section id="exit"><title>Exit Status</title>
     <p><code>htcacheclean</code> returns a zero status ("true") if all
-    operations were successful, <code>1</code> otherwise.</p>
+    operations were successful, <code>1</code> otherwise. If an URL is
+    specified, and the URL was cached and successfully removed,
+    <code>0</code> is returned, <code>2</code> otherwise. If an error
+    occurred during URL removal, <code>1</code> is returned.</p>
 </section>
 
 </manualpage>

Modified: httpd/httpd/trunk/support/htcacheclean.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/support/htcacheclean.c?rev=928921&r1=928920&r2=928921&view=diff
==============================================================================
--- httpd/httpd/trunk/support/htcacheclean.c (original)
+++ httpd/httpd/trunk/support/htcacheclean.c Mon Mar 29 22:30:46 2010
@@ -32,6 +32,7 @@
 #include "apr_thread_proc.h"
 #include "apr_signal.h"
 #include "apr_getopt.h"
+#include "apr_md5.h"
 #include "apr_ring.h"
 #include "apr_date.h"
 #include "apr_buckets.h"
@@ -722,6 +723,199 @@ static void purge(char *path, apr_pool_t
     }
 }
 
+static apr_status_t remove_directory(apr_pool_t *pool, const char *dir)
+{
+    apr_status_t rv;
+    apr_dir_t *dirp;
+    apr_finfo_t dirent;
+
+    rv = apr_dir_open(&dirp, dir, pool);
+    if (rv == APR_ENOENT) {
+        return rv;
+    }
+    if (rv != APR_SUCCESS) {
+        char errmsg[120];
+        apr_file_printf(errfile, "Could not open directory %s: %s" APR_EOL_STR,
+                dir, apr_strerror(rv, errmsg, sizeof errmsg));
+        return rv;
+    }
+
+    while (apr_dir_read(&dirent, APR_FINFO_DIRENT | APR_FINFO_TYPE, dirp)
+            == APR_SUCCESS) {
+        if (dirent.filetype == APR_DIR) {
+            if (strcmp(dirent.name, ".") && strcmp(dirent.name, "..")) {
+                rv = remove_directory(pool, apr_pstrcat(pool, dir, "/",
+                        dirent.name, NULL));
+                /* tolerate the directory not being empty, the cache may have
+                 * attempted to recreate the directory in the mean time.
+                 */
+                if (APR_SUCCESS != rv && APR_ENOTEMPTY != rv) {
+                    break;
+                }
+            }
+        } else {
+            const char *file = apr_pstrcat(pool, dir, "/", dirent.name, NULL);
+            rv = apr_file_remove(file, pool);
+            if (APR_SUCCESS != rv) {
+                char errmsg[120];
+                apr_file_printf(errfile,
+                        "Could not remove file '%s': %s" APR_EOL_STR, file,
+                        apr_strerror(rv, errmsg, sizeof errmsg));
+                break;
+            }
+        }
+    }
+
+    apr_dir_close(dirp);
+
+    if (rv == APR_SUCCESS) {
+        rv = apr_dir_remove(dir, pool);
+        if (APR_ENOTEMPTY == rv) {
+            rv = APR_SUCCESS;
+        }
+        if (rv != APR_SUCCESS) {
+            char errmsg[120];
+            apr_file_printf(errfile, "Could not remove directory %s: %s" APR_EOL_STR,
+                    dir, apr_strerror(rv, errmsg, sizeof errmsg));
+        }
+    }
+
+    return rv;
+}
+
+static apr_status_t find_directory(apr_pool_t *pool, const char *base,
+        const char *rest)
+{
+    apr_status_t rv;
+    apr_dir_t *dirp;
+    apr_finfo_t dirent;
+    int found = 0, files = 0;
+    const char *header = apr_pstrcat(pool, rest, CACHE_HEADER_SUFFIX, NULL);
+    const char *data = apr_pstrcat(pool, rest, CACHE_DATA_SUFFIX, NULL);
+    const char *vdir = apr_pstrcat(pool, rest, CACHE_HEADER_SUFFIX,
+            CACHE_VDIR_SUFFIX, NULL);
+    const char *dirname = NULL;
+
+    rv = apr_dir_open(&dirp, base, pool);
+    if (rv != APR_SUCCESS) {
+        char errmsg[120];
+        apr_file_printf(errfile, "Could not open directory %s: %s" APR_EOL_STR,
+                base, apr_strerror(rv, errmsg, sizeof errmsg));
+        return rv;
+    }
+
+    rv = APR_ENOENT;
+
+    while (apr_dir_read(&dirent, APR_FINFO_DIRENT | APR_FINFO_TYPE, dirp)
+            == APR_SUCCESS) {
+        int len = strlen(dirent.name);
+        int restlen = strlen(rest);
+        if (dirent.filetype == APR_DIR && !strncmp(rest, dirent.name, len)) {
+            dirname = apr_pstrcat(pool, base, "/", dirent.name, NULL);
+            rv = find_directory(pool, dirname, rest + (len < restlen ? len
+                    : restlen));
+            if (APR_SUCCESS == rv) {
+                found = 1;
+            }
+        }
+        if (dirent.filetype == APR_DIR) {
+            if (!strcmp(dirent.name, vdir)) {
+                files = 1;
+            }
+        }
+        if (dirent.filetype == APR_REG) {
+            if (!strcmp(dirent.name, header) || !strcmp(dirent.name, data)) {
+                files = 1;
+            }
+        }
+    }
+
+    apr_dir_close(dirp);
+
+    if (files) {
+        rv = APR_SUCCESS;
+        if (!dryrun) {
+            const char *remove;
+            apr_status_t status;
+
+            remove = apr_pstrcat(pool, base, "/", header, NULL);
+            status = apr_file_remove(remove, pool);
+            if (status != APR_SUCCESS && status != APR_ENOENT) {
+                char errmsg[120];
+                apr_file_printf(errfile, "Could not remove file %s: %s" APR_EOL_STR,
+                        remove, apr_strerror(status, errmsg, sizeof errmsg));
+                rv = status;
+            }
+
+            remove = apr_pstrcat(pool, base, "/", data, NULL);
+            status = apr_file_remove(remove, pool);
+            if (status != APR_SUCCESS && status != APR_ENOENT) {
+                char errmsg[120];
+                apr_file_printf(errfile, "Could not remove file %s: %s" APR_EOL_STR,
+                        remove, apr_strerror(status, errmsg, sizeof errmsg));
+                rv = status;
+            }
+
+            status = remove_directory(pool, apr_pstrcat(pool, base, "/", vdir, NULL));
+            if (status != APR_SUCCESS && status != APR_ENOENT) {
+                rv = status;
+            }
+        }
+    }
+
+    /* If asked to delete dirs, do so now. We don't care if it fails.
+     * If it fails, it likely means there was something else there.
+     */
+    if (dirname && deldirs && !dryrun) {
+        apr_dir_remove(dirname, pool);
+    }
+
+    if (found) {
+        return APR_SUCCESS;
+    }
+
+    return rv;
+}
+
+/**
+ * Delete a specific URL from the cache.
+ */
+static apr_status_t delete_url(apr_pool_t *pool, const char *proxypath, const char *url)
+{
+    apr_md5_ctx_t context;
+    unsigned char digest[16];
+    char tmp[23];
+    int i, k;
+    unsigned int x;
+    static const char enc_table[64] =
+            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@";
+
+    apr_md5_init(&context);
+    apr_md5_update(&context, (const unsigned char *) url, strlen(url));
+    apr_md5_final(digest, &context);
+
+    /* encode 128 bits as 22 characters, using a modified uuencoding
+     * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is
+     * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters
+     */
+    for (i = 0, k = 0; i < 15; i += 3) {
+        x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2];
+        tmp[k++] = enc_table[x >> 18];
+        tmp[k++] = enc_table[(x >> 12) & 0x3f];
+        tmp[k++] = enc_table[(x >> 6) & 0x3f];
+        tmp[k++] = enc_table[x & 0x3f];
+    }
+
+    /* one byte left */
+    x = digest[15];
+    tmp[k++] = enc_table[x >> 2]; /* use up 6 bits */
+    tmp[k++] = enc_table[(x << 4) & 0x3f];
+    tmp[k] = 0;
+
+    /* automatically find the directory levels */
+    return find_directory(pool, proxypath, tmp);
+}
+
 /*
  * usage info
  */
@@ -735,6 +929,7 @@ static void usage(const char *error)
     "%s -- program for cleaning the disk cache."                             NL
     "Usage: %s [-Dvtrn] -pPATH -lLIMIT [-PPIDFILE]"                          NL
     "       %s [-nti] -dINTERVAL -pPATH -lLIMIT [-PPIDFILE]"                 NL
+    "       %s [-Dvt] -pPATH URL ..."                                        NL
                                                                              NL
     "Options:"                                                               NL
     "  -d   Daemonize and repeat cache cleaning every INTERVAL minutes."     NL
@@ -767,7 +962,15 @@ static void usage(const char *error)
                                                                              NL
     "  -i   Be intelligent and run only when there was a modification of"    NL
     "       the disk cache. This option is only possible together with the"  NL
-    "       -d option."                                                      NL,
+    "       -d option."                                                      NL
+                                                                             NL
+    "Should an URL be provided on the command line, the URL will be"         NL
+    "deleted from the cache. A reverse proxied URL is made up as follows:"   NL
+    "http://<hostname>:<port><path>?[query]. So, for the path \"/\" on the"  NL
+    "host \"localhost\" and port 80, the URL to delete becomes"              NL
+    "\"http://localhost:80/?\". Note the '?' in the URL must always be"      NL
+    "specified explicitly, whether a query string is present or not."        NL,
+    shortname,
     shortname,
     shortname,
     shortname
@@ -980,8 +1183,43 @@ int main(int argc, const char * const ar
         usage(NULL);
     }
 
-    if (o->ind != argc) {
-         usage("Additional parameters specified on the command line, aborting");
+    if (o->ind < argc) {
+        int deleted = 0;
+        int error = 0;
+        if (isdaemon) {
+            usage("Option -d cannot be used with URL arguments, aborting");
+        }
+        if (intelligent) {
+            usage("Option -i cannot be used with URL arguments, aborting");
+        }
+        if (limit_found) {
+            usage("Option -l cannot be used with URL arguments, aborting");
+        }
+        while (o->ind < argc) {
+            status = delete_url(pool, proxypath, argv[o->ind]);
+            if (APR_SUCCESS == status) {
+                if (verbose) {
+                    apr_file_printf(errfile, "Removed: %s" APR_EOL_STR,
+                            argv[o->ind]);
+                }
+                deleted = 1;
+            }
+            else if (APR_ENOENT == status) {
+                if (verbose) {
+                    apr_file_printf(errfile, "Not cached: %s" APR_EOL_STR,
+                            argv[o->ind]);
+                }
+            }
+            else {
+                if (verbose) {
+                    apr_file_printf(errfile, "Error while removed: %s" APR_EOL_STR,
+                            argv[o->ind]);
+                }
+                error = 1;
+            }
+            o->ind++;
+        }
+        return error ? 1 : deleted ? 0 : 2;
     }
 
     if (isdaemon && repeat <= 0) {