You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by mi...@apache.org on 2020/07/03 19:48:54 UTC

svn commit: r1879488 - /httpd/httpd/trunk/server/util_etag.c

Author: minfrin
Date: Fri Jul  3 19:48:53 2020
New Revision: 1879488

URL: http://svn.apache.org/viewvc?rev=1879488&view=rev
Log:
Add MMAP support to ETag generation.

Modified:
    httpd/httpd/trunk/server/util_etag.c

Modified: httpd/httpd/trunk/server/util_etag.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/util_etag.c?rev=1879488&r1=1879487&r2=1879488&view=diff
==============================================================================
--- httpd/httpd/trunk/server/util_etag.c (original)
+++ httpd/httpd/trunk/server/util_etag.c Fri Jul  3 19:48:53 2020
@@ -30,6 +30,10 @@
 #include "http_protocol.h"   /* For index_of_response().  Grump. */
 #include "http_request.h"
 
+#if APR_HAS_MMAP
+#include "apr_mmap.h"
+#endif /* APR_HAS_MMAP */
+
 #define SHA1_DIGEST_BASE64_LEN 4*(APR_SHA1_DIGESTSIZE/3)
 
 /* Generate the human-readable hex representation of an apr_uint64_t
@@ -84,6 +88,113 @@ static void etag_end(char *next, const c
 }
 
 /*
+ * Construct a strong ETag by creating a SHA1 hash across the file content.
+ */
+static char *make_digest_etag(request_rec *r, etag_rec *er, char *vlv,
+		apr_size_t vlv_len, char *weak, apr_size_t weak_len)
+{
+    apr_sha1_ctx_t context;
+    unsigned char buf[4096]; /* keep this a multiple of 64 */
+    unsigned char digest[APR_SHA1_DIGESTSIZE];
+    apr_file_t *fd = NULL;
+    core_dir_config *cfg;
+    char *etag, *next;
+
+    apr_size_t nbytes;
+    apr_off_t offset = 0, zero = 0;
+    apr_status_t status;
+
+    cfg = (core_dir_config *)ap_get_core_module_config(r->per_dir_config);
+
+    if (er->fd) {
+        fd = er->fd;
+    }
+    else if (er->pathname) {
+        if ((status = apr_file_open(&fd, er->pathname, APR_READ | APR_BINARY,
+                0, r->pool)) != APR_SUCCESS) {
+            ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10251)
+                          "Make etag: could not open %s", er->pathname);
+            return "";
+        }
+    }
+    if (!fd) {
+        return "";
+    }
+
+    if ((status = apr_file_seek(fd, APR_CUR, &offset)) != APR_SUCCESS) {
+        ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10252)
+                      "Make etag: could not seek");
+        if (er->pathname) {
+            apr_file_close(fd);
+        }
+        return "";
+    }
+
+    if ((status = apr_file_seek(fd, APR_SET, &zero)) != APR_SUCCESS) {
+        ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10253)
+                      "Make etag: could not seek");
+        if (er->pathname) {
+            apr_file_close(fd);
+        }
+        return "";
+    }
+
+#if APR_HAS_MMAP
+    if (cfg->enable_mmap != ENABLE_MMAP_OFF && er->fd &&
+            er->finfo->size >= APR_MMAP_THRESHOLD &&
+            er->finfo->size < APR_MMAP_LIMIT) {
+        apr_mmap_t *mm;
+
+        if ((status = apr_mmap_create(&mm, er->fd, 0, er->finfo->size,
+                            APR_MMAP_READ, r->pool) != APR_SUCCESS)) {
+            ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10253)
+                          "Make etag: could not mmap");
+            return "";
+        }
+
+    }
+#endif
+
+    apr_sha1_init(&context);
+    nbytes = sizeof(buf);
+    while ((status = apr_file_read(fd, buf, &nbytes)) == APR_SUCCESS) {
+        apr_sha1_update_binary(&context, buf, nbytes);
+        nbytes = sizeof(buf);
+    }
+    if (status != APR_EOF) {
+        ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10254)
+                      "Make etag: could not read");
+        if (er->pathname) {
+            apr_file_close(fd);
+        }
+        return "";
+    }
+
+    if ((status = apr_file_seek(fd, APR_SET, &offset)) != APR_SUCCESS) {
+        ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10255)
+                      "Make etag: could not seek");
+        if (er->pathname) {
+            apr_file_close(fd);
+        }
+        return "";
+    }
+    apr_sha1_final(digest, &context);
+
+    etag = apr_palloc(r->pool, weak_len + sizeof("\"\"") +
+            SHA1_DIGEST_BASE64_LEN + vlv_len + 4);
+
+    etag_start(etag, weak, &next);
+    next += apr_base64_encode_binary(next, digest, APR_SHA1_DIGESTSIZE) - 1;
+    etag_end(next, vlv, vlv_len);
+
+    if (er->pathname) {
+        apr_file_close(fd);
+    }
+
+    return etag;
+}
+
+/*
  * Construct an entity tag (ETag) from resource information.  If it's a real
  * file, build in some of the file characteristics.  If the modification time
  * is newer than (request-time minus 1 second), mark the ETag as weak - it
@@ -93,8 +204,7 @@ AP_DECLARE(char *) ap_make_etag_ex(reque
 {
     char *weak = NULL;
     apr_size_t weak_len = 0, vlv_len = 0;
-    char *etag, *vlv;
-    char *next;
+    char *etag, *next, *vlv;
     core_dir_config *cfg;
     etag_components_t etag_bits;
     etag_components_t bits_added;
@@ -149,85 +259,7 @@ AP_DECLARE(char *) ap_make_etag_ex(reque
     if (er->finfo->filetype == APR_REG &&
             (AP_REQUEST_IS_STRONG_ETAG(r) || (etag_bits & ETAG_DIGEST))) {
 
-        apr_sha1_ctx_t context;
-        unsigned char buf[4096]; /* keep this a multiple of 64 */
-        unsigned char digest[APR_SHA1_DIGESTSIZE];
-        apr_file_t *fd = NULL;
-
-        apr_size_t nbytes;
-        apr_off_t offset = 0, zero = 0;
-        apr_status_t status;
-
-        if (er->fd) {
-            fd = er->fd;
-        }
-        else if (er->pathname) {
-            if ((status = apr_file_open(&fd, er->pathname, APR_READ | APR_BINARY,
-                    0, r->pool)) != APR_SUCCESS) {
-                ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10251)
-                              "Make etag: could not open %s", er->pathname);
-                return "";
-            }
-        }
-        if (!fd) {
-            return "";
-        }
-
-        etag = apr_palloc(r->pool, weak_len + sizeof("\"\"") +
-                SHA1_DIGEST_BASE64_LEN + vlv_len + 4);
-
-        if ((status = apr_file_seek(fd, APR_CUR, &offset)) != APR_SUCCESS) {
-            ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10252)
-                          "Make etag: could not seek");
-            if (er->pathname) {
-                apr_file_close(fd);
-            }
-            return "";
-        }
-
-        if ((status = apr_file_seek(fd, APR_SET, &zero)) != APR_SUCCESS) {
-            ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10253)
-                          "Make etag: could not seek");
-            if (er->pathname) {
-                apr_file_close(fd);
-            }
-            return "";
-        }
-
-        apr_sha1_init(&context);
-        nbytes = sizeof(buf);
-        while ((status = apr_file_read(fd, buf, &nbytes)) == APR_SUCCESS) {
-            apr_sha1_update_binary(&context, buf, nbytes);
-            nbytes = sizeof(buf);
-        }
-        if (status != APR_EOF) {
-            ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10254)
-                          "Make etag: could not read");
-            if (er->pathname) {
-                apr_file_close(fd);
-            }
-            return "";
-        }
-
-        if ((status = apr_file_seek(fd, APR_SET, &offset)) != APR_SUCCESS) {
-            ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10255)
-                          "Make etag: could not seek");
-            if (er->pathname) {
-                apr_file_close(fd);
-            }
-            return "";
-        }
-        apr_sha1_final(digest, &context);
-
-        etag_start(etag, weak, &next);
-        next += apr_base64_encode_binary(next, digest, APR_SHA1_DIGESTSIZE) - 1;
-        etag_end(next, vlv, vlv_len);
-
-        if (er->pathname) {
-            apr_file_close(fd);
-        }
-
-        return etag;
+        return make_digest_etag(r, er, vlv, vlv_len, weak, weak_len);
     }
 
     /*



Re: svn commit: r1879488 - /httpd/httpd/trunk/server/util_etag.c

Posted by Yann Ylavic <yl...@gmail.com>.
On Mon, Jul 6, 2020 at 12:37 PM Graham Leggett <mi...@sharp.fm> wrote:
>
> On 06 Jul 2020, at 09:28, Ruediger Pluem <rp...@apache.org> wrote:
>
> +    apr_sha1_init(&context);
> +    nbytes = sizeof(buf);
> +    while ((status = apr_file_read(fd, buf, &nbytes)) == APR_SUCCESS) {
>
>
> Why do we still use apr_file_read in case we use MMAP? IMHO we should use apr_mmap_offset.
> But we would need to consider the amount of memory we map at the same time for large files
> in order to avoid exhausting the memory. So I would propose that we create a brigade with one
> filebucket and do bucket reads. So like the following code from the default handler (excluding the reading):
>
>        bb = apr_brigade_create(r->pool, c->bucket_alloc);
>
>        e = apr_brigade_insert_file(bb, fd, 0, r->finfo.size, r->pool);
>
> #if APR_HAS_MMAP
>            if (d->enable_mmap == ENABLE_MMAP_OFF) {
>                (void)apr_bucket_file_enable_mmap(e, 0);
>            }
> #endif
>
> Of course that would require to get the finfo in case we open the file descriptor on our own.
>
> The reading would then be something like (without error handling):
>
> while (!APR_BRIGADE_EMPTY(bb))
> {
>      e = APR_BUCKET_FIRST(bb)
>      apr_bucket_read(e, &str, &len);
>      apr_sha1_update_binary(&context, str, len);
>      apr_bucket_delete(e);
> }
>
>
> Makes much more sense - r1879541.

FWIW, I don't think MMAP-ing sequentially is better/faster than just
read()ing, it could be double work with the (file)system cache.
But code is simpler and EnableMMap off does it well, so +1.


Regards;
Yann.

Re: svn commit: r1879488 - /httpd/httpd/trunk/server/util_etag.c

Posted by Graham Leggett <mi...@sharp.fm>.
On 06 Jul 2020, at 09:28, Ruediger Pluem <rp...@apache.org> wrote:

>> +    apr_sha1_init(&context);
>> +    nbytes = sizeof(buf);
>> +    while ((status = apr_file_read(fd, buf, &nbytes)) == APR_SUCCESS) {
> 
> Why do we still use apr_file_read in case we use MMAP? IMHO we should use apr_mmap_offset.
> But we would need to consider the amount of memory we map at the same time for large files
> in order to avoid exhausting the memory. So I would propose that we create a brigade with one
> filebucket and do bucket reads. So like the following code from the default handler (excluding the reading):
> 
>        bb = apr_brigade_create(r->pool, c->bucket_alloc);
> 
>        e = apr_brigade_insert_file(bb, fd, 0, r->finfo.size, r->pool);
> 
> #if APR_HAS_MMAP
>            if (d->enable_mmap == ENABLE_MMAP_OFF) {
>                (void)apr_bucket_file_enable_mmap(e, 0);
>            }
> #endif
> 
> Of course that would require to get the finfo in case we open the file descriptor on our own.
> 
> The reading would then be something like (without error handling):
> 
> while (!APR_BRIGADE_EMPTY(bb))
> {
>      e = APR_BUCKET_FIRST(bb)
>      apr_bucket_read(e, &str, &len);
>      apr_sha1_update_binary(&context, str, len);
>      apr_bucket_delete(e);
> }

Makes much more sense - r1879541.

Regards,
Graham
—


Re: svn commit: r1879488 - /httpd/httpd/trunk/server/util_etag.c

Posted by Ruediger Pluem <rp...@apache.org>.

On 7/3/20 9:48 PM, minfrin@apache.org wrote:
> Author: minfrin
> Date: Fri Jul  3 19:48:53 2020
> New Revision: 1879488
> 
> URL: http://svn.apache.org/viewvc?rev=1879488&view=rev
> Log:
> Add MMAP support to ETag generation.
> 
> Modified:
>     httpd/httpd/trunk/server/util_etag.c
> 
> Modified: httpd/httpd/trunk/server/util_etag.c
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/util_etag.c?rev=1879488&r1=1879487&r2=1879488&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/server/util_etag.c (original)
> +++ httpd/httpd/trunk/server/util_etag.c Fri Jul  3 19:48:53 2020

> @@ -84,6 +88,113 @@ static void etag_end(char *next, const c
>  }
>  
>  /*
> + * Construct a strong ETag by creating a SHA1 hash across the file content.
> + */
> +static char *make_digest_etag(request_rec *r, etag_rec *er, char *vlv,
> +		apr_size_t vlv_len, char *weak, apr_size_t weak_len)
> +{
> +    apr_sha1_ctx_t context;
> +    unsigned char buf[4096]; /* keep this a multiple of 64 */
> +    unsigned char digest[APR_SHA1_DIGESTSIZE];
> +    apr_file_t *fd = NULL;
> +    core_dir_config *cfg;
> +    char *etag, *next;
> +
> +    apr_size_t nbytes;
> +    apr_off_t offset = 0, zero = 0;
> +    apr_status_t status;
> +
> +    cfg = (core_dir_config *)ap_get_core_module_config(r->per_dir_config);
> +
> +    if (er->fd) {
> +        fd = er->fd;
> +    }
> +    else if (er->pathname) {
> +        if ((status = apr_file_open(&fd, er->pathname, APR_READ | APR_BINARY,
> +                0, r->pool)) != APR_SUCCESS) {
> +            ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10251)
> +                          "Make etag: could not open %s", er->pathname);
> +            return "";
> +        }
> +    }
> +    if (!fd) {
> +        return "";
> +    }
> +
> +    if ((status = apr_file_seek(fd, APR_CUR, &offset)) != APR_SUCCESS) {
> +        ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10252)
> +                      "Make etag: could not seek");
> +        if (er->pathname) {
> +            apr_file_close(fd);
> +        }
> +        return "";
> +    }
> +
> +    if ((status = apr_file_seek(fd, APR_SET, &zero)) != APR_SUCCESS) {
> +        ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10253)
> +                      "Make etag: could not seek");
> +        if (er->pathname) {
> +            apr_file_close(fd);
> +        }
> +        return "";
> +    }
> +
> +#if APR_HAS_MMAP
> +    if (cfg->enable_mmap != ENABLE_MMAP_OFF && er->fd &&
> +            er->finfo->size >= APR_MMAP_THRESHOLD &&
> +            er->finfo->size < APR_MMAP_LIMIT) {
> +        apr_mmap_t *mm;
> +
> +        if ((status = apr_mmap_create(&mm, er->fd, 0, er->finfo->size,
> +                            APR_MMAP_READ, r->pool) != APR_SUCCESS)) {
> +            ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(10253)
> +                          "Make etag: could not mmap");
> +            return "";
> +        }
> +
> +    }
> +#endif
> +
> +    apr_sha1_init(&context);
> +    nbytes = sizeof(buf);
> +    while ((status = apr_file_read(fd, buf, &nbytes)) == APR_SUCCESS) {

Why do we still use apr_file_read in case we use MMAP? IMHO we should use apr_mmap_offset.
But we would need to consider the amount of memory we map at the same time for large files
in order to avoid exhausting the memory. So I would propose that we create a brigade with one
filebucket and do bucket reads. So like the following code from the default handler (excluding the reading):

        bb = apr_brigade_create(r->pool, c->bucket_alloc);

        e = apr_brigade_insert_file(bb, fd, 0, r->finfo.size, r->pool);

#if APR_HAS_MMAP
            if (d->enable_mmap == ENABLE_MMAP_OFF) {
                (void)apr_bucket_file_enable_mmap(e, 0);
            }
#endif

Of course that would require to get the finfo in case we open the file descriptor on our own.

The reading would then be something like (without error handling):

while (!APR_BRIGADE_EMPTY(bb))
{
      e = APR_BUCKET_FIRST(bb)
      apr_bucket_read(e, &str, &len);
      apr_sha1_update_binary(&context, str, len);
      apr_bucket_delete(e);
}


Regards

Rüdiger