You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by yl...@apache.org on 2022/01/20 11:09:34 UTC

svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Author: ylavic
Date: Thu Jan 20 11:09:34 2022
New Revision: 1897240

URL: http://svn.apache.org/viewvc?rev=1897240&view=rev
Log:
ap_regex: Use Thread Local Storage (if efficient) to avoid allocations.

PCRE2 wants an opaque context by providing the API to allocate and free it, so
to minimize these calls we maintain one opaque context per thread (in Thread
Local Storage, TLS) grown as needed, and while at it we do the same for PCRE1
ints vectors. Note that this requires a fast TLS mechanism to be worth it,
which is the case of apr_thread_data_get/set() from/to apr_thread_current()
when APR_HAS_THREAD_LOCAL; otherwise we'll do the allocation and freeing for
each ap_regexec().

The small stack vector is used for PCRE1 && !APR_HAS_THREAD_LOCAL only now.


Modified:
    httpd/httpd/trunk/server/main.c
    httpd/httpd/trunk/server/util_pcre.c

Modified: httpd/httpd/trunk/server/main.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/main.c?rev=1897240&r1=1897239&r2=1897240&view=diff
==============================================================================
--- httpd/httpd/trunk/server/main.c (original)
+++ httpd/httpd/trunk/server/main.c Thu Jan 20 11:09:34 2022
@@ -338,6 +338,15 @@ static void reset_process_pconf(process_
     apr_pool_pre_cleanup_register(process->pconf, NULL, deregister_all_hooks);
 }
 
+#if APR_HAS_THREAD_LOCAL
+static apr_status_t main_thread_exit_cleanup(void *arg)
+{
+    apr_thread_t *thd = arg;
+    apr_pool_destroy(apr_thread_pool_get(thd));
+    return APR_SUCCESS;
+}
+#endif
+
 static process_rec *init_process(int *argc, const char * const * *argv)
 {
     process_rec *process;
@@ -388,6 +397,34 @@ static process_rec *init_process(int *ar
     process->argc = *argc;
     process->argv = *argv;
     process->short_name = apr_filepath_name_get((*argv)[0]);
+
+#if APR_HAS_THREAD_LOCAL
+    /* Create an apr_thread_t for the main thread to set up its
+     * Thread Local Storage. Since it's detached and it won't
+     * apr_thread_exit(), destroy its pool before exiting via
+     * a process->pool cleanup
+     */
+    {
+        apr_thread_t *main_thd;
+        apr_threadattr_t *main_thd_attr = NULL;
+        if (apr_threadattr_create(&main_thd_attr, process->pool)
+                || apr_threadattr_detach_set(main_thd_attr, 1)
+                || apr_thread_current_create(&main_thd, main_thd_attr,
+                                             process->pool)) {
+            char ctimebuff[APR_CTIME_LEN];
+            apr_ctime(ctimebuff, apr_time_now());
+            fprintf(stderr, "[%s] [crit] (%d) %s: %s failed "
+                            "to initialize thread context, exiting\n",
+                            ctimebuff, stat, (*argv)[0], failed);
+            apr_terminate();
+            exit(1);
+        }
+        apr_pool_cleanup_register(process->pool, main_thd,
+                                  main_thread_exit_cleanup,
+                                  apr_pool_cleanup_null);
+    }
+#endif
+
     return process;
 }
 

Modified: httpd/httpd/trunk/server/util_pcre.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/util_pcre.c?rev=1897240&r1=1897239&r2=1897240&view=diff
==============================================================================
--- httpd/httpd/trunk/server/util_pcre.c (original)
+++ httpd/httpd/trunk/server/util_pcre.c Thu Jan 20 11:09:34 2022
@@ -53,6 +53,8 @@ POSSIBILITY OF SUCH DAMAGE.
 */
 
 #include "httpd.h"
+#include "apr_version.h"
+#include "apr_portable.h"
 #include "apr_strings.h"
 #include "apr_tables.h"
 
@@ -263,7 +265,122 @@ AP_DECLARE(int) ap_regcomp(ap_regex_t *
  * ints. However, if the number of possible capturing brackets is small, use a
  * block of store on the stack, to reduce the use of malloc/free. The threshold
  * is in a macro that can be changed at configure time.
+ * Yet more unfortunately, PCRE2 wants an opaque context by providing the API
+ * to allocate and free it, so to minimize these calls we maintain one opaque
+ * context per thread (in Thread Local Storage, TLS) grown as needed, and while
+ * at it we do the same for PCRE1 ints vectors. Note that this requires a fast
+ * TLS mechanism to be worth it, which is the case of apr_thread_data_get/set()
+ * from/to apr_thread_current() when APR_HAS_THREAD_LOCAL; otherwise we'll do
+ * the allocation and freeing for each ap_regexec().
  */
+
+#ifdef HAVE_PCRE2
+typedef pcre2_match_data* match_data_pt;
+typedef size_t*           match_vector_pt;
+#else
+typedef int*              match_data_pt;
+typedef int*              match_vector_pt;
+#endif
+
+#if APR_HAS_THREAD_LOCAL
+
+static match_data_pt get_match_data(apr_size_t size,
+                                    match_vector_pt *ovector,
+                                    match_vector_pt small_vector)
+{
+    apr_thread_t *current;
+    struct {
+        match_data_pt data;
+        apr_size_t size;
+    } *tls = NULL;
+
+    /* APR_HAS_THREAD_LOCAL garantees this works */
+    current = apr_thread_current();
+    ap_assert(current != NULL);
+
+    apr_thread_data_get((void **)&tls, "apreg", current);
+    if (!tls || tls->size < size) {
+        apr_pool_t *tp = apr_thread_pool_get(current);
+        if (tls) {
+#ifdef HAVE_PCRE2
+            pcre2_match_data_free(tls->data); /* NULL safe */
+#endif
+        }
+        else {
+            tls = apr_pcalloc(tp, sizeof(*tls));
+            apr_thread_data_set(tls, "apreg", NULL, current);
+        }
+        tls->size *= 2;
+        if (tls->size < size) {
+            tls->size = size;
+            if (tls->size < POSIX_MALLOC_THRESHOLD) {
+                tls->size = POSIX_MALLOC_THRESHOLD;
+            }
+        }
+#ifdef HAVE_PCRE2
+        tls->data = pcre2_match_data_create(tls->size, NULL);
+#else
+        tls->data = apr_palloc(tp, tls->size * sizeof(int) * 3);
+#endif
+        if (!tls->data) {
+            tls->size = 0;
+            return NULL;
+        }
+    }
+
+#ifdef HAVE_PCRE2
+    *ovector = pcre2_get_ovector_pointer(tls->data);
+#else
+    *vector = tls->data;
+#endif
+    return tls->data;
+}
+
+/* Nothing to put back with thread local */
+static APR_INLINE void put_match_data(match_data_pt data,
+                                      apr_size_t size)
+{ }
+
+#else /* !APR_HAS_THREAD_LOCAL */
+
+/* Always allocate/free without thread local */
+
+static match_data_pt get_match_data(apr_size_t size,
+                                    match_vector_pt *ovector,
+                                    match_vector_pt small_vector)
+{
+    match_data_pt data;
+
+#ifdef HAVE_PCRE2
+    data = pcre2_match_data_create(size, NULL);
+    *ovector = pcre2_get_ovector_pointer(data);
+#else
+    if (size > POSIX_MALLOC_THRESHOLD) {
+        data = malloc(size * sizeof(int) * 3);
+    }
+    else {
+        data = small_vector;
+    }
+    *ovector = data;
+#endif
+
+    return data;
+}
+
+static APR_INLINE void put_match_data(match_data_pt data,
+                                      apr_size_t size)
+{
+#ifdef HAVE_PCRE2
+    pcre2_match_data_free(data);
+#else
+    if (size > POSIX_MALLOC_THRESHOLD) {
+        free(data);
+    }
+#endif
+}
+
+#endif /* !APR_HAS_THREAD_LOCAL */
+
 AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string,
                            apr_size_t nmatch, ap_regmatch_t *pmatch,
                            int eflags)
@@ -278,16 +395,20 @@ AP_DECLARE(int) ap_regexec_len(const ap_
 {
     int rc;
     int options = 0;
-    apr_size_t nlim;
-#ifdef HAVE_PCRE2
-    pcre2_match_data *matchdata;
-    size_t *ovector;
+    match_vector_pt ovector = NULL;
+    apr_size_t nlim = ((apr_size_t)preg->re_nsub + 1) > nmatch
+                    ? ((apr_size_t)preg->re_nsub + 1) : nmatch;
+#if defined(HAVE_PCRE2) || APR_HAS_THREAD_LOCAL
+    match_data_pt data = get_match_data(nlim, &ovector, NULL);
 #else
-    int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
-    int allocated_ovector = 0;
-    int *ovector = NULL;
+    int small_vector[POSIX_MALLOC_THRESHOLD * 3];
+    match_data_pt data = get_match_data(nlim, &ovector, small_vector);
 #endif
 
+    if (!data) {
+        return AP_REG_ESPACE;
+    }
+
     if ((eflags & AP_REG_NOTBOL) != 0)
         options |= PCREn(NOTBOL);
     if ((eflags & AP_REG_NOTEOL) != 0)
@@ -298,61 +419,30 @@ AP_DECLARE(int) ap_regexec_len(const ap_
         options |= PCREn(ANCHORED);
 
 #ifdef HAVE_PCRE2
-    /* TODO: create a generic TLS matchdata buffer of some nmatch limit,
-     * e.g. 10 matches, to avoid a malloc-per-call. If it must be allocated,
-     * implement a general context using palloc and no free implementation.
-     */
-    nlim = ((apr_size_t)preg->re_nsub + 1) > nmatch
-         ? ((apr_size_t)preg->re_nsub + 1) : nmatch;
-    matchdata = pcre2_match_data_create(nlim, NULL);
-    if (matchdata == NULL)
-        return AP_REG_ESPACE;
-    ovector = pcre2_get_ovector_pointer(matchdata);
     rc = pcre2_match((const pcre2_code *)preg->re_pcre,
                      (const unsigned char *)buff, len,
-                     0, options, matchdata, NULL);
-    if (rc == 0)
-        rc = nlim;            /* All captured slots were filled in */
-#else
-    if (nmatch > 0) {
-        if (nmatch <= POSIX_MALLOC_THRESHOLD) {
-            ovector = &(small_ovector[0]);
-        }
-        else {
-            ovector = (int *)malloc(sizeof(int) * nmatch * 3);
-            if (ovector == NULL)
-                return AP_REG_ESPACE;
-            allocated_ovector = 1;
-        }
-    }
+                     0, options, data, NULL);
+#else
     rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len,
-                   0, options, ovector, nmatch * 3);
-    if (rc == 0)
-        rc = nmatch;            /* All captured slots were filled in */
+                   0, options, ovector, nlim * 3);
 #endif
 
     if (rc >= 0) {
-        apr_size_t i;
-        nlim = (apr_size_t)rc < nmatch ? (apr_size_t)rc : nmatch;
-        for (i = 0; i < nlim; i++) {
+        apr_size_t n, i;
+        if (rc == 0)
+            rc = nlim; /* All captured slots were filled in */
+        n = (apr_size_t)rc < nmatch ? (apr_size_t)rc : nmatch;
+        for (i = 0; i < n; i++) {
             pmatch[i].rm_so = ovector[i * 2];
             pmatch[i].rm_eo = ovector[i * 2 + 1];
         }
         for (; i < nmatch; i++)
             pmatch[i].rm_so = pmatch[i].rm_eo = -1;
-    }
-
-#ifdef HAVE_PCRE2
-    pcre2_match_data_free(matchdata);
-#else
-    if (allocated_ovector)
-        free(ovector);
-#endif
-
-    if (rc >= 0) {
+        put_match_data(data, nlim);
         return 0;
     }
     else {
+        put_match_data(data, nlim);
 #ifdef HAVE_PCRE2
         if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
             return AP_REG_INVARG;



Re: svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Thu, Jan 20, 2022 at 10:33 AM Yann Ylavic <yl...@gmail.com> wrote:
>
> On Thu, Jan 20, 2022 at 3:53 PM William A Rowe Jr <wr...@rowe-clan.net> wrote:
> >
> > pcre1 is very dangerous, on stack. pcre2 is highly cautioned against
> > using stack for
> > its arrays, by its author. We should heed the advice.
> Not sure if I can do that with PCRE2 if it always
> --no-recurse-on-stack, probably if pcre2_malloc/free are still
> override-able but I didn't look into it yet (until then I'll keep
> using PCRE1..).

To reiterate; PCRE1 is EOL and irreparable from the security
reports which the author has received and cannot share, so it
is very unwise, end of statement.

That's why PCRE2 got written.

Cheers,

Bill

Re: svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Posted by Yann Ylavic <yl...@gmail.com>.
On Thu, Jan 20, 2022 at 3:53 PM William A Rowe Jr <wr...@rowe-clan.net> wrote:
>
> pcre1 is very dangerous, on stack. pcre2 is highly cautioned against
> using stack for
> its arrays, by its author. We should heed the advice.

My iterative changes make it possible to use PCRE1 on heap (at least
for the vector we pass to pcre_exec) when the compiler supports TLS
natively.
For the software I'm in charge of I also usually compile PCRE1 with
--no-recurse-on-stack and override pcre[_stack]_{malloc,free}() to use
TLS (and apr_pools), but it's not something applicable to upstream
httpd because third-party modules possibly use PCRE each their own way
(like me..).
Not sure if I can do that with PCRE2 if it always
--no-recurse-on-stack, probably if pcre2_malloc/free are still
override-able but I didn't look into it yet (until then I'll keep
using PCRE1..).

Cheers;
Yann.

Re: svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Posted by Ruediger Pluem <rp...@apache.org>.

On 1/20/22 3:52 PM, William A Rowe Jr wrote:
> On Thu, Jan 20, 2022 at 5:09 AM <yl...@apache.org> wrote:
>>
>> Author: ylavic
>> Date: Thu Jan 20 11:09:34 2022
>> New Revision: 1897240
>>
>> URL: http://svn.apache.org/viewvc?rev=1897240&view=rev
>> Log:
>> ap_regex: Use Thread Local Storage (if efficient) to avoid allocations.
>>
>> PCRE2 wants an opaque context by providing the API to allocate and free it, so
>> to minimize these calls we maintain one opaque context per thread (in Thread
>> Local Storage, TLS) grown as needed, and while at it we do the same for PCRE1
>> ints vectors. Note that this requires a fast TLS mechanism to be worth it,
>> which is the case of apr_thread_data_get/set() from/to apr_thread_current()
>> when APR_HAS_THREAD_LOCAL; otherwise we'll do the allocation and freeing for
>> each ap_regexec().
>>
>> The small stack vector is used for PCRE1 && !APR_HAS_THREAD_LOCAL only now.
> 
> It's good to keep iterating on this, for now, but when I wrote the
> patch both pcre1 & pcre2
> were supported by an author, if not a whole community.
> 
> I don't believe the project can or should ship support for httpd
> 2.6/3.0/next with support
> for a dead library. But it's better not to rip it out just yet.

I guess we can rip it out of trunk once we backported the PCRE2 support to 2.4.x
and some month without issues in 2.4.x have passed.

Regards

Rüdiger


Re: svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Thu, Jan 20, 2022 at 5:09 AM <yl...@apache.org> wrote:
>
> Author: ylavic
> Date: Thu Jan 20 11:09:34 2022
> New Revision: 1897240
>
> URL: http://svn.apache.org/viewvc?rev=1897240&view=rev
> Log:
> ap_regex: Use Thread Local Storage (if efficient) to avoid allocations.
>
> PCRE2 wants an opaque context by providing the API to allocate and free it, so
> to minimize these calls we maintain one opaque context per thread (in Thread
> Local Storage, TLS) grown as needed, and while at it we do the same for PCRE1
> ints vectors. Note that this requires a fast TLS mechanism to be worth it,
> which is the case of apr_thread_data_get/set() from/to apr_thread_current()
> when APR_HAS_THREAD_LOCAL; otherwise we'll do the allocation and freeing for
> each ap_regexec().
>
> The small stack vector is used for PCRE1 && !APR_HAS_THREAD_LOCAL only now.

It's good to keep iterating on this, for now, but when I wrote the
patch both pcre1 & pcre2
were supported by an author, if not a whole community.

I don't believe the project can or should ship support for httpd
2.6/3.0/next with support
for a dead library. But it's better not to rip it out just yet.

pcre1 is very dangerous, on stack. pcre2 is highly cautioned against
using stack for
its arrays, by its author. We should heed the advice.

And I'm not a voting participant so take my observations, with both
grains of salt.

Cheers,

Bill

Re: svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Posted by Ruediger Pluem <rp...@apache.org>.
On 1/24/22 9:42 AM, Ruediger Pluem wrote:
> 
> 
> On 1/20/22 5:14 PM, Yann Ylavic wrote:
>> Index: server/util.c
>> ===================================================================
>> --- server/util.c	(revision 1897156)
>> +++ server/util.c	(working copy)
>> @@ -3261,6 +3261,56 @@ AP_DECLARE(void *) ap_realloc(void *ptr, size_t si
>>      return p;
>>  }
>>  
>> +#if defined(AP_THREAD_LOCAL) && !APR_VERSION_AT_LEAST(1,8,0)
>> +struct thread_ctx {
>> +    apr_thread_start_t func;
>> +    void *data;
>> +};
>> +
>> +static AP_THREAD_LOCAL apr_thread_t *current_thread;
> 
> Shouldn't we do
> 
> static AP_THREAD_LOCAL apr_thread_t *current_thread = NULL;
> 
> for the sake of clarity?

Otherwise looks good.

Regards

Rüdiger


Re: svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Posted by Ruediger Pluem <rp...@apache.org>.

On 1/20/22 5:14 PM, Yann Ylavic wrote:
> Index: server/util.c
> ===================================================================
> --- server/util.c	(revision 1897156)
> +++ server/util.c	(working copy)
> @@ -3261,6 +3261,56 @@ AP_DECLARE(void *) ap_realloc(void *ptr, size_t si
>      return p;
>  }
>  
> +#if defined(AP_THREAD_LOCAL) && !APR_VERSION_AT_LEAST(1,8,0)
> +struct thread_ctx {
> +    apr_thread_start_t func;
> +    void *data;
> +};
> +
> +static AP_THREAD_LOCAL apr_thread_t *current_thread;

Shouldn't we do

static AP_THREAD_LOCAL apr_thread_t *current_thread = NULL;

for the sake of clarity?

Regards

Rüdiger


Re: svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Posted by Ruediger Pluem <rp...@apache.org>.

On 1/20/22 5:14 PM, Yann Ylavic wrote:
> On Thu, Jan 20, 2022 at 2:41 PM Ruediger Pluem <rp...@apache.org> wrote:
>>
>> On 1/20/22 2:24 PM, Yann Ylavic wrote:
>>
>>>
>>> All good points,  thanks Rüdiger, should be fixed in r1897250.
>>
>> Great. I guess next we need to think what we do for 2.4.x.
>> Even when 1.8.x is released, we cannot demand it for 2.4.x (for trunk we could).
>> I guess we have two general choices:
>>
>> 1. We implement it differently on 2.4.x also using TLS when available, but not requiring APR 1.8.x.
>> 2. We recommend that people who switch over to PCRE2 to use APR 1.8.x for performance reasons.
>>
>> As using PCRE2 make some sense and should be encouraged 2. would make it difficult for people tied to older APR versions like some
>> distributions.
>>
>> For 1. my rough idea would be that in case of a threaded MPM we could store the apr_thread_t pointer of a worker_thread in a TLS.
>> That would solve the performance issue in most cases.
> 
> How about the attached patch?
> For APR < 1.8 it defines ap_thread_create()/ap_thread_current() as
> wrappers around the APR ones plus storing/loading the current
> apr_thread_t* from the TLS as APR 1.8 does (if implemented by the
> compiler still). Then it applies a simple
> s/apr_thread_create/ap_thread_create/g in the code base..
> Finally ap_regex is adapted to use that if available at compile time
> and runtime, otherwise it falls back to allocation/free.
> 
>>
>> BTW: I think the current code does not work well in the case of !APR_HAS_THREADS, but in this case we could just a static variable
>> to store the pointer, correct?
> 
> AP[R]_HAS_THREAD_LOCAL are defined on if APR_HAS_THREADS, so it should
> be fine for the compilation.
> I'm not sure we should optimize for the !APR_HAS_THREADS case with a
> static though, but I could be convinced..

Rethinking as well and I guess even users of prefork will likely compile on systems with APR_HAS_THREADS and probably
AP[R]_HAS_THREAD_LOCAL. Hence it might be a very small group of users that would benefit from this. Hence lets park this
for a while and resurrect it if my assumption on the amount of users benefiting from this was wrong.

Regards

Rüdiger


Re: svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Posted by Yann Ylavic <yl...@gmail.com>.
On Thu, Jan 20, 2022 at 2:41 PM Ruediger Pluem <rp...@apache.org> wrote:
>
> On 1/20/22 2:24 PM, Yann Ylavic wrote:
>
> >
> > All good points,  thanks Rüdiger, should be fixed in r1897250.
>
> Great. I guess next we need to think what we do for 2.4.x.
> Even when 1.8.x is released, we cannot demand it for 2.4.x (for trunk we could).
> I guess we have two general choices:
>
> 1. We implement it differently on 2.4.x also using TLS when available, but not requiring APR 1.8.x.
> 2. We recommend that people who switch over to PCRE2 to use APR 1.8.x for performance reasons.
>
> As using PCRE2 make some sense and should be encouraged 2. would make it difficult for people tied to older APR versions like some
> distributions.
>
> For 1. my rough idea would be that in case of a threaded MPM we could store the apr_thread_t pointer of a worker_thread in a TLS.
> That would solve the performance issue in most cases.

How about the attached patch?
For APR < 1.8 it defines ap_thread_create()/ap_thread_current() as
wrappers around the APR ones plus storing/loading the current
apr_thread_t* from the TLS as APR 1.8 does (if implemented by the
compiler still). Then it applies a simple
s/apr_thread_create/ap_thread_create/g in the code base..
Finally ap_regex is adapted to use that if available at compile time
and runtime, otherwise it falls back to allocation/free.

>
> BTW: I think the current code does not work well in the case of !APR_HAS_THREADS, but in this case we could just a static variable
> to store the pointer, correct?

AP[R]_HAS_THREAD_LOCAL are defined on if APR_HAS_THREADS, so it should
be fine for the compilation.
I'm not sure we should optimize for the !APR_HAS_THREADS case with a
static though, but I could be convinced..


Regards;
Yann.

Re: svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Posted by Ruediger Pluem <rp...@apache.org>.

On 1/20/22 2:24 PM, Yann Ylavic wrote:

> 
> All good points,  thanks Rüdiger, should be fixed in r1897250.

Great. I guess next we need to think what we do for 2.4.x.
Even when 1.8.x is released, we cannot demand it for 2.4.x (for trunk we could).
I guess we have two general choices:

1. We implement it differently on 2.4.x also using TLS when available, but not requiring APR 1.8.x.
2. We recommend that people who switch over to PCRE2 to use APR 1.8.x for performance reasons.

As using PCRE2 make some sense and should be encouraged 2. would make it difficult for people tied to older APR versions like some
distributions.

For 1. my rough idea would be that in case of a threaded MPM we could store the apr_thread_t pointer of a worker_thread in a TLS.
That would solve the performance issue in most cases.

BTW: I think the current code does not work well in the case of !APR_HAS_THREADS, but in this case we could just a static variable
to store the pointer, correct?

Regards

Rüdiger

Re: svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Posted by Yann Ylavic <yl...@gmail.com>.
On Thu, Jan 20, 2022 at 1:53 PM Ruediger Pluem <rp...@apache.org> wrote:
>
> On 1/20/22 12:09 PM, ylavic@apache.org wrote:
> >
> >  #include "httpd.h"
> > +#include "apr_version.h"
>
> Why is this needed?

It's not (anymore), I tested for APR_VERSION_AT_LEAST(1,8,0) previously.

>
> > +#include "apr_portable.h"
>
> apr_thread_proc.h
>
> is not sufficent?

It is, same remnant from an older version using the apr_threadkey API still.

>
> > +
> > +#ifdef HAVE_PCRE2
> > +    *ovector = pcre2_get_ovector_pointer(tls->data);
> > +#else
> > +    *vector = tls->data;
>
> *ovector instead of *vector?

Clearly, I thought I had compile tested first with PCRE1 and
!APR_HAS_THREAD_LOCAL but made changes afterward..

>
> > +
> > +#ifdef HAVE_PCRE2
> > +    data = pcre2_match_data_create(size, NULL);
> > +    *ovector = pcre2_get_ovector_pointer(data);
>
> Should we do an if (data) here? Not sure if pcre2_get_ovector_pointer can work with NULL

PCRE2 assumes !NULL so clearly!

All good points,  thanks Rüdiger, should be fixed in r1897250.


Regards;
Yann.

Re: svn commit: r1897240 - in /httpd/httpd/trunk/server: main.c util_pcre.c

Posted by Ruediger Pluem <rp...@apache.org>.

On 1/20/22 12:09 PM, ylavic@apache.org wrote:
> Author: ylavic
> Date: Thu Jan 20 11:09:34 2022
> New Revision: 1897240
> 
> URL: http://svn.apache.org/viewvc?rev=1897240&view=rev
> Log:
> ap_regex: Use Thread Local Storage (if efficient) to avoid allocations.
> 
> PCRE2 wants an opaque context by providing the API to allocate and free it, so
> to minimize these calls we maintain one opaque context per thread (in Thread
> Local Storage, TLS) grown as needed, and while at it we do the same for PCRE1
> ints vectors. Note that this requires a fast TLS mechanism to be worth it,
> which is the case of apr_thread_data_get/set() from/to apr_thread_current()
> when APR_HAS_THREAD_LOCAL; otherwise we'll do the allocation and freeing for
> each ap_regexec().
> 
> The small stack vector is used for PCRE1 && !APR_HAS_THREAD_LOCAL only now.
> 
> 
> Modified:
>     httpd/httpd/trunk/server/main.c
>     httpd/httpd/trunk/server/util_pcre.c
> 

> Modified: httpd/httpd/trunk/server/util_pcre.c
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/util_pcre.c?rev=1897240&r1=1897239&r2=1897240&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/server/util_pcre.c (original)
> +++ httpd/httpd/trunk/server/util_pcre.c Thu Jan 20 11:09:34 2022
> @@ -53,6 +53,8 @@ POSSIBILITY OF SUCH DAMAGE.
>  */
>  
>  #include "httpd.h"
> +#include "apr_version.h"

Why is this needed?

> +#include "apr_portable.h"

apr_thread_proc.h

is not sufficent?

>  #include "apr_strings.h"
>  #include "apr_tables.h"
>  
> @@ -263,7 +265,122 @@ AP_DECLARE(int) ap_regcomp(ap_regex_t *
>   * ints. However, if the number of possible capturing brackets is small, use a
>   * block of store on the stack, to reduce the use of malloc/free. The threshold
>   * is in a macro that can be changed at configure time.
> + * Yet more unfortunately, PCRE2 wants an opaque context by providing the API
> + * to allocate and free it, so to minimize these calls we maintain one opaque
> + * context per thread (in Thread Local Storage, TLS) grown as needed, and while
> + * at it we do the same for PCRE1 ints vectors. Note that this requires a fast
> + * TLS mechanism to be worth it, which is the case of apr_thread_data_get/set()
> + * from/to apr_thread_current() when APR_HAS_THREAD_LOCAL; otherwise we'll do
> + * the allocation and freeing for each ap_regexec().
>   */
> +
> +#ifdef HAVE_PCRE2
> +typedef pcre2_match_data* match_data_pt;
> +typedef size_t*           match_vector_pt;
> +#else
> +typedef int*              match_data_pt;
> +typedef int*              match_vector_pt;
> +#endif
> +
> +#if APR_HAS_THREAD_LOCAL
> +
> +static match_data_pt get_match_data(apr_size_t size,
> +                                    match_vector_pt *ovector,
> +                                    match_vector_pt small_vector)
> +{
> +    apr_thread_t *current;
> +    struct {
> +        match_data_pt data;
> +        apr_size_t size;
> +    } *tls = NULL;
> +
> +    /* APR_HAS_THREAD_LOCAL garantees this works */
> +    current = apr_thread_current();
> +    ap_assert(current != NULL);
> +
> +    apr_thread_data_get((void **)&tls, "apreg", current);
> +    if (!tls || tls->size < size) {
> +        apr_pool_t *tp = apr_thread_pool_get(current);
> +        if (tls) {
> +#ifdef HAVE_PCRE2
> +            pcre2_match_data_free(tls->data); /* NULL safe */
> +#endif
> +        }
> +        else {
> +            tls = apr_pcalloc(tp, sizeof(*tls));
> +            apr_thread_data_set(tls, "apreg", NULL, current);
> +        }
> +        tls->size *= 2;
> +        if (tls->size < size) {
> +            tls->size = size;
> +            if (tls->size < POSIX_MALLOC_THRESHOLD) {
> +                tls->size = POSIX_MALLOC_THRESHOLD;
> +            }
> +        }
> +#ifdef HAVE_PCRE2
> +        tls->data = pcre2_match_data_create(tls->size, NULL);
> +#else
> +        tls->data = apr_palloc(tp, tls->size * sizeof(int) * 3);
> +#endif
> +        if (!tls->data) {
> +            tls->size = 0;
> +            return NULL;
> +        }
> +    }
> +
> +#ifdef HAVE_PCRE2
> +    *ovector = pcre2_get_ovector_pointer(tls->data);
> +#else
> +    *vector = tls->data;

*ovector instead of *vector?

> +#endif
> +    return tls->data;
> +}
> +
> +/* Nothing to put back with thread local */
> +static APR_INLINE void put_match_data(match_data_pt data,
> +                                      apr_size_t size)
> +{ }
> +
> +#else /* !APR_HAS_THREAD_LOCAL */
> +
> +/* Always allocate/free without thread local */
> +
> +static match_data_pt get_match_data(apr_size_t size,
> +                                    match_vector_pt *ovector,
> +                                    match_vector_pt small_vector)
> +{
> +    match_data_pt data;
> +
> +#ifdef HAVE_PCRE2
> +    data = pcre2_match_data_create(size, NULL);
> +    *ovector = pcre2_get_ovector_pointer(data);

Should we do an if (data) here? Not sure if pcre2_get_ovector_pointer can work with NULL

> +#else
> +    if (size > POSIX_MALLOC_THRESHOLD) {
> +        data = malloc(size * sizeof(int) * 3);
> +    }
> +    else {
> +        data = small_vector;
> +    }
> +    *ovector = data;
> +#endif
> +
> +    return data;
> +}
> +
> +static APR_INLINE void put_match_data(match_data_pt data,
> +                                      apr_size_t size)
> +{
> +#ifdef HAVE_PCRE2
> +    pcre2_match_data_free(data);
> +#else
> +    if (size > POSIX_MALLOC_THRESHOLD) {
> +        free(data);
> +    }
> +#endif
> +}
> +
> +#endif /* !APR_HAS_THREAD_LOCAL */
> +
>  AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string,
>                             apr_size_t nmatch, ap_regmatch_t *pmatch,
>                             int eflags)
> 

Regards

Rüdiger