You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by yl...@apache.org on 2022/07/15 10:36:24 UTC

svn commit: r1902731 - /httpd/httpd/trunk/server/util_pcre.c

Author: ylavic
Date: Fri Jul 15 10:36:24 2022
New Revision: 1902731

URL: http://svn.apache.org/viewvc?rev=1902731&view=rev
Log:
util_pcre: Add a thread local subpool cache for when stack does not suffice.

When AP_HAS_THREAD_LOCAL is available, use a thread-local match_thread_state to
save per-thread data in a subpool of the thread's pool.

If private_malloc() gets out of the stack buffer and the current thread has a
pool (i.e. ap_thread_current() != NULL), it will apr_palloc()ate and return
memory from the subpool.

When the match is complete and the match_data are freed, the thread subpool is
cleared thus giving back the memory to the allocator, which itself will give
back the memory or recycle it depending on its max_free setting.

* util_pcre.c:
  Restore POSIX_MALLOC_THRESHOLDsince this is part of the user API.

* util_pcre.c(match_data_pt):
  Type not used (explicitely) anymore, axe.
  
* util_pcre.c(struct match_data_state):
  Put the stack buffer there to simplify code (the state is allocated on
  stack anyway).
  If APREG_USE_THREAD_LOCAL, add the apr_thread_t* and match_thread_state*
  fields that track the thread local data for the match.

* util_pcre.c(alloc_match_data, free_match):
  Renamed to setup_state() and cleanup_state(), simplified (no stack buffer
  parameters anymore).
  cleanup_state() now clears the thread local subpool if used during the match.
  setup_state() set state->thd to ap_thread_current(), thus NULL if it's not a
  suitable thread for using thread local data.

* util_pcre.c(private_malloc):
  Fix a possible buf_used overflow (size <= avail < APR_ALIGN_DEFAULT(size)).
  Create the thread local subpool (once per thread) and allocate from there
  when stack space is missing and state->thd != NULL, otherwise fall back to
  malloc() still.

* util_pcre.c(private_free):
  Do nothing for thread local subpool memory, will be freed in cleanup_state
  eventually.


Modified:
    httpd/httpd/trunk/server/util_pcre.c

Modified: httpd/httpd/trunk/server/util_pcre.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/util_pcre.c?rev=1902731&r1=1902730&r2=1902731&view=diff
==============================================================================
--- httpd/httpd/trunk/server/util_pcre.c (original)
+++ httpd/httpd/trunk/server/util_pcre.c Fri Jul 15 10:36:24 2022
@@ -73,8 +73,19 @@ POSSIBILITY OF SUCH DAMAGE.
 #define APR_WANT_STRFUNC
 #include "apr_want.h"
 
-#ifndef AP_PCRE_STACKBUF_SIZE
-#define AP_PCRE_STACKBUF_SIZE (256)
+#ifndef POSIX_MALLOC_THRESHOLD
+#define POSIX_MALLOC_THRESHOLD (10)
+#endif
+
+#ifdef HAVE_PCRE2
+/* Reserve 128 bytes for the PCRE2 structs, that is (a bit above):
+ *   sizeof(pcre2_general_context) + offsetof(pcre2_match_data, ovector)
+ */
+#define AP_PCRE_STACKBUF_SIZE \
+    (128 + POSIX_MALLOC_THRESHOLD * sizeof(PCRE2_SIZE) * 2)
+#else
+#define AP_PCRE_STACKBUF_SIZE \
+    (POSIX_MALLOC_THRESHOLD * sizeof(int) * 3)
 #endif
 
 /* Table of error strings corresponding to POSIX error codes; must be
@@ -202,7 +213,7 @@ AP_DECLARE(int) ap_regcomp(ap_regex_t *
 {
 #ifdef HAVE_PCRE2
     uint32_t capcount;
-    size_t erroffset;
+    PCRE2_SIZE erroffset;
 #else
     const char *errorptr;
     int erroffset;
@@ -257,105 +268,201 @@ AP_DECLARE(int) ap_regcomp(ap_regex_t *
  *              Match a regular expression       *
  *************************************************/
 
-/* Unfortunately, PCRE requires 3 ints of working space for each captured
+/* Unfortunately, PCRE1 requires 3 ints of working space for each captured
  * substring, so we have to get and release working store instead of just using
  * the POSIX structures as was done in earlier releases when PCRE needed only 2
  * ints. However, if the number of possible capturing brackets is small, use a
  * block of store on the stack, to reduce the use of malloc/free. The threshold
- * is in a macro that can be changed at configure time.
- * Yet more unfortunately, PCRE2 wants an opaque context by providing the API
- * to allocate and free it, so to minimize these calls we maintain one opaque
- * context per thread (in Thread Local Storage, TLS) grown as needed, and while
- * at it we do the same for PCRE1 ints vectors. Note that this requires a fast
- * TLS mechanism to be worth it, which is the case of apr_thread_data_get/set()
- * from/to ap_thread_current() when AP_HAS_THREAD_LOCAL; otherwise we'll do
- * the allocation and freeing for each ap_regexec().
+ * is in POSIX_MALLOC_THRESHOLD macro that can be changed at configure time.
+ * PCRE2 takes an opaque match context and lets us provide the callbacks to
+ * manage the memory needed during the match, so we can still use a small stack
+ * space that'll suffice for regexes up to POSIX_MALLOC_THRESHOLD captures (and
+ * not too complex), above that either use some thread local subpool cache (if
+ * AP_HAS_THREAD_LOCAL) or fall back to malloc()/free().
  */
 
+#if AP_HAS_THREAD_LOCAL && !defined(APREG_NO_THREAD_LOCAL)
+#define APREG_USE_THREAD_LOCAL 1
+#else
+#define APREG_USE_THREAD_LOCAL 0
+#endif
+
 #ifdef HAVE_PCRE2
-typedef pcre2_match_data* match_data_pt;
-typedef size_t*           match_vector_pt;
+typedef PCRE2_SIZE* match_vector_pt;
 #else
-typedef int*              match_data_pt;
-typedef int*              match_vector_pt;
+typedef int*        match_vector_pt;
 #endif
 
-struct match_data_state
-{
-    match_data_pt data;
-    char *buf;
-    apr_size_t buf_len;
+#if APREG_USE_THREAD_LOCAL
+struct match_thread_state {
+    char *heap;
+    apr_size_t heap_size;
+    apr_size_t heap_used;
+    apr_pool_t *pool;
+};
+
+AP_THREAD_LOCAL static struct match_thread_state *thread_state;
+#endif
+
+struct match_data_state {
+    /* keep first, struct aligned */
+    char buf[AP_PCRE_STACKBUF_SIZE];
     apr_size_t buf_used;
+
+#if APREG_USE_THREAD_LOCAL
+    apr_thread_t *thd;
+    struct match_thread_state *ts;
+#endif
+
 #ifdef HAVE_PCRE2
     pcre2_general_context *pcre2_ctx;
+    pcre2_match_data* match_data;
+#else
+    int *match_data;
 #endif
 };
 
 static void * private_malloc(size_t size, void *ctx)
 {
     struct match_data_state *state = ctx;
+    apr_size_t avail;
 
-    if(size <= (state->buf_len - state->buf_used)) {
+    avail = sizeof(state->buf) - state->buf_used;
+    if (avail >= size) {
         void *p = state->buf + state->buf_used;
+        size = APR_ALIGN_DEFAULT(size);
+        if (size > avail) {
+            size = avail;
+        }
+        state->buf_used += size;
+        return p;
+    }
 
-        state->buf_used += APR_ALIGN_DEFAULT(size);
+#if APREG_USE_THREAD_LOCAL
+    if (state->thd) {
+        struct match_thread_state *ts = thread_state;
+        void *p;
+
+        if (!ts) {
+            apr_pool_t *tp = apr_thread_pool_get(state->thd);
+            ts = apr_pcalloc(tp, sizeof(*ts));
+            apr_pool_create(&ts->pool, tp);
+            thread_state = state->ts = ts;
+        }
+        else if (!state->ts) {
+            ts->heap_used = 0;
+            state->ts = ts;
+        }
 
+        avail = ts->heap_size - ts->heap_used;
+        if (avail >= size) {
+            size = APR_ALIGN_DEFAULT(size);
+            if (size > avail) {
+                size = avail;
+            }
+            p = ts->heap + ts->heap_used;
+        }
+        else {
+            ts->heap_size *= 2;
+            size = APR_ALIGN_DEFAULT(size);
+            if (ts->heap_size < size) {
+                ts->heap_size = size;
+            }
+            if (ts->heap_size < AP_PCRE_STACKBUF_SIZE * 2) {
+                ts->heap_size = AP_PCRE_STACKBUF_SIZE * 2;
+            }
+            ts->heap = apr_palloc(ts->pool, ts->heap_size);
+            ts->heap_used = 0;
+            p = ts->heap;
+        }
+
+        ts->heap_used += size;
         return p;
     }
-    else {
-        return malloc(size);
-    }
+#endif
+
+    return malloc(size);
 }
 
 static void private_free(void *block, void *ctx)
 {
     struct match_data_state *state = ctx;
-    void *buf_start = state->buf;
-    void *buf_end = state->buf + state->buf_len;
+    char *p = block;
 
-    if (block >= buf_start && block <= buf_end) {
+    if (p >= state->buf && p < state->buf + sizeof(state->buf)) {
         /* This block allocated from stack buffer. Do nothing. */
+        return;
     }
-    else {
-        free(block);
+
+#if APREG_USE_THREAD_LOCAL
+    if (state->thd) {
+        /* Freed in cleanup_state() eventually. */
+        return;
     }
+#endif
+
+    free(block);
 } 
 
 static APR_INLINE
-match_data_pt alloc_match_data(apr_size_t size,
-                               struct match_data_state *state,
-                               void *stack_buf,
-                               apr_size_t stack_buf_len)
+int setup_state(struct match_data_state *state, apr_uint32_t ncaps)
 {
-    state->buf = stack_buf;
-    state->buf_len = stack_buf_len;
     state->buf_used = 0;
+#if APREG_USE_THREAD_LOCAL
+    state->thd = ap_thread_current();
+    state->ts = NULL;
+#endif
 
 #ifdef HAVE_PCRE2
-    state->pcre2_ctx = pcre2_general_context_create(private_malloc, private_free, state);
+    state->pcre2_ctx = pcre2_general_context_create(private_malloc,
+                                                    private_free, state);
     if (!state->pcre2_ctx) { 
-        return NULL;
+        return 0;
     }
-    state->data = pcre2_match_data_create((int)size, state->pcre2_ctx);
-    if (!state->data) {
+
+    state->match_data = pcre2_match_data_create(ncaps, state->pcre2_ctx);
+    if (!state->match_data) {
         pcre2_general_context_free(state->pcre2_ctx);
-        return NULL;
+        return 0;
     }
 #else
-    state->data = private_malloc(size * sizeof(int) * 3, state);
+    if (ncaps) {
+        state->match_data = private_malloc(ncaps * sizeof(int) * 3, state);
+        if (!state->match_data) {
+            return 0;
+        }
+    }
+    else {
+        /* Fine with PCRE1 */
+        state->match_data = NULL;
+    }
 #endif
 
-    return state->data;
+    return 1;
 }
 
 static APR_INLINE
-void free_match_data(struct match_data_state *state)
+void cleanup_state(struct match_data_state *state)
 {
 #ifdef HAVE_PCRE2
-    pcre2_match_data_free(state->data);
+    pcre2_match_data_free(state->match_data);
     pcre2_general_context_free(state->pcre2_ctx);
 #else
-    private_free(state->data, state);
+    if (state->match_data) {
+        private_free(state->match_data, state);
+    }
+#endif
+
+#if APREG_USE_THREAD_LOCAL
+    if (state->ts) {
+        /* Let the thread's pool allocator recycle or free according
+         * to its max_free setting.
+         */
+        struct match_thread_state *ts = state->ts;
+        apr_pool_clear(ts->pool);
+        ts->heap_size = 0;
+        ts->heap = NULL;
+    }
 #endif
 }
 
@@ -372,16 +479,12 @@ AP_DECLARE(int) ap_regexec_len(const ap_
                                ap_regmatch_t *pmatch, int eflags)
 {
     int rc;
-    int options = 0, to_free = 0;
-    match_vector_pt ovector = NULL;
-    apr_size_t ncaps = (apr_size_t)preg->re_nsub + 1;
+    int options = 0;
     struct match_data_state state;
-    match_data_pt data;
-    /* Use apr_uint64_t to get proper alignment. */
-    apr_uint64_t stack_buf[(AP_PCRE_STACKBUF_SIZE + sizeof(apr_uint64_t) - 1) / sizeof(apr_uint64_t)];
+    match_vector_pt ovector = NULL;
+    apr_uint32_t ncaps = (apr_uint32_t)preg->re_nsub + 1;
 
-    data = alloc_match_data(ncaps, &state, stack_buf, sizeof(stack_buf));
-    if (!data) {
+    if (!setup_state(&state, ncaps)) {
         return AP_REG_ESPACE;
     }
 
@@ -396,11 +499,11 @@ AP_DECLARE(int) ap_regexec_len(const ap_
 
 #ifdef HAVE_PCRE2
     rc = pcre2_match((const pcre2_code *)preg->re_pcre,
-                     (const unsigned char *)buff, len,
-                     0, options, data, NULL);
-    ovector = pcre2_get_ovector_pointer(data);
+                     (const unsigned char *)buff, len, 0, options,
+                     state.match_data, NULL);
+    ovector = pcre2_get_ovector_pointer(state.match_data);
 #else
-    ovector = data;
+    ovector = state.match_data;
     rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len,
                    0, options, ovector, ncaps * 3);
 #endif
@@ -415,11 +518,11 @@ AP_DECLARE(int) ap_regexec_len(const ap_
         }
         for (; i < nmatch; i++)
             pmatch[i].rm_so = pmatch[i].rm_eo = -1;
-        free_match_data(&state);
+        cleanup_state(&state);
         return 0;
     }
     else {
-        free_match_data(&state);
+        cleanup_state(&state);
 #ifdef HAVE_PCRE2
         if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
             return AP_REG_INVARG;



Re: svn commit: r1902731 - /httpd/httpd/trunk/server/util_pcre.c

Posted by Ruediger Pluem <rp...@apache.org>.

On 7/28/22 9:25 PM, Ruediger Pluem wrote:
> 
> 
> On 7/15/22 12:36 PM, ylavic@apache.org wrote:
>> Author: ylavic
>> Date: Fri Jul 15 10:36:24 2022
>> New Revision: 1902731
>>
>> URL: http://svn.apache.org/viewvc?rev=1902731&view=rev

>>  
>> -        state->buf_used += APR_ALIGN_DEFAULT(size);
>> +#if APREG_USE_THREAD_LOCAL
>> +    if (state->thd) {
>> +        struct match_thread_state *ts = thread_state;
>> +        void *p;
>> +
>> +        if (!ts) {
>> +            apr_pool_t *tp = apr_thread_pool_get(state->thd);
>> +            ts = apr_pcalloc(tp, sizeof(*ts));
>> +            apr_pool_create(&ts->pool, tp);
>> +            thread_state = state->ts = ts;
>> +        }
>> +        else if (!state->ts) {
>> +            ts->heap_used = 0;
>> +            state->ts = ts;
>> +        }
>>  
>> +        avail = ts->heap_size - ts->heap_used;
>> +        if (avail >= size) {
>> +            size = APR_ALIGN_DEFAULT(size);
>> +            if (size > avail) {
>> +                size = avail;
>> +            }
>> +            p = ts->heap + ts->heap_used;
>> +        }
>> +        else {
>> +            ts->heap_size *= 2;
>> +            size = APR_ALIGN_DEFAULT(size);
>> +            if (ts->heap_size < size) {
>> +                ts->heap_size = size;
>> +            }
>> +            if (ts->heap_size < AP_PCRE_STACKBUF_SIZE * 2) {
>> +                ts->heap_size = AP_PCRE_STACKBUF_SIZE * 2;
>> +            }
>> +            ts->heap = apr_palloc(ts->pool, ts->heap_size);
>> +            ts->heap_used = 0;
>> +            p = ts->heap;
> 
> I think that apr_palloc should be efficient enough for allocating memory quickly and
> that we don't need to reserve bigger memory chunks manage them here locally that looks
> like what apr_palloc already does.

Already done by r1902858 :-)

Regards

RĂ¼diger


Re: svn commit: r1902731 - /httpd/httpd/trunk/server/util_pcre.c

Posted by Ruediger Pluem <rp...@apache.org>.

On 7/15/22 12:36 PM, ylavic@apache.org wrote:
> Author: ylavic
> Date: Fri Jul 15 10:36:24 2022
> New Revision: 1902731
> 
> URL: http://svn.apache.org/viewvc?rev=1902731&view=rev
> Log:
> util_pcre: Add a thread local subpool cache for when stack does not suffice.
> 
> When AP_HAS_THREAD_LOCAL is available, use a thread-local match_thread_state to
> save per-thread data in a subpool of the thread's pool.
> 
> If private_malloc() gets out of the stack buffer and the current thread has a
> pool (i.e. ap_thread_current() != NULL), it will apr_palloc()ate and return
> memory from the subpool.
> 
> When the match is complete and the match_data are freed, the thread subpool is
> cleared thus giving back the memory to the allocator, which itself will give
> back the memory or recycle it depending on its max_free setting.
> 
> * util_pcre.c:
>   Restore POSIX_MALLOC_THRESHOLDsince this is part of the user API.
> 
> * util_pcre.c(match_data_pt):
>   Type not used (explicitely) anymore, axe.
>   
> * util_pcre.c(struct match_data_state):
>   Put the stack buffer there to simplify code (the state is allocated on
>   stack anyway).
>   If APREG_USE_THREAD_LOCAL, add the apr_thread_t* and match_thread_state*
>   fields that track the thread local data for the match.
> 
> * util_pcre.c(alloc_match_data, free_match):
>   Renamed to setup_state() and cleanup_state(), simplified (no stack buffer
>   parameters anymore).
>   cleanup_state() now clears the thread local subpool if used during the match.
>   setup_state() set state->thd to ap_thread_current(), thus NULL if it's not a
>   suitable thread for using thread local data.
> 
> * util_pcre.c(private_malloc):
>   Fix a possible buf_used overflow (size <= avail < APR_ALIGN_DEFAULT(size)).
>   Create the thread local subpool (once per thread) and allocate from there
>   when stack space is missing and state->thd != NULL, otherwise fall back to
>   malloc() still.
> 
> * util_pcre.c(private_free):
>   Do nothing for thread local subpool memory, will be freed in cleanup_state
>   eventually.
> 
> 
> Modified:
>     httpd/httpd/trunk/server/util_pcre.c
> 
> Modified: httpd/httpd/trunk/server/util_pcre.c
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/util_pcre.c?rev=1902731&r1=1902730&r2=1902731&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/server/util_pcre.c (original)
> +++ httpd/httpd/trunk/server/util_pcre.c Fri Jul 15 10:36:24 2022

> @@ -257,105 +268,201 @@ AP_DECLARE(int) ap_regcomp(ap_regex_t *
>   *              Match a regular expression       *
>   *************************************************/
>  
> -/* Unfortunately, PCRE requires 3 ints of working space for each captured
> +/* Unfortunately, PCRE1 requires 3 ints of working space for each captured
>   * substring, so we have to get and release working store instead of just using
>   * the POSIX structures as was done in earlier releases when PCRE needed only 2
>   * ints. However, if the number of possible capturing brackets is small, use a
>   * block of store on the stack, to reduce the use of malloc/free. The threshold
> - * is in a macro that can be changed at configure time.
> - * Yet more unfortunately, PCRE2 wants an opaque context by providing the API
> - * to allocate and free it, so to minimize these calls we maintain one opaque
> - * context per thread (in Thread Local Storage, TLS) grown as needed, and while
> - * at it we do the same for PCRE1 ints vectors. Note that this requires a fast
> - * TLS mechanism to be worth it, which is the case of apr_thread_data_get/set()
> - * from/to ap_thread_current() when AP_HAS_THREAD_LOCAL; otherwise we'll do
> - * the allocation and freeing for each ap_regexec().
> + * is in POSIX_MALLOC_THRESHOLD macro that can be changed at configure time.
> + * PCRE2 takes an opaque match context and lets us provide the callbacks to
> + * manage the memory needed during the match, so we can still use a small stack
> + * space that'll suffice for regexes up to POSIX_MALLOC_THRESHOLD captures (and
> + * not too complex), above that either use some thread local subpool cache (if
> + * AP_HAS_THREAD_LOCAL) or fall back to malloc()/free().
>   */
>  
> +#if AP_HAS_THREAD_LOCAL && !defined(APREG_NO_THREAD_LOCAL)
> +#define APREG_USE_THREAD_LOCAL 1
> +#else
> +#define APREG_USE_THREAD_LOCAL 0
> +#endif
> +
>  #ifdef HAVE_PCRE2
> -typedef pcre2_match_data* match_data_pt;
> -typedef size_t*           match_vector_pt;
> +typedef PCRE2_SIZE* match_vector_pt;
>  #else
> -typedef int*              match_data_pt;
> -typedef int*              match_vector_pt;
> +typedef int*        match_vector_pt;
>  #endif
>  
> -struct match_data_state
> -{
> -    match_data_pt data;
> -    char *buf;
> -    apr_size_t buf_len;
> +#if APREG_USE_THREAD_LOCAL
> +struct match_thread_state {
> +    char *heap;
> +    apr_size_t heap_size;
> +    apr_size_t heap_used;
> +    apr_pool_t *pool;
> +};
> +
> +AP_THREAD_LOCAL static struct match_thread_state *thread_state;
> +#endif
> +
> +struct match_data_state {
> +    /* keep first, struct aligned */
> +    char buf[AP_PCRE_STACKBUF_SIZE];
>      apr_size_t buf_used;
> +
> +#if APREG_USE_THREAD_LOCAL
> +    apr_thread_t *thd;
> +    struct match_thread_state *ts;
> +#endif
> +
>  #ifdef HAVE_PCRE2
>      pcre2_general_context *pcre2_ctx;
> +    pcre2_match_data* match_data;
> +#else
> +    int *match_data;
>  #endif
>  };
>  
>  static void * private_malloc(size_t size, void *ctx)
>  {
>      struct match_data_state *state = ctx;
> +    apr_size_t avail;
>  
> -    if(size <= (state->buf_len - state->buf_used)) {
> +    avail = sizeof(state->buf) - state->buf_used;
> +    if (avail >= size) {
>          void *p = state->buf + state->buf_used;
> +        size = APR_ALIGN_DEFAULT(size);
> +        if (size > avail) {
> +            size = avail;
> +        }
> +        state->buf_used += size;
> +        return p;
> +    }
>  
> -        state->buf_used += APR_ALIGN_DEFAULT(size);
> +#if APREG_USE_THREAD_LOCAL
> +    if (state->thd) {
> +        struct match_thread_state *ts = thread_state;
> +        void *p;
> +
> +        if (!ts) {
> +            apr_pool_t *tp = apr_thread_pool_get(state->thd);
> +            ts = apr_pcalloc(tp, sizeof(*ts));
> +            apr_pool_create(&ts->pool, tp);
> +            thread_state = state->ts = ts;
> +        }
> +        else if (!state->ts) {
> +            ts->heap_used = 0;
> +            state->ts = ts;
> +        }
>  
> +        avail = ts->heap_size - ts->heap_used;
> +        if (avail >= size) {
> +            size = APR_ALIGN_DEFAULT(size);
> +            if (size > avail) {
> +                size = avail;
> +            }
> +            p = ts->heap + ts->heap_used;
> +        }
> +        else {
> +            ts->heap_size *= 2;
> +            size = APR_ALIGN_DEFAULT(size);
> +            if (ts->heap_size < size) {
> +                ts->heap_size = size;
> +            }
> +            if (ts->heap_size < AP_PCRE_STACKBUF_SIZE * 2) {
> +                ts->heap_size = AP_PCRE_STACKBUF_SIZE * 2;
> +            }
> +            ts->heap = apr_palloc(ts->pool, ts->heap_size);
> +            ts->heap_used = 0;
> +            p = ts->heap;

I think that apr_palloc should be efficient enough for allocating memory quickly and
that we don't need to reserve bigger memory chunks manage them here locally that looks
like what apr_palloc already does.

> +        }
> +
> +        ts->heap_used += size;
>          return p;
>      }
> -    else {
> -        return malloc(size);
> -    }
> +#endif
> +
> +    return malloc(size);
>  }
>  
>  static void private_free(void *block, void *ctx)
>  {
>      struct match_data_state *state = ctx;
> -    void *buf_start = state->buf;
> -    void *buf_end = state->buf + state->buf_len;
> +    char *p = block;
>  
> -    if (block >= buf_start && block <= buf_end) {
> +    if (p >= state->buf && p < state->buf + sizeof(state->buf)) {
>          /* This block allocated from stack buffer. Do nothing. */
> +        return;
>      }
> -    else {
> -        free(block);
> +
> +#if APREG_USE_THREAD_LOCAL
> +    if (state->thd) {
> +        /* Freed in cleanup_state() eventually. */
> +        return;
>      }
> +#endif
> +
> +    free(block);
>  } 
>  
>  static APR_INLINE
> -match_data_pt alloc_match_data(apr_size_t size,
> -                               struct match_data_state *state,
> -                               void *stack_buf,
> -                               apr_size_t stack_buf_len)
> +int setup_state(struct match_data_state *state, apr_uint32_t ncaps)
>  {
> -    state->buf = stack_buf;
> -    state->buf_len = stack_buf_len;
>      state->buf_used = 0;
> +#if APREG_USE_THREAD_LOCAL
> +    state->thd = ap_thread_current();
> +    state->ts = NULL;
> +#endif
>  
>  #ifdef HAVE_PCRE2
> -    state->pcre2_ctx = pcre2_general_context_create(private_malloc, private_free, state);
> +    state->pcre2_ctx = pcre2_general_context_create(private_malloc,
> +                                                    private_free, state);
>      if (!state->pcre2_ctx) { 
> -        return NULL;
> +        return 0;
>      }
> -    state->data = pcre2_match_data_create((int)size, state->pcre2_ctx);
> -    if (!state->data) {
> +
> +    state->match_data = pcre2_match_data_create(ncaps, state->pcre2_ctx);
> +    if (!state->match_data) {
>          pcre2_general_context_free(state->pcre2_ctx);
> -        return NULL;
> +        return 0;
>      }
>  #else
> -    state->data = private_malloc(size * sizeof(int) * 3, state);
> +    if (ncaps) {
> +        state->match_data = private_malloc(ncaps * sizeof(int) * 3, state);
> +        if (!state->match_data) {
> +            return 0;
> +        }
> +    }
> +    else {
> +        /* Fine with PCRE1 */
> +        state->match_data = NULL;
> +    }
>  #endif
>  
> -    return state->data;
> +    return 1;
>  }
>  
>  static APR_INLINE
> -void free_match_data(struct match_data_state *state)
> +void cleanup_state(struct match_data_state *state)
>  {
>  #ifdef HAVE_PCRE2
> -    pcre2_match_data_free(state->data);
> +    pcre2_match_data_free(state->match_data);
>      pcre2_general_context_free(state->pcre2_ctx);
>  #else
> -    private_free(state->data, state);
> +    if (state->match_data) {
> +        private_free(state->match_data, state);
> +    }
> +#endif
> +
> +#if APREG_USE_THREAD_LOCAL
> +    if (state->ts) {
> +        /* Let the thread's pool allocator recycle or free according
> +         * to its max_free setting.
> +         */
> +        struct match_thread_state *ts = state->ts;
> +        apr_pool_clear(ts->pool);
> +        ts->heap_size = 0;
> +        ts->heap = NULL;
> +    }
>  #endif
>  }
>  

Regards

RĂ¼diger