You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by Rüdiger Plüm <ru...@vodafone.com> on 2011/11/04 09:08:27 UTC

Fwd: svn commit: r1197405 - in /httpd/httpd/trunk: CHANGES docs/manual/upgrading.xml modules/filters/mod_substitute.c


-------- Original-Nachricht --------
Betreff: 	svn commit: r1197405 - in /httpd/httpd/trunk: CHANGES docs/manual/upgrading.xml modules/filters/mod_substitute.c
Datum: 	Fri, 04 Nov 2011 05:35:54 GMT
Von: 	sf@apache.org



Author: sf
Date: Fri Nov  4 05:35:53 2011
New Revision: 1197405

URL: http://svn.apache.org/viewvc?rev=1197405&view=rev
Log:
To prevent overboarding memory usage, limit line length to 1MB

Modified:
     httpd/httpd/trunk/CHANGES
     httpd/httpd/trunk/docs/manual/upgrading.xml
     httpd/httpd/trunk/modules/filters/mod_substitute.c

Modified: httpd/httpd/trunk/modules/filters/mod_substitute.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/mod_substitute.c?rev=1197405&r1=1197404&r2=1197405&view=diff
==============================================================================
--- httpd/httpd/trunk/modules/filters/mod_substitute.c (original)
+++ httpd/httpd/trunk/modules/filters/mod_substitute.c Fri Nov  4 05:35:53 2011
@@ -158,6 +164,9 @@ static void do_pattmatch(ap_filter_t *f,
                               * as its own bucket, then isolate the pattern
                               * and delete it.
                               */
+                            if (space_left<  len + repl_len)
+                                return APR_ENOMEM;
+                            space_left -= len + repl_len;


Why is this needed at all? IMHO we only consume memory by creating additional bucket structures,
but this does not depend on the length of the replacement strings.
As we use transient buckets below we use the same data (script->replacement) over and over
again for new buckets.

                              SEDRMPATBCKT(b, len, tmp_b, script->patlen);
                              /*
                               * Finally, we create a bucket that contains the
@@ -188,25 +197,36 @@ static void do_pattmatch(ap_filter_t *f,
                      int left = bytes;
                      const char *pos = buff;
                      char *repl;
+                    apr_size_t space_left = AP_SUBST_MAX_LINE_LENGTH;
                      while (!ap_regexec_len(script->regexp, pos, left,
                                         AP_MAX_REG_MATCH, regm, 0)) {
+                        apr_status_t rv;
                          have_match = 1;
                          if (script->flatten&&  !force_quick) {
                              /* copy bytes before the match */
                              if (regm[0].rm_so>  0)
                                  ap_varbuf_strmemcat(&vb, pos, regm[0].rm_so);
                              /* add replacement string */
-                            ap_varbuf_regsub(&vb, script->replacement, pos,
-                                             AP_MAX_REG_MATCH, regm, 0);
+                            rv = ap_varbuf_regsub(&vb, script->replacement, pos,
+                                                  AP_MAX_REG_MATCH, regm,
+                                                  AP_SUBST_MAX_LINE_LENGTH - vb.strlen);
+                            if (rv != APR_SUCCESS)
+                                return rv;
                          }
                          else {
-                            ap_pregsub_ex(pool,&repl, script->replacement, pos,
-                                              AP_MAX_REG_MATCH, regm, 0);
+                            apr_size_t repl_len;
+                            rv = ap_pregsub_ex(pool,&repl,
+                                               script->replacement, pos,
+                                               AP_MAX_REG_MATCH, regm,
+                                               space_left);
+                            if (rv != APR_SUCCESS)
+                                return rv;
                              len = (apr_size_t) (regm[0].rm_eo - regm[0].rm_so);
+                            repl_len = strlen(repl);
+                            space_left -= len + repl_len;


Isn't it sufficient to reduce space_left only by repl_len?
IMHO we only allocate repl_len new memory in ap_pregsub_ex and not len + repl_len or am I wrong here?



                              SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len);
-                            tmp_b = apr_bucket_transient_create(repl,
-                                             strlen(repl),
-                                             f->r->connection->bucket_alloc);
+                            tmp_b = apr_bucket_transient_create(repl, repl_len,
+                                                f->r->connection->bucket_alloc);
                              APR_BUCKET_INSERT_BEFORE(b, tmp_b);
                          }
                          /*

@@ -414,9 +450,8 @@ static apr_status_t substitute_filter(ap
                              rv = ap_pass_brigade(f->next, ctx->passbb);
                              apr_brigade_cleanup(ctx->passbb);
                              num = 0;
-                            apr_pool_clear(ctx->tpool);



Why don't we clear this pool any longer? It was cleared in both cases:
The error one and the none error one.
And calling  apr_pool_clear twice does not harm.


                              if (rv != APR_SUCCESS)
-                                return rv;
+                                goto err;
                          }
                          b = tmp_b;
                      }


Regards

Rüdiger


RE: Fwd: svn commit: r1197405 - in /httpd/httpd/trunk: CHANGES docs/manual/upgrading.xml modules/filters/mod_substitute.c

Posted by Stefan Fritsch <sf...@sfritsch.de>.
On Tue, 8 Nov 2011, "Plüm, Rüdiger, VF-Group" wrote:
>> -----Original Message-----
>> From: Stefan Fritsch [mailto:sf@sfritsch.de]
>> Sent: Dienstag, 8. November 2011 03:15
>> To: dev@httpd.apache.org
>> Subject: Re: Fwd: svn commit: r1197405 - in
>> /httpd/httpd/trunk: CHANGES docs/manual/upgrading.xml
>> modules/filters/mod_substitute.c
>>
>> On Sun, 6 Nov 2011, Stefan Fritsch wrote:
>>>>>> Isn't it sufficient to reduce space_left only by repl_len?
>>>>>> IMHO we only allocate repl_len new memory in
>> ap_pregsub_ex and not len
>>>>>> + repl_len or am I wrong here?
>>>>>>
>>>>>>
>>>>
>>>>
>>>> Any comment on that?
>>>
>>> That was also supposed to cause the line length to be
>> limited instead of the
>>> length of the replacement. But you are right, this is not
>> correct. I will
>>> look at it during the hackathon.
>>
>> I have looked more closely, and the code is actually correct.
>> It's purpose
>> is not to limit new memory allocation but the line length. I
>> have added a
>> few comments in r1199060 which hopefully make the intentions clear.
>>
>
>                             len = (apr_size_t) (regm[0].rm_eo - regm[0].rm_so);
> +                            repl_len = strlen(repl);
> +                            space_left -= len + repl_len;
>
>
> But len is the length of the string we remove correct?

Oops. True, I looked at the wrong part of the code where len has a 
different meaning :-(

> So the limit will be imposed on the sum of the length of the strings to replace plus the
> sum of all replacements for this line. So actually the length of the original
> and the resulting line can be much shorter than the allowed limit. Example:
>
> Lets say the limit is 10 chars per line.
> The source line is: 12345
> I do a s/12345/123456/.
> This would fail because the length of the string to replace was 5 and that of the replacement was 6.
> But the resulting line only would have a length of 6.
>
> But s/12345/123456/n would work (so not using regex).
>
> Does this approach really make sense and is comprehensible for users?
> Or do I simply fail to correctly understand the code?
>
> Maybe space_left above is meant to be
>
> space_left -= repl_len + (apr_size_t) (regm[0].rm_so - pos);

regm[0].rm_so is already counted from pos, but that's basically it. I hope 
I got it right in r1199410. Thanks again.

RE: Fwd: svn commit: r1197405 - in /httpd/httpd/trunk: CHANGES docs/manual/upgrading.xml modules/filters/mod_substitute.c

Posted by "Plüm, Rüdiger, VF-Group" <ru...@vodafone.com>.
 

> -----Original Message-----
> From: Stefan Fritsch [mailto:sf@sfritsch.de] 
> Sent: Dienstag, 8. November 2011 03:15
> To: dev@httpd.apache.org
> Subject: Re: Fwd: svn commit: r1197405 - in 
> /httpd/httpd/trunk: CHANGES docs/manual/upgrading.xml 
> modules/filters/mod_substitute.c
> 
> On Sun, 6 Nov 2011, Stefan Fritsch wrote:
> >>>> Isn't it sufficient to reduce space_left only by repl_len?
> >>>> IMHO we only allocate repl_len new memory in 
> ap_pregsub_ex and not len
> >>>> + repl_len or am I wrong here?
> >>>> 
> >>>> 
> >> 
> >> 
> >> Any comment on that?
> >
> > That was also supposed to cause the line length to be 
> limited instead of the 
> > length of the replacement. But you are right, this is not 
> correct. I will 
> > look at it during the hackathon.
> 
> I have looked more closely, and the code is actually correct. 
> It's purpose 
> is not to limit new memory allocation but the line length. I 
> have added a 
> few comments in r1199060 which hopefully make the intentions clear.
> 

                             len = (apr_size_t) (regm[0].rm_eo - regm[0].rm_so);
+                            repl_len = strlen(repl);
+                            space_left -= len + repl_len;


But len is the length of the string we remove correct?
So the limit will be imposed on the sum of the length of the strings to replace plus the
sum of all replacements for this line. So actually the length of the original
and the resulting line can be much shorter than the allowed limit. Example:

Lets say the limit is 10 chars per line.
The source line is: 12345
I do a s/12345/123456/.
This would fail because the length of the string to replace was 5 and that of the replacement was 6.
But the resulting line only would have a length of 6.

But s/12345/123456/n would work (so not using regex).

Does this approach really make sense and is comprehensible for users?
Or do I simply fail to correctly understand the code?

Maybe space_left above is meant to be

space_left -= repl_len + (apr_size_t) (regm[0].rm_so - pos);


Regards

Rüdiger

Re: Fwd: svn commit: r1197405 - in /httpd/httpd/trunk: CHANGES docs/manual/upgrading.xml modules/filters/mod_substitute.c

Posted by Stefan Fritsch <sf...@sfritsch.de>.
On Sun, 6 Nov 2011, Stefan Fritsch wrote:
>>>> Isn't it sufficient to reduce space_left only by repl_len?
>>>> IMHO we only allocate repl_len new memory in ap_pregsub_ex and not len
>>>> + repl_len or am I wrong here?
>>>> 
>>>> 
>> 
>> 
>> Any comment on that?
>
> That was also supposed to cause the line length to be limited instead of the 
> length of the replacement. But you are right, this is not correct. I will 
> look at it during the hackathon.

I have looked more closely, and the code is actually correct. It's purpose 
is not to limit new memory allocation but the line length. I have added a 
few comments in r1199060 which hopefully make the intentions clear.

Re: Fwd: svn commit: r1197405 - in /httpd/httpd/trunk: CHANGES docs/manual/upgrading.xml modules/filters/mod_substitute.c

Posted by Stefan Fritsch <sf...@sfritsch.de>.
On Sun, 6 Nov 2011, Ruediger Pluem wrote:
>>> @@ -188,25 +197,36 @@ static void do_pattmatch(ap_filter_t *f,
>>>                     int left = bytes;
>>>                     const char *pos = buff;
>>>                     char *repl;
>>> +                    apr_size_t space_left = AP_SUBST_MAX_LINE_LENGTH;
>>>                     while (!ap_regexec_len(script->regexp, pos, left,
>>>                                        AP_MAX_REG_MATCH, regm, 0)) {
>>> +                        apr_status_t rv;
>>>                         have_match = 1;
>>>                         if (script->flatten&&  !force_quick) {
>>>                             /* copy bytes before the match */
>>>                             if (regm[0].rm_so>  0)
>>>                                 ap_varbuf_strmemcat(&vb, pos,
>>> regm[0].rm_so);
>>>                             /* add replacement string */
>>> -                            ap_varbuf_regsub(&vb,
>>> script->replacement, pos,
>>> -                                             AP_MAX_REG_MATCH, regm, 0);
>>> +                            rv = ap_varbuf_regsub(&vb,
>>> script->replacement, pos,
>>> +                                                  AP_MAX_REG_MATCH,
>>> regm,
>>> +
>>> AP_SUBST_MAX_LINE_LENGTH - vb.strlen);
>>> +                            if (rv != APR_SUCCESS)
>>> +                                return rv;
>>>                         }
>>>                         else {
>>> -                            ap_pregsub_ex(pool,&repl,
>>> script->replacement, pos,
>>> -                                              AP_MAX_REG_MATCH, regm,
>>> 0);
>>> +                            apr_size_t repl_len;
>>> +                            rv = ap_pregsub_ex(pool,&repl,
>>> +                                               script->replacement, pos,
>>> +                                               AP_MAX_REG_MATCH, regm,
>>> +                                               space_left);
>>> +                            if (rv != APR_SUCCESS)
>>> +                                return rv;
>>>                             len = (apr_size_t) (regm[0].rm_eo -
>>> regm[0].rm_so);
>>> +                            repl_len = strlen(repl);
>>> +                            space_left -= len + repl_len;
>>>
>>>
>>> Isn't it sufficient to reduce space_left only by repl_len?
>>> IMHO we only allocate repl_len new memory in ap_pregsub_ex and not len
>>> + repl_len or am I wrong here?
>>>
>>>
>
>
> Any comment on that?

That was also supposed to cause the line length to be limited instead of 
the length of the replacement. But you are right, this is not correct. I 
will look at it during the hackathon.

Re: Fwd: svn commit: r1197405 - in /httpd/httpd/trunk: CHANGES docs/manual/upgrading.xml modules/filters/mod_substitute.c

Posted by Ruediger Pluem <rp...@apache.org>.

On 11/04/2011 05:04 PM, Stefan Fritsch wrote:
> On Fri, 4 Nov 2011, Rüdiger Plüm wrote:
>> Modified: httpd/httpd/trunk/modules/filters/mod_substitute.c
>> URL:
>> http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/mod_substitute.c?rev=1197405&r1=1197404&r2=1197405&view=diff
>>
>> ==============================================================================
>>
>> --- httpd/httpd/trunk/modules/filters/mod_substitute.c (original)
>> +++ httpd/httpd/trunk/modules/filters/mod_substitute.c Fri Nov  4
>> 05:35:53 2011
>> @@ -158,6 +164,9 @@ static void do_pattmatch(ap_filter_t *f,
>>                              * as its own bucket, then isolate the
>> pattern
>>                              * and delete it.
>>                              */
>> +                            if (space_left<  len + repl_len)
>> +                                return APR_ENOMEM;
>> +                            space_left -= len + repl_len;
>>
>>
>> Why is this needed at all? IMHO we only consume memory by creating
>> additional bucket structures,
>> but this does not depend on the length of the replacement strings.
>> As we use transient buckets below we use the same data
>> (script->replacement) over and over
>> again for new buckets.
> 
> I wanted the behavior to be more predictable for the user. For a single
> rule, we always force quick mode. IMHO, it would be confusing if the
> line-too-long error appears just because the user added a second rule.
> We could make the length limit only apply if quick mode has not been
> explicitly set. But the bucket structures can also use significant
> memory (as demonstrated by the range issue). Having the limit not depend
> on the number of buckets and therefore on the content of the line but
> only on the line length is more comprehensible, IMHO.

Fair enough.

> 
>>                             SEDRMPATBCKT(b, len, tmp_b, script->patlen);
>>                             /*
>>                              * Finally, we create a bucket that
>> contains the
>> @@ -188,25 +197,36 @@ static void do_pattmatch(ap_filter_t *f,
>>                     int left = bytes;
>>                     const char *pos = buff;
>>                     char *repl;
>> +                    apr_size_t space_left = AP_SUBST_MAX_LINE_LENGTH;
>>                     while (!ap_regexec_len(script->regexp, pos, left,
>>                                        AP_MAX_REG_MATCH, regm, 0)) {
>> +                        apr_status_t rv;
>>                         have_match = 1;
>>                         if (script->flatten&&  !force_quick) {
>>                             /* copy bytes before the match */
>>                             if (regm[0].rm_so>  0)
>>                                 ap_varbuf_strmemcat(&vb, pos,
>> regm[0].rm_so);
>>                             /* add replacement string */
>> -                            ap_varbuf_regsub(&vb,
>> script->replacement, pos,
>> -                                             AP_MAX_REG_MATCH, regm, 0);
>> +                            rv = ap_varbuf_regsub(&vb,
>> script->replacement, pos,
>> +                                                  AP_MAX_REG_MATCH,
>> regm,
>> +                                                 
>> AP_SUBST_MAX_LINE_LENGTH - vb.strlen);
>> +                            if (rv != APR_SUCCESS)
>> +                                return rv;
>>                         }
>>                         else {
>> -                            ap_pregsub_ex(pool,&repl,
>> script->replacement, pos,
>> -                                              AP_MAX_REG_MATCH, regm,
>> 0);
>> +                            apr_size_t repl_len;
>> +                            rv = ap_pregsub_ex(pool,&repl,
>> +                                               script->replacement, pos,
>> +                                               AP_MAX_REG_MATCH, regm,
>> +                                               space_left);
>> +                            if (rv != APR_SUCCESS)
>> +                                return rv;
>>                             len = (apr_size_t) (regm[0].rm_eo -
>> regm[0].rm_so);
>> +                            repl_len = strlen(repl);
>> +                            space_left -= len + repl_len;
>>
>>
>> Isn't it sufficient to reduce space_left only by repl_len?
>> IMHO we only allocate repl_len new memory in ap_pregsub_ex and not len
>> + repl_len or am I wrong here?
>>
>>


Any comment on that?

Regards

Rüdiger


Re: Fwd: svn commit: r1197405 - in /httpd/httpd/trunk: CHANGES docs/manual/upgrading.xml modules/filters/mod_substitute.c

Posted by Stefan Fritsch <sf...@sfritsch.de>.
On Fri, 4 Nov 2011, Rüdiger Plüm wrote:
> Modified: httpd/httpd/trunk/modules/filters/mod_substitute.c
> URL: 
> http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/mod_substitute.c?rev=1197405&r1=1197404&r2=1197405&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/modules/filters/mod_substitute.c (original)
> +++ httpd/httpd/trunk/modules/filters/mod_substitute.c Fri Nov  4 05:35:53 
> 2011
> @@ -158,6 +164,9 @@ static void do_pattmatch(ap_filter_t *f,
>                              * as its own bucket, then isolate the pattern
>                              * and delete it.
>                              */
> +                            if (space_left<  len + repl_len)
> +                                return APR_ENOMEM;
> +                            space_left -= len + repl_len;
>
>
> Why is this needed at all? IMHO we only consume memory by creating additional 
> bucket structures,
> but this does not depend on the length of the replacement strings.
> As we use transient buckets below we use the same data (script->replacement) 
> over and over
> again for new buckets.

I wanted the behavior to be more predictable for the user. For a single 
rule, we always force quick mode. IMHO, it would be confusing if the 
line-too-long error appears just because the user added a second rule. We 
could make the length limit only apply if quick mode has not been 
explicitly set. But the bucket structures can also use significant memory 
(as demonstrated by the range issue). Having the limit not depend on the 
number of buckets and therefore on the content of the line but only on the 
line length is more comprehensible, IMHO.

>                             SEDRMPATBCKT(b, len, tmp_b, script->patlen);
>                             /*
>                              * Finally, we create a bucket that contains the
> @@ -188,25 +197,36 @@ static void do_pattmatch(ap_filter_t *f,
>                     int left = bytes;
>                     const char *pos = buff;
>                     char *repl;
> +                    apr_size_t space_left = AP_SUBST_MAX_LINE_LENGTH;
>                     while (!ap_regexec_len(script->regexp, pos, left,
>                                        AP_MAX_REG_MATCH, regm, 0)) {
> +                        apr_status_t rv;
>                         have_match = 1;
>                         if (script->flatten&&  !force_quick) {
>                             /* copy bytes before the match */
>                             if (regm[0].rm_so>  0)
>                                 ap_varbuf_strmemcat(&vb, pos, 
> regm[0].rm_so);
>                             /* add replacement string */
> -                            ap_varbuf_regsub(&vb, script->replacement, pos,
> -                                             AP_MAX_REG_MATCH, regm, 0);
> +                            rv = ap_varbuf_regsub(&vb, script->replacement, 
> pos,
> +                                                  AP_MAX_REG_MATCH, regm,
> +                                                  AP_SUBST_MAX_LINE_LENGTH - 
> vb.strlen);
> +                            if (rv != APR_SUCCESS)
> +                                return rv;
>                         }
>                         else {
> -                            ap_pregsub_ex(pool,&repl, script->replacement, 
> pos,
> -                                              AP_MAX_REG_MATCH, regm, 0);
> +                            apr_size_t repl_len;
> +                            rv = ap_pregsub_ex(pool,&repl,
> +                                               script->replacement, pos,
> +                                               AP_MAX_REG_MATCH, regm,
> +                                               space_left);
> +                            if (rv != APR_SUCCESS)
> +                                return rv;
>                             len = (apr_size_t) (regm[0].rm_eo - 
> regm[0].rm_so);
> +                            repl_len = strlen(repl);
> +                            space_left -= len + repl_len;
>
>
> Isn't it sufficient to reduce space_left only by repl_len?
> IMHO we only allocate repl_len new memory in ap_pregsub_ex and not len + 
> repl_len or am I wrong here?
>
>
>
>                             SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len);
> -                            tmp_b = apr_bucket_transient_create(repl,
> -                                             strlen(repl),
> - 
> f->r->connection->bucket_alloc);
> +                            tmp_b = apr_bucket_transient_create(repl, 
> repl_len,
> + 
> f->r->connection->bucket_alloc);
>                             APR_BUCKET_INSERT_BEFORE(b, tmp_b);
>                         }
>                         /*
>
> @@ -414,9 +450,8 @@ static apr_status_t substitute_filter(ap
>                             rv = ap_pass_brigade(f->next, ctx->passbb);
>                             apr_brigade_cleanup(ctx->passbb);
>                             num = 0;
> -                            apr_pool_clear(ctx->tpool);
>
>
>
> Why don't we clear this pool any longer? It was cleared in both cases:
> The error one and the none error one.
> And calling  apr_pool_clear twice does not harm.

Good catch. I will fix this shortly. As always, your review is very 
welcome.

Cheers,
Stefan