You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@httpd.apache.org by Cliff Woolley <cl...@yahoo.com> on 2000/11/12 03:01:21 UTC

Implementing split() on pipe buckets?

Can you guys take a look at this and see what you think?  It's an attempt to get my
brain wrapped around the issues with implementing pipe_split().  Basically, I took
pipe_read() and made it into pipe_readn(), where pipe_readn() DOES obey the *len
input length, but behaves otherwise the same as pipe_read().  pipe_read() is now a
wrapper around pipe_readn() that calls it with *len=IOBUFSIZE.

Also included is a cleanup of handling failed malloc()'s, which were before just
noted as XXX's.  (Those fixes should go in even if pipe_split() doesn't... I can
separate them out into their own patch if you want.)  I have a few questions which I
noted inline with new XXX's, and removed any XXX's which I felt no longer applied.

A question I didn't note inline is that maybe pipe_split() should check the length
returned from pipe_readn() against the original length, and if they differ then it
should return APR_EINVAL.  (It currently doesn't do this.)

One thing I noted inline which might deserve more explanation is a question about
the test for (*len > 0).  My understanding is that this inserts a pipe bucket back
into the stream any time the amount of data read is greater than 0 (even if there's
nothing left to read).  If there's nothing left to read, we don't detect that until
after we've gone to all the trouble of inserting a new pipe bucket into the stream,
tried to read from it, and got 0 bytes back.  Is there any way to detect that the
pipe is exhausted the first time around and go ahead and close the pipe if it is? 
Also, isn't it possible for *len to be 0 when block==AP_NONBLOCK_READ even if the
pipe isn't really exhausted yet?  Should the test be for APR_EOF or something?  Or
is it correct as it stands and I'm missing something?

--Cliff


Index: ap_buckets_pipe.c
===================================================================
RCS file: /home/cvspublic/apache-2.0/src/ap/ap_buckets_pipe.c,v
retrieving revision 1.20
diff -u -r1.20 ap_buckets_pipe.c
--- ap_buckets_pipe.c	2000/11/11 04:41:56	1.20
+++ ap_buckets_pipe.c	2000/11/12 01:31:00
@@ -56,9 +56,8 @@
 #include "ap_buckets.h"
 #include <stdlib.h>
 
-/* XXX: We should obey the block flag */
-static apr_status_t pipe_read(ap_bucket *a, const char **str,
-			      apr_size_t *len, ap_read_type block)
+static apr_status_t pipe_readn(ap_bucket *a, const char **str,
+			       apr_size_t *len, ap_read_type block)
 {
     apr_file_t *p = a->data;
     ap_bucket *b;
@@ -71,9 +70,11 @@
         apr_set_pipe_timeout(p, 0);
     }
 
-    buf = malloc(IOBUFSIZE); /* XXX: check for failure? */
+    buf = malloc(*len);
+    if (buf == NULL) {
+        return APR_ENOMEM;
+    }
     *str = buf;
-    *len = IOBUFSIZE;
     rv = apr_read(p, buf, len);
 
     if (block == AP_NONBLOCK_READ) {
@@ -89,11 +90,17 @@
      * Change the current bucket to refer to what we read,
      * even if we read nothing because we hit EOF.
      */
-    ap_bucket_make_heap(a, buf, *len, 0, NULL);  /* XXX: check for failure? */
+    a = ap_bucket_make_heap(a, buf, *len, 0, NULL);
+    if (a == NULL) {
+        *str = NULL;
+        free(buf);
+        return APR_ENOMEM;
+    }
+
     /*
      * If there's more to read we have to keep the rest of the pipe
      * for later.  Otherwise, we'll close the pipe.
-     * XXX: Note that more complicated bucket types that 
+     * Note that more complicated bucket types that 
      * refer to data not in memory and must therefore have a read()
      * function similar to this one should be wary of copying this
      * code because if they have a destroy function they probably
@@ -102,8 +109,12 @@
      * rather than destroying the old one and creating a completely
      * new bucket.
      */
-    if (*len > 0) {
+    if (*len > 0) {  /* XXX: should this check for AP_NONBLOCK_READ? */
         b = ap_bucket_create_pipe(p);
+        if (b == NULL) {
+            apr_close(p);
+            return APR_ENOMEM;
+        }
 	AP_BUCKET_INSERT_AFTER(a, b);
     }
     else {
@@ -112,6 +123,34 @@
     return APR_SUCCESS;
 }
 
+static apr_status_t pipe_read(ap_bucket *a, const char **str,
+			      apr_size_t *len, ap_read_type block)
+{
+    *len = IOBUFSIZE;
+    return pipe_readn(a, str, len, block);
+}
+
+static apr_status_t pipe_split(ap_bucket *a, apr_off_t point)
+{
+    apr_size_t *len;
+    char **str;
+
+    if (point < 0) {
+        return APR_EINVAL;
+    }
+    *len = point;
+    /*
+     * pipe_readn() takes care of the split for us.
+     * its real purpose (to provide str) is just treated
+     * as a side-effect here and str is ignored.  this
+     * is fine since pipe_readn split the pipe into a heap
+     * bucket (containing str) and a pipe bucket (containing
+     * the rest of the pipe after *len bytes) anyway
+     */
+    /* XXX: should this use AP_NONBLOCK_READ or not? */
+    return pipe_readn(a, str, len, AP_NONBLOCK_READ);
+}
+
 AP_DECLARE(ap_bucket *) ap_bucket_make_pipe(ap_bucket *b, apr_file_t *p)
 {
     /*
@@ -144,5 +183,5 @@
     ap_bucket_destroy_notimpl,
     pipe_read,
     ap_bucket_setaside_notimpl,
-    ap_bucket_split_notimpl
+    pipe_split
 };


__________________________________________________
Do You Yahoo!?
Yahoo! Calendar - Get organized for the holidays!
http://calendar.yahoo.com/

RE: Implementing split() on pipe buckets?

Posted by "William A. Rowe, Jr." <wr...@rowe-clan.net>.

> From: Tony Finch [mailto:dot@dotat.at]
> Sent: Monday, November 13, 2000 3:44 PM
> 
> "William A. Rowe, Jr." <wr...@rowe-clan.net> wrote:
> >
> >I agree with you that duplicate can't work on a pipe or socket, but I really
> >disagree here.  We discussed at the filtering meeting that there are several
> >key operations that must -always- be implemented, and -always- work.  These
> >would be create, destroy, read and split.
> 
> No, we agreed that split couldn't work for pipes.

Catching up on the thread, eh :-?

I didn't catch that part... but the way I finally put it together is that
we didn't really finish some of the edges (such a pipe/socket and so on.)
Doesn't matter - pipes/sockets don't split or dup, and we have accessors
for users who want them.

Re: Implementing split() on pipe buckets?

Posted by Tony Finch <do...@dotat.at>.

"William A. Rowe, Jr." <wr...@rowe-clan.net> wrote:
>
>I agree with you that duplicate can't work on a pipe or socket, but I really
>disagree here.  We discussed at the filtering meeting that there are several
>key operations that must -always- be implemented, and -always- work.  These
>would be create, destroy, read and split.

No, we agreed that split couldn't work for pipes.

Tony.
-- 
en oeccget g mtcaa    f.a.n.finch
v spdlkishrhtewe y    dot@dotat.at
eatp o v eiti i d.    fanf@covalent.net

RE: Implementing split() on pipe buckets?

Posted by "William A. Rowe, Jr." <wr...@rowe-clan.net>.

> From: rbb@covalent.net [mailto:rbb@covalent.net]
> Sent: Saturday, November 11, 2000 11:58 PM
> 
> > The cases we are discussing are all fifo problems.  Can't we have a common
> > wrapper that handles (with the extra error conditions) any fifo bucket, outside
> > of the explicit and atomic calls (split, duplicate) that offer predictable fifo
> > behavior for filters that don't care to work around these issues themselves?
> > e.g. ap_bucket_split_any, ap_bucket_duplicate_any, etc.  They can carry their
> > documented shortcomings, and the author who uses them does so at a cost?
> 
> fine, but those are Apache functions, not bucket functions.  These things
> cannot be implemented in the buckets code.  That is all I have been
> saying.  The bucket implementation does not allow for splitting pipes or
> sockets, so if we want to wrap those, then that is up to Apache.

That's fine... we can add these functions to util.c or somewhere appropriate in
the main or ap directory (which is an Apache lib).  In the long term, I'd argue
they can be considered bucket functions, which need to grow if a lifo or other
goofy bucket case were added.

> I don't want to pollute the buckets with Apache specific ideas, and how we
> recover from not getting enough data is Apache specific.  If we want to
> implement a pipe/socket split, then it MUST be done at the Apache level.

Agreed, but I don't think these are Apache-specific.  Some bucket users
would like the functions some (you, perhaps :-) wouldn't care for them,
and that argument would persist outside of Apache.

Speaking of which (uh oh, he's thinking and has coffee in hand :-) what do
folks think of moving the buckets into APR?  I believe we have proved they
aren't Apache specific entities.  If someone wants to make the case that
they are useful to (and would be used by) other projects, I'd be +1 to see
them in APR.

RE: Implementing split() on pipe buckets?

Posted by rb...@covalent.net.

> The cases we are discussing are all fifo problems.  Can't we have a common
> wrapper that handles (with the extra error conditions) any fifo bucket, outside
> of the explicit and atomic calls (split, duplicate) that offer predictable fifo
> behavior for filters that don't care to work around these issues themselves?
> e.g. ap_bucket_split_any, ap_bucket_duplicate_any, etc.  They can carry their
> documented shortcomings, and the author who uses them does so at a cost?

fine, but those are Apache functions, not bucket functions.  These things
cannot be implemented in the buckets code.  That is all I have been
saying.  The bucket implementation does not allow for splitting pipes or
sockets, so if we want to wrap those, then that is up to Apache.

I don't want to pollute the buckets with Apache specific ideas, and how we
recover from not getting enough data is Apache specific.  If we want to
implement a pipe/socket split, then it MUST be done at the Apache level.

Ryan

_______________________________________________________________________________
Ryan Bloom                        	rbb@apache.org
406 29th St.
San Francisco, CA 94131
-------------------------------------------------------------------------------

RE: Implementing split() on pipe buckets?

Posted by "William A. Rowe, Jr." <wr...@rowe-clan.net>.

> From: rbb@covalent.net [mailto:rbb@covalent.net]
> Sent: Saturday, November 11, 2000 10:26 PM
> 
> > I agree with you that duplicate can't work on a pipe or socket, but I really
> > disagree here.  We discussed at the filtering meeting that there are several
> > key operations that must -always- be implemented, and -always- work.  These
> > would be create, destroy, read and split.  Anything else is negotiable.
> 
> No, at the filter meeting, the only functions that we said would always
> work are create and read.  Everything is negotiable.  In this case, a
> split on a pipe or socket just can't be done cleanly.  I explained why in
> another message just now.

Destroy always works (it simply may be a noop if it is immortal, or may be
deferred if the refcount isn't 0.)  

I'm not going to argue split either way, since the pipe/socket cases wern't 
fully considered at that meeting. 

The cases we are discussing are all fifo problems.  Can't we have a common
wrapper that handles (with the extra error conditions) any fifo bucket, outside
of the explicit and atomic calls (split, duplicate) that offer predictable fifo
behavior for filters that don't care to work around these issues themselves?
e.g. ap_bucket_split_any, ap_bucket_duplicate_any, etc.  They can carry their
documented shortcomings, and the author who uses them does so at a cost?

These aren't implemented bucket-by-bucket, so won't carry the costs to the
bucket author.  They are predictable, so the filter author must be prepared to
handle additional error results (potential read+split or read+duplicate errors.)

It's granular, saves code, and doesn't polute buckets.  Is that a good comprimize?

RE: Implementing split() on pipe buckets?

Posted by rb...@covalent.net.

> I agree with you that duplicate can't work on a pipe or socket, but I really
> disagree here.  We discussed at the filtering meeting that there are several
> key operations that must -always- be implemented, and -always- work.  These
> would be create, destroy, read and split.  Anything else is negotiable.

No, at the filter meeting, the only functions that we said would always
work are create and read.  Everything is negotiable.  In this case, a
split on a pipe or socket just can't be done cleanly.  I explained why in
another message just now.

> The question on the table should be how to split a pipe or socket, and if it
> must be a read/split, then so be it.  

There is NO way to do a clean split on a pipe or socket.  There are just
too many edge cases that would need to be taken care of.  The only place
that these issues can be cleanly handled is in the program.

> The rule is simple, a filter can never trust it knows every kind of bucket
> that is thrown it's way, so it must have an API to retrieve the methods
> available, and the methods -preferred-.  e.g. read is better than split
> from one sort of pipe, duplicate is bad from another.  If the module author
> wishes to optimize, give them the info, don't keep throwing a ton of special
> rules and restrictions that every filter author is required to work around.

What special ton of special rules?  There is a small set of rules:

Every bucket MUST implement read and create

If a bucket has a -1 length, then the length is undeterminable.

Bucket functions should feel free to return APR_ENOTIMPL, which means the
program should fall back to some other method.

Ryan
_______________________________________________________________________________
Ryan Bloom                        	rbb@apache.org
406 29th St.
San Francisco, CA 94131
-------------------------------------------------------------------------------

RE: Implementing split() on pipe buckets?

Posted by "William A. Rowe, Jr." <wr...@rowe-clan.net>.

> From: rbb@covalent.net [mailto:rbb@covalent.net]
> Sent: Saturday, November 11, 2000 8:30 PM
> 
> I am against pipes and sockets having a split function.  They don't work
> the same way that the other buckets work, and they really can't.  This
> patch fixes the problem of a pipe split where there is enough data in the
> pipe to split at the desired length, because it splits the pipe into a
> heap and a pipe bucket at the correct place.  What if there isn't enough
> data to do the split at the correct place?  This patch doesn't handle that
> case.  All the other bucket types return an error if the split is past the
> end of the bucket.

I agree with you that duplicate can't work on a pipe or socket, but I really
disagree here.  We discussed at the filtering meeting that there are several
key operations that must -always- be implemented, and -always- work.  These
would be create, destroy, read and split.  Anything else is negotiable.
The question on the table should be how to split a pipe or socket, and if it
must be a read/split, then so be it.  

The rule is simple, a filter can never trust it knows every kind of bucket
that is thrown it's way, so it must have an API to retrieve the methods
available, and the methods -preferred-.  e.g. read is better than split
from one sort of pipe, duplicate is bad from another.  If the module author
wishes to optimize, give them the info, don't keep throwing a ton of special
rules and restrictions that every filter author is required to work around.
Let the few do the working around, let the masses have clean code.  Or, in
other words, the benefit of the many outweights the benefit to the few, or
the none :-)

Re: Implementing split() on pipe buckets?

Posted by rb...@covalent.net.

I am against pipes and sockets having a split function.  They don't work
the same way that the other buckets work, and they really can't.  This
patch fixes the problem of a pipe split where there is enough data in the
pipe to split at the desired length, because it splits the pipe into a
heap and a pipe bucket at the correct place.  What if there isn't enough
data to do the split at the correct place?  This patch doesn't handle that
case.  All the other bucket types return an error if the split is past the
end of the bucket.

The only logical way to deal with pipe and socket splits is to read the
data and then split it.  The split functions can't do this, because that
changes the definition of split, so it is up to the program itself.  The
code isn't hard to read or understand, if anything it makes the code a bit
clearer about what is happening.

I am also against some of the malloc checks.  In general in Apache, we do
not check for NULL after calling malloc, because if malloc returns NULL
your machine is basically screwed and we can't really fail cleanly anyway.

Ryan

> Index: ap_buckets_pipe.c
> ===================================================================
> RCS file: /home/cvspublic/apache-2.0/src/ap/ap_buckets_pipe.c,v
> retrieving revision 1.20
> diff -u -r1.20 ap_buckets_pipe.c
> --- ap_buckets_pipe.c	2000/11/11 04:41:56	1.20
> +++ ap_buckets_pipe.c	2000/11/12 01:31:00
> @@ -56,9 +56,8 @@
>  #include "ap_buckets.h"
>  #include <stdlib.h>
>  
> -/* XXX: We should obey the block flag */
> -static apr_status_t pipe_read(ap_bucket *a, const char **str,
> -			      apr_size_t *len, ap_read_type block)
> +static apr_status_t pipe_readn(ap_bucket *a, const char **str,
> +			       apr_size_t *len, ap_read_type block)
>  {
>      apr_file_t *p = a->data;
>      ap_bucket *b;
> @@ -71,9 +70,11 @@
>          apr_set_pipe_timeout(p, 0);
>      }
>  
> -    buf = malloc(IOBUFSIZE); /* XXX: check for failure? */
> +    buf = malloc(*len);
> +    if (buf == NULL) {
> +        return APR_ENOMEM;
> +    }
>      *str = buf;
> -    *len = IOBUFSIZE;
>      rv = apr_read(p, buf, len);
>  
>      if (block == AP_NONBLOCK_READ) {
> @@ -89,11 +90,17 @@
>       * Change the current bucket to refer to what we read,
>       * even if we read nothing because we hit EOF.
>       */
> -    ap_bucket_make_heap(a, buf, *len, 0, NULL);  /* XXX: check for failure? */
> +    a = ap_bucket_make_heap(a, buf, *len, 0, NULL);
> +    if (a == NULL) {
> +        *str = NULL;
> +        free(buf);
> +        return APR_ENOMEM;
> +    }
> +
>      /*
>       * If there's more to read we have to keep the rest of the pipe
>       * for later.  Otherwise, we'll close the pipe.
> -     * XXX: Note that more complicated bucket types that 
> +     * Note that more complicated bucket types that 
>       * refer to data not in memory and must therefore have a read()
>       * function similar to this one should be wary of copying this
>       * code because if they have a destroy function they probably
> @@ -102,8 +109,12 @@
>       * rather than destroying the old one and creating a completely
>       * new bucket.
>       */
> -    if (*len > 0) {
> +    if (*len > 0) {  /* XXX: should this check for AP_NONBLOCK_READ? */
>          b = ap_bucket_create_pipe(p);
> +        if (b == NULL) {
> +            apr_close(p);
> +            return APR_ENOMEM;
> +        }
>  	AP_BUCKET_INSERT_AFTER(a, b);
>      }
>      else {
> @@ -112,6 +123,34 @@
>      return APR_SUCCESS;
>  }
>  
> +static apr_status_t pipe_read(ap_bucket *a, const char **str,
> +			      apr_size_t *len, ap_read_type block)
> +{
> +    *len = IOBUFSIZE;
> +    return pipe_readn(a, str, len, block);
> +}
> +
> +static apr_status_t pipe_split(ap_bucket *a, apr_off_t point)
> +{
> +    apr_size_t *len;
> +    char **str;
> +
> +    if (point < 0) {
> +        return APR_EINVAL;
> +    }
> +    *len = point;
> +    /*
> +     * pipe_readn() takes care of the split for us.
> +     * its real purpose (to provide str) is just treated
> +     * as a side-effect here and str is ignored.  this
> +     * is fine since pipe_readn split the pipe into a heap
> +     * bucket (containing str) and a pipe bucket (containing
> +     * the rest of the pipe after *len bytes) anyway
> +     */
> +    /* XXX: should this use AP_NONBLOCK_READ or not? */
> +    return pipe_readn(a, str, len, AP_NONBLOCK_READ);
> +}
> +
>  AP_DECLARE(ap_bucket *) ap_bucket_make_pipe(ap_bucket *b, apr_file_t *p)
>  {
>      /*
> @@ -144,5 +183,5 @@
>      ap_bucket_destroy_notimpl,
>      pipe_read,
>      ap_bucket_setaside_notimpl,
> -    ap_bucket_split_notimpl
> +    pipe_split
>  };
> 
> 
> __________________________________________________
> Do You Yahoo!?
> Yahoo! Calendar - Get organized for the holidays!
> http://calendar.yahoo.com/
> 
> 


_______________________________________________________________________________
Ryan Bloom                        	rbb@apache.org
406 29th St.
San Francisco, CA 94131
-------------------------------------------------------------------------------