You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@apr.apache.org by Colm MacCárthaigh <co...@Redbrick.DCU.IE> on 2002/12/04 13:25:49 UTC

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

On Wed, Dec 04, 2002 at 07:12:04AM -0500, Jeff Trawick wrote:
> Colm MacCárthaigh <co...@Redbrick.DCU.IE> writes:
> 
> > O.k., now that is interesting, another thing I've noticed is that when
> > the encoding is chunked, I can't replicate the problem. Presumably the
> > problem is related to how much data is trying to be sent down the
> > socket at once.
> 
> Well, note that Apache uses different socket APIs for dynamic content
> vs. static content, and chunking is only used for dynamic content.
> That should be related to the breakage.
> 
> Also, the other poster mentioned that SSL works over IPv6 but non-SSL
> doesn't on one box.  That also corresponds to the same difference in
> socket API usage.
> 
> We can't use sendfile for any dynamic content.  We can't use sendfile
> for encrypted static or encrypted dynamic content.  So it really seems
> that sendfile is key to the issue.

Looks very much like that is the case.

> But I'm confused because you already disabled sendfile usage and that
> didn't bring some amount of happiness.  Also, the path where we use
> sendfile is the path where we use TCP_CORK so how can there be a
> problem if we don't use sendfile()?
> 
> double-checking where TCP_CORK is used:
> 
> 1) APR_TCP_NOPUSH_FLAG is TCP_CORK on Linux
> 2) APR_TCP_NOPUSH socket option controls APR_TCP_NOPUSH_FLAG
> 3) apr_sendfile() for Linux is the only code that dorks with
>    APR_TCP_NOPUSH (and thus TCP_CORK)

Here's what I see , when I use --disable-sendfile, this is a successful
request:

poll([{fd=8, events=POLLIN, revents=POLLIN}], 1, 300000) = 1
read(8, "GET / HTTP/1.1\r\n", 8000)     = 16
gettimeofday({1039003895, 580353}, NULL) = 0
poll([{fd=8, events=POLLIN, revents=POLLIN}], 1, 300000) = 1
read(8, "Host: www.heanet.ie\r\n", 8000) = 21
poll([{fd=8, events=POLLIN, revents=POLLIN}], 1, 300000) = 1
read(8, "\r\n", 8000)                   = 2
stat64("/usr/local/apache/htdocs/", {st_mode=S_IFDIR|S_ISGID|0755,
st_size=4096, ...}) = 0
brk(0x814c000)                          = 0x814c000
stat64("/usr/local/apache/htdocs/index.html", {st_mode=S_IFREG|0644,
st_size=256, ...}) = 0
open("/usr/local/apache/htdocs/index.html", O_RDONLY) = 9
brk(0x814f000)                          = 0x814f000
read(8, 0x8146a30, 8000)                = -1 EAGAIN (Resource
temporarily unavailable)
setsockopt(8, SOL_TCP, TCP_NODELAY, [0], 4) = 0
setsockopt(8, SOL_TCP, TCP_CORK, [1], 4) = 0
writev(8, [{"HTTP/1.1 200 OK\r\nDate: Wed, 04 D"..., 248}], 1) = 248
sendfile(8, 9, [0], 256)                = 256
setsockopt(8, SOL_TCP, TCP_CORK, [0], 4) = 0
setsockopt(8, SOL_TCP, TCP_NODELAY, [1], 4) = 0
write(4, "2001:770:18:1:201:2ff:fef0:f281 "..., 90) = 90
poll(^C
 <unfinished ...>

and this is an unsuccessful one:

poll([{fd=8, events=POLLIN, revents=POLLIN}], 1, 300000) = 1
read(8, "GET / HTTP/1.1\r\n", 8000)     = 16
gettimeofday({1039003973, 629255}, NULL) = 0
poll([{fd=8, events=POLLIN, revents=POLLIN}], 1, 300000) = 1
read(8, "Host: www.heanet.ie\r\n", 8000) = 21
poll([{fd=8, events=POLLIN, revents=POLLIN}], 1, 300000) = 1
read(8, "\r\n", 8000)                   = 2
stat64("/usr/local/apache/htdocs/", {st_mode=S_IFDIR|S_ISGID|0755,
st_size=4096, ...}) = 0
brk(0x814c000)                          = 0x814c000
stat64("/usr/local/apache/htdocs/index.html", {st_mode=S_IFREG|0644,
st_size=256, ...}) = 0
open("/usr/local/apache/htdocs/index.html", O_RDONLY) = 9
brk(0x814f000)                          = 0x814f000
read(8, 0x8146a30, 8000)                = -1 EAGAIN (Resource
temporarily unavailable)
setsockopt(8, SOL_TCP, TCP_NODELAY, [0], 4) = 0
setsockopt(8, SOL_TCP, TCP_CORK, [1], 4) = 0
writev(8, [{"HTTP/1.1 200 OK\r\nDate: Wed, 04 D"..., 248}], 1) = 248
sendfile(8, 9, [0], 256)                = 256
setsockopt(8, SOL_TCP, TCP_CORK, [0], 4) = 0
setsockopt(8, SOL_TCP, TCP_NODELAY, [1], 4) = 0
write(4, "2001:770:18:1:201:2ff:fef0:f281 "..., 90) = 90
poll([{fd=8, events=POLLIN, revents=POLLIN}], 1, 15000) = 1
read(8, "", 8000)                       = 0
gettimeofday({1039003977, 946871}, NULL) = 0
shutdown(8, 1 /* send */)               = 0
poll([{fd=8, events=POLLIN, revents=POLLIN|POLLHUP}], 1, 2000) = 1
read(8, "", 512)                        = 0
close(8)                                = 0
read(5, 0xbffffb77, 1)                  = -1 EAGAIN (Resource
temporarily unavailable)
close(9)                                = 0
accept(3,  <unfinished ...>

> So I still don't understand why disabling sendfile usage didn't avoid
> the problem, unless it isn't specific to TCP_CORK after all.

Looks like the --disable-sendfile doesnt work, it's still using
sendfile. Reading the configure.in tells me you meant 
--without-sendfile ;-) Which does do as would be expected, the
problem goes away. 

I've written a patch, to confirm that it is really
TCP_CORK, applied it, and tested it, and when I use the new
--disable-ipv6-tcp-cork option, everything is nice and happy :-)
Wether I use sendfile or not.

My tests (and patch) were based on apr and apr-util from CVS , 
with the 2.0.43 codebase, because CVS seems broken right now.

Index: configure.in
===================================================================
RCS file: /home/cvspublic/apr/configure.in,v
retrieving revision 1.506
diff -u -r1.506 configure.in
--- configure.in	2 Dec 2002 16:07:09 -0000	1.506
+++ configure.in	4 Dec 2002 10:29:45 -0000
@@ -1770,6 +1770,16 @@
 echo "${nl}Checking for IPv6 Networking support..."
 dnl Start of checking for IPv6 support...
 
+ipv6_tcp_cork=1
+AC_ARG_ENABLE(ipv6-tcp-cork, 
+  [  --disable-ipv6-tcp-cork Disable TCP_CORK with IPv6.],
+  [  if test "$enableval" = "no"; then
+        use_ipv6_tcp_cork=0
+     fi ],
+  [ use_ipv6_tcp_cork=1 ] )
+
+AC_SUBST(use_ipv6_tcp_cork)
+
 AC_ARG_ENABLE(ipv6,
   [  --disable-ipv6          Disable IPv6 support in APR.],
   [ if test "$enableval" = "no"; then
Index: include/apr.h.in
===================================================================
RCS file: /home/cvspublic/apr/include/apr.h.in,v
retrieving revision 1.117
diff -u -r1.117 apr.h.in
--- include/apr.h.in	22 Oct 2002 12:37:40 -0000	1.117
+++ include/apr.h.in	4 Dec 2002 10:29:46 -0000
@@ -171,6 +171,11 @@
  */
 #define APR_TCP_NOPUSH_FLAG       @apr_tcp_nopush_flag@
 
+/* Should we use "corked" TCP with IPv6 ? (this seems to be broken on
+ * linux
+ */
+#define APR_USE_IPV6_TCP_CORK     @use_ipv6_tcp_cork@
+
 /* Is the TCP_NODELAY socket option inherited from listening sockets?
 */
 #define APR_TCP_NODELAY_INHERITED @tcp_nodelay_inherited@
Index: network_io/unix/sockopt.c
===================================================================
RCS file: /home/cvspublic/apr/network_io/unix/sockopt.c,v
retrieving revision 1.63
diff -u -r1.63 sockopt.c
--- network_io/unix/sockopt.c	20 Nov 2002 03:50:21 -0000	1.63
+++ network_io/unix/sockopt.c	4 Dec 2002 10:29:46 -0000
@@ -259,9 +259,15 @@
         return APR_ENOTIMPL;
 #endif
     }
-    if (opt & APR_TCP_NOPUSH) {
+    if (opt & APR_TCP_NOPUSH
+#ifndef APR_USE_IPV6_TCP_CORK
+		    sd;fl sd;klfjss ds;lkfjds
+        && sock->remoteaddr->sa.sin.sin_family != APR_INET6
+#endif
+		    ) {
+    
 #if APR_TCP_NOPUSH_FLAG
-        if (apr_is_option_set(sock->netmask, APR_TCP_NOPUSH) != on) {
+       	if (apr_is_option_set(sock->netmask, APR_TCP_NOPUSH) != on) {
             int optlevel = IPPROTO_TCP;
             int optname = TCP_NODELAY;
-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by "William A. Rowe, Jr." <wr...@apache.org>.
At 07:18 AM 12/4/2002, Colm MacCárthaigh wrote:
>> In the interest of tying up loose ends, I'm still concerned with your
>> observation that --disable-sendfile didn't do the right thing...  did
>> you "make distclean" before re-configuring?
>
>The problem there was that --disable-sendfile isnt an option configure
>knows anything about, the right one is --without-sendfile, which does 
>work, and does fix the problem. :-)

Note that 2.0.44 will introduce the directive "EnableSendfile off" ... which
gives you some run-time control.  Reason #1 for that patch was so that
folks could be guided to cripple sendfile without rebuilding [Reason #2
was that sendfile is broken in some kernels for any network files.]

Bill


Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by "William A. Rowe, Jr." <wr...@apache.org>.
At 07:18 AM 12/4/2002, Colm MacCárthaigh wrote:
>> In the interest of tying up loose ends, I'm still concerned with your
>> observation that --disable-sendfile didn't do the right thing...  did
>> you "make distclean" before re-configuring?
>
>The problem there was that --disable-sendfile isnt an option configure
>knows anything about, the right one is --without-sendfile, which does 
>work, and does fix the problem. :-)

Note that 2.0.44 will introduce the directive "EnableSendfile off" ... which
gives you some run-time control.  Reason #1 for that patch was so that
folks could be guided to cripple sendfile without rebuilding [Reason #2
was that sendfile is broken in some kernels for any network files.]

Bill


Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Thu, Dec 05, 2002 at 04:17:53PM +0000, Joe Orton wrote:
> To summarize some off-list dicussion - the kernel guys have said that
> using sendfile on IPv6 sockets may trigger bugs in cards which do
> hardware TCP checksumming for card/driver/OS combinations which support
> that. (since the cards have to know about IPv6, and that probably
> doesn't get tested much).

Cool :-) Just to point out, IPv6 + sendfile() works fine , IPv6 +
sendfile() + TCP_CORK = badness. For me anyway :-)

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Thu, Dec 05, 2002 at 04:17:53PM +0000, Joe Orton wrote:
> To summarize some off-list dicussion - the kernel guys have said that
> using sendfile on IPv6 sockets may trigger bugs in cards which do
> hardware TCP checksumming for card/driver/OS combinations which support
> that. (since the cards have to know about IPv6, and that probably
> doesn't get tested much).

Cool :-) Just to point out, IPv6 + sendfile() works fine , IPv6 +
sendfile() + TCP_CORK = badness. For me anyway :-)

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Joe Orton <jo...@redhat.com>.
To summarize some off-list dicussion - the kernel guys have said that
using sendfile on IPv6 sockets may trigger bugs in cards which do
hardware TCP checksumming for card/driver/OS combinations which support
that. (since the cards have to know about IPv6, and that probably
doesn't get tested much).

Colm confirmed using tcpdump traces that the "missing" response packets
were indeed getting sent with bad checksums. ("tcpdump -vv -s 1500 ip6"
is the magic invocation for the curious)

joe

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Joe Orton <jo...@redhat.com>.
To summarize some off-list dicussion - the kernel guys have said that
using sendfile on IPv6 sockets may trigger bugs in cards which do
hardware TCP checksumming for card/driver/OS combinations which support
that. (since the cards have to know about IPv6, and that probably
doesn't get tested much).

Colm confirmed using tcpdump traces that the "missing" response packets
were indeed getting sent with bad checksums. ("tcpdump -vv -s 1500 ip6"
is the magic invocation for the curious)

joe

RE: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Jeroen Massar <je...@unfix.org>.
Colm MacCárthaigh [mailto:colmmacc@Redbrick.DCU.IE] wrote:

> On Thu, Dec 05, 2002 at 12:58:48PM +0000, Colm MacCárthaigh wrote:
> > I'm using vanilla 2.4.18, from Debian kernel-source-2.4.18 
> , one machine
> > has the broadcom bcm5700 module, and the the other has the intel
> > e1000 module. 
> 
> O.k., more testing, I've tried it out on another box, which has
> a 3com network card and IDE disks, and it *doesnt* happen there.
> 
> Things the vulnerable machines have in common:
> 
>  SCSI hard-disks
>  Software RAID 1 set-up
>  GigE cards (bcm5700 , e1000)
>  Dell (2650, and 1650)
> 
> Things unaffected machine has:
> 
>  IDE disks
>  no RAID
>  3c905 network card.

I just recompiled the debian's 2.0.43 with --without-sendfile (push it
in the args list of debian/rules) and now www.sixxs.net works fully on
IPv4/IPv6 and IPv4-SSL and IPv6-SSL.

As for the hardware in that box, Linux 2.4.18, Celeron, IDE, ext3 fs's
and a:

eth0: RealTek RTL8139 Fast Ethernet at 0xd0800000, 00:10:dc:20:7c:7c,
IRQ 11
eth0:  Identified 8139 chip type 'RTL-8139B'

purgatory.unfix.org has 2.4.18, p100, IDE, ext3 and a Intel and DEC, but
IPv6 is tunneled and doesn't have that problem.
games.concepts.nl box has 2.4.19-pre5-ac3, Athlon, SCSI, ext3 and a
3c905C, this one doesn't have any problems.

I'll be running that first box with --without-sendfile :)

If you need extra informations don't mind to speak up.

Greets,
 Jeroen


RE: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Jeroen Massar <je...@unfix.org>.
Colm MacCárthaigh [mailto:colmmacc@Redbrick.DCU.IE] wrote:

> On Thu, Dec 05, 2002 at 12:58:48PM +0000, Colm MacCárthaigh wrote:
> > I'm using vanilla 2.4.18, from Debian kernel-source-2.4.18 
> , one machine
> > has the broadcom bcm5700 module, and the the other has the intel
> > e1000 module. 
> 
> O.k., more testing, I've tried it out on another box, which has
> a 3com network card and IDE disks, and it *doesnt* happen there.
> 
> Things the vulnerable machines have in common:
> 
>  SCSI hard-disks
>  Software RAID 1 set-up
>  GigE cards (bcm5700 , e1000)
>  Dell (2650, and 1650)
> 
> Things unaffected machine has:
> 
>  IDE disks
>  no RAID
>  3c905 network card.

I just recompiled the debian's 2.0.43 with --without-sendfile (push it
in the args list of debian/rules) and now www.sixxs.net works fully on
IPv4/IPv6 and IPv4-SSL and IPv6-SSL.

As for the hardware in that box, Linux 2.4.18, Celeron, IDE, ext3 fs's
and a:

eth0: RealTek RTL8139 Fast Ethernet at 0xd0800000, 00:10:dc:20:7c:7c,
IRQ 11
eth0:  Identified 8139 chip type 'RTL-8139B'

purgatory.unfix.org has 2.4.18, p100, IDE, ext3 and a Intel and DEC, but
IPv6 is tunneled and doesn't have that problem.
games.concepts.nl box has 2.4.19-pre5-ac3, Athlon, SCSI, ext3 and a
3c905C, this one doesn't have any problems.

I'll be running that first box with --without-sendfile :)

If you need extra informations don't mind to speak up.

Greets,
 Jeroen


Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Thu, Dec 05, 2002 at 12:58:48PM +0000, Colm MacCárthaigh wrote:
> I'm using vanilla 2.4.18, from Debian kernel-source-2.4.18 , one machine
> has the broadcom bcm5700 module, and the the other has the intel
> e1000 module. 

O.k., more testing, I've tried it out on another box, which has
a 3com network card and IDE disks, and it *doesnt* happen there.

Things the vulnerable machines have in common:

 SCSI hard-disks
 Software RAID 1 set-up
 GigE cards (bcm5700 , e1000)
 Dell (2650, and 1650)

Things unaffected machine has:

 IDE disks
 no RAID
 3c905 network card.

So unless both the bcm5700 and e1000 driver are affected, it's
probably not that. To test if it was the raid stuff, I moved
my stuff to a non-RAID scsi partition, and that didnt fix
anything. So I think that can be ruled out. I havnt got any
idea what's causing it, but bottom line, it looks less and less
likely that this is a bug with apache, and more like TCP_CORK
+ IPv6 is broken on some linux setups.

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Thu, Dec 05, 2002 at 12:58:48PM +0000, Colm MacCárthaigh wrote:
> I'm using vanilla 2.4.18, from Debian kernel-source-2.4.18 , one machine
> has the broadcom bcm5700 module, and the the other has the intel
> e1000 module. 

O.k., more testing, I've tried it out on another box, which has
a 3com network card and IDE disks, and it *doesnt* happen there.

Things the vulnerable machines have in common:

 SCSI hard-disks
 Software RAID 1 set-up
 GigE cards (bcm5700 , e1000)
 Dell (2650, and 1650)

Things unaffected machine has:

 IDE disks
 no RAID
 3c905 network card.

So unless both the bcm5700 and e1000 driver are affected, it's
probably not that. To test if it was the raid stuff, I moved
my stuff to a non-RAID scsi partition, and that didnt fix
anything. So I think that can be ruled out. I havnt got any
idea what's causing it, but bottom line, it looks less and less
likely that this is a bug with apache, and more like TCP_CORK
+ IPv6 is broken on some linux setups.

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Thu, Dec 05, 2002 at 11:30:29AM +0000, Joe Orton wrote:
> On Thu, Dec 05, 2002 at 10:54:53AM +0000, Colm MacCárthaigh wrote:
> ..
> > telnet [v6addr] 80
> > GET / HTTP/1.1
> > Host: madeup.tld
> > 
> > <observe lack of response>
> 
> Have you verified with tcpdump/ethereal etc that this hang is because
> the server is not sending the packets? What does netstat -t show for
> this connection?
> 
> That test works fine for me here between two 2.4.18-based boxes - are
> you using vanilla unpatched 2.4.18?

I'm using vanilla 2.4.18, from Debian kernel-source-2.4.18 , one machine
has the broadcom bcm5700 module, and the the other has the intel
e1000 module. Just to make doubly sure

netstat -t shows tcp        

0    504 2001:770:18:2:206:5b:80 2001:770:18:1:201:33128 ESTABLISHED

ethereal tells me that the first request is happening fine,
it sees the GET and then returns the 200. the second request
however doesnt do this, it sends 4 HTTP 200's before issueing
2 FIN/ACK's and an RST. I'm seeing the same thing from the
client side, but no output in telnet. So it is sending *something*,
just a very broken something.

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Thu, Dec 05, 2002 at 11:30:29AM +0000, Joe Orton wrote:
> On Thu, Dec 05, 2002 at 10:54:53AM +0000, Colm MacCárthaigh wrote:
> ..
> > telnet [v6addr] 80
> > GET / HTTP/1.1
> > Host: madeup.tld
> > 
> > <observe lack of response>
> 
> Have you verified with tcpdump/ethereal etc that this hang is because
> the server is not sending the packets? What does netstat -t show for
> this connection?
> 
> That test works fine for me here between two 2.4.18-based boxes - are
> you using vanilla unpatched 2.4.18?

I'm using vanilla 2.4.18, from Debian kernel-source-2.4.18 , one machine
has the broadcom bcm5700 module, and the the other has the intel
e1000 module. Just to make doubly sure

netstat -t shows tcp        

0    504 2001:770:18:2:206:5b:80 2001:770:18:1:201:33128 ESTABLISHED

ethereal tells me that the first request is happening fine,
it sees the GET and then returns the 200. the second request
however doesnt do this, it sends 4 HTTP 200's before issueing
2 FIN/ACK's and an RST. I'm seeing the same thing from the
client side, but no output in telnet. So it is sending *something*,
just a very broken something.

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Joe Orton <jo...@redhat.com>.
On Thu, Dec 05, 2002 at 10:54:53AM +0000, Colm MacCárthaigh wrote:
..
> telnet [v6addr] 80
> GET / HTTP/1.1
> Host: madeup.tld
> 
> <observe lack of response>

Have you verified with tcpdump/ethereal etc that this hang is because
the server is not sending the packets? What does netstat -t show for
this connection?

That test works fine for me here between two 2.4.18-based boxes - are
you using vanilla unpatched 2.4.18?

joe

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Joe Orton <jo...@redhat.com>.
On Thu, Dec 05, 2002 at 10:54:53AM +0000, Colm MacCárthaigh wrote:
..
> telnet [v6addr] 80
> GET / HTTP/1.1
> Host: madeup.tld
> 
> <observe lack of response>

Have you verified with tcpdump/ethereal etc that this hang is because
the server is not sending the packets? What does netstat -t show for
this connection?

That test works fine for me here between two 2.4.18-based boxes - are
you using vanilla unpatched 2.4.18?

joe

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Thu, Dec 05, 2002 at 07:16:02AM +0000, Joe Orton wrote:
> On Wed, Dec 04, 2002 at 11:38:06PM +0000, Colm MacCárthaigh wrote:
> > On Wed, Dec 04, 2002 at 11:05:21PM +0000, Joe Orton wrote:
> > > Colm, can you try running the apr/test/sendfile binary with your
> > > machines?
> > 
> > It was one of the first things I tried when I was debugging,
> > unfortunately, it doesnt seem to support IPv6 from the client side
> > at all, I get:
> >
> > colmmacc@byron:~/apr/test$ ./sendfile client blocking orion.ipv6.heanet.ie
> > Creating a test file...
> > apr_sockaddr_info_get()->670005/No address associated with hostname
> 
> Oh, sorry, I forgot to mention, you have to change the "family =
> APR_INET" line to "family = APR_INET6" to make it work over IPv6.

O.k., I've done that, and yes it works for me. sendfile.c
creates and deletes the file it sends though, it also only
serves it once, and the bug only appears when you do it twice.

In order to see the bug sendfile.c would have to try and
send the same file down two different sockets. One after
another, and monitor the second one. I patched it to fix
this (patch is below), but it isnt displaying the problem.

There are a few slight differences in what it's doing compared
to httpd though. Firstly, it's the client end that's sending
the file here, which might be relevant. 

The way I can reliably reproduce the bug, is :

./configure
make
make install
dd bs=1 count=256 if=/dev/zero of=/usr/local/apache2/htdocs/index.html
/usr/local/apache2/bin/apachectl start

[ go to other machine ]

telnet [v6addr] 80
GET / HTTP/1.1
Host: madeup.tld

<observe response>

telnet [v6addr] 80
GET / HTTP/1.1
Host: madeup.tld

<observe lack of response>

Index: sendfile.c
===================================================================
RCS file: /home/cvspublic/apr/test/sendfile.c,v
retrieving revision 1.23
diff -u -r1.23 sendfile.c
--- sendfile.c	20 Nov 2002 03:50:22 -0000	1.23
+++ sendfile.c	5 Dec 2002 10:40:39 -0000
@@ -222,9 +222,9 @@
     int family;
     apr_sockaddr_t *destsa;
 
-    family = APR_INET;
+    family = APR_INET6;
     apr_setup(&p, &sock, &family);
-    create_testfile(p, TESTFILE);
+    /* create_testfile(p, TESTFILE); */
 
     rv = apr_file_open(&f, TESTFILE, APR_READ, 0, p);
     if (rv != APR_SUCCESS) {
@@ -492,13 +492,13 @@
 
     printf("client: apr_socket_sendfile() worked as expected!\n");
 
-    rv = apr_file_remove(TESTFILE, p);
+/*    rv = apr_file_remove(TESTFILE, p);
     if (rv != APR_SUCCESS) {
         fprintf(stderr, "apr_file_remove()->%d/%s\n",
                 rv,
 		apr_strerror(rv, buf, sizeof buf));
         exit(1);
-    }
+    } */
 
     return 0;
 }
@@ -752,6 +752,8 @@
             host = argv[3];
         }	
         if (!strcmp(argv[2], "blocking")) {
+	    client(BLK, host);
+	    sleep(10);
             return client(BLK, host);
         }
         else if (!strcmp(argv[2], "timeout")) {
-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Thu, Dec 05, 2002 at 07:16:02AM +0000, Joe Orton wrote:
> On Wed, Dec 04, 2002 at 11:38:06PM +0000, Colm MacCárthaigh wrote:
> > On Wed, Dec 04, 2002 at 11:05:21PM +0000, Joe Orton wrote:
> > > Colm, can you try running the apr/test/sendfile binary with your
> > > machines?
> > 
> > It was one of the first things I tried when I was debugging,
> > unfortunately, it doesnt seem to support IPv6 from the client side
> > at all, I get:
> >
> > colmmacc@byron:~/apr/test$ ./sendfile client blocking orion.ipv6.heanet.ie
> > Creating a test file...
> > apr_sockaddr_info_get()->670005/No address associated with hostname
> 
> Oh, sorry, I forgot to mention, you have to change the "family =
> APR_INET" line to "family = APR_INET6" to make it work over IPv6.

O.k., I've done that, and yes it works for me. sendfile.c
creates and deletes the file it sends though, it also only
serves it once, and the bug only appears when you do it twice.

In order to see the bug sendfile.c would have to try and
send the same file down two different sockets. One after
another, and monitor the second one. I patched it to fix
this (patch is below), but it isnt displaying the problem.

There are a few slight differences in what it's doing compared
to httpd though. Firstly, it's the client end that's sending
the file here, which might be relevant. 

The way I can reliably reproduce the bug, is :

./configure
make
make install
dd bs=1 count=256 if=/dev/zero of=/usr/local/apache2/htdocs/index.html
/usr/local/apache2/bin/apachectl start

[ go to other machine ]

telnet [v6addr] 80
GET / HTTP/1.1
Host: madeup.tld

<observe response>

telnet [v6addr] 80
GET / HTTP/1.1
Host: madeup.tld

<observe lack of response>

Index: sendfile.c
===================================================================
RCS file: /home/cvspublic/apr/test/sendfile.c,v
retrieving revision 1.23
diff -u -r1.23 sendfile.c
--- sendfile.c	20 Nov 2002 03:50:22 -0000	1.23
+++ sendfile.c	5 Dec 2002 10:40:39 -0000
@@ -222,9 +222,9 @@
     int family;
     apr_sockaddr_t *destsa;
 
-    family = APR_INET;
+    family = APR_INET6;
     apr_setup(&p, &sock, &family);
-    create_testfile(p, TESTFILE);
+    /* create_testfile(p, TESTFILE); */
 
     rv = apr_file_open(&f, TESTFILE, APR_READ, 0, p);
     if (rv != APR_SUCCESS) {
@@ -492,13 +492,13 @@
 
     printf("client: apr_socket_sendfile() worked as expected!\n");
 
-    rv = apr_file_remove(TESTFILE, p);
+/*    rv = apr_file_remove(TESTFILE, p);
     if (rv != APR_SUCCESS) {
         fprintf(stderr, "apr_file_remove()->%d/%s\n",
                 rv,
 		apr_strerror(rv, buf, sizeof buf));
         exit(1);
-    }
+    } */
 
     return 0;
 }
@@ -752,6 +752,8 @@
             host = argv[3];
         }	
         if (!strcmp(argv[2], "blocking")) {
+	    client(BLK, host);
+	    sleep(10);
             return client(BLK, host);
         }
         else if (!strcmp(argv[2], "timeout")) {
-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Joe Orton <jo...@redhat.com>.
On Wed, Dec 04, 2002 at 11:38:06PM +0000, Colm MacCárthaigh wrote:
> On Wed, Dec 04, 2002 at 11:05:21PM +0000, Joe Orton wrote:
> > Colm, can you try running the apr/test/sendfile binary with your
> > machines?
> 
> It was one of the first things I tried when I was debugging,
> unfortunately, it doesnt seem to support IPv6 from the client side
> at all, I get:
>
> colmmacc@byron:~/apr/test$ ./sendfile client blocking orion.ipv6.heanet.ie
> Creating a test file...
> apr_sockaddr_info_get()->670005/No address associated with hostname

Oh, sorry, I forgot to mention, you have to change the "family =
APR_INET" line to "family = APR_INET6" to make it work over IPv6.

Regards,

joe

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Joe Orton <jo...@redhat.com>.
On Wed, Dec 04, 2002 at 11:38:06PM +0000, Colm MacCárthaigh wrote:
> On Wed, Dec 04, 2002 at 11:05:21PM +0000, Joe Orton wrote:
> > Colm, can you try running the apr/test/sendfile binary with your
> > machines?
> 
> It was one of the first things I tried when I was debugging,
> unfortunately, it doesnt seem to support IPv6 from the client side
> at all, I get:
>
> colmmacc@byron:~/apr/test$ ./sendfile client blocking orion.ipv6.heanet.ie
> Creating a test file...
> apr_sockaddr_info_get()->670005/No address associated with hostname

Oh, sorry, I forgot to mention, you have to change the "family =
APR_INET" line to "family = APR_INET6" to make it work over IPv6.

Regards,

joe

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Wed, Dec 04, 2002 at 11:05:21PM +0000, Joe Orton wrote:
> On Wed, Dec 04, 2002 at 08:21:36AM -0500, Jeff Trawick wrote:
> > Colm MacCárthaigh <co...@Redbrick.DCU.IE> writes:
> > 
> > > On Wed, Dec 04, 2002 at 12:25:49PM +0000, Colm MacCárthaigh wrote:
> > > > My tests (and patch) were based on apr and apr-util from CVS , 
> > > > with the 2.0.43 codebase, because CVS seems broken right now.
> > > 
> > > stupid pre-test patch, here's the real one:
> > 
> > patch looks basically right to me...  probably is exactly right...
> > unless somebody beats me to it I'll play with it and commit it... nag
> > me directly if it isn't committed in 36 hours or so :)
> 
> It would seem prudent to confirm this is a kernel bug before adding a
> workaround to APR.  APR's test/sendfile works fine here over IPv6
> between two 2.4.18-based kernels (RHL 8.0), and is doing basically the
> same thing as httpd, albeit without the TCP_NODELAY - maybe that makes a
> difference.

TCP_CORK and TCP_NODELAY are mutually exclusive anyway, so
it wouldnt make any difference :) As for it's status as
a kernel bug, I'm more and more certain that it is, I
don't think the kernel is recycling a file descriptor
properly (this is purely based on kdb, I havnt started 
trawling source yet). I'm more familiar with the apache 
source  (grep -c colmmacc CHANGES ;) , it's a while
since I played with the Linux kernel internals.

> Colm, can you try running the apr/test/sendfile binary with your
> machines?

It was one of the first things I tried when I was debugging,
unfortunately, it doesnt seem to support IPv6 from the client side
at all, I get:

colmmacc@byron:~/apr/test$ ./sendfile client blocking orion.ipv6.heanet.ie
Creating a test file...
apr_sockaddr_info_get()->670005/No address associated with hostname

colmmacc@byron:~/apr/test$ host -t AAAA orion.ipv6.heanet.ie
orion.ipv6.heanet.ie    AAAA    2001:770:18:2:206:5BFF:FE8D:2402
colmmacc@byron:~/apr/test$ ./sendfile client blocking 2001:770:18:2:206:5BFF:FE8D:2402
Creating a test file...
apr_sockaddr_info_get()->670009/Address family for hostname not
supported

Although, the server process does seem to be listening on IPv6. 

tcp        0      0 :::8021                 :::*	LISTEN

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Wed, Dec 04, 2002 at 11:05:21PM +0000, Joe Orton wrote:
> On Wed, Dec 04, 2002 at 08:21:36AM -0500, Jeff Trawick wrote:
> > Colm MacCárthaigh <co...@Redbrick.DCU.IE> writes:
> > 
> > > On Wed, Dec 04, 2002 at 12:25:49PM +0000, Colm MacCárthaigh wrote:
> > > > My tests (and patch) were based on apr and apr-util from CVS , 
> > > > with the 2.0.43 codebase, because CVS seems broken right now.
> > > 
> > > stupid pre-test patch, here's the real one:
> > 
> > patch looks basically right to me...  probably is exactly right...
> > unless somebody beats me to it I'll play with it and commit it... nag
> > me directly if it isn't committed in 36 hours or so :)
> 
> It would seem prudent to confirm this is a kernel bug before adding a
> workaround to APR.  APR's test/sendfile works fine here over IPv6
> between two 2.4.18-based kernels (RHL 8.0), and is doing basically the
> same thing as httpd, albeit without the TCP_NODELAY - maybe that makes a
> difference.

TCP_CORK and TCP_NODELAY are mutually exclusive anyway, so
it wouldnt make any difference :) As for it's status as
a kernel bug, I'm more and more certain that it is, I
don't think the kernel is recycling a file descriptor
properly (this is purely based on kdb, I havnt started 
trawling source yet). I'm more familiar with the apache 
source  (grep -c colmmacc CHANGES ;) , it's a while
since I played with the Linux kernel internals.

> Colm, can you try running the apr/test/sendfile binary with your
> machines?

It was one of the first things I tried when I was debugging,
unfortunately, it doesnt seem to support IPv6 from the client side
at all, I get:

colmmacc@byron:~/apr/test$ ./sendfile client blocking orion.ipv6.heanet.ie
Creating a test file...
apr_sockaddr_info_get()->670005/No address associated with hostname

colmmacc@byron:~/apr/test$ host -t AAAA orion.ipv6.heanet.ie
orion.ipv6.heanet.ie    AAAA    2001:770:18:2:206:5BFF:FE8D:2402
colmmacc@byron:~/apr/test$ ./sendfile client blocking 2001:770:18:2:206:5BFF:FE8D:2402
Creating a test file...
apr_sockaddr_info_get()->670009/Address family for hostname not
supported

Although, the server process does seem to be listening on IPv6. 

tcp        0      0 :::8021                 :::*	LISTEN

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Joe Orton <jo...@redhat.com>.
On Wed, Dec 04, 2002 at 08:21:36AM -0500, Jeff Trawick wrote:
> Colm MacCárthaigh <co...@Redbrick.DCU.IE> writes:
> 
> > On Wed, Dec 04, 2002 at 12:25:49PM +0000, Colm MacCárthaigh wrote:
> > > My tests (and patch) were based on apr and apr-util from CVS , 
> > > with the 2.0.43 codebase, because CVS seems broken right now.
> > 
> > stupid pre-test patch, here's the real one:
> 
> patch looks basically right to me...  probably is exactly right...
> unless somebody beats me to it I'll play with it and commit it... nag
> me directly if it isn't committed in 36 hours or so :)

It would seem prudent to confirm this is a kernel bug before adding a
workaround to APR.  APR's test/sendfile works fine here over IPv6
between two 2.4.18-based kernels (RHL 8.0), and is doing basically the
same thing as httpd, albeit without the TCP_NODELAY - maybe that makes a
difference.

Colm, can you try running the apr/test/sendfile binary with your
machines? I used it like:

one$ ./sendfile server 
two$ ./sendfile client blocking <address-of-one>

Regards,

joe


Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Joe Orton <jo...@redhat.com>.
On Wed, Dec 04, 2002 at 08:21:36AM -0500, Jeff Trawick wrote:
> Colm MacCárthaigh <co...@Redbrick.DCU.IE> writes:
> 
> > On Wed, Dec 04, 2002 at 12:25:49PM +0000, Colm MacCárthaigh wrote:
> > > My tests (and patch) were based on apr and apr-util from CVS , 
> > > with the 2.0.43 codebase, because CVS seems broken right now.
> > 
> > stupid pre-test patch, here's the real one:
> 
> patch looks basically right to me...  probably is exactly right...
> unless somebody beats me to it I'll play with it and commit it... nag
> me directly if it isn't committed in 36 hours or so :)

It would seem prudent to confirm this is a kernel bug before adding a
workaround to APR.  APR's test/sendfile works fine here over IPv6
between two 2.4.18-based kernels (RHL 8.0), and is doing basically the
same thing as httpd, albeit without the TCP_NODELAY - maybe that makes a
difference.

Colm, can you try running the apr/test/sendfile binary with your
machines? I used it like:

one$ ./sendfile server 
two$ ./sendfile client blocking <address-of-one>

Regards,

joe


Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by David Reid <dr...@jetnet.co.uk>.
> On Wed, Dec 04, 2002 at 02:29:29PM -0000, David Reid wrote:
> > > > In the interest of tying up loose ends, I'm still concerned with
your
> > > > observation that --disable-sendfile didn't do the right thing...
did
> > > > you "make distclean" before re-configuring?
> > >
> > > The problem there was that --disable-sendfile isnt an option configure
> > > knows anything about, the right one is --without-sendfile, which does
> > > work, and does fix the problem. :-)
> >
> > However, that will disable sendfile on IPv4 as well won't it? Can you
just
> > confirm that it does for both v4 and v6? Thanks.
>
> yep, --without-sendfile completely disables sendfile , so IPv4 won't
> try to use it, or TCP_CORK, which is a bit of a loss (for IPv4).
> The patch will just disable TCP_CORK for IPv6, but continue to use
> sendfile in both cases.

Thanks, in that case +1 for the patch :)

david



Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Wed, Dec 04, 2002 at 02:29:29PM -0000, David Reid wrote:
> > > In the interest of tying up loose ends, I'm still concerned with your
> > > observation that --disable-sendfile didn't do the right thing...  did
> > > you "make distclean" before re-configuring?
> >
> > The problem there was that --disable-sendfile isnt an option configure
> > knows anything about, the right one is --without-sendfile, which does
> > work, and does fix the problem. :-)
> 
> However, that will disable sendfile on IPv4 as well won't it? Can you just
> confirm that it does for both v4 and v6? Thanks.

yep, --without-sendfile completely disables sendfile , so IPv4 won't
try to use it, or TCP_CORK, which is a bit of a loss (for IPv4). 
The patch will just disable TCP_CORK for IPv6, but continue to use 
sendfile in both cases.

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by David Reid <dr...@jetnet.co.uk>.
> > In the interest of tying up loose ends, I'm still concerned with your
> > observation that --disable-sendfile didn't do the right thing...  did
> > you "make distclean" before re-configuring?
>
> The problem there was that --disable-sendfile isnt an option configure
> knows anything about, the right one is --without-sendfile, which does
> work, and does fix the problem. :-)

However, that will disable sendfile on IPv4 as well won't it? Can you just
confirm that it does for both v4 and v6? Thanks.

david



Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Jeff Trawick <tr...@attglobal.net>.
Colm MacCárthaigh <co...@Redbrick.DCU.IE> writes:

> > as David Reid reminded, the flag needs to default to 0 on the right
> > Linux boxen...  unless/until we get specific info, I plan to tweak
> > your patch to default it to --disable when building on Linux 2.4.x...
> > relatively few users who would encounter the problem are going to be
> > as able as you to dig to the bottom of it, and the symptoms mentioned
> > in subsequent bug reports may not trigger the right questions on the
> > part of the developers...
> 
> Well, all I can say for certain is 2.4.18 is broken.

actually, I heard from a user off-line that it works fine with him for
2.4.18 on a couple of boxes

until we hear more we won't automatically assume the system is busted

> > In the interest of tying up loose ends, I'm still concerned with your
> > observation that --disable-sendfile didn't do the right thing...  did
> > you "make distclean" before re-configuring?
> 
> The problem there was that --disable-sendfile isnt an option configure
> knows anything about, the right one is --without-sendfile, which does 
> work, and does fix the problem. :-)

oops, sorry about the wild goose chase!

-- 
Jeff Trawick | trawick@attglobal.net
Born in Roswell... married an alien...

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Jeff Trawick <tr...@attglobal.net>.
Colm MacCárthaigh <co...@Redbrick.DCU.IE> writes:

> > as David Reid reminded, the flag needs to default to 0 on the right
> > Linux boxen...  unless/until we get specific info, I plan to tweak
> > your patch to default it to --disable when building on Linux 2.4.x...
> > relatively few users who would encounter the problem are going to be
> > as able as you to dig to the bottom of it, and the symptoms mentioned
> > in subsequent bug reports may not trigger the right questions on the
> > part of the developers...
> 
> Well, all I can say for certain is 2.4.18 is broken.

actually, I heard from a user off-line that it works fine with him for
2.4.18 on a couple of boxes

until we hear more we won't automatically assume the system is busted

> > In the interest of tying up loose ends, I'm still concerned with your
> > observation that --disable-sendfile didn't do the right thing...  did
> > you "make distclean" before re-configuring?
> 
> The problem there was that --disable-sendfile isnt an option configure
> knows anything about, the right one is --without-sendfile, which does 
> work, and does fix the problem. :-)

oops, sorry about the wild goose chase!

-- 
Jeff Trawick | trawick@attglobal.net
Born in Roswell... married an alien...

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Wed, Dec 04, 2002 at 08:21:36AM -0500, Jeff Trawick wrote:
> Colm MacCárthaigh <co...@Redbrick.DCU.IE> writes:
> 
> > On Wed, Dec 04, 2002 at 12:25:49PM +0000, Colm MacCárthaigh wrote:
> > > My tests (and patch) were based on apr and apr-util from CVS , 
> > > with the 2.0.43 codebase, because CVS seems broken right now.
> > 
> > stupid pre-test patch, here's the real one:
> 
> patch looks basically right to me...  probably is exactly right...
> unless somebody beats me to it I'll play with it and commit it... nag
> me directly if it isn't committed in 36 hours or so :)

Cool :-)

> as David Reid reminded, the flag needs to default to 0 on the right
> Linux boxen...  unless/until we get specific info, I plan to tweak
> your patch to default it to --disable when building on Linux 2.4.x...
> relatively few users who would encounter the problem are going to be
> as able as you to dig to the bottom of it, and the symptoms mentioned
> in subsequent bug reports may not trigger the right questions on the
> part of the developers...

Well, all I can say for certain is 2.4.18 is broken.

> In the interest of tying up loose ends, I'm still concerned with your
> observation that --disable-sendfile didn't do the right thing...  did
> you "make distclean" before re-configuring?

The problem there was that --disable-sendfile isnt an option configure
knows anything about, the right one is --without-sendfile, which does 
work, and does fix the problem. :-)

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Wed, Dec 04, 2002 at 08:21:36AM -0500, Jeff Trawick wrote:
> Colm MacCárthaigh <co...@Redbrick.DCU.IE> writes:
> 
> > On Wed, Dec 04, 2002 at 12:25:49PM +0000, Colm MacCárthaigh wrote:
> > > My tests (and patch) were based on apr and apr-util from CVS , 
> > > with the 2.0.43 codebase, because CVS seems broken right now.
> > 
> > stupid pre-test patch, here's the real one:
> 
> patch looks basically right to me...  probably is exactly right...
> unless somebody beats me to it I'll play with it and commit it... nag
> me directly if it isn't committed in 36 hours or so :)

Cool :-)

> as David Reid reminded, the flag needs to default to 0 on the right
> Linux boxen...  unless/until we get specific info, I plan to tweak
> your patch to default it to --disable when building on Linux 2.4.x...
> relatively few users who would encounter the problem are going to be
> as able as you to dig to the bottom of it, and the symptoms mentioned
> in subsequent bug reports may not trigger the right questions on the
> part of the developers...

Well, all I can say for certain is 2.4.18 is broken.

> In the interest of tying up loose ends, I'm still concerned with your
> observation that --disable-sendfile didn't do the right thing...  did
> you "make distclean" before re-configuring?

The problem there was that --disable-sendfile isnt an option configure
knows anything about, the right one is --without-sendfile, which does 
work, and does fix the problem. :-)

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Jeff Trawick <tr...@attglobal.net>.
Colm MacCárthaigh <co...@Redbrick.DCU.IE> writes:

> On Wed, Dec 04, 2002 at 12:25:49PM +0000, Colm MacCárthaigh wrote:
> > My tests (and patch) were based on apr and apr-util from CVS , 
> > with the 2.0.43 codebase, because CVS seems broken right now.
> 
> stupid pre-test patch, here's the real one:

patch looks basically right to me...  probably is exactly right...
unless somebody beats me to it I'll play with it and commit it... nag
me directly if it isn't committed in 36 hours or so :)

as David Reid reminded, the flag needs to default to 0 on the right
Linux boxen...  unless/until we get specific info, I plan to tweak
your patch to default it to --disable when building on Linux 2.4.x...
relatively few users who would encounter the problem are going to be
as able as you to dig to the bottom of it, and the symptoms mentioned
in subsequent bug reports may not trigger the right questions on the
part of the developers...

In the interest of tying up loose ends, I'm still concerned with your
observation that --disable-sendfile didn't do the right thing...  did
you "make distclean" before re-configuring?

Thanks!

-- 
Jeff Trawick | trawick@attglobal.net
Born in Roswell... married an alien...

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by David Reid <dr...@jetnet.co.uk>.
As Jeff suggested a while back do we know which versions of the linux kernel
are affected by this problem? If so we can probably have the flag
automagically set.

Otherwise this looks OK to me.

david

----- Original Message -----
From: "Colm MacCárthaigh" <co...@Redbrick.DCU.IE>
To: <de...@httpd.apache.org>
Cc: <tr...@attglobal.net>; <de...@apr.apache.org>
Sent: Wednesday, December 04, 2002 12:30 PM
Subject: Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]


> On Wed, Dec 04, 2002 at 12:25:49PM +0000, Colm MacCárthaigh wrote:
> > My tests (and patch) were based on apr and apr-util from CVS ,
> > with the 2.0.43 codebase, because CVS seems broken right now.
>
> stupid pre-test patch, here's the real one:
>
> Index: configure.in
> ===================================================================
> RCS file: /home/cvspublic/apr/configure.in,v
> retrieving revision 1.506
> diff -u -r1.506 configure.in
> --- configure.in 2 Dec 2002 16:07:09 -0000 1.506
> +++ configure.in 4 Dec 2002 12:29:41 -0000
> @@ -1770,6 +1770,16 @@
>  echo "${nl}Checking for IPv6 Networking support..."
>  dnl Start of checking for IPv6 support...
>
> +use_ipv6_tcp_cork=1
> +AC_ARG_ENABLE(ipv6-tcp-cork,
> +  [  --disable-ipv6-tcp-cork Disable TCP_CORK with IPv6.],
> +  [  if test "$enableval" = "no"; then
> +        use_ipv6_tcp_cork=0
> +     fi ],
> +  [ use_ipv6_tcp_cork=1 ] )
> +
> +AC_SUBST(use_ipv6_tcp_cork)
> +
>  AC_ARG_ENABLE(ipv6,
>    [  --disable-ipv6          Disable IPv6 support in APR.],
>    [ if test "$enableval" = "no"; then
> Index: include/apr.h.in
> ===================================================================
> RCS file: /home/cvspublic/apr/include/apr.h.in,v
> retrieving revision 1.117
> diff -u -r1.117 apr.h.in
> --- include/apr.h.in 22 Oct 2002 12:37:40 -0000 1.117
> +++ include/apr.h.in 4 Dec 2002 12:29:41 -0000
> @@ -171,6 +171,11 @@
>   */
>  #define APR_TCP_NOPUSH_FLAG       @apr_tcp_nopush_flag@
>
> +/* Should we use "corked" TCP with IPv6 ? (this seems to be broken on
> + * linux
> + */
> +#define APR_USE_IPV6_TCP_CORK     @use_ipv6_tcp_cork@
> +
>  /* Is the TCP_NODELAY socket option inherited from listening sockets?
>  */
>  #define APR_TCP_NODELAY_INHERITED @tcp_nodelay_inherited@
> Index: network_io/unix/sockopt.c
> ===================================================================
> RCS file: /home/cvspublic/apr/network_io/unix/sockopt.c,v
> retrieving revision 1.63
> diff -u -r1.63 sockopt.c
> --- network_io/unix/sockopt.c 20 Nov 2002 03:50:21 -0000 1.63
> +++ network_io/unix/sockopt.c 4 Dec 2002 12:29:42 -0000
> @@ -259,7 +259,12 @@
>          return APR_ENOTIMPL;
>  #endif
>      }
> +#if APR_USE_IPV6_TCP_CORK
>      if (opt & APR_TCP_NOPUSH) {
> +#else
> +    if (opt & APR_TCP_NOPUSH && sock->remote_addr->sa.sin.sin_family !=
APR_INET6) {
> +#endif
> +
>  #if APR_TCP_NOPUSH_FLAG
>          if (apr_is_option_set(sock->netmask, APR_TCP_NOPUSH) != on) {
>              int optlevel = IPPROTO_TCP;
>
> --
> colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie
> Web:                                 http://devnull.redbrick.dcu.ie/
>


Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Jeff Trawick <tr...@attglobal.net>.
Colm MacCárthaigh <co...@Redbrick.DCU.IE> writes:

> On Wed, Dec 04, 2002 at 12:25:49PM +0000, Colm MacCárthaigh wrote:
> > My tests (and patch) were based on apr and apr-util from CVS , 
> > with the 2.0.43 codebase, because CVS seems broken right now.
> 
> stupid pre-test patch, here's the real one:

patch looks basically right to me...  probably is exactly right...
unless somebody beats me to it I'll play with it and commit it... nag
me directly if it isn't committed in 36 hours or so :)

as David Reid reminded, the flag needs to default to 0 on the right
Linux boxen...  unless/until we get specific info, I plan to tweak
your patch to default it to --disable when building on Linux 2.4.x...
relatively few users who would encounter the problem are going to be
as able as you to dig to the bottom of it, and the symptoms mentioned
in subsequent bug reports may not trigger the right questions on the
part of the developers...

In the interest of tying up loose ends, I'm still concerned with your
observation that --disable-sendfile didn't do the right thing...  did
you "make distclean" before re-configuring?

Thanks!

-- 
Jeff Trawick | trawick@attglobal.net
Born in Roswell... married an alien...

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Wed, Dec 04, 2002 at 12:25:49PM +0000, Colm MacCárthaigh wrote:
> My tests (and patch) were based on apr and apr-util from CVS , 
> with the 2.0.43 codebase, because CVS seems broken right now.

stupid pre-test patch, here's the real one:

Index: configure.in
===================================================================
RCS file: /home/cvspublic/apr/configure.in,v
retrieving revision 1.506
diff -u -r1.506 configure.in
--- configure.in	2 Dec 2002 16:07:09 -0000	1.506
+++ configure.in	4 Dec 2002 12:29:41 -0000
@@ -1770,6 +1770,16 @@
 echo "${nl}Checking for IPv6 Networking support..."
 dnl Start of checking for IPv6 support...
 
+use_ipv6_tcp_cork=1
+AC_ARG_ENABLE(ipv6-tcp-cork, 
+  [  --disable-ipv6-tcp-cork Disable TCP_CORK with IPv6.],
+  [  if test "$enableval" = "no"; then
+        use_ipv6_tcp_cork=0
+     fi ],
+  [ use_ipv6_tcp_cork=1 ] )
+
+AC_SUBST(use_ipv6_tcp_cork)
+
 AC_ARG_ENABLE(ipv6,
   [  --disable-ipv6          Disable IPv6 support in APR.],
   [ if test "$enableval" = "no"; then
Index: include/apr.h.in
===================================================================
RCS file: /home/cvspublic/apr/include/apr.h.in,v
retrieving revision 1.117
diff -u -r1.117 apr.h.in
--- include/apr.h.in	22 Oct 2002 12:37:40 -0000	1.117
+++ include/apr.h.in	4 Dec 2002 12:29:41 -0000
@@ -171,6 +171,11 @@
  */
 #define APR_TCP_NOPUSH_FLAG       @apr_tcp_nopush_flag@
 
+/* Should we use "corked" TCP with IPv6 ? (this seems to be broken on
+ * linux
+ */
+#define APR_USE_IPV6_TCP_CORK     @use_ipv6_tcp_cork@ 
+
 /* Is the TCP_NODELAY socket option inherited from listening sockets?
 */
 #define APR_TCP_NODELAY_INHERITED @tcp_nodelay_inherited@
Index: network_io/unix/sockopt.c
===================================================================
RCS file: /home/cvspublic/apr/network_io/unix/sockopt.c,v
retrieving revision 1.63
diff -u -r1.63 sockopt.c
--- network_io/unix/sockopt.c	20 Nov 2002 03:50:21 -0000	1.63
+++ network_io/unix/sockopt.c	4 Dec 2002 12:29:42 -0000
@@ -259,7 +259,12 @@
         return APR_ENOTIMPL;
 #endif
     }
+#if APR_USE_IPV6_TCP_CORK
     if (opt & APR_TCP_NOPUSH) {
+#else
+    if (opt & APR_TCP_NOPUSH && sock->remote_addr->sa.sin.sin_family != APR_INET6) {
+#endif
+    
 #if APR_TCP_NOPUSH_FLAG
         if (apr_is_option_set(sock->netmask, APR_TCP_NOPUSH) != on) {
             int optlevel = IPPROTO_TCP;

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/ 

Re: Linux + TCP_CORK + IPv6 = Broken [PATCH]

Posted by Colm MacCárthaigh <co...@Redbrick.DCU.IE>.
On Wed, Dec 04, 2002 at 12:25:49PM +0000, Colm MacCárthaigh wrote:
> My tests (and patch) were based on apr and apr-util from CVS , 
> with the 2.0.43 codebase, because CVS seems broken right now.

stupid pre-test patch, here's the real one:

Index: configure.in
===================================================================
RCS file: /home/cvspublic/apr/configure.in,v
retrieving revision 1.506
diff -u -r1.506 configure.in
--- configure.in	2 Dec 2002 16:07:09 -0000	1.506
+++ configure.in	4 Dec 2002 12:29:41 -0000
@@ -1770,6 +1770,16 @@
 echo "${nl}Checking for IPv6 Networking support..."
 dnl Start of checking for IPv6 support...
 
+use_ipv6_tcp_cork=1
+AC_ARG_ENABLE(ipv6-tcp-cork, 
+  [  --disable-ipv6-tcp-cork Disable TCP_CORK with IPv6.],
+  [  if test "$enableval" = "no"; then
+        use_ipv6_tcp_cork=0
+     fi ],
+  [ use_ipv6_tcp_cork=1 ] )
+
+AC_SUBST(use_ipv6_tcp_cork)
+
 AC_ARG_ENABLE(ipv6,
   [  --disable-ipv6          Disable IPv6 support in APR.],
   [ if test "$enableval" = "no"; then
Index: include/apr.h.in
===================================================================
RCS file: /home/cvspublic/apr/include/apr.h.in,v
retrieving revision 1.117
diff -u -r1.117 apr.h.in
--- include/apr.h.in	22 Oct 2002 12:37:40 -0000	1.117
+++ include/apr.h.in	4 Dec 2002 12:29:41 -0000
@@ -171,6 +171,11 @@
  */
 #define APR_TCP_NOPUSH_FLAG       @apr_tcp_nopush_flag@
 
+/* Should we use "corked" TCP with IPv6 ? (this seems to be broken on
+ * linux
+ */
+#define APR_USE_IPV6_TCP_CORK     @use_ipv6_tcp_cork@ 
+
 /* Is the TCP_NODELAY socket option inherited from listening sockets?
 */
 #define APR_TCP_NODELAY_INHERITED @tcp_nodelay_inherited@
Index: network_io/unix/sockopt.c
===================================================================
RCS file: /home/cvspublic/apr/network_io/unix/sockopt.c,v
retrieving revision 1.63
diff -u -r1.63 sockopt.c
--- network_io/unix/sockopt.c	20 Nov 2002 03:50:21 -0000	1.63
+++ network_io/unix/sockopt.c	4 Dec 2002 12:29:42 -0000
@@ -259,7 +259,12 @@
         return APR_ENOTIMPL;
 #endif
     }
+#if APR_USE_IPV6_TCP_CORK
     if (opt & APR_TCP_NOPUSH) {
+#else
+    if (opt & APR_TCP_NOPUSH && sock->remote_addr->sa.sin.sin_family != APR_INET6) {
+#endif
+    
 #if APR_TCP_NOPUSH_FLAG
         if (apr_is_option_set(sock->netmask, APR_TCP_NOPUSH) != on) {
             int optlevel = IPPROTO_TCP;

-- 
colmmacc@redbrick.dcu.ie        PubKey: colmmacc+pgp@redbrick.dcu.ie  
Web:                                 http://devnull.redbrick.dcu.ie/