You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@tomcat.apache.org by Joseph Lam <jl...@ust.hk> on 2005/07/15 05:25:19 UTC

Re: Production cluster crashed with 'SEVERE: TCP Worker thread...' error

Several minutes before the crash, I found some session replication error.
That seemed to cause all my JK connector threads being hold up and no
responding to further requests.

catalina.out:

Jul 15, 2005 10:29:01 AM org.apache.catalina.cluster.tcp.DataSender
pushMessage
INFO: resending 504 bytes to 203.194.228.76:4001 from 52990
java.net.SocketTimeoutException: Read timed out
        at java.net.SocketInputStream.socketRead0(Native Method)
        at java.net.SocketInputStream.read(SocketInputStream.java:129)
        at java.net.SocketInputStream.read(SocketInputStream.java:182)
        at
org.apache.catalina.cluster.tcp.DataSender.waitForAck(DataSender.java:542)
        at
org.apache.catalina.cluster.tcp.DataSender.pushMessage(DataSender.java:504)
        at
org.apache.catalina.cluster.tcp.DataSender.sendMessage(DataSender.java:378)
        at
org.apache.catalina.cluster.tcp.PooledSocketSender.sendMessage(PooledSocketSender.java:124)
        at
org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessageData(ReplicationTransmitter.java:651)
        at
org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessage(ReplicationTransmitter.java:348)
        at
org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:466)
        at
org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:495)
        at
org.apache.catalina.cluster.tcp.ReplicationValve.invoke(ReplicationValve.java:210)
        at
org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:105)
        at
org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:107)
        at
org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:148)
        at
org.apache.jk.server.JkCoyoteHandler.invoke(JkCoyoteHandler.java:307)
        at
org.apache.jk.common.HandlerRequest.invoke(HandlerRequest.java:385)
        at org.apache.jk.common.ChannelSocket.invoke(ChannelSocket.java:748)
        at
org.apache.jk.common.ChannelSocket.processConnection(ChannelSocket.java:678)
        at
org.apache.jk.common.SocketConnection.runIt(ChannelSocket.java:871)
        at
org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:684)
        at java.lang.Thread.run(Thread.java:595)
Jul 15, 2005 10:29:01 AM org.apache.catalina.cluster.tcp.DataSender
pushMessage
INFO: resending 487 bytes to 203.194.228.77:4001 from 52737
java.net.SocketException: Socket closed
        at java.net.SocketInputStream.read(SocketInputStream.java:162)
        at java.net.SocketInputStream.read(SocketInputStream.java:182)
        at
org.apache.catalina.cluster.tcp.DataSender.waitForAck(DataSender.java:542)
        at
org.apache.catalina.cluster.tcp.DataSender.pushMessage(DataSender.java:504)
        at
org.apache.catalina.cluster.tcp.DataSender.sendMessage(DataSender.java:378)
        at
org.apache.catalina.cluster.tcp.PooledSocketSender.sendMessage(PooledSocketSender.java:124)
        at
org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessageData(ReplicationTransmitter.java:651)
        at
org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessage(ReplicationTransmitter.java:348)
        at
org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:466)
        at
org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:495)
        at
org.apache.catalina.cluster.tcp.ReplicationValve.invoke(ReplicationValve.java:210)
        at
org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:105)
        at
org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:107)
        at
org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:148)
        at
org.apache.jk.server.JkCoyoteHandler.invoke(JkCoyoteHandler.java:307)
        at
org.apache.jk.common.HandlerRequest.invoke(HandlerRequest.java:385)
        at org.apache.jk.common.ChannelSocket.invoke(ChannelSocket.java:748)
        at
org.apache.jk.common.ChannelSocket.processConnection(ChannelSocket.java:678)
        at
org.apache.jk.common.SocketConnection.runIt(ChannelSocket.java:871)
        at
org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:684)
        at java.lang.Thread.run(Thread.java:595)
Jul 15, 2005 10:29:02 AM org.apache.catalina.cluster.tcp.DataSender
waitForAck
WARNING: Wasnt able to read acknowledgement from
server[{0}:{1,number,integer}] in {2,number,integer} ms. Disconnecting
socket, and trying again.

Jul 15, 2005 10:29:04 AM
org.apache.catalina.cluster.tcp.ReplicationTransmitter sendMessageData
WARNING: Unable to send replicated message, is server down?
java.net.ConnectException: Connection timed out
        at java.net.PlainSocketImpl.socketConnect(Native Method)
        at java.net.PlainSocketImpl.doConnect(PlainSocketImpl.java:333)
        at
java.net.PlainSocketImpl.connectToAddress(PlainSocketImpl.java:195)
        at java.net.PlainSocketImpl.connect(PlainSocketImpl.java:182)
        at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:364)
        at java.net.Socket.connect(Socket.java:507)
        at java.net.Socket.connect(Socket.java:457)
        at java.net.Socket.<init>(Socket.java:365)
        at java.net.Socket.<init>(Socket.java:207)
        at
org.apache.catalina.cluster.tcp.DataSender.openSocket(DataSender.java:418)
        at
org.apache.catalina.cluster.tcp.DataSender.pushMessage(DataSender.java:496)
        at
org.apache.catalina.cluster.tcp.DataSender.sendMessage(DataSender.java:378)
        at
org.apache.catalina.cluster.tcp.PooledSocketSender.sendMessage(PooledSocketSender.java:124)
        at
org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessageData(ReplicationTransmitter.java:651)
        at
org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessage(ReplicationTransmitter.java:348)
        at
org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:466)
        at
org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:495)
        at
org.apache.catalina.cluster.tcp.ReplicationValve.invoke(ReplicationValve.java:210)
        at
org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:105)
        at
org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:107)
        at
org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:148)
        at
org.apache.jk.server.JkCoyoteHandler.invoke(JkCoyoteHandler.java:307)
        at
org.apache.jk.common.HandlerRequest.invoke(HandlerRequest.java:385)
        at org.apache.jk.common.ChannelSocket.invoke(ChannelSocket.java:748)
        at
org.apache.jk.common.ChannelSocket.processConnection(ChannelSocket.java:678)
        at
org.apache.jk.common.SocketConnection.runIt(ChannelSocket.java:871)
        at
org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:684)
        at java.lang.Thread.run(Thread.java:595)


> Hi
>
> In my 4-node TC5.5.9 cluster, the catalina.out suddenly got flooded by
> the error below and the whole cluster simply stopped responding to web
> clients. I have to restart all nodes in order to get it back to normal.
> It seemed to be triggerred by a surge in loading but not able to resume
> even after the load was reduced later on. The cluster has been working
> fine previously.
>
> catalina.out:
>
> SEVERE: TCP Worker thread in cluster caught 'java.io.IOException:
> Connection reset by peer' closing channel
> java.io.IOException: Connection reset by peer
>         at sun.nio.ch.FileDispatcher.read0(Native Method)
>         at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:21) at
> sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:233) at
> sun.nio.ch.IOUtil.read(IOUtil.java:206)
>         at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:207)
> at
> org.apache.catalina.cluster.tcp.TcpReplicationThread.drainChannel(TcpReplicationThread.java:123)
>         at
> org.apache.catalina.cluster.tcp.TcpReplicationThread.run(TcpReplicationThread.java:67)
> ...
>
> Here is my server.xml <Cluster> config:
>         <Cluster
> className="org.apache.catalina.cluster.tcp.SimpleTcpCluster"
>                  managerClassName="qpi.bochk.web.ClusterSessionManager"
> expireSessionsOnShutdown="false"
>                  useDirtyFlag="true"
>                  notifyListenersOnReplication="true">
>
>             <Membership
>                 className="org.apache.catalina.cluster.mcast.McastService"
> mcastAddr="228.0.0.6"
>                 mcastPort="45564"
>                 mcastFrequency="500"
>                 mcastDropTime="3000"/>
>
>             <Receiver
>                 className="org.apache.catalina.cluster.tcp.ReplicationListener"
> tcpListenAddress="auto"
>                 tcpListenPort="4001"
>                 tcpSelectorTimeout="100"
>                 tcpThreadCount="10"/>
>
>             <Sender
>                 className="org.apache.catalina.cluster.tcp.ReplicationTransmitter"
> replicationMode="pooled"
>                 ackTimeout="15000"/>
>
>             <Valve
> className="org.apache.catalina.cluster.tcp.ReplicationValve"
>                    filter=".*\.gif;.*\.js;.*\.jpg;.*\.htm;.*\.html;.*\.css;.*\.CSS;.*\.GIF;.*\.JPG;.*\.txt;"/>
>
>             <Deployer
> className="org.apache.catalina.cluster.deploy.FarmWarDeployer"
>                       tempDir="/tmp/war-temp/"
>                       deployDir="/tmp/war-deploy/"
>                       watchDir="/tmp/war-listen/"
>                       watchEnabled="false"/>
>         </Cluster>
>
> Regards,
> Joseph Lam
>
>
> --------------------------------------------------------------------- To
> unsubscribe, e-mail: tomcat-user-unsubscribe@jakarta.apache.org For
> additional commands, e-mail: tomcat-user-help@jakarta.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: tomcat-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: tomcat-user-help@jakarta.apache.org


Re: Production cluster crashed with 'SEVERE: TCP Worker thread...' error

Posted by Peter Rossbach <pr...@objektpark.de>.
Why you use your own SesssionManager?
Log say that your backup node is down.
Why you don't use the cluster domain feature?
    T1 and T3   Domain 20 at port 10020
    T2 and  T4 Domain 21 at port 10021
 
    T1 and T2 hosted at host 1
    T3 and T4 hosted at host 2

    Apaches at host 1 and  host 2 can access all tomcats
    A Ip Loadbalancer balanced the load between A1 and A2

Have you install the 5.5.9 ClusterFix pack!!

http://issues.apache.org/bugzilla/show_bug.cgi?id=34389


Peter

Joseph Lam schrieb:

>Several minutes before the crash, I found some session replication error.
>That seemed to cause all my JK connector threads being hold up and no
>responding to further requests.
>
>catalina.out:
>
>Jul 15, 2005 10:29:01 AM org.apache.catalina.cluster.tcp.DataSender
>pushMessage
>INFO: resending 504 bytes to 203.194.228.76:4001 from 52990
>java.net.SocketTimeoutException: Read timed out
>        at java.net.SocketInputStream.socketRead0(Native Method)
>        at java.net.SocketInputStream.read(SocketInputStream.java:129)
>        at java.net.SocketInputStream.read(SocketInputStream.java:182)
>        at
>org.apache.catalina.cluster.tcp.DataSender.waitForAck(DataSender.java:542)
>        at
>org.apache.catalina.cluster.tcp.DataSender.pushMessage(DataSender.java:504)
>        at
>org.apache.catalina.cluster.tcp.DataSender.sendMessage(DataSender.java:378)
>        at
>org.apache.catalina.cluster.tcp.PooledSocketSender.sendMessage(PooledSocketSender.java:124)
>        at
>org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessageData(ReplicationTransmitter.java:651)
>        at
>org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessage(ReplicationTransmitter.java:348)
>        at
>org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:466)
>        at
>org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:495)
>        at
>org.apache.catalina.cluster.tcp.ReplicationValve.invoke(ReplicationValve.java:210)
>        at
>org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:105)
>        at
>org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:107)
>        at
>org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:148)
>        at
>org.apache.jk.server.JkCoyoteHandler.invoke(JkCoyoteHandler.java:307)
>        at
>org.apache.jk.common.HandlerRequest.invoke(HandlerRequest.java:385)
>        at org.apache.jk.common.ChannelSocket.invoke(ChannelSocket.java:748)
>        at
>org.apache.jk.common.ChannelSocket.processConnection(ChannelSocket.java:678)
>        at
>org.apache.jk.common.SocketConnection.runIt(ChannelSocket.java:871)
>        at
>org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:684)
>        at java.lang.Thread.run(Thread.java:595)
>Jul 15, 2005 10:29:01 AM org.apache.catalina.cluster.tcp.DataSender
>pushMessage
>INFO: resending 487 bytes to 203.194.228.77:4001 from 52737
>java.net.SocketException: Socket closed
>        at java.net.SocketInputStream.read(SocketInputStream.java:162)
>        at java.net.SocketInputStream.read(SocketInputStream.java:182)
>        at
>org.apache.catalina.cluster.tcp.DataSender.waitForAck(DataSender.java:542)
>        at
>org.apache.catalina.cluster.tcp.DataSender.pushMessage(DataSender.java:504)
>        at
>org.apache.catalina.cluster.tcp.DataSender.sendMessage(DataSender.java:378)
>        at
>org.apache.catalina.cluster.tcp.PooledSocketSender.sendMessage(PooledSocketSender.java:124)
>        at
>org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessageData(ReplicationTransmitter.java:651)
>        at
>org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessage(ReplicationTransmitter.java:348)
>        at
>org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:466)
>        at
>org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:495)
>        at
>org.apache.catalina.cluster.tcp.ReplicationValve.invoke(ReplicationValve.java:210)
>        at
>org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:105)
>        at
>org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:107)
>        at
>org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:148)
>        at
>org.apache.jk.server.JkCoyoteHandler.invoke(JkCoyoteHandler.java:307)
>        at
>org.apache.jk.common.HandlerRequest.invoke(HandlerRequest.java:385)
>        at org.apache.jk.common.ChannelSocket.invoke(ChannelSocket.java:748)
>        at
>org.apache.jk.common.ChannelSocket.processConnection(ChannelSocket.java:678)
>        at
>org.apache.jk.common.SocketConnection.runIt(ChannelSocket.java:871)
>        at
>org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:684)
>        at java.lang.Thread.run(Thread.java:595)
>Jul 15, 2005 10:29:02 AM org.apache.catalina.cluster.tcp.DataSender
>waitForAck
>WARNING: Wasnt able to read acknowledgement from
>server[{0}:{1,number,integer}] in {2,number,integer} ms. Disconnecting
>socket, and trying again.
>
>Jul 15, 2005 10:29:04 AM
>org.apache.catalina.cluster.tcp.ReplicationTransmitter sendMessageData
>WARNING: Unable to send replicated message, is server down?
>java.net.ConnectException: Connection timed out
>        at java.net.PlainSocketImpl.socketConnect(Native Method)
>        at java.net.PlainSocketImpl.doConnect(PlainSocketImpl.java:333)
>        at
>java.net.PlainSocketImpl.connectToAddress(PlainSocketImpl.java:195)
>        at java.net.PlainSocketImpl.connect(PlainSocketImpl.java:182)
>        at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:364)
>        at java.net.Socket.connect(Socket.java:507)
>        at java.net.Socket.connect(Socket.java:457)
>        at java.net.Socket.<init>(Socket.java:365)
>        at java.net.Socket.<init>(Socket.java:207)
>        at
>org.apache.catalina.cluster.tcp.DataSender.openSocket(DataSender.java:418)
>        at
>org.apache.catalina.cluster.tcp.DataSender.pushMessage(DataSender.java:496)
>        at
>org.apache.catalina.cluster.tcp.DataSender.sendMessage(DataSender.java:378)
>        at
>org.apache.catalina.cluster.tcp.PooledSocketSender.sendMessage(PooledSocketSender.java:124)
>        at
>org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessageData(ReplicationTransmitter.java:651)
>        at
>org.apache.catalina.cluster.tcp.ReplicationTransmitter.sendMessage(ReplicationTransmitter.java:348)
>        at
>org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:466)
>        at
>org.apache.catalina.cluster.tcp.SimpleTcpCluster.send(SimpleTcpCluster.java:495)
>        at
>org.apache.catalina.cluster.tcp.ReplicationValve.invoke(ReplicationValve.java:210)
>        at
>org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:105)
>        at
>org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:107)
>        at
>org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:148)
>        at
>org.apache.jk.server.JkCoyoteHandler.invoke(JkCoyoteHandler.java:307)
>        at
>org.apache.jk.common.HandlerRequest.invoke(HandlerRequest.java:385)
>        at org.apache.jk.common.ChannelSocket.invoke(ChannelSocket.java:748)
>        at
>org.apache.jk.common.ChannelSocket.processConnection(ChannelSocket.java:678)
>        at
>org.apache.jk.common.SocketConnection.runIt(ChannelSocket.java:871)
>        at
>org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:684)
>        at java.lang.Thread.run(Thread.java:595)
>
>
>  
>
>>Hi
>>
>>In my 4-node TC5.5.9 cluster, the catalina.out suddenly got flooded by
>>the error below and the whole cluster simply stopped responding to web
>>clients. I have to restart all nodes in order to get it back to normal.
>>It seemed to be triggerred by a surge in loading but not able to resume
>>even after the load was reduced later on. The cluster has been working
>>fine previously.
>>
>>catalina.out:
>>
>>SEVERE: TCP Worker thread in cluster caught 'java.io.IOException:
>>Connection reset by peer' closing channel
>>java.io.IOException: Connection reset by peer
>>        at sun.nio.ch.FileDispatcher.read0(Native Method)
>>        at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:21) at
>>sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:233) at
>>sun.nio.ch.IOUtil.read(IOUtil.java:206)
>>        at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:207)
>>at
>>org.apache.catalina.cluster.tcp.TcpReplicationThread.drainChannel(TcpReplicationThread.java:123)
>>        at
>>org.apache.catalina.cluster.tcp.TcpReplicationThread.run(TcpReplicationThread.java:67)
>>...
>>
>>Here is my server.xml <Cluster> config:
>>        <Cluster
>>className="org.apache.catalina.cluster.tcp.SimpleTcpCluster"
>>                 managerClassName="qpi.bochk.web.ClusterSessionManager"
>>expireSessionsOnShutdown="false"
>>                 useDirtyFlag="true"
>>                 notifyListenersOnReplication="true">
>>
>>            <Membership
>>                className="org.apache.catalina.cluster.mcast.McastService"
>>mcastAddr="228.0.0.6"
>>                mcastPort="45564"
>>                mcastFrequency="500"
>>                mcastDropTime="3000"/>
>>
>>            <Receiver
>>                className="org.apache.catalina.cluster.tcp.ReplicationListener"
>>tcpListenAddress="auto"
>>                tcpListenPort="4001"
>>                tcpSelectorTimeout="100"
>>                tcpThreadCount="10"/>
>>
>>            <Sender
>>                className="org.apache.catalina.cluster.tcp.ReplicationTransmitter"
>>replicationMode="pooled"
>>                ackTimeout="15000"/>
>>
>>            <Valve
>>className="org.apache.catalina.cluster.tcp.ReplicationValve"
>>                   filter=".*\.gif;.*\.js;.*\.jpg;.*\.htm;.*\.html;.*\.css;.*\.CSS;.*\.GIF;.*\.JPG;.*\.txt;"/>
>>
>>            <Deployer
>>className="org.apache.catalina.cluster.deploy.FarmWarDeployer"
>>                      tempDir="/tmp/war-temp/"
>>                      deployDir="/tmp/war-deploy/"
>>                      watchDir="/tmp/war-listen/"
>>                      watchEnabled="false"/>
>>        </Cluster>
>>
>>Regards,
>>Joseph Lam
>>
>>
>>--------------------------------------------------------------------- To
>>unsubscribe, e-mail: tomcat-user-unsubscribe@jakarta.apache.org For
>>additional commands, e-mail: tomcat-user-help@jakarta.apache.org
>>    
>>
>
>
>
>---------------------------------------------------------------------
>To unsubscribe, e-mail: tomcat-user-unsubscribe@jakarta.apache.org
>For additional commands, e-mail: tomcat-user-help@jakarta.apache.org
>
>
>
>
>  
>




---------------------------------------------------------------------
To unsubscribe, e-mail: tomcat-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: tomcat-user-help@jakarta.apache.org