You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by ma...@apache.org on 2020/11/11 20:44:03 UTC

[tomcat] branch master updated: Fix NIO concurrency issue that removes connections from the poller.

This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tomcat.git


The following commit(s) were added to refs/heads/master by this push:
     new 45aeed6  Fix NIO concurrency issue that removes connections from the poller.
45aeed6 is described below

commit 45aeed655771308d5185d9dbab8e29a73d87509b
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Wed Nov 11 20:43:04 2020 +0000

    Fix NIO concurrency issue that removes connections from the poller.
    
    This is the source of the intermittent WebSocket test failure so this
    commit also removes the associated debug code for that issue.
---
 java/org/apache/tomcat/util/net/NioEndpoint.java   | 40 ++++++++++++++++------
 .../tomcat/websocket/TestWebSocketFrameClient.java | 24 +++----------
 webapps/docs/changelog.xml                         |  4 +++
 3 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/java/org/apache/tomcat/util/net/NioEndpoint.java b/java/org/apache/tomcat/util/net/NioEndpoint.java
index ac4959e..070a78a 100644
--- a/java/org/apache/tomcat/util/net/NioEndpoint.java
+++ b/java/org/apache/tomcat/util/net/NioEndpoint.java
@@ -1522,7 +1522,14 @@ public class NioEndpoint extends AbstractJsseEndpoint<NioChannel,SocketChannel>
 
         @Override
         protected void doRun() {
-            NioChannel socket = socketWrapper.getSocket();
+            /*
+             * Do not cache and re-use the value of socketWrapper.getSocket() in
+             * this method. If the socket closes the value will be updated to
+             * CLOSED_NIO_CHANNEL and the previous value potentially re-used for
+             * a new connection. That can result in a stale cached value which
+             * in turn can result in unintentionally closing currently active
+             * connections.
+             */
             Poller poller = NioEndpoint.this.poller;
             if (poller == null) {
                 socketWrapper.close();
@@ -1532,7 +1539,7 @@ public class NioEndpoint extends AbstractJsseEndpoint<NioChannel,SocketChannel>
             try {
                 int handshake = -1;
                 try {
-                    if (socket.isHandshakeComplete()) {
+                    if (socketWrapper.getSocket().isHandshakeComplete()) {
                         // No TLS handshaking required. Let the handler
                         // process this socket / event combination.
                         handshake = 0;
@@ -1542,7 +1549,7 @@ public class NioEndpoint extends AbstractJsseEndpoint<NioChannel,SocketChannel>
                         // if the handshake failed.
                         handshake = -1;
                     } else {
-                        handshake = socket.handshake(event == SocketEvent.OPEN_READ, event == SocketEvent.OPEN_WRITE);
+                        handshake = socketWrapper.getSocket().handshake(event == SocketEvent.OPEN_READ, event == SocketEvent.OPEN_WRITE);
                         // The handshake process reads/writes from/to the
                         // socket. status may therefore be OPEN_WRITE once
                         // the handshake completes. However, the handshake
@@ -1567,27 +1574,23 @@ public class NioEndpoint extends AbstractJsseEndpoint<NioChannel,SocketChannel>
                         state = getHandler().process(socketWrapper, event);
                     }
                     if (state == SocketState.CLOSED) {
-                        SelectionKey key = JreCompat.isJre11Available() ? null : socket.getIOChannel().keyFor(poller.getSelector());
-                        poller.cancelledKey(key, socketWrapper);
+                        poller.cancelledKey(getSelectionKey(), socketWrapper);
                     }
                 } else if (handshake == -1 ) {
                     getHandler().process(socketWrapper, SocketEvent.CONNECT_FAIL);
-                    SelectionKey key = JreCompat.isJre11Available() ? null : socket.getIOChannel().keyFor(poller.getSelector());
-                    poller.cancelledKey(key, socketWrapper);
+                    poller.cancelledKey(getSelectionKey(), socketWrapper);
                 } else if (handshake == SelectionKey.OP_READ){
                     socketWrapper.registerReadInterest();
                 } else if (handshake == SelectionKey.OP_WRITE){
                     socketWrapper.registerWriteInterest();
                 }
             } catch (CancelledKeyException cx) {
-                SelectionKey key = JreCompat.isJre11Available() ? null : socket.getIOChannel().keyFor(poller.getSelector());
-                poller.cancelledKey(key, socketWrapper);
+                poller.cancelledKey(getSelectionKey(), socketWrapper);
             } catch (VirtualMachineError vme) {
                 ExceptionUtils.handleThrowable(vme);
             } catch (Throwable t) {
                 log.error(sm.getString("endpoint.processing.fail"), t);
-                SelectionKey key = JreCompat.isJre11Available() ? null : socket.getIOChannel().keyFor(poller.getSelector());
-                poller.cancelledKey(key, socketWrapper);
+                poller.cancelledKey(getSelectionKey(), socketWrapper);
             } finally {
                 socketWrapper = null;
                 event = null;
@@ -1597,8 +1600,23 @@ public class NioEndpoint extends AbstractJsseEndpoint<NioChannel,SocketChannel>
                 }
             }
         }
+
+        private SelectionKey getSelectionKey() {
+            // Shortcut for Java 11 onwards
+            if (JreCompat.isJre11Available()) {
+                return null;
+            }
+
+            SocketChannel socketChannel = socketWrapper.getSocket().getIOChannel();
+            if (socketChannel == null) {
+                return null;
+            }
+
+            return socketChannel.keyFor(NioEndpoint.this.poller.getSelector());
+        }
     }
 
+
     // ----------------------------------------------- SendfileData Inner Class
 
     /**
diff --git a/test/org/apache/tomcat/websocket/TestWebSocketFrameClient.java b/test/org/apache/tomcat/websocket/TestWebSocketFrameClient.java
index f9ad656..fd222dd 100644
--- a/test/org/apache/tomcat/websocket/TestWebSocketFrameClient.java
+++ b/test/org/apache/tomcat/websocket/TestWebSocketFrameClient.java
@@ -23,8 +23,6 @@ import java.util.Map;
 import java.util.Queue;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
-import java.util.logging.Level;
-import java.util.logging.LogManager;
 
 import jakarta.websocket.ClientEndpointConfig;
 import jakarta.websocket.ClientEndpointConfig.Configurator;
@@ -117,20 +115,11 @@ public class TestWebSocketFrameClient extends WebSocketBaseTest {
 
         tomcat.start();
 
-        LogManager.getLogManager().getLogger("org.apache.coyote").setLevel(Level.ALL);
-        LogManager.getLogManager().getLogger("org.apache.tomcat.websocket").setLevel(Level.ALL);
-        LogManager.getLogManager().getLogger("org.apache.tomcat.util.net").setLevel(Level.ALL);
-        try {
-            echoTester("",null);
-            echoTester("/",null);
-            // This will trigger a redirect so there will be 5 requests logged
-            echoTester("/foo",null);
-            echoTester("/foo/",null);
-        } finally {
-            LogManager.getLogManager().getLogger("org.apache.coyote").setLevel(Level.INFO);
-            LogManager.getLogManager().getLogger("org.apache.tomcat.websocket.WsWebSocketContainer").setLevel(Level.INFO);
-            LogManager.getLogManager().getLogger("org.apache.tomcat.util.net").setLevel(Level.INFO);
-        }
+        echoTester("",null);
+        echoTester("/",null);
+        // This will trigger a redirect so there will be 5 requests logged
+        echoTester("/foo",null);
+        echoTester("/foo/",null);
     }
 
     public void echoTester(String path, ClientEndpointConfig clientEndpointConfig)
@@ -198,7 +187,6 @@ public class TestWebSocketFrameClient extends WebSocketBaseTest {
         clientEndpointConfig.getUserProperties().put(Constants.WS_AUTHENTICATION_PASSWORD, utf8Pass);
 
         echoTester(URI_PROTECTED, clientEndpointConfig);
-
     }
 
     @Test
@@ -235,7 +223,5 @@ public class TestWebSocketFrameClient extends WebSocketBaseTest {
         clientEndpointConfig.getUserProperties().put(Constants.WS_AUTHENTICATION_PASSWORD,PWD);
 
         echoTester(URI_PROTECTED, clientEndpointConfig);
-
     }
-
 }
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index a1b224c..f5e8b66 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -144,6 +144,10 @@
       <fix>
         <bug>64830</bug>: Fix concurrency issue in HPACK decoder. (markt)
       </fix>
+      <fix>
+        Fix a concurrency issue in the NIO connector that could cause newly
+        created connections to be removed from the poller. (markt)
+      </fix>
     </changelog>
   </subsection>
   <subsection name="Jasper">


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org


Re: [tomcat] branch master updated: Fix NIO concurrency issue that removes connections from the poller.

Posted by Rémy Maucherat <re...@apache.org>.
On Thu, Nov 12, 2020 at 10:32 AM Mark Thomas <ma...@apache.org> wrote:

> On 11/11/2020 22:37, Rémy Maucherat wrote:
> > On Wed, Nov 11, 2020 at 9:44 PM <ma...@apache.org> wrote:
> >
> >> This is an automated email from the ASF dual-hosted git repository.
> >>
> >> markt pushed a commit to branch master
> >> in repository https://gitbox.apache.org/repos/asf/tomcat.git
> >>
> >>
> >> The following commit(s) were added to refs/heads/master by this push:
> >>      new 45aeed6  Fix NIO concurrency issue that removes connections
> from
> >> the poller.
> >> 45aeed6 is described below
> >>
> >> commit 45aeed655771308d5185d9dbab8e29a73d87509b
> >> Author: Mark Thomas <ma...@apache.org>
> >> AuthorDate: Wed Nov 11 20:43:04 2020 +0000
> >>
> >>     Fix NIO concurrency issue that removes connections from the poller.
> >>
> >>     This is the source of the intermittent WebSocket test failure so
> this
> >>     commit also removes the associated debug code for that issue.
> >>
> >
> > Great fix. I never expected this one ...
>
> Thanks. It took me long enough to find it.
>
> It only occurred every 1 in ~15 test runs. When a full test run takes
> ~8.5 mins it is a slow process. I was trying to narrow down the set of
> tests that triggered it but it was hard to determine if the failure was
> still being triggered. After about a day of getting nowhere I decided to
> start from the other end and ran the single test in a loop until it
> failed. That meant I could reproduce the failure in less than a minute.
> Things moved a lot faster from that point.
>
> Once I could reproduce the issue, it was just a case of adding debug
> statements to track down the root cause. Some of those statements
> altered the timing enough to prevent the failure but even that helped as
> it meant the issue was occurring after that point.
>
> The root cause surprised me as well. I'd suspected some sort of issue
> along these lines and had been looking at the source code during the
> longer test runs. Knowing NioChannel instances were being re-used I'd
> explicitly looked for places were this sort of mix-up could happen and
> completely failed to find this one.
>
> It will be interesting to see if any other intermittent issues disappear
> suggesting they had the same root cause.
>

That's interesting, I'll review some more with that closed channel behavior
in mind.

Rémy

Re: [tomcat] branch master updated: Fix NIO concurrency issue that removes connections from the poller.

Posted by Mark Thomas <ma...@apache.org>.
On 11/11/2020 22:37, Rémy Maucherat wrote:
> On Wed, Nov 11, 2020 at 9:44 PM <ma...@apache.org> wrote:
> 
>> This is an automated email from the ASF dual-hosted git repository.
>>
>> markt pushed a commit to branch master
>> in repository https://gitbox.apache.org/repos/asf/tomcat.git
>>
>>
>> The following commit(s) were added to refs/heads/master by this push:
>>      new 45aeed6  Fix NIO concurrency issue that removes connections from
>> the poller.
>> 45aeed6 is described below
>>
>> commit 45aeed655771308d5185d9dbab8e29a73d87509b
>> Author: Mark Thomas <ma...@apache.org>
>> AuthorDate: Wed Nov 11 20:43:04 2020 +0000
>>
>>     Fix NIO concurrency issue that removes connections from the poller.
>>
>>     This is the source of the intermittent WebSocket test failure so this
>>     commit also removes the associated debug code for that issue.
>>
> 
> Great fix. I never expected this one ...

Thanks. It took me long enough to find it.

It only occurred every 1 in ~15 test runs. When a full test run takes
~8.5 mins it is a slow process. I was trying to narrow down the set of
tests that triggered it but it was hard to determine if the failure was
still being triggered. After about a day of getting nowhere I decided to
start from the other end and ran the single test in a loop until it
failed. That meant I could reproduce the failure in less than a minute.
Things moved a lot faster from that point.

Once I could reproduce the issue, it was just a case of adding debug
statements to track down the root cause. Some of those statements
altered the timing enough to prevent the failure but even that helped as
it meant the issue was occurring after that point.

The root cause surprised me as well. I'd suspected some sort of issue
along these lines and had been looking at the source code during the
longer test runs. Knowing NioChannel instances were being re-used I'd
explicitly looked for places were this sort of mix-up could happen and
completely failed to find this one.

It will be interesting to see if any other intermittent issues disappear
suggesting they had the same root cause.

Mark

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org


Re: [tomcat] branch master updated: Fix NIO concurrency issue that removes connections from the poller.

Posted by Rémy Maucherat <re...@apache.org>.
On Wed, Nov 11, 2020 at 9:44 PM <ma...@apache.org> wrote:

> This is an automated email from the ASF dual-hosted git repository.
>
> markt pushed a commit to branch master
> in repository https://gitbox.apache.org/repos/asf/tomcat.git
>
>
> The following commit(s) were added to refs/heads/master by this push:
>      new 45aeed6  Fix NIO concurrency issue that removes connections from
> the poller.
> 45aeed6 is described below
>
> commit 45aeed655771308d5185d9dbab8e29a73d87509b
> Author: Mark Thomas <ma...@apache.org>
> AuthorDate: Wed Nov 11 20:43:04 2020 +0000
>
>     Fix NIO concurrency issue that removes connections from the poller.
>
>     This is the source of the intermittent WebSocket test failure so this
>     commit also removes the associated debug code for that issue.
>

Great fix. I never expected this one ...

Rémy


> ---
>  java/org/apache/tomcat/util/net/NioEndpoint.java   | 40
> ++++++++++++++++------
>  .../tomcat/websocket/TestWebSocketFrameClient.java | 24 +++----------
>  webapps/docs/changelog.xml                         |  4 +++
>  3 files changed, 38 insertions(+), 30 deletions(-)
>
> diff --git a/java/org/apache/tomcat/util/net/NioEndpoint.java
> b/java/org/apache/tomcat/util/net/NioEndpoint.java
> index ac4959e..070a78a 100644
> --- a/java/org/apache/tomcat/util/net/NioEndpoint.java
> +++ b/java/org/apache/tomcat/util/net/NioEndpoint.java
> @@ -1522,7 +1522,14 @@ public class NioEndpoint extends
> AbstractJsseEndpoint<NioChannel,SocketChannel>
>
>          @Override
>          protected void doRun() {
> -            NioChannel socket = socketWrapper.getSocket();
> +            /*
> +             * Do not cache and re-use the value of
> socketWrapper.getSocket() in
> +             * this method. If the socket closes the value will be
> updated to
> +             * CLOSED_NIO_CHANNEL and the previous value potentially
> re-used for
> +             * a new connection. That can result in a stale cached value
> which
> +             * in turn can result in unintentionally closing currently
> active
> +             * connections.
> +             */
>              Poller poller = NioEndpoint.this.poller;
>              if (poller == null) {
>                  socketWrapper.close();
> @@ -1532,7 +1539,7 @@ public class NioEndpoint extends
> AbstractJsseEndpoint<NioChannel,SocketChannel>
>              try {
>                  int handshake = -1;
>                  try {
> -                    if (socket.isHandshakeComplete()) {
> +                    if (socketWrapper.getSocket().isHandshakeComplete()) {
>                          // No TLS handshaking required. Let the handler
>                          // process this socket / event combination.
>                          handshake = 0;
> @@ -1542,7 +1549,7 @@ public class NioEndpoint extends
> AbstractJsseEndpoint<NioChannel,SocketChannel>
>                          // if the handshake failed.
>                          handshake = -1;
>                      } else {
> -                        handshake = socket.handshake(event ==
> SocketEvent.OPEN_READ, event == SocketEvent.OPEN_WRITE);
> +                        handshake =
> socketWrapper.getSocket().handshake(event == SocketEvent.OPEN_READ, event
> == SocketEvent.OPEN_WRITE);
>                          // The handshake process reads/writes from/to the
>                          // socket. status may therefore be OPEN_WRITE once
>                          // the handshake completes. However, the handshake
> @@ -1567,27 +1574,23 @@ public class NioEndpoint extends
> AbstractJsseEndpoint<NioChannel,SocketChannel>
>                          state = getHandler().process(socketWrapper,
> event);
>                      }
>                      if (state == SocketState.CLOSED) {
> -                        SelectionKey key = JreCompat.isJre11Available() ?
> null : socket.getIOChannel().keyFor(poller.getSelector());
> -                        poller.cancelledKey(key, socketWrapper);
> +                        poller.cancelledKey(getSelectionKey(),
> socketWrapper);
>                      }
>                  } else if (handshake == -1 ) {
>                      getHandler().process(socketWrapper,
> SocketEvent.CONNECT_FAIL);
> -                    SelectionKey key = JreCompat.isJre11Available() ?
> null : socket.getIOChannel().keyFor(poller.getSelector());
> -                    poller.cancelledKey(key, socketWrapper);
> +                    poller.cancelledKey(getSelectionKey(), socketWrapper);
>                  } else if (handshake == SelectionKey.OP_READ){
>                      socketWrapper.registerReadInterest();
>                  } else if (handshake == SelectionKey.OP_WRITE){
>                      socketWrapper.registerWriteInterest();
>                  }
>              } catch (CancelledKeyException cx) {
> -                SelectionKey key = JreCompat.isJre11Available() ? null :
> socket.getIOChannel().keyFor(poller.getSelector());
> -                poller.cancelledKey(key, socketWrapper);
> +                poller.cancelledKey(getSelectionKey(), socketWrapper);
>              } catch (VirtualMachineError vme) {
>                  ExceptionUtils.handleThrowable(vme);
>              } catch (Throwable t) {
>                  log.error(sm.getString("endpoint.processing.fail"), t);
> -                SelectionKey key = JreCompat.isJre11Available() ? null :
> socket.getIOChannel().keyFor(poller.getSelector());
> -                poller.cancelledKey(key, socketWrapper);
> +                poller.cancelledKey(getSelectionKey(), socketWrapper);
>              } finally {
>                  socketWrapper = null;
>                  event = null;
> @@ -1597,8 +1600,23 @@ public class NioEndpoint extends
> AbstractJsseEndpoint<NioChannel,SocketChannel>
>                  }
>              }
>          }
> +
> +        private SelectionKey getSelectionKey() {
> +            // Shortcut for Java 11 onwards
> +            if (JreCompat.isJre11Available()) {
> +                return null;
> +            }
> +
> +            SocketChannel socketChannel =
> socketWrapper.getSocket().getIOChannel();
> +            if (socketChannel == null) {
> +                return null;
> +            }
> +
> +            return
> socketChannel.keyFor(NioEndpoint.this.poller.getSelector());
> +        }
>      }
>
> +
>      // ----------------------------------------------- SendfileData Inner
> Class
>
>      /**
> diff --git
> a/test/org/apache/tomcat/websocket/TestWebSocketFrameClient.java
> b/test/org/apache/tomcat/websocket/TestWebSocketFrameClient.java
> index f9ad656..fd222dd 100644
> --- a/test/org/apache/tomcat/websocket/TestWebSocketFrameClient.java
> +++ b/test/org/apache/tomcat/websocket/TestWebSocketFrameClient.java
> @@ -23,8 +23,6 @@ import java.util.Map;
>  import java.util.Queue;
>  import java.util.concurrent.CountDownLatch;
>  import java.util.concurrent.TimeUnit;
> -import java.util.logging.Level;
> -import java.util.logging.LogManager;
>
>  import jakarta.websocket.ClientEndpointConfig;
>  import jakarta.websocket.ClientEndpointConfig.Configurator;
> @@ -117,20 +115,11 @@ public class TestWebSocketFrameClient extends
> WebSocketBaseTest {
>
>          tomcat.start();
>
> -
> LogManager.getLogManager().getLogger("org.apache.coyote").setLevel(Level.ALL);
> -
> LogManager.getLogManager().getLogger("org.apache.tomcat.websocket").setLevel(Level.ALL);
> -        LogManager.getLogManager().getLogger("org.apache.tomcat.util.net
> ").setLevel(Level.ALL);
> -        try {
> -            echoTester("",null);
> -            echoTester("/",null);
> -            // This will trigger a redirect so there will be 5 requests
> logged
> -            echoTester("/foo",null);
> -            echoTester("/foo/",null);
> -        } finally {
> -
> LogManager.getLogManager().getLogger("org.apache.coyote").setLevel(Level.INFO);
> -
> LogManager.getLogManager().getLogger("org.apache.tomcat.websocket.WsWebSocketContainer").setLevel(Level.INFO);
> -            LogManager.getLogManager().getLogger("
> org.apache.tomcat.util.net").setLevel(Level.INFO);
> -        }
> +        echoTester("",null);
> +        echoTester("/",null);
> +        // This will trigger a redirect so there will be 5 requests logged
> +        echoTester("/foo",null);
> +        echoTester("/foo/",null);
>      }
>
>      public void echoTester(String path, ClientEndpointConfig
> clientEndpointConfig)
> @@ -198,7 +187,6 @@ public class TestWebSocketFrameClient extends
> WebSocketBaseTest {
>
>  clientEndpointConfig.getUserProperties().put(Constants.WS_AUTHENTICATION_PASSWORD,
> utf8Pass);
>
>          echoTester(URI_PROTECTED, clientEndpointConfig);
> -
>      }
>
>      @Test
> @@ -235,7 +223,5 @@ public class TestWebSocketFrameClient extends
> WebSocketBaseTest {
>
>  clientEndpointConfig.getUserProperties().put(Constants.WS_AUTHENTICATION_PASSWORD,PWD);
>
>          echoTester(URI_PROTECTED, clientEndpointConfig);
> -
>      }
> -
>  }
> diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
> index a1b224c..f5e8b66 100644
> --- a/webapps/docs/changelog.xml
> +++ b/webapps/docs/changelog.xml
> @@ -144,6 +144,10 @@
>        <fix>
>          <bug>64830</bug>: Fix concurrency issue in HPACK decoder. (markt)
>        </fix>
> +      <fix>
> +        Fix a concurrency issue in the NIO connector that could cause
> newly
> +        created connections to be removed from the poller. (markt)
> +      </fix>
>      </changelog>
>    </subsection>
>    <subsection name="Jasper">
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
> For additional commands, e-mail: dev-help@tomcat.apache.org
>
>