You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nuttx.apache.org by xi...@apache.org on 2021/03/22 08:13:24 UTC

[incubator-nuttx] branch master updated: tcp_send_buffered.c: improve tcp write buffering

This is an automated email from the ASF dual-hosted git repository.

xiaoxiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nuttx.git


The following commit(s) were added to refs/heads/master by this push:
     new 837e1a7  tcp_send_buffered.c: improve tcp write buffering
837e1a7 is described below

commit 837e1a72a47b4e5a874124e316192b2172301d30
Author: YAMAMOTO Takashi <ya...@midokura.com>
AuthorDate: Mon Mar 15 16:19:42 2021 +0900

    tcp_send_buffered.c: improve tcp write buffering
    
    * Send data chunk-by-chunk
      Note: A stream socket doesn't have atomicity requirement.
    
    * Increase the chance to use full-sized segments
    
    Benchmark numbers in my environment:
    
    * Over ESP32 wifi
    * The peer is NetBSD, which has traditional delayed ack TCP
    * iperf uses 16384 bytes buffer
    
    ---
    
    without this patch,
    CONFIG_IOB_NBUFFERS=36
    CONFIG_IOB_BUFSIZE=196
    
    does not work.
    see https://github.com/apache/incubator-nuttx/pull/2772#discussion_r592820639
    
    ---
    
    without this patch,
    CONFIG_IOB_NBUFFERS=128
    CONFIG_IOB_BUFSIZE=196
    ```
    nsh> iperf -c 192.168.8.1
           IP: 192.168.8.103
    
     mode=tcp-client sip=192.168.8.103:5001,dip=192.168.8.1:5001, interval=3, time=30
    
            Interval Bandwidth
    
       0-   3 sec,  4.11 Mbits/sec
       3-   6 sec,  4.63 Mbits/sec
       6-   9 sec,  4.89 Mbits/sec
       9-  12 sec,  4.63 Mbits/sec
      12-  15 sec,  4.85 Mbits/sec
      15-  18 sec,  4.85 Mbits/sec
      18-  21 sec,  5.02 Mbits/sec
      21-  24 sec,  3.67 Mbits/sec
      24-  27 sec,  4.94 Mbits/sec
      27-  30 sec,  4.81 Mbits/sec
       0-  30 sec,  4.64 Mbits/sec
    nsh>
    ```
    
    ---
    
    with this patch,
    CONFIG_IOB_NBUFFERS=36
    CONFIG_IOB_BUFSIZE=196
    ```
    nsh> iperf -c 192.168.8.1
           IP: 192.168.8.103
    
     mode=tcp-client sip=192.168.8.103:5001,dip=192.168.8.1:5001, interval=3, time=30
    
            Interval Bandwidth
    
       0-   3 sec,  5.33 Mbits/sec
       3-   6 sec,  5.59 Mbits/sec
       6-   9 sec,  5.55 Mbits/sec
       9-  12 sec,  5.59 Mbits/sec
      12-  15 sec,  5.59 Mbits/sec
      15-  18 sec,  5.72 Mbits/sec
      18-  21 sec,  5.68 Mbits/sec
      21-  24 sec,  5.29 Mbits/sec
      24-  27 sec,  4.67 Mbits/sec
      27-  30 sec,  4.50 Mbits/sec
       0-  30 sec,  5.35 Mbits/sec
    nsh>
    ```
    
    ---
    
    with this patch,
    CONFIG_IOB_NBUFFERS=128
    CONFIG_IOB_BUFSIZE=196
    ```
    nsh> iperf -c 192.168.8.1
           IP: 192.168.8.103
    
     mode=tcp-client sip=192.168.8.103:5001,dip=192.168.8.1:5001, interval=3, time=30
    
            Interval Bandwidth
    
       0-   3 sec,  5.51 Mbits/sec
       3-   6 sec,  4.67 Mbits/sec
       6-   9 sec,  4.54 Mbits/sec
       9-  12 sec,  5.42 Mbits/sec
      12-  15 sec,  5.37 Mbits/sec
      15-  18 sec,  5.11 Mbits/sec
      18-  21 sec,  5.07 Mbits/sec
      21-  24 sec,  5.29 Mbits/sec
      24-  27 sec,  5.77 Mbits/sec
      27-  30 sec,  4.63 Mbits/sec
       0-  30 sec,  5.14 Mbits/sec
    nsh>
    ```
---
 net/tcp/tcp.h               |   8 +--
 net/tcp/tcp_send_buffered.c | 167 +++++++++++++++++++++++++++++++++++++-------
 2 files changed, 144 insertions(+), 31 deletions(-)

diff --git a/net/tcp/tcp.h b/net/tcp/tcp.h
index c7c8fdb..c9aa4fd 100644
--- a/net/tcp/tcp.h
+++ b/net/tcp/tcp.h
@@ -67,11 +67,11 @@
 #  define TCP_WBNACK(wrb)            ((wrb)->wb_nack)
 #  define TCP_WBIOB(wrb)             ((wrb)->wb_iob)
 #  define TCP_WBCOPYOUT(wrb,dest,n)  (iob_copyout(dest,(wrb)->wb_iob,(n),0))
-#  define TCP_WBCOPYIN(wrb,src,n) \
-     (iob_copyin((wrb)->wb_iob,src,(n),0,false,\
+#  define TCP_WBCOPYIN(wrb,src,n,off) \
+     (iob_copyin((wrb)->wb_iob,src,(n),(off),false,\
                  IOBUSER_NET_TCP_WRITEBUFFER))
-#  define TCP_WBTRYCOPYIN(wrb,src,n) \
-     (iob_trycopyin((wrb)->wb_iob,src,(n),0,false,\
+#  define TCP_WBTRYCOPYIN(wrb,src,n,off) \
+     (iob_trycopyin((wrb)->wb_iob,src,(n),(off),false,\
                     IOBUSER_NET_TCP_WRITEBUFFER))
 
 #  define TCP_WBTRIM(wrb,n) \
diff --git a/net/tcp/tcp_send_buffered.c b/net/tcp/tcp_send_buffered.c
index d695705..c73a4a1 100644
--- a/net/tcp/tcp_send_buffered.c
+++ b/net/tcp/tcp_send_buffered.c
@@ -918,6 +918,44 @@ static inline void send_txnotify(FAR struct socket *psock,
 }
 
 /****************************************************************************
+ * Name: tcp_max_wrb_size
+ *
+ * Description:
+ *   Calculate the desired amount of data for a single
+ *   struct tcp_wrbuffer_s.
+ *
+ ****************************************************************************/
+
+static uint32_t tcp_max_wrb_size(FAR struct tcp_conn_s *conn)
+{
+  const uint32_t mss = conn->mss;
+  uint32_t size;
+
+  /* a few segments should be fine */
+
+  size = 4 * mss;
+
+  /* but it should not hog too many IOB buffers */
+
+  if (size > CONFIG_IOB_NBUFFERS * CONFIG_IOB_BUFSIZE / 2)
+    {
+      size = CONFIG_IOB_NBUFFERS * CONFIG_IOB_BUFSIZE / 2;
+    }
+
+  /* also, we prefer a multiple of mss */
+
+  if (size > mss)
+    {
+      const uint32_t odd = size % mss;
+      size -= odd;
+    }
+
+  DEBUGASSERT(size > 0);
+  ninfo("tcp_max_wrb_size = %" PRIu32 " for conn %p\n", size, conn);
+  return size;
+}
+
+/****************************************************************************
  * Public Functions
  ****************************************************************************/
 
@@ -982,6 +1020,7 @@ ssize_t psock_tcp_send(FAR struct socket *psock, FAR const void *buf,
 {
   FAR struct tcp_conn_s *conn;
   FAR struct tcp_wrbuffer_s *wrb;
+  FAR const uint8_t *cp;
   ssize_t    result = 0;
   bool       nonblock;
   int        ret = OK;
@@ -1043,30 +1082,15 @@ ssize_t psock_tcp_send(FAR struct socket *psock, FAR const void *buf,
 
   BUF_DUMP("psock_tcp_send", buf, len);
 
-  if (len > 0)
+  cp = buf;
+  while (len > 0)
     {
-      /* Allocate a write buffer.  Careful, the network will be momentarily
-       * unlocked here.
-       */
+      uint32_t max_wrb_size;
+      unsigned int off;
+      size_t chunk_len = len;
+      ssize_t chunk_result;
 
       net_lock();
-      if (nonblock)
-        {
-          wrb = tcp_wrbuffer_tryalloc();
-        }
-      else
-        {
-          wrb = tcp_wrbuffer_alloc();
-        }
-
-      if (wrb == NULL)
-        {
-          /* A buffer allocation error occurred */
-
-          nerr("ERROR: Failed to allocate write buffer\n");
-          ret = nonblock ? -EAGAIN : -ENOMEM;
-          goto errout_with_lock;
-        }
 
       /* Allocate resources to receive a callback */
 
@@ -1083,7 +1107,7 @@ ssize_t psock_tcp_send(FAR struct socket *psock, FAR const void *buf,
 
           nerr("ERROR: Failed to allocate callback\n");
           ret = nonblock ? -EAGAIN : -ENOMEM;
-          goto errout_with_wrb;
+          goto errout_with_lock;
         }
 
       /* Set up the callback in the connection */
@@ -1093,11 +1117,63 @@ ssize_t psock_tcp_send(FAR struct socket *psock, FAR const void *buf,
       psock->s_sndcb->priv  = (FAR void *)psock;
       psock->s_sndcb->event = psock_send_eventhandler;
 
+      /* Allocate a write buffer.  Careful, the network will be momentarily
+       * unlocked here.
+       */
+
+      /* Try to coalesce into the last wrb.
+       *
+       * But only when it might yield larger segments.
+       * (REVISIT: It might make sense to lift this condition.
+       * IOB boundaries and segment boundaries usually do not match.
+       * It makes sense to save the number of IOBs.)
+       *
+       * Also, for simplicity, do it only when we haven't sent anything
+       * from the the wrb yet.
+       */
+
+      max_wrb_size = tcp_max_wrb_size(conn);
+      wrb = (FAR struct tcp_wrbuffer_s *)sq_tail(&conn->write_q);
+      if (wrb != NULL && TCP_WBSENT(wrb) == 0 && TCP_WBNRTX(wrb) == 0 &&
+          TCP_WBPKTLEN(wrb) < max_wrb_size &&
+          (TCP_WBPKTLEN(wrb) % conn->mss) != 0)
+        {
+          wrb = (FAR struct tcp_wrbuffer_s *)sq_remlast(&conn->write_q);
+          ninfo("coalesce %zu bytes to wrb %p (%" PRIu16 ")\n", len, wrb,
+                TCP_WBPKTLEN(wrb));
+          DEBUGASSERT(TCP_WBPKTLEN(wrb) > 0);
+        }
+      else if (nonblock)
+        {
+          wrb = tcp_wrbuffer_tryalloc();
+          ninfo("new wrb %p (non blocking)\n", wrb);
+        }
+      else
+        {
+          wrb = tcp_wrbuffer_alloc();
+          ninfo("new wrb %p\n", wrb);
+        }
+
+      if (wrb == NULL)
+        {
+          /* A buffer allocation error occurred */
+
+          nerr("ERROR: Failed to allocate write buffer\n");
+          ret = nonblock ? -EAGAIN : -ENOMEM;
+          goto errout_with_lock;
+        }
+
       /* Initialize the write buffer */
 
       TCP_WBSEQNO(wrb) = (unsigned)-1;
       TCP_WBNRTX(wrb)  = 0;
 
+      off = TCP_WBPKTLEN(wrb);
+      if (off + chunk_len > max_wrb_size)
+        {
+          chunk_len = max_wrb_size - off;
+        }
+
       /* Copy the user data into the write buffer.  We cannot wait for
        * buffer space if the socket was opened non-blocking.
        */
@@ -1112,13 +1188,21 @@ ssize_t psock_tcp_send(FAR struct socket *psock, FAR const void *buf,
            * remaining data.
            */
 
-          result = TCP_WBTRYCOPYIN(wrb, (FAR uint8_t *)buf, len);
-          if (result == -ENOMEM)
+          chunk_result = TCP_WBTRYCOPYIN(wrb, cp, chunk_len, off);
+          if (chunk_result == -ENOMEM)
             {
               if (TCP_WBPKTLEN(wrb) > 0)
                 {
                   ninfo("INFO: Allocated part of the requested data\n");
-                  result = TCP_WBPKTLEN(wrb);
+                  DEBUGASSERT(TCP_WBPKTLEN(wrb) >= off);
+                  chunk_result = TCP_WBPKTLEN(wrb) - off;
+
+                  /* Note: chunk_result here can be 0 if we are trying
+                   * to coalesce into the existing buffer and we failed
+                   * to add anything.
+                   */
+
+                  DEBUGASSERT(chunk_result >= 0);
                 }
               else
                 {
@@ -1129,7 +1213,7 @@ ssize_t psock_tcp_send(FAR struct socket *psock, FAR const void *buf,
             }
           else
             {
-              result = len;
+              DEBUGASSERT(chunk_result == chunk_len);
             }
         }
       else
@@ -1143,7 +1227,9 @@ ssize_t psock_tcp_send(FAR struct socket *psock, FAR const void *buf,
            */
 
           blresult = net_breaklock(&count);
-          result = TCP_WBCOPYIN(wrb, (FAR uint8_t *)buf, len);
+          ninfo("starting copyin to wrb %p\n", wrb);
+          chunk_result = TCP_WBCOPYIN(wrb, cp, chunk_len, off);
+          ninfo("finished copyin to wrb %p\n", wrb);
           if (blresult >= 0)
             {
               net_restorelock(count);
@@ -1167,6 +1253,28 @@ ssize_t psock_tcp_send(FAR struct socket *psock, FAR const void *buf,
 
       send_txnotify(psock, conn);
       net_unlock();
+
+      if (chunk_result == 0)
+        {
+          DEBUGASSERT(nonblock);
+          break;
+        }
+
+      if (chunk_result < 0)
+        {
+          if (result == 0)
+            {
+              result = chunk_result;
+            }
+
+          break;
+        }
+
+      DEBUGASSERT(chunk_result <= len);
+      DEBUGASSERT(result >= 0);
+      cp += chunk_result;
+      len -= chunk_result;
+      result += chunk_result;
     }
 
   /* Check for errors.  Errors are signaled by negative errno values
@@ -1195,6 +1303,11 @@ errout_with_lock:
   net_unlock();
 
 errout:
+  if (result > 0)
+    {
+      return result;
+    }
+
   return ret;
 }