You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nuttx.apache.org by xi...@apache.org on 2023/01/18 08:24:15 UTC

[nuttx] branch master updated (b362f18d6a -> 64dd7e6376)

This is an automated email from the ASF dual-hosted git repository.

xiaoxiang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx.git


    from b362f18d6a mempool:Calibration total memory statistics
     new 0e7d397553 net/tcp: new api tcp_dataconcat() to concatenate/pack iob chain
     new d175f50f01 net/tcp: add out-of-order segment support
     new c581cc5f9b net/tcp: parse tcp options in common function
     new 64dd7e6376 net/tcp: add Selective-ACK support

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 include/nuttx/net/tcp.h     |   9 +-
 net/tcp/Kconfig             |  28 +++
 net/tcp/tcp.h               |  87 ++++++++
 net/tcp/tcp_callback.c      | 210 ++++++++++++++++--
 net/tcp/tcp_conn.c          |  16 ++
 net/tcp/tcp_input.c         | 531 ++++++++++++++++++++++++++++++++++----------
 net/tcp/tcp_recvwindow.c    |  30 +++
 net/tcp/tcp_send.c          |  53 ++++-
 net/tcp/tcp_send_buffered.c | 318 +++++++++++++++++++-------
 9 files changed, 1067 insertions(+), 215 deletions(-)


[nuttx] 02/04: net/tcp: add out-of-order segment support

Posted by xi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

xiaoxiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx.git

commit d175f50f017d3e239177347a9856d42b1fc02ae3
Author: chao an <an...@xiaomi.com>
AuthorDate: Fri Jan 6 15:30:58 2023 +0800

    net/tcp: add out-of-order segment support
    
    Signed-off-by: chao an <an...@xiaomi.com>
---
 net/tcp/Kconfig          |  16 +++
 net/tcp/tcp.h            |  43 +++++++
 net/tcp/tcp_callback.c   | 156 +++++++++++++++++++++++-
 net/tcp/tcp_conn.c       |  16 +++
 net/tcp/tcp_input.c      | 312 ++++++++++++++++++++++++++++++++++++++++++++++-
 net/tcp/tcp_recvwindow.c |  30 +++++
 6 files changed, 571 insertions(+), 2 deletions(-)

diff --git a/net/tcp/Kconfig b/net/tcp/Kconfig
index a9958d935c..64208e76b2 100644
--- a/net/tcp/Kconfig
+++ b/net/tcp/Kconfig
@@ -135,6 +135,22 @@ config NET_TCP_WINDOW_SCALE_FACTOR
 
 endif # NET_TCP_WINDOW_SCALE
 
+config NET_TCP_OUT_OF_ORDER
+	bool "Enable TCP/IP Out Of Order segments"
+	default n
+	---help---
+		TCP will queue segments that arrive out of order.
+
+if NET_TCP_OUT_OF_ORDER
+
+config NET_TCP_OUT_OF_ORDER_BUFSIZE
+	int "TCP/IP Out Of Order buffer size"
+	default 16384
+	---help---
+		This is the default value for out-of-order buffer size.
+
+endif # NET_TCP_OUT_OF_ORDER
+
 config NET_TCP_NOTIFIER
 	bool "Support TCP notifications"
 	default n
diff --git a/net/tcp/tcp.h b/net/tcp/tcp.h
index 5637106499..bac6bc94de 100644
--- a/net/tcp/tcp.h
+++ b/net/tcp/tcp.h
@@ -106,6 +106,10 @@
 
 #define TCP_WSCALE            0x01U /* Window Scale option enabled */
 
+/* The Max Range count of TCP Selective ACKs */
+
+#define TCP_SACK_RANGES_MAX   4
+
 /* After receiving 3 duplicate ACKs, TCP performs a retransmission
  * (RFC 5681 (3.2))
  */
@@ -144,6 +148,15 @@ struct tcp_poll_s
   FAR struct devif_callback_s *cb; /* Needed to teardown the poll */
 };
 
+/* Out-of-order segments */
+
+struct tcp_ofoseg_s
+{
+  uint32_t         left;  /* Left edge of segment */
+  uint32_t         right; /* Right edge of segment */
+  FAR struct iob_s *data; /* Out-of-order buffering */
+};
+
 struct tcp_conn_s
 {
   /* Common prologue of all connection structures. */
@@ -251,6 +264,17 @@ struct tcp_conn_s
 
   struct iob_s *readahead;   /* Read-ahead buffering */
 
+#ifdef CONFIG_NET_TCP_OUT_OF_ORDER
+
+  /* Number of out-of-order segments */
+
+  uint8_t nofosegs;
+
+  /* This defines a out of order segment block. */
+
+  struct tcp_ofoseg_s ofosegs[TCP_SACK_RANGES_MAX];
+#endif
+
 #ifdef CONFIG_NET_TCP_WRITE_BUFFERS
   /* Write buffering
    *
@@ -2100,6 +2124,25 @@ void tcp_sendbuffer_notify(FAR struct tcp_conn_s *conn);
 
 uint16_t tcpip_hdrsize(FAR struct tcp_conn_s *conn);
 
+/****************************************************************************
+ * Name: tcp_ofoseg_bufsize
+ *
+ * Description:
+ *   Calculate the pending size of out-of-order buffer
+ *
+ * Input Parameters:
+ *   conn   - The TCP connection of interest
+ *
+ * Returned Value:
+ *   Total size of out-of-order buffer
+ *
+ * Assumptions:
+ *   This function must be called with the network locked.
+ *
+ ****************************************************************************/
+
+int tcp_ofoseg_bufsize(FAR struct tcp_conn_s *conn);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/net/tcp/tcp_callback.c b/net/tcp/tcp_callback.c
index 3d00cdcc1f..d825ba07eb 100644
--- a/net/tcp/tcp_callback.c
+++ b/net/tcp/tcp_callback.c
@@ -94,10 +94,155 @@ tcp_data_event(FAR struct net_driver_s *dev, FAR struct tcp_conn_s *conn,
   return flags;
 }
 
+/****************************************************************************
+ * Name: tcp_ofoseg_data_event
+ *
+ * Description:
+ *   Handle out-of-order segment to readahead poll.
+ *
+ * Assumptions:
+ * - This function must be called with the network locked.
+ *
+ ****************************************************************************/
+
+#ifdef CONFIG_NET_TCP_OUT_OF_ORDER
+static uint16_t tcp_ofoseg_data_event(FAR struct net_driver_s *dev,
+                                      FAR struct tcp_conn_s *conn,
+                                      uint16_t flags)
+{
+  FAR struct tcp_ofoseg_s *seg;
+  uint32_t rcvseq;
+  int i = 0;
+
+  /* Assume that we will ACK the data.  The data will be ACKed if it is
+   * placed in the read-ahead buffer -OR- if it zero length
+   */
+
+  flags |= TCP_SNDACK;
+
+  /* Get the receive sequence number */
+
+  rcvseq = tcp_getsequence(conn->rcvseq);
+
+  ninfo("TCP OFOSEG rcvseq [%" PRIu32 "]\n", rcvseq);
+
+  /* Foreach out-of-order segments */
+
+  while (i < conn->nofosegs)
+    {
+      seg = &conn->ofosegs[i];
+
+      /* rcvseq -->|
+       * ofoseg    |------|
+       */
+
+      if (rcvseq == seg->left)
+        {
+          ninfo("TCP OFOSEG input [%" PRIu32 " : %" PRIu32 " : %u]\n",
+                 seg->left, seg->right, seg->data->io_pktlen);
+          rcvseq = TCP_SEQ_ADD(rcvseq,
+                               seg->data->io_pktlen);
+          net_incr32(conn->rcvseq, seg->data->io_pktlen);
+          tcp_dataconcat(&conn->readahead, &seg->data);
+        }
+      else if (TCP_SEQ_GT(rcvseq, seg->left))
+        {
+          /* rcvseq       -->|
+           * ofoseg  |------|
+           */
+
+          if (TCP_SEQ_GTE(rcvseq, seg->right))
+            {
+              /* Remove stale segments */
+
+              iob_free_chain(seg->data);
+              seg->data = NULL;
+            }
+
+          /* rcvseq  -->|
+           * ofoseg   |------|
+           */
+
+          else
+            {
+              seg->data =
+                iob_trimhead(seg->data,
+                             TCP_SEQ_SUB(rcvseq, seg->left));
+              seg->left = rcvseq;
+              if (seg->data != NULL)
+                {
+                  ninfo("TCP OFOSEG input "
+                        "[%" PRIu32 " : %" PRIu32 " : %u]\n",
+                        seg->left, seg->right, seg->data->io_pktlen);
+                  rcvseq = TCP_SEQ_ADD(rcvseq,
+                                       seg->data->io_pktlen);
+                  net_incr32(conn->rcvseq, seg->data->io_pktlen);
+                  tcp_dataconcat(&conn->readahead, &seg->data);
+                }
+            }
+        }
+
+      /* Rebuild out-of-order pool if segment is consumed */
+
+      if (seg->data == NULL)
+        {
+          for (; i < conn->nofosegs - 1; i++)
+            {
+              conn->ofosegs[i] = conn->ofosegs[i + 1];
+            }
+
+          conn->nofosegs--;
+
+          /* Try segments again */
+
+          i = 0;
+        }
+      else
+        {
+          i++;
+        }
+    }
+
+  return flags;
+}
+#endif /* CONFIG_NET_TCP_OUT_OF_ORDER */
+
 /****************************************************************************
  * Public Functions
  ****************************************************************************/
 
+/****************************************************************************
+ * Name: tcp_ofoseg_bufsize
+ *
+ * Description:
+ *   Calculate the pending size of out-of-order buffer
+ *
+ * Input Parameters:
+ *   conn   - The TCP connection of interest
+ *
+ * Returned Value:
+ *   Total size of out-of-order buffer
+ *
+ * Assumptions:
+ *   This function must be called with the network locked.
+ *
+ ****************************************************************************/
+
+#ifdef CONFIG_NET_TCP_OUT_OF_ORDER
+int tcp_ofoseg_bufsize(FAR struct tcp_conn_s *conn)
+{
+  int total = 0;
+  int i;
+
+  for (i = 0; i < conn->nofosegs; i++)
+    {
+      total += conn->ofosegs[i].data->io_pktlen;
+    }
+
+  return total;
+}
+#endif /* CONFIG_NET_TCP_OUT_OF_ORDER */
+
 /****************************************************************************
  * Name: tcp_callback
  *
@@ -112,7 +257,7 @@ tcp_data_event(FAR struct net_driver_s *dev, FAR struct tcp_conn_s *conn,
 uint16_t tcp_callback(FAR struct net_driver_s *dev,
                       FAR struct tcp_conn_s *conn, uint16_t flags)
 {
-#ifdef CONFIG_NET_TCP_NOTIFIER
+#if defined(CONFIG_NET_TCP_NOTIFIER) || defined(CONFIG_NET_TCP_OUT_OF_ORDER)
   uint16_t orig = flags;
 #endif
 
@@ -166,6 +311,15 @@ uint16_t tcp_callback(FAR struct net_driver_s *dev,
       flags = tcp_data_event(dev, conn, flags);
     }
 
+#ifdef CONFIG_NET_TCP_OUT_OF_ORDER
+  if ((orig & TCP_NEWDATA) != 0 && conn->nofosegs > 0)
+    {
+      /* Try out-of-order pool if new data is coming */
+
+      flags = tcp_ofoseg_data_event(dev, conn, flags);
+    }
+#endif
+
   /* Check if there is a connection-related event and a connection
    * callback.
    */
diff --git a/net/tcp/tcp_conn.c b/net/tcp/tcp_conn.c
index de09d72c13..1c2b359478 100644
--- a/net/tcp/tcp_conn.c
+++ b/net/tcp/tcp_conn.c
@@ -806,6 +806,22 @@ void tcp_free(FAR struct tcp_conn_s *conn)
   iob_free_chain(conn->readahead);
   conn->readahead = NULL;
 
+#ifdef CONFIG_NET_TCP_OUT_OF_ORDER
+  /* Release any out-of-order buffers */
+
+  if (conn->nofosegs > 0)
+    {
+      int i;
+
+      for (i = 0; i < conn->nofosegs; i++)
+        {
+          iob_free_chain(conn->ofosegs[i].data);
+        }
+
+      conn->nofosegs = 0;
+    }
+#endif /* CONFIG_NET_TCP_OUT_OF_ORDER */
+
 #ifdef CONFIG_NET_TCP_WRITE_BUFFERS
   /* Release any write buffers attached to the connection */
 
diff --git a/net/tcp/tcp_input.c b/net/tcp/tcp_input.c
index 4ed4edc025..552f6b1d37 100644
--- a/net/tcp/tcp_input.c
+++ b/net/tcp/tcp_input.c
@@ -257,6 +257,313 @@ static void tcp_snd_wnd_update(FAR struct tcp_conn_s *conn,
     }
 }
 
+#ifdef CONFIG_NET_TCP_OUT_OF_ORDER
+
+/****************************************************************************
+ * Name: tcp_rebuild_ofosegs
+ *
+ * Description:
+ *   Re-build out-of-order pool from incoming segment
+ *
+ * Input Parameters:
+ *   conn   - The TCP connection of interest
+ *   ofoseg - Pointer to incoming out-of-order segment
+ *   start  - Index of start postion of segment pool
+ *
+ * Returned Value:
+ *   True if incoming data has been consumed
+ *
+ * Assumptions:
+ *   The network is locked.
+ *
+ ****************************************************************************/
+
+static bool tcp_rebuild_ofosegs(FAR struct tcp_conn_s *conn,
+                                FAR struct tcp_ofoseg_s *ofoseg,
+                                int start)
+{
+  struct tcp_ofoseg_s *seg;
+  int i;
+
+  for (i = start; i < conn->nofosegs && ofoseg->data != NULL; i++)
+    {
+      seg = &conn->ofosegs[i];
+
+      /* ofoseg    |~~~
+       * segpool |---|
+       */
+
+      if (TCP_SEQ_GTE(ofoseg->left, seg->left))
+        {
+          /* ofoseg        |---|
+           * segpool |---|
+           */
+
+          if (TCP_SEQ_GT(ofoseg->left, seg->right))
+            {
+              continue;
+            }
+
+          /* ofoseg      |---|
+           * segpool |---|
+           */
+
+          else if (ofoseg->left == seg->right)
+            {
+              tcp_dataconcat(&seg->data, &ofoseg->data);
+              seg->right = ofoseg->right;
+            }
+
+          /* ofoseg   |--|
+           * segpool |---|
+           */
+
+          else if (TCP_SEQ_LTE(ofoseg->right, seg->right))
+            {
+              iob_free_chain(ofoseg->data);
+              ofoseg->data = NULL;
+            }
+
+          /* ofoseg    |---|
+           * segpool |---|
+           */
+
+          else if (TCP_SEQ_GT(ofoseg->right, seg->right))
+            {
+              ofoseg->data =
+                iob_trimhead(ofoseg->data,
+                             TCP_SEQ_SUB(seg->right, ofoseg->left));
+              tcp_dataconcat(&seg->data, &ofoseg->data);
+              seg->right = ofoseg->right;
+            }
+        }
+
+      /* ofoseg  |~~~
+       * segpool   |---|
+       */
+
+      else
+        {
+          /* ofoseg  |---|
+           * segpool     |---|
+           */
+
+          if (ofoseg->right == seg->left)
+            {
+              tcp_dataconcat(&ofoseg->data, &seg->data);
+              seg->data = ofoseg->data;
+              seg->left = ofoseg->left;
+              ofoseg->data = NULL;
+            }
+
+          /* ofoseg  |---|
+           * segpool       |---|
+           */
+
+          else if (TCP_SEQ_LT(ofoseg->right, seg->left))
+            {
+              continue;
+            }
+
+          /* ofoseg  |---|~|
+           * segpool  |--|
+           */
+
+          else if (TCP_SEQ_GTE(ofoseg->right, seg->right))
+            {
+              iob_free_chain(seg->data);
+              *seg = *ofoseg;
+              ofoseg->data = NULL;
+            }
+
+          /* ofoseg  |---|
+           * segpool   |---|
+           */
+
+          else if (TCP_SEQ_GT(ofoseg->right, seg->left))
+            {
+              ofoseg->data =
+                iob_trimtail(ofoseg->data,
+                             ofoseg->right - seg->left);
+              tcp_dataconcat(&ofoseg->data, &seg->data);
+              seg->data = ofoseg->data;
+              seg->left = ofoseg->left;
+              ofoseg->data = NULL;
+            }
+        }
+    }
+
+  return (ofoseg->data == NULL);
+}
+
+/****************************************************************************
+ * Name: tcp_reorder_ofosegs
+ *
+ * Description:
+ *   Sort out-of-order segments by left edge
+ *
+ * Input Parameters:
+ *   nofosegs - Number of out-of-order semgnets
+ *   ofosegs  - Pointer to out-of-order segments
+ *
+ * Returned Value:
+ *   True if re-order occurs
+ *
+ * Assumptions:
+ *   The network is locked.
+ *
+ ****************************************************************************/
+
+static bool tcp_reorder_ofosegs(int nofosegs,
+                                FAR struct tcp_ofoseg_s *ofosegs)
+{
+  struct tcp_ofoseg_s segs;
+  bool reordered = false;
+  int i;
+  int j;
+
+  /* Sort out-of-order segments by left edge */
+
+  for (i = 0; i < nofosegs - 1; i++)
+    {
+      for (j = 0; j < nofosegs - 1 - i; j++)
+        {
+          if (TCP_SEQ_GT(ofosegs[j].left,
+                         ofosegs[j + 1].left))
+            {
+              segs = ofosegs[j];
+              ofosegs[j] = ofosegs[j + 1];
+              ofosegs[j + 1] = segs;
+              reordered = true;
+            }
+        }
+    }
+
+  return reordered;
+}
+
+/****************************************************************************
+ * Name: tcp_input_ofosegs
+ *
+ * Description:
+ *   Handle incoming TCP data to out-of-order pool
+ *
+ * Input Parameters:
+ *   dev    - The device driver structure containing the received TCP packet.
+ *   conn   - The TCP connection of interest
+ *   iplen  - Length of the IP header (IPv4_HDRLEN or IPv6_HDRLEN).
+ *
+ * Returned Value:
+ *   None
+ *
+ * Assumptions:
+ *   The network is locked.
+ *
+ ****************************************************************************/
+
+static void tcp_input_ofosegs(FAR struct net_driver_s *dev,
+                              FAR struct tcp_conn_s *conn,
+                              unsigned int iplen)
+{
+  struct tcp_ofoseg_s ofoseg;
+  bool rebuild;
+  int i = 0;
+  int len;
+
+  ofoseg.left =
+    tcp_getsequence(((FAR struct tcp_hdr_s *)IPBUF(iplen))->seqno);
+
+  /* Calculate the pending size of out-of-order cache, if the input edge can
+   * not fill the adjacent segments, drop it
+   */
+
+  if (tcp_ofoseg_bufsize(conn) > CONFIG_NET_TCP_OUT_OF_ORDER_BUFSIZE &&
+      ofoseg.left >= conn->ofosegs[0].left)
+    {
+      return;
+    }
+
+  /* Get left/right edge from incoming data */
+
+  len = (dev->d_appdata - dev->d_iob->io_data) - dev->d_iob->io_offset;
+  ofoseg.right = TCP_SEQ_ADD(ofoseg.left, dev->d_iob->io_pktlen - len);
+
+  ninfo("TCP OFOSEG out-of-order "
+        "[%" PRIu32 " : %" PRIu32 " : %" PRIu32 "]\n",
+        ofoseg.left, ofoseg.right, TCP_SEQ_SUB(ofoseg.right, ofoseg.left));
+
+  /* Trim l3/l4 header to reserve appdata */
+
+  dev->d_iob = iob_trimhead(dev->d_iob, len);
+  if (dev->d_iob == NULL)
+    {
+      /* No available data, clear device buffer */
+
+      goto clear;
+    }
+
+  ofoseg.data = dev->d_iob;
+
+  /* Build out-of-order pool */
+
+  rebuild = tcp_rebuild_ofosegs(conn, &ofoseg, 0);
+
+  /* Incoming segment out of order from existing pool, add to new segment */
+
+  if (!rebuild && conn->nofosegs != TCP_SACK_RANGES_MAX)
+    {
+      conn->ofosegs[conn->nofosegs] = ofoseg;
+      conn->nofosegs++;
+      rebuild = true;
+    }
+
+  /* Try Re-order ofosegs */
+
+  if (rebuild &&
+      tcp_reorder_ofosegs(conn->nofosegs, (FAR void *)conn->ofosegs))
+    {
+      /* Re-build out-of-order pool after re-order */
+
+      while (i < conn->nofosegs - 1)
+        {
+          if (tcp_rebuild_ofosegs(conn, &conn->ofosegs[i], i + 1))
+            {
+              for (; i < conn->nofosegs - 1; i++)
+                {
+                  conn->ofosegs[i] = conn->ofosegs[i + 1];
+                }
+
+              conn->nofosegs--;
+
+              i = 0;
+            }
+          else
+            {
+              i++;
+            }
+        }
+    }
+
+  for (i = 0; i < conn->nofosegs; i++)
+    {
+      ninfo("TCP OFOSEG [%d][%" PRIu32 " : %" PRIu32 " : %" PRIu32 "]\n", i,
+            conn->ofosegs[i].left, conn->ofosegs[i].right,
+            TCP_SEQ_SUB(conn->ofosegs[i].right, conn->ofosegs[i].left));
+    }
+
+  /* Incoming data has been consumed, re-prepare device buffer to send
+   * response.
+   */
+
+  if (rebuild)
+    {
+clear:
+      netdev_iob_clear(dev);
+      netdev_iob_prepare(dev, false, 0);
+    }
+}
+#endif /* CONFIG_NET_TCP_OUT_OF_ORDER */
+
 /****************************************************************************
  * Name: tcp_input
  *
@@ -697,8 +1004,11 @@ found:
             }
           else
             {
-              /* We never queue out-of-order segments. */
+#ifdef CONFIG_NET_TCP_OUT_OF_ORDER
+              /* Queue out-of-order segments. */
 
+              tcp_input_ofosegs(dev, conn, iplen);
+#endif
               tcp_send(dev, conn, TCP_ACK, tcpiplen);
               return;
             }
diff --git a/net/tcp/tcp_recvwindow.c b/net/tcp/tcp_recvwindow.c
index 7be6fd1682..2777c86230 100644
--- a/net/tcp/tcp_recvwindow.c
+++ b/net/tcp/tcp_recvwindow.c
@@ -219,6 +219,36 @@ uint32_t tcp_get_recvwindow(FAR struct net_driver_s *dev,
 
   recvwndo = tcp_calc_rcvsize(conn, recvwndo);
 
+#ifdef CONFIG_NET_TCP_OUT_OF_ORDER
+  /* Calculate the minimum desired size */
+
+  if (conn->nofosegs > 0)
+    {
+      uint32_t desire = conn->ofosegs[0].left -
+                        tcp_getsequence(conn->rcvseq);
+      int bufsize = tcp_ofoseg_bufsize(conn);
+
+      if (desire < tcp_rx_mss(dev))
+        {
+          desire = tcp_rx_mss(dev);
+        }
+
+      if (TCP_SEQ_LT(recvwndo, bufsize))
+        {
+          recvwndo = 0;
+        }
+      else
+        {
+          recvwndo -= bufsize;
+        }
+
+      if (recvwndo < desire)
+        {
+          recvwndo = desire;
+        }
+    }
+#endif /* CONFIG_NET_TCP_OUT_OF_ORDER */
+
 #ifdef CONFIG_NET_TCP_WINDOW_SCALE
   recvwndo >>= conn->rcv_scale;
 #endif


[nuttx] 03/04: net/tcp: parse tcp options in common function

Posted by xi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

xiaoxiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx.git

commit c581cc5f9b8c3bbd622895f3eff4e6a7f1342774
Author: chao an <an...@xiaomi.com>
AuthorDate: Mon Jan 9 19:44:23 2023 +0800

    net/tcp: parse tcp options in common function
    
    Signed-off-by: chao an <an...@xiaomi.com>
---
 net/tcp/tcp_input.c | 212 ++++++++++++++++++++++++----------------------------
 1 file changed, 96 insertions(+), 116 deletions(-)

diff --git a/net/tcp/tcp_input.c b/net/tcp/tcp_input.c
index 552f6b1d37..b4b7b05292 100644
--- a/net/tcp/tcp_input.c
+++ b/net/tcp/tcp_input.c
@@ -564,6 +564,100 @@ clear:
 }
 #endif /* CONFIG_NET_TCP_OUT_OF_ORDER */
 
+/****************************************************************************
+ * Name: tcp_parse_option
+ *
+ * Description:
+ *   Parse incoming TCP options
+ *
+ * Input Parameters:
+ *   dev    - The device driver structure containing the received TCP packet.
+ *   conn   - The TCP connection of interest
+ *   iplen  - Length of the IP header (IPv4_HDRLEN or IPv6_HDRLEN).
+ *
+ * Returned Value:
+ *   None
+ *
+ * Assumptions:
+ *   The network is locked.
+ *
+ ****************************************************************************/
+
+static void tcp_parse_option(FAR struct net_driver_s *dev,
+                             FAR struct tcp_conn_s *conn,
+                             unsigned int iplen)
+{
+  FAR struct tcp_hdr_s *tcp;
+  unsigned int tcpiplen;
+  uint16_t tmp16;
+  uint8_t  opt;
+  int i;
+
+  tcp = IPBUF(iplen);
+
+  if ((tcp->tcpoffset & 0xf0) <= 0x50)
+    {
+      return;
+    }
+
+  tcpiplen = iplen + TCP_HDRLEN;
+
+  for (i = 0; i < ((tcp->tcpoffset >> 4) - 5) << 2 ; )
+    {
+      opt = IPDATA(tcpiplen + i);
+      if (opt == TCP_OPT_END)
+        {
+          /* End of options. */
+
+          break;
+        }
+      else if (opt == TCP_OPT_NOOP)
+        {
+          /* NOP option. */
+
+          ++i;
+          continue;
+        }
+      else if (opt == TCP_OPT_MSS &&
+               IPDATA(tcpiplen + 1 + i) == TCP_OPT_MSS_LEN)
+        {
+          uint16_t tcp_mss = TCP_MSS(dev, iplen);
+
+          /* An MSS option with the right option length. */
+
+          tmp16 = ((uint16_t)IPDATA(tcpiplen + 2 + i) << 8) |
+                   (uint16_t)IPDATA(tcpiplen + 3 + i);
+          conn->mss = tmp16 > tcp_mss ? tcp_mss : tmp16;
+        }
+#ifdef CONFIG_NET_TCP_WINDOW_SCALE
+      else if (opt == TCP_OPT_WS &&
+               IPDATA(tcpiplen + 1 + i) == TCP_OPT_WS_LEN)
+        {
+          conn->snd_scale = IPDATA(tcpiplen + 2 + i);
+          conn->rcv_scale = CONFIG_NET_TCP_WINDOW_SCALE_FACTOR;
+          conn->flags    |= TCP_WSCALE;
+        }
+#endif
+      else
+        {
+          /* All other options have a length field, so that we
+           * easily can skip past them.
+           */
+
+          if (IPDATA(tcpiplen + 1 + i) == 0)
+            {
+              /* If the length field is zero, the options are
+               * malformed and we don't process them further.
+               */
+
+              break;
+            }
+        }
+
+      i += IPDATA(tcpiplen + 1 + i);
+    }
+}
+
 /****************************************************************************
  * Name: tcp_input
  *
@@ -593,9 +687,7 @@ static void tcp_input(FAR struct net_driver_s *dev, uint8_t domain,
   uint16_t tmp16;
   uint16_t flags;
   uint16_t result;
-  uint8_t  opt;
   int      len;
-  int      i;
 
 #ifdef CONFIG_NET_STATISTICS
   /* Bump up the count of TCP packets received */
@@ -748,63 +840,7 @@ static void tcp_input(FAR struct net_driver_s *dev, uint8_t domain,
 
           /* Parse the TCP MSS option, if present. */
 
-          if ((tcp->tcpoffset & 0xf0) > 0x50)
-            {
-              for (i = 0; i < ((tcp->tcpoffset >> 4) - 5) << 2 ; )
-                {
-                  opt = IPDATA(tcpiplen + i);
-                  if (opt == TCP_OPT_END)
-                    {
-                      /* End of options. */
-
-                      break;
-                    }
-                  else if (opt == TCP_OPT_NOOP)
-                    {
-                      /* NOP option. */
-
-                      ++i;
-                      continue;
-                    }
-                  else if (opt == TCP_OPT_MSS &&
-                           IPDATA(tcpiplen + 1 + i) == TCP_OPT_MSS_LEN)
-                    {
-                      uint16_t tcp_mss = TCP_MSS(dev, iplen);
-
-                      /* An MSS option with the right option length. */
-
-                      tmp16 = ((uint16_t)IPDATA(tcpiplen + 2 + i) << 8) |
-                               (uint16_t)IPDATA(tcpiplen + 3 + i);
-                      conn->mss = tmp16 > tcp_mss ? tcp_mss : tmp16;
-                    }
-#ifdef CONFIG_NET_TCP_WINDOW_SCALE
-                  else if (opt == TCP_OPT_WS &&
-                           IPDATA(tcpiplen + 1 + i) == TCP_OPT_WS_LEN)
-                    {
-                      conn->snd_scale = IPDATA(tcpiplen + 2 + i);
-                      conn->rcv_scale = CONFIG_NET_TCP_WINDOW_SCALE_FACTOR;
-                      conn->flags    |= TCP_WSCALE;
-                    }
-#endif
-                  else
-                    {
-                      /* All other options have a length field, so that we
-                       * easily can skip past them.
-                       */
-
-                      if (IPDATA(tcpiplen + 1 + i) == 0)
-                        {
-                          /* If the length field is zero, the options are
-                           * malformed and we don't process them further.
-                           */
-
-                          break;
-                        }
-                    }
-
-                  i += IPDATA(tcpiplen + 1 + i);
-                }
-            }
+          tcp_parse_option(dev, conn, iplen);
 
           /* Our response will be a SYNACK. */
 
@@ -1245,63 +1281,7 @@ found:
           {
             /* Parse the TCP MSS option, if present. */
 
-            if ((tcp->tcpoffset & 0xf0) > 0x50)
-              {
-                for (i = 0; i < ((tcp->tcpoffset >> 4) - 5) << 2 ; )
-                  {
-                    opt = IPDATA(tcpiplen + i);
-                    if (opt == TCP_OPT_END)
-                      {
-                        /* End of options. */
-
-                        break;
-                      }
-                    else if (opt == TCP_OPT_NOOP)
-                      {
-                        /* NOP option. */
-
-                        ++i;
-                        continue;
-                      }
-                    else if (opt == TCP_OPT_MSS &&
-                             IPDATA(tcpiplen + 1 + i) == TCP_OPT_MSS_LEN)
-                      {
-                        uint16_t tcp_mss = TCP_MSS(dev, iplen);
-
-                        /* An MSS option with the right option length. */
-
-                        tmp16 = (IPDATA(tcpiplen + 2 + i) << 8) |
-                                 IPDATA(tcpiplen + 3 + i);
-                        conn->mss = tmp16 > tcp_mss ? tcp_mss : tmp16;
-                      }
-#ifdef CONFIG_NET_TCP_WINDOW_SCALE
-                    else if (opt == TCP_OPT_WS &&
-                             IPDATA(tcpiplen + 1 + i) == TCP_OPT_WS_LEN)
-                      {
-                        conn->snd_scale = IPDATA(tcpiplen + 2 + i);
-                        conn->rcv_scale = CONFIG_NET_TCP_WINDOW_SCALE_FACTOR;
-                        conn->flags    |= TCP_WSCALE;
-                      }
-#endif
-                    else
-                      {
-                        /* All other options have a length field, so that we
-                         * easily can skip past them.
-                         */
-
-                        if (IPDATA(tcpiplen + 1 + i) == 0)
-                          {
-                            /* If the length field is zero, the options are
-                             * malformed and we don't process them further.
-                             */
-
-                            break;
-                          }
-                      }
-
-                    i += IPDATA(tcpiplen + 1 + i);
-                  }
-              }
+            tcp_parse_option(dev, conn, iplen);
 
             conn->tcpstateflags = TCP_ESTABLISHED;
             memcpy(conn->rcvseq, tcp->seqno, 4);


[nuttx] 04/04: net/tcp: add Selective-ACK support

Posted by xi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

xiaoxiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx.git

commit 64dd7e6376f01a50a9ce9aec1a0e11f5e69cf814
Author: chao an <an...@xiaomi.com>
AuthorDate: Tue Jan 10 13:41:02 2023 +0800

    net/tcp: add Selective-ACK support
    
    Reference:
    https://datatracker.ietf.org/doc/html/rfc2018
    
    Iperf2 client/server test on esp32c3:
    
    Drop(1/50):
    CONFIG_NET_TCP_DEBUG_DROP_SEND=y
    CONFIG_NET_TCP_DEBUG_DROP_SEND_PROBABILITY=50  // Drop probability: 1/50
    CONFIG_NET_TCP_DEBUG_DROP_RECV=y
    CONFIG_NET_TCP_DEBUG_DROP_RECV_PROBABILITY=50  // Drop probability: 1/50
    
    Drop(1/50) + OFO/SACK:
    CONFIG_NET_TCP_DEBUG_DROP_SEND=y
    CONFIG_NET_TCP_DEBUG_DROP_SEND_PROBABILITY=50  // Drop probability: 1/50
    CONFIG_NET_TCP_DEBUG_DROP_RECV=y
    CONFIG_NET_TCP_DEBUG_DROP_RECV_PROBABILITY=50  // Drop probability: 1/50
    
    CONFIG_NET_TCP_OUT_OF_ORDER=y
    CONFIG_NET_TCP_SELECTIVE_ACK=y
    
    ---------------------------------------------------------
    |  TCP Config            | Server | Client |            |
    |-------------------------------------------------------|
    |  Original              |   12   |     9  |  Mbits/sec |
    |  Drop(1/50)            |  0.6   |   0.3  |  Mbits/sec |
    |  Drop(1/50) + OFO/SACK |    8   |     8  |  Mbits/sec |
    ---------------------------------------------------------
    
    Signed-off-by: chao an <an...@xiaomi.com>
---
 include/nuttx/net/tcp.h     |   9 +-
 net/tcp/Kconfig             |  12 ++
 net/tcp/tcp.h               |  29 ++++
 net/tcp/tcp_input.c         |  99 +++++++-------
 net/tcp/tcp_send.c          |  53 +++++++-
 net/tcp/tcp_send_buffered.c | 318 +++++++++++++++++++++++++++++++++-----------
 6 files changed, 392 insertions(+), 128 deletions(-)

diff --git a/include/nuttx/net/tcp.h b/include/nuttx/net/tcp.h
index 1a983f3473..83e4fa1b05 100644
--- a/include/nuttx/net/tcp.h
+++ b/include/nuttx/net/tcp.h
@@ -77,10 +77,13 @@
 #define TCP_OPT_NOOP      1   /* "No-operation" TCP option */
 #define TCP_OPT_MSS       2   /* Maximum segment size TCP option */
 #define TCP_OPT_WS        3   /* Window size scaling factor */
+#define TCP_OPT_SACK_PERM 4   /* Selective-ACK Permitted option */
+#define TCP_OPT_SACK      5   /* Selective-ACK Block option */
 
-#define TCP_OPT_NOOP_LEN  1   /* Length of TCP NOOP option. */
-#define TCP_OPT_MSS_LEN   4   /* Length of TCP MSS option. */
-#define TCP_OPT_WS_LEN    3   /* Length of TCP WS option. */
+#define TCP_OPT_NOOP_LEN       1   /* Length of TCP NOOP option. */
+#define TCP_OPT_MSS_LEN        4   /* Length of TCP MSS option. */
+#define TCP_OPT_WS_LEN         3   /* Length of TCP WS option. */
+#define TCP_OPT_SACK_PERM_LEN  2   /* Length of TCP SACK option. */
 
 /* The TCP states used in the struct tcp_conn_s tcpstateflags field */
 
diff --git a/net/tcp/Kconfig b/net/tcp/Kconfig
index 64208e76b2..b53c20da74 100644
--- a/net/tcp/Kconfig
+++ b/net/tcp/Kconfig
@@ -151,6 +151,18 @@ config NET_TCP_OUT_OF_ORDER_BUFSIZE
 
 endif # NET_TCP_OUT_OF_ORDER
 
+config NET_TCP_SELECTIVE_ACK
+	bool "Enable TCP/IP Selective Acknowledgment Options"
+	default n
+	select NET_TCP_OUT_OF_ORDER
+	---help---
+		Enable RFC2018(TCP Selective Acknowledgment Options):
+			Selective Acknowledgment (SACK) is a strategy which corrects this
+			behavior in the face of multiple dropped segments.  With selective
+			acknowledgments, the data receiver can inform the sender about all
+			segments that have arrived successfully, so the sender need
+			retransmit only the segments that have actually been lost.
+
 config NET_TCP_NOTIFIER
 	bool "Support TCP notifications"
 	default n
diff --git a/net/tcp/tcp.h b/net/tcp/tcp.h
index bac6bc94de..999b001ea2 100644
--- a/net/tcp/tcp.h
+++ b/net/tcp/tcp.h
@@ -105,6 +105,7 @@
 /* The TCP options flags */
 
 #define TCP_WSCALE            0x01U /* Window Scale option enabled */
+#define TCP_SACK              0x02U /* Selective ACKs enabled */
 
 /* The Max Range count of TCP Selective ACKs */
 
@@ -157,6 +158,14 @@ struct tcp_ofoseg_s
   FAR struct iob_s *data; /* Out-of-order buffering */
 };
 
+/* SACK ranges to include in ACK packets. */
+
+struct tcp_sack_s
+{
+  uint32_t left;    /* Left edge of the SACK */
+  uint32_t right;   /* Right edge of the SACK */
+};
+
 struct tcp_conn_s
 {
   /* Common prologue of all connection structures. */
@@ -2143,6 +2152,26 @@ uint16_t tcpip_hdrsize(FAR struct tcp_conn_s *conn);
 
 int tcp_ofoseg_bufsize(FAR struct tcp_conn_s *conn);
 
+/****************************************************************************
+ * Name: tcp_reorder_ofosegs
+ *
+ * Description:
+ *   Sort out-of-order segments by left edge
+ *
+ * Input Parameters:
+ *   nofosegs - Number of out-of-order semgnets
+ *   ofosegs  - Pointer to out-of-order segments
+ *
+ * Returned Value:
+ *   True if re-order occurs
+ *
+ * Assumptions:
+ *   The network is locked.
+ *
+ ****************************************************************************/
+
+bool tcp_reorder_ofosegs(int nofosegs, FAR struct tcp_ofoseg_s *ofosegs);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/net/tcp/tcp_input.c b/net/tcp/tcp_input.c
index b4b7b05292..001dea54d6 100644
--- a/net/tcp/tcp_input.c
+++ b/net/tcp/tcp_input.c
@@ -396,52 +396,6 @@ static bool tcp_rebuild_ofosegs(FAR struct tcp_conn_s *conn,
   return (ofoseg->data == NULL);
 }
 
-/****************************************************************************
- * Name: tcp_reorder_ofosegs
- *
- * Description:
- *   Sort out-of-order segments by left edge
- *
- * Input Parameters:
- *   nofosegs - Number of out-of-order semgnets
- *   ofosegs  - Pointer to out-of-order segments
- *
- * Returned Value:
- *   True if re-order occurs
- *
- * Assumptions:
- *   The network is locked.
- *
- ****************************************************************************/
-
-static bool tcp_reorder_ofosegs(int nofosegs,
-                                FAR struct tcp_ofoseg_s *ofosegs)
-{
-  struct tcp_ofoseg_s segs;
-  bool reordered = false;
-  int i;
-  int j;
-
-  /* Sort out-of-order segments by left edge */
-
-  for (i = 0; i < nofosegs - 1; i++)
-    {
-      for (j = 0; j < nofosegs - 1 - i; j++)
-        {
-          if (TCP_SEQ_GT(ofosegs[j].left,
-                         ofosegs[j + 1].left))
-            {
-              segs = ofosegs[j];
-              ofosegs[j] = ofosegs[j + 1];
-              ofosegs[j + 1] = segs;
-              reordered = true;
-            }
-        }
-    }
-
-  return reordered;
-}
-
 /****************************************************************************
  * Name: tcp_input_ofosegs
  *
@@ -637,6 +591,14 @@ static void tcp_parse_option(FAR struct net_driver_s *dev,
           conn->rcv_scale = CONFIG_NET_TCP_WINDOW_SCALE_FACTOR;
           conn->flags    |= TCP_WSCALE;
         }
+#endif
+#ifdef CONFIG_NET_TCP_SELECTIVE_ACK
+      else if (opt == TCP_OPT_SACK_PERM &&
+               IPDATA(tcpiplen + 1 + i) ==
+               TCP_OPT_SACK_PERM_LEN)
+        {
+          conn->flags    |= TCP_SACK;
+        }
 #endif
       else
         {
@@ -1627,6 +1589,51 @@ drop:
  * Public Functions
  ****************************************************************************/
 
+/****************************************************************************
+ * Name: tcp_reorder_ofosegs
+ *
+ * Description:
+ *   Sort out-of-order segments by left edge
+ *
+ * Input Parameters:
+ *   nofosegs - Number of out-of-order semgnets
+ *   ofosegs  - Pointer to out-of-order segments
+ *
+ * Returned Value:
+ *   True if re-order occurs
+ *
+ * Assumptions:
+ *   The network is locked.
+ *
+ ****************************************************************************/
+
+bool tcp_reorder_ofosegs(int nofosegs, FAR struct tcp_ofoseg_s *ofosegs)
+{
+  struct tcp_ofoseg_s segs;
+  bool reordered = false;
+  int i;
+  int j;
+
+  /* Sort out-of-order segments by left edge */
+
+  for (i = 0; i < nofosegs - 1; i++)
+    {
+      for (j = 0; j < nofosegs - 1 - i; j++)
+        {
+          if (TCP_SEQ_GT(ofosegs[j].left,
+                         ofosegs[j + 1].left))
+            {
+              segs = ofosegs[j];
+              ofosegs[j] = ofosegs[j + 1];
+              ofosegs[j + 1] = segs;
+              reordered = true;
+            }
+        }
+    }
+
+  return reordered;
+}
+
 /****************************************************************************
  * Name: tcp_ipv4_input
  *
diff --git a/net/tcp/tcp_send.c b/net/tcp/tcp_send.c
index db3d38835e..0c57c32e8e 100644
--- a/net/tcp/tcp_send.c
+++ b/net/tcp/tcp_send.c
@@ -274,10 +274,44 @@ void tcp_send(FAR struct net_driver_s *dev, FAR struct tcp_conn_s *conn,
       return;
     }
 
-  tcp            = tcp_header(dev);
-  tcp->flags     = flags;
-  dev->d_len     = len;
-  tcp->tcpoffset = (TCP_HDRLEN / 4) << 4;
+  tcp        = tcp_header(dev);
+  tcp->flags = flags;
+  dev->d_len = len;
+
+#ifdef CONFIG_NET_TCP_SELECTIVE_ACK
+  if ((conn->flags & TCP_SACK) && (flags == TCP_ACK) && conn->nofosegs > 0)
+    {
+      int optlen = conn->nofosegs * sizeof(struct tcp_sack_s);
+      int i;
+
+      tcp->optdata[0] = TCP_OPT_NOOP;
+      tcp->optdata[1] = TCP_OPT_NOOP;
+      tcp->optdata[2] = TCP_OPT_SACK;
+      tcp->optdata[3] = TCP_OPT_SACK_PERM_LEN + optlen;
+
+      optlen += 4;
+
+      for (i = 0; i < conn->nofosegs; i++)
+        {
+          ninfo("TCP SACK [%d]"
+                "[%" PRIu32 " : %" PRIu32 " : %" PRIu32 "]\n", i,
+                conn->ofosegs[i].left, conn->ofosegs[i].right,
+                TCP_SEQ_SUB(conn->ofosegs[i].right, conn->ofosegs[i].left));
+          tcp_setsequence(&tcp->optdata[4 + i * 2 * sizeof(uint32_t)],
+                          conn->ofosegs[i].left);
+          tcp_setsequence(&tcp->optdata[4 + (i * 2 + 1) * sizeof(uint32_t)],
+                          conn->ofosegs[i].right);
+        }
+
+      dev->d_len += optlen;
+      tcp->tcpoffset = ((TCP_HDRLEN + optlen) / 4) << 4;
+    }
+  else
+#endif /* CONFIG_NET_TCP_SELECTIVE_ACK */
+    {
+      tcp->tcpoffset = (TCP_HDRLEN / 4) << 4;
+    }
+
   tcp_sendcommon(dev, conn, tcp);
 
 #if defined(CONFIG_NET_STATISTICS) && \
@@ -597,6 +631,17 @@ void tcp_synack(FAR struct net_driver_s *dev, FAR struct tcp_conn_s *conn,
     }
 #endif
 
+#ifdef CONFIG_NET_TCP_SELECTIVE_ACK
+  if (tcp->flags == TCP_SYN ||
+      ((tcp->flags == (TCP_ACK | TCP_SYN)) && (conn->flags & TCP_SACK)))
+    {
+      tcp->optdata[optlen++] = TCP_OPT_NOOP;
+      tcp->optdata[optlen++] = TCP_OPT_NOOP;
+      tcp->optdata[optlen++] = TCP_OPT_SACK_PERM;
+      tcp->optdata[optlen++] = TCP_OPT_SACK_PERM_LEN;
+    }
+#endif
+
   tcp->tcpoffset         = ((TCP_HDRLEN + optlen) / 4) << 4;
   dev->d_len            += optlen;
 
diff --git a/net/tcp/tcp_send_buffered.c b/net/tcp/tcp_send_buffered.c
index 925d08bb12..62651169c5 100644
--- a/net/tcp/tcp_send_buffered.c
+++ b/net/tcp/tcp_send_buffered.c
@@ -170,6 +170,80 @@ static void psock_writebuffer_notify(FAR struct tcp_conn_s *conn)
 #  define psock_writebuffer_notify(conn)
 #endif
 
+static void retransmit_segment(FAR struct tcp_conn_s *conn,
+                               FAR struct tcp_wrbuffer_s *wrb)
+{
+  uint16_t sent;
+
+  /* Reset the number of bytes sent sent from the write buffer */
+
+  sent = TCP_WBSENT(wrb);
+  if (conn->tx_unacked > sent)
+    {
+      conn->tx_unacked -= sent;
+    }
+  else
+    {
+      conn->tx_unacked = 0;
+    }
+
+  if (conn->sent > sent)
+    {
+      conn->sent -= sent;
+    }
+  else
+    {
+      conn->sent = 0;
+    }
+
+  TCP_WBSENT(wrb) = 0;
+  ninfo("REXMIT: wrb=%p sent=%u, "
+        "conn tx_unacked=%" PRId32 " sent=%" PRId32 "\n",
+        wrb, TCP_WBSENT(wrb), conn->tx_unacked, conn->sent);
+
+  /* Free any write buffers that have exceed the retry count */
+
+  if (++TCP_WBNRTX(wrb) >= TCP_MAXRTX)
+    {
+      nwarn("WARNING: Expiring wrb=%p nrtx=%u\n",
+            wrb, TCP_WBNRTX(wrb));
+
+      /* Return the write buffer to the free list */
+
+      tcp_wrbuffer_release(wrb);
+
+      /* Notify any waiters if the write buffers have been
+       * drained.
+       */
+
+      psock_writebuffer_notify(conn);
+
+      /* NOTE expired is different from un-ACKed, it is designed
+       * to represent the number of segments that have been sent,
+       * retransmitted, and un-ACKed, if expired is not zero, the
+       * connection will be closed.
+       *
+       * field expired can only be updated at TCP_ESTABLISHED
+       * state
+       */
+
+      conn->expired++;
+    }
+  else
+    {
+      /* Insert the write buffer into the write_q (in sequence
+       * number order).  The retransmission will occur below
+       * when the write buffer with the lowest sequence number
+       * is pulled from the write_q again.
+       */
+
+      ninfo("REXMIT: Moving wrb=%p nrtx=%u\n",
+            wrb, TCP_WBNRTX(wrb));
+
+      psock_insert_segment(wrb, &conn->write_q);
+    }
+}
+
 /****************************************************************************
  * Name: psock_lost_connection
  *
@@ -285,6 +359,97 @@ static inline void send_ipselect(FAR struct net_driver_s *dev,
 }
 #endif
 
+/****************************************************************************
+ * Name: parse_sack
+ *
+ * Description:
+ *   Parse sack from incoming TCP options
+ *
+ * Input Parameters:
+ *   conn   - The TCP connection of interest
+ *   tcp    - Header of tcp structure
+ *   segs   - Segments edge of sacks
+ *
+ * Returned Value:
+ *   Number of sacks
+ *
+ * Assumptions:
+ *   The network is locked.
+ *
+ ****************************************************************************/
+
+#ifdef CONFIG_NET_TCP_SELECTIVE_ACK
+static int parse_sack(FAR struct tcp_conn_s *conn, FAR struct tcp_hdr_s *tcp,
+                      FAR struct tcp_ofoseg_s *segs)
+{
+  FAR struct tcp_sack_s *sacks;
+  int nsack = 0;
+  uint8_t opt;
+  int i;
+
+  /* Get the size of the link layer header,
+   * the IP and TCP header
+   */
+
+  for (i = 0; i < ((tcp->tcpoffset >> 4) - 5) << 2 ; )
+    {
+      opt = *(tcp->optdata + i);
+      if (opt == TCP_OPT_END)
+        {
+          /* End of options. */
+
+          break;
+        }
+      else if (opt == TCP_OPT_NOOP)
+        {
+          /* NOP option. */
+
+          ++i;
+          continue;
+        }
+      else if (opt == TCP_OPT_SACK)
+        {
+          nsack = (*(tcp->optdata + 1 + i) -
+                   TCP_OPT_SACK_PERM_LEN) /
+                   (sizeof(uint32_t) * 2);
+          sacks = (FAR struct tcp_sack_s *)
+                  (tcp->optdata + i +
+                   TCP_OPT_SACK_PERM_LEN);
+
+          for (i = 0; i < nsack; i++)
+            {
+              segs[i].left = tcp_getsequence((uint8_t *)&sacks[i].left);
+              segs[i].right = tcp_getsequence((uint8_t *)&sacks[i].right);
+            }
+
+          tcp_reorder_ofosegs(nsack, segs);
+
+          break;
+        }
+      else
+        {
+          /* All other options have a length field,
+           * so that we easily can skip past them.
+           */
+
+          if (*(tcp->optdata + 1 + i) == 0)
+            {
+              /* If the length field is zero,
+               * the options are malformed and
+               * we don't process them further.
+               */
+
+              break;
+            }
+        }
+
+      i += *(tcp->optdata + 1 + i);
+    }
+
+  return nsack;
+}
+#endif /* CONFIG_NET_TCP_SELECTIVE_ACK */
+
 /****************************************************************************
  * Name: psock_send_eventhandler
  *
@@ -309,6 +474,10 @@ static uint16_t psock_send_eventhandler(FAR struct net_driver_s *dev,
                                         FAR void *pvpriv, uint16_t flags)
 {
   FAR struct tcp_conn_s *conn = pvpriv;
+#ifdef CONFIG_NET_TCP_SELECTIVE_ACK
+  struct tcp_ofoseg_s ofosegs[TCP_SACK_RANGES_MAX];
+  uint8_t nsacks = 0;
+#endif
 #ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
   uint32_t rexmitno = 0;
 #endif
@@ -458,7 +627,6 @@ static uint16_t psock_send_eventhandler(FAR struct net_driver_s *dev,
                         wrb, TCP_WBSEQNO(wrb), TCP_WBPKTLEN(wrb));
                 }
             }
-#ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
           else if (ackno == TCP_WBSEQNO(wrb))
             {
               /* Reset the duplicate ack counter */
@@ -472,16 +640,33 @@ static uint16_t psock_send_eventhandler(FAR struct net_driver_s *dev,
 
               if (++TCP_WBNACK(wrb) == TCP_FAST_RETRANSMISSION_THRESH)
                 {
-                  /* Do fast retransmit */
+#ifdef CONFIG_NET_TCP_SELECTIVE_ACK
+                  if ((conn->flags & TCP_SACK) &&
+                      (tcp->tcpoffset & 0xf0) > 0x50)
+                    {
+                      /* Parse s-ack from tcp options */
 
-                  rexmitno = ackno;
+                      nsacks = parse_sack(conn, tcp, ofosegs);
 
-                  /* Reset counter */
+                      flags |= TCP_REXMIT;
+                    }
+#ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
+                  else
+#endif
+#endif
+                    {
+#ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
+                      /* Do fast retransmit */
 
-                  TCP_WBNACK(wrb) = 0;
+                      rexmitno = ackno;
+#endif
+
+                      /* Reset counter */
+
+                      TCP_WBNACK(wrb) = 0;
+                    }
                 }
             }
-#endif
         }
 
       /* A special case is the head of the write_q which may be partially
@@ -613,6 +798,57 @@ static uint16_t psock_send_eventhandler(FAR struct net_driver_s *dev,
     }
 #endif
 
+#ifdef CONFIG_NET_TCP_SELECTIVE_ACK
+
+  /* Check if we are being asked to retransmit s-ack data */
+
+  if (nsacks > 0)
+    {
+      FAR struct tcp_wrbuffer_s *wrb;
+      FAR sq_entry_t *entry;
+      FAR sq_entry_t *next;
+      uint32_t right;
+      int i;
+
+      /* Dump s-ack edge */
+
+      for (i = 0, right = 0; i < nsacks; i++)
+        {
+          ninfo("TCP SACK [%d]"
+                "[%" PRIu32 " : %" PRIu32 " : %" PRIu32 "]\n",
+                i, ofosegs[i].left, ofosegs[i].right,
+                TCP_SEQ_SUB(ofosegs[i].right, ofosegs[i].left));
+        }
+
+      for (entry = sq_peek(&conn->unacked_q); entry; entry = next)
+        {
+          wrb  = (FAR struct tcp_wrbuffer_s *)entry;
+          next = sq_next(entry);
+
+          for (i = 0, right = 0; i < nsacks; i++)
+            {
+              /* Wrb seqno out of s-ack edge ? do retransmit ! */
+
+              if (TCP_SEQ_LT(TCP_WBSEQNO(wrb), ofosegs[i].left) &&
+                  TCP_SEQ_GTE(TCP_WBSEQNO(wrb), right))
+                {
+                  ninfo("TCP REXMIT "
+                        "[%" PRIu32 " : %" PRIu32 " : %d]\n",
+                        TCP_WBSEQNO(wrb),
+                        TCP_SEQ_ADD(TCP_WBSEQNO(wrb), TCP_WBPKTLEN(wrb)),
+                        TCP_WBPKTLEN(wrb));
+                  sq_rem(entry, &conn->unacked_q);
+                  retransmit_segment(conn, (FAR void *)entry);
+                  break;
+                }
+
+              right = ofosegs[i].right;
+            }
+        }
+    }
+  else
+#endif
+
   /* Check if we are being asked to retransmit data */
 
   if ((flags & TCP_REXMIT) != 0)
@@ -706,75 +942,7 @@ static uint16_t psock_send_eventhandler(FAR struct net_driver_s *dev,
 
       while ((entry = sq_remlast(&conn->unacked_q)) != NULL)
         {
-          wrb = (FAR struct tcp_wrbuffer_s *)entry;
-          uint16_t sent;
-
-          /* Reset the number of bytes sent sent from the write buffer */
-
-          sent = TCP_WBSENT(wrb);
-          if (conn->tx_unacked > sent)
-            {
-              conn->tx_unacked -= sent;
-            }
-          else
-            {
-              conn->tx_unacked = 0;
-            }
-
-          if (conn->sent > sent)
-            {
-              conn->sent -= sent;
-            }
-          else
-            {
-              conn->sent = 0;
-            }
-
-          TCP_WBSENT(wrb) = 0;
-          ninfo("REXMIT: wrb=%p sent=%u, "
-                "conn tx_unacked=%" PRId32 " sent=%" PRId32 "\n",
-                wrb, TCP_WBSENT(wrb), conn->tx_unacked, conn->sent);
-
-          /* Free any write buffers that have exceed the retry count */
-
-          if (++TCP_WBNRTX(wrb) >= TCP_MAXRTX)
-            {
-              nwarn("WARNING: Expiring wrb=%p nrtx=%u\n",
-                    wrb, TCP_WBNRTX(wrb));
-
-              /* Return the write buffer to the free list */
-
-              tcp_wrbuffer_release(wrb);
-
-              /* Notify any waiters if the write buffers have been
-               * drained.
-               */
-
-              psock_writebuffer_notify(conn);
-
-              /* NOTE expired is different from un-ACKed, it is designed to
-               * represent the number of segments that have been sent,
-               * retransmitted, and un-ACKed, if expired is not zero, the
-               * connection will be closed.
-               *
-               * field expired can only be updated at TCP_ESTABLISHED state
-               */
-
-              conn->expired++;
-              continue;
-            }
-          else
-            {
-              /* Insert the write buffer into the write_q (in sequence
-               * number order).  The retransmission will occur below
-               * when the write buffer with the lowest sequence number
-               * is pulled from the write_q again.
-               */
-
-              ninfo("REXMIT: Moving wrb=%p nrtx=%u\n", wrb, TCP_WBNRTX(wrb));
-
-              psock_insert_segment(wrb, &conn->write_q);
-            }
+          retransmit_segment(conn, (FAR void *)entry);
         }
     }
 


[nuttx] 01/04: net/tcp: new api tcp_dataconcat() to concatenate/pack iob chain

Posted by xi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

xiaoxiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx.git

commit 0e7d397553d51c70c625843bcd891efb054c887f
Author: chao an <an...@xiaomi.com>
AuthorDate: Fri Jan 6 13:50:43 2023 +0800

    net/tcp: new api tcp_dataconcat() to concatenate/pack iob chain
    
    Signed-off-by: chao an <an...@xiaomi.com>
---
 net/tcp/tcp.h          | 15 ++++++++++++++
 net/tcp/tcp_callback.c | 54 ++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/net/tcp/tcp.h b/net/tcp/tcp.h
index f28dd9758a..5637106499 100644
--- a/net/tcp/tcp.h
+++ b/net/tcp/tcp.h
@@ -1278,6 +1278,21 @@ uint16_t tcp_datahandler(FAR struct net_driver_s *dev,
                          FAR struct tcp_conn_s *conn,
                          uint16_t offset);
 
+/****************************************************************************
+ * Name: tcp_dataconcat
+ *
+ * Description:
+ *   Concatenate iob_s chain iob2 to iob1, if CONFIG_NET_TCP_RECV_PACK is
+ *   endabled, pack all data in the I/O buffer chain.
+ *
+ * Returned Value:
+ *   The number of bytes actually buffered is returned.  This will be either
+ *   zero or equal to iob1->io_pktlen.
+ *
+ ****************************************************************************/
+
+uint16_t tcp_dataconcat(FAR struct iob_s **iob1, FAR struct iob_s **iob2);
+
 /****************************************************************************
  * Name: tcp_backlogcreate
  *
diff --git a/net/tcp/tcp_callback.c b/net/tcp/tcp_callback.c
index a5f22c4556..3d00cdcc1f 100644
--- a/net/tcp/tcp_callback.c
+++ b/net/tcp/tcp_callback.c
@@ -196,6 +196,43 @@ uint16_t tcp_callback(FAR struct net_driver_s *dev,
   return flags;
 }
 
+/****************************************************************************
+ * Name: tcp_dataconcat
+ *
+ * Description:
+ *   Concatenate iob_s chain iob2 to iob1, if CONFIG_NET_TCP_RECV_PACK is
+ *   endabled, pack all data in the I/O buffer chain.
+ *
+ * Returned Value:
+ *   The number of bytes actually buffered is returned.  This will be either
+ *   zero or equal to iob->io_pktlen.
+ *
+ ****************************************************************************/
+
+uint16_t tcp_dataconcat(FAR struct iob_s **iob1, FAR struct iob_s **iob2)
+{
+  if (*iob1 == NULL)
+    {
+      *iob1 = *iob2;
+    }
+  else
+    {
+      iob_concat(*iob1, *iob2);
+    }
+
+  *iob2 = NULL;
+
+#ifdef CONFIG_NET_TCP_RECV_PACK
+  /* Merge an iob chain into a continuous space, thereby reducing iob
+   * consumption.
+   */
+
+  *iob1 = iob_pack(*iob1);
+#endif
+
+  return (*iob1)->io_pktlen;
+}
+
 /****************************************************************************
  * Name: tcp_datahandler
  *
@@ -247,22 +284,9 @@ uint16_t tcp_datahandler(FAR struct net_driver_s *dev,
 
   /* Concat the iob to readahead */
 
-  if (conn->readahead == NULL)
-    {
-      conn->readahead = iob;
-    }
-  else
-    {
-      iob_concat(conn->readahead, iob);
-    }
-
-#ifdef CONFIG_NET_TCP_RECV_PACK
-  /* Merge an iob chain into a continuous space, thereby reducing iob
-   * consumption.
-   */
+  tcp_dataconcat(&conn->readahead, &iob);
 
-  conn->readahead = iob_pack(conn->readahead);
-#endif
+  /* Clear device buffer */
 
   netdev_iob_clear(dev);