MPTCP Linux Development
 help / color / mirror / Atom feed
From: Geliang Tang <geliang@kernel.org>
To: Paolo Abeni <pabeni@redhat.com>, mptcp@lists.linux.dev
Subject: Re: [PATCH v5 mptcp-next 09/10] mptcp: introduce mptcp-level backlog
Date: Wed, 08 Oct 2025 11:09:29 +0800	[thread overview]
Message-ID: <8d1a5a81a52c26ea19d6e10e9137b22875b58d7b.camel@kernel.org> (raw)
In-Reply-To: <16ead1024784cf5913a9c9ef69d7d15cb73512a3.1759737859.git.pabeni@redhat.com>

Hi Paolo,

On Mon, 2025-10-06 at 10:12 +0200, Paolo Abeni wrote:
> We are soon using it for incoming data processing.
> MPTCP can't leverage the sk_backlog, as the latter is processed
> before the release callback, and such callback for MPTCP releases
> and re-acquire the socket spinlock, breaking the sk_backlog
> processing
> assumption.
> 
> Add a skb backlog list inside the mptcp sock struct, and implement
> basic helper to transfer packet to and purge such list.
> 
> Packets in the backlog are not memory accounted, but still use the
> incoming subflow receive memory, to allow back-pressure.
> 
> No packet is currently added to the backlog, so no functional changes
> intended here.
> 
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> --
> v4 -> v5:
>   - split out of the next path, to make the latter smaller
>   - set a custom destructor for skbs in the backlog, this avoid
>     duplicate code, and fix a few places where the need ssk cleanup
>     was not performed.
>   - factor out the backlog purge in a new helper,
>     use spinlock protection, clear the backlog list and zero the
>     backlog len
>   - explicitly init the backlog_len at mptcp_init_sock() time
> ---
>  net/mptcp/protocol.c | 70 +++++++++++++++++++++++++++++++++++++++++-
> --
>  net/mptcp/protocol.h |  4 +++
>  2 files changed, 70 insertions(+), 4 deletions(-)
> 
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 05ee6bd26b7fa..2d5d3da67d1ac 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -337,6 +337,11 @@ static void mptcp_data_queue_ofo(struct
> mptcp_sock *msk, struct sk_buff *skb)
>   mptcp_rcvbuf_grow(sk);
>  }
>  
> +static void mptcp_bl_free(struct sk_buff *skb)
> +{
> + atomic_sub(skb->truesize, &skb->sk->sk_rmem_alloc);
> +}
> +
>  static int mptcp_init_skb(struct sock *ssk,
>     struct sk_buff *skb, int offset, int copy_len)
>  {
> @@ -360,7 +365,7 @@ static int mptcp_init_skb(struct sock *ssk,
>   skb_dst_drop(skb);
>  
>   /* "borrow" the fwd memory from the subflow, instead of reclaiming
> it */
> - skb->destructor = NULL;
> + skb->destructor = mptcp_bl_free;
>   borrowed = ssk->sk_forward_alloc - sk_unused_reserved_mem(ssk);
>   borrowed &= ~(PAGE_SIZE - 1);
>   sk_forward_alloc_add(ssk, skb->truesize - borrowed);
> @@ -373,6 +378,13 @@ static bool __mptcp_move_skb(struct sock *sk,
> struct sk_buff *skb)
>   struct mptcp_sock *msk = mptcp_sk(sk);
>   struct sk_buff *tail;
>  
> + /* Avoid the indirect call overhead, we know destructor is
> + * mptcp_bl_free at this point.
> + */
> + atomic_sub(skb->truesize, &skb->sk->sk_rmem_alloc);
> + skb->sk = NULL;
> + skb->destructor = NULL;
> +
>   /* try to fetch required memory from subflow */
>   if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
>   MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
> @@ -654,6 +666,35 @@ static void mptcp_dss_corruption(struct
> mptcp_sock *msk, struct sock *ssk)
>   }
>  }
>  
> +static void __mptcp_add_backlog(struct sock *sk, struct sk_buff
> *skb)
> +{
> + struct mptcp_sock *msk = mptcp_sk(sk);
> + struct sk_buff *tail = NULL;
> + bool fragstolen;
> + int delta;
> +
> + if (unlikely(sk->sk_state == TCP_CLOSE)) {
> + kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
> + return;
> + }
> +
> + /* Try to coalesce with the last skb in our backlog */
> + if (!list_empty(&msk->backlog_list))
> + tail = list_last_entry(&msk->backlog_list, struct sk_buff, list);
> +
> + if (tail && MPTCP_SKB_CB(skb)->map_seq == MPTCP_SKB_CB(tail)-
> >end_seq &&
> +     skb->sk == tail->sk &&
> +     __mptcp_try_coalesce(sk, tail, skb, &fragstolen, &delta)) {
> + skb->truesize -= delta;
> + kfree_skb_partial(skb, fragstolen);
> + WRITE_ONCE(msk->backlog_len, msk->backlog_len + delta);
> + return;
> + }
> +
> + list_add_tail(&skb->list, &msk->backlog_list);
> + WRITE_ONCE(msk->backlog_len, msk->backlog_len + skb->truesize);
> +}
> +
>  static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
>      struct sock *ssk)
>  {
> @@ -701,10 +742,12 @@ static bool
> __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
>   int bmem;
>  
>   bmem = mptcp_init_skb(ssk, skb, offset, len);
> - skb->sk = NULL;
>   sk_forward_alloc_add(sk, bmem);
> - atomic_sub(skb->truesize, &ssk->sk_rmem_alloc);
> - ret = __mptcp_move_skb(sk, skb) || ret;
> +
> + if (true)

nit: How about adding the own_msk parameter to
__mptcp_move_skbs_from_subflow() in this patch? This would allow us to
avoid using 'if (true)' here by using 'if (own_msk)'.

Thanks,
-Geliang

> + ret |= __mptcp_move_skb(sk, skb);
> + else
> + __mptcp_add_backlog(sk, skb);
>   seq += len;
>  
>   if (unlikely(map_remaining < len)) {
> @@ -2753,12 +2796,28 @@ static void mptcp_mp_fail_no_response(struct
> mptcp_sock *msk)
>   unlock_sock_fast(ssk, slow);
>  }
>  
> +static void mptcp_backlog_purge(struct sock *sk)
> +{
> + struct mptcp_sock *msk = mptcp_sk(sk);
> + struct sk_buff *tmp, *skb;
> + LIST_HEAD(backlog);
> +
> + mptcp_data_lock(sk);
> + list_splice_init(&msk->backlog_list, &backlog);
> + msk->backlog_len = 0;
> + mptcp_data_unlock(sk);
> +
> + list_for_each_entry_safe(skb, tmp, &backlog, list)
> + kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
> +}
> +
>  static void mptcp_do_fastclose(struct sock *sk)
>  {
>   struct mptcp_subflow_context *subflow, *tmp;
>   struct mptcp_sock *msk = mptcp_sk(sk);
>  
>   mptcp_set_state(sk, TCP_CLOSE);
> + mptcp_backlog_purge(sk);
>   mptcp_for_each_subflow_safe(msk, subflow, tmp)
>   __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
>     subflow, MPTCP_CF_FASTCLOSE);
> @@ -2816,11 +2875,13 @@ static void __mptcp_init_sock(struct sock
> *sk)
>   INIT_LIST_HEAD(&msk->conn_list);
>   INIT_LIST_HEAD(&msk->join_list);
>   INIT_LIST_HEAD(&msk->rtx_queue);
> + INIT_LIST_HEAD(&msk->backlog_list);
>   INIT_WORK(&msk->work, mptcp_worker);
>   msk->out_of_order_queue = RB_ROOT;
>   msk->first_pending = NULL;
>   msk->timer_ival = TCP_RTO_MIN;
>   msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
> + msk->backlog_len = 0;
>  
>   WRITE_ONCE(msk->first, NULL);
>   inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
> @@ -3197,6 +3258,7 @@ static void mptcp_destroy_common(struct
> mptcp_sock *msk, unsigned int flags)
>   struct sock *sk = (struct sock *)msk;
>  
>   __mptcp_clear_xmit(sk);
> + mptcp_backlog_purge(sk);
>  
>   /* join list will be eventually flushed (with rst) at sock lock
> release time */
>   mptcp_for_each_subflow_safe(msk, subflow, tmp)
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 46d8432c72ee7..a21c4955f4cfb 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -358,6 +358,9 @@ struct mptcp_sock {
>   * allow_infinite_fallback and
>   * allow_join
>   */
> +
> + struct list_head backlog_list; /*protected by the data lock */
> + u32 backlog_len;
>  };
>  
>  #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
> @@ -408,6 +411,7 @@ static inline int mptcp_space_from_win(const
> struct sock *sk, int win)
>  static inline int __mptcp_space(const struct sock *sk)
>  {
>   return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
> +     READ_ONCE(mptcp_sk(sk)->backlog_len) -
>       sk_rmem_alloc_get(sk));
>  }
>  


  reply	other threads:[~2025-10-08  3:09 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-06  8:11 [PATCH v5 mptcp-next 00/10] mptcp: introduce backlog processing Paolo Abeni
2025-10-06  8:12 ` [PATCH v5 mptcp-next 01/10] mptcp: borrow forward memory from subflow Paolo Abeni
2025-10-06  8:12 ` [PATCH v5 mptcp-next 02/10] mptcp: cleanup fallback data fin reception Paolo Abeni
2025-10-06  8:12 ` [PATCH v5 mptcp-next 03/10] mptcp: cleanup fallback dummy mapping generation Paolo Abeni
2025-10-06  8:12 ` [PATCH v5 mptcp-next 04/10] mptcp: fix MSG_PEEK stream corruption Paolo Abeni
2025-10-06  8:12 ` [PATCH v5 mptcp-next 05/10] mptcp: ensure the kernel PM does not take action too late Paolo Abeni
2025-10-06  8:12 ` [PATCH v5 mptcp-next 06/10] mptcp: do not miss early first subflow close event notification Paolo Abeni
2025-10-06  8:12 ` [PATCH v5 mptcp-next 07/10] mptcp: make mptcp_destroy_common() static Paolo Abeni
2025-10-06  8:12 ` [PATCH v5 mptcp-next 08/10] mptcp: drop the __mptcp_data_ready() helper Paolo Abeni
2025-10-06  8:12 ` [PATCH v5 mptcp-next 09/10] mptcp: introduce mptcp-level backlog Paolo Abeni
2025-10-08  3:09   ` Geliang Tang [this message]
2025-10-20 19:45   ` Mat Martineau
2025-10-06  8:12 ` [PATCH v5 mptcp-next 10/10] mptcp: leverage the backlog for RX packet processing Paolo Abeni
2025-10-20 23:32   ` Mat Martineau
2025-10-21 17:21     ` Paolo Abeni
2025-10-21 23:53       ` Mat Martineau
2025-10-06 17:07 ` [PATCH v5 mptcp-next 00/10] mptcp: introduce backlog processing Matthieu Baerts
2025-10-08  3:07   ` Geliang Tang
2025-10-08  7:30     ` Paolo Abeni
2025-10-09  6:54       ` Geliang Tang
2025-10-09  7:52         ` Paolo Abeni
2025-10-09  9:02           ` Geliang Tang
2025-10-09 10:23             ` Paolo Abeni
2025-10-09 13:58               ` Paolo Abeni
2025-10-10  8:21                 ` Paolo Abeni
2025-10-10 12:22                   ` Geliang Tang
2025-10-13  9:07                     ` Geliang Tang
2025-10-13 13:29                       ` Paolo Abeni
2025-10-13 17:07                         ` Paolo Abeni
2025-10-15  9:00                       ` Paolo Abeni
2025-10-17  6:38                         ` Geliang Tang
2025-10-18  0:16                           ` Mat Martineau
2025-10-06 17:43 ` MPTCP CI

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8d1a5a81a52c26ea19d6e10e9137b22875b58d7b.camel@kernel.org \
    --to=geliang@kernel.org \
    --cc=mptcp@lists.linux.dev \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox