public inbox for mptcp@lists.linux.dev
 help / color / mirror / Atom feed
* [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready
@ 2026-03-17  8:36 Gang Yan
  2026-03-17  8:36 ` [PATCH mptcp-net v3 1/3] mptcp: replace backlog_list with backlog_queue Gang Yan
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Gang Yan @ 2026-03-17  8:36 UTC (permalink / raw)
  To: mptcp; +Cc: pabeni, Gang Yan

From: Gang Yan <yangang@kylinos.cn>

Hi Paolo,Matt:

The patches have replaced the original backlog_list with an rb-tree based
'backlog_queue', and resolved the existing transmission stall issues by using
the new backlog_queue infrastructure.

Looking forward to your comments and feedback.

Thanks
Gang

Gang Yan (3):
  mptcp: replace backlog_list with backlog_queue
  mptcp: fix the stall problems using backlog_queue
  mptcp: fix the stall problems with data_ready

 net/mptcp/protocol.c | 93 +++++++++++++++++++++++++++++++++++++-------
 net/mptcp/protocol.h |  2 +-
 2 files changed, 81 insertions(+), 14 deletions(-)

-- 
2.43.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH mptcp-net v3 1/3] mptcp: replace backlog_list with backlog_queue
  2026-03-17  8:36 [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready Gang Yan
@ 2026-03-17  8:36 ` Gang Yan
  2026-03-17  8:36 ` [PATCH mptcp-net v3 2/3] mptcp: fix the stall problems using backlog_queue Gang Yan
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Gang Yan @ 2026-03-17  8:36 UTC (permalink / raw)
  To: mptcp; +Cc: pabeni, Gang Yan, Geliang Tang

From: Gang Yan <yangang@kylinos.cn>

This patch replaces the original list-based backlog_list with a
red-black tree (RB-tree) based backlog_queue for MPTCP.

Add key helper functions:
 - mptcp_queue_backlog: Insert skb into backlog_queue in order of
   map_seq via RB-tree
 - mptcp_backlog_queue_to_list: Convert RB-tree based backlog_queue to
   list_head
 - mptcp_backlog_list_to_queue: Convert list_head back to RB-tree based
   backlog_queue

Adapt existing backlog operation logic:
   - Update mptcp_can_spool_backlog to splice RB-tree backlog to list
     via new helper
   - Adjust mptcp_backlog_spooled to restore list skbs back to RB-tree
     backlog_queue
   - Modify mptcp_close_ssk and mptcp_recv_skb to check RB-tree emptiness
     instead of list
   - Update mptcp_backlog_purge to use RB-tree to list conversion for
     backlog cleanup

Furthermore, this patch also initialize the msk->backlog_unaccounted in
'__mptcp_init_sock'.

Co-developed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
---
 net/mptcp/protocol.c | 72 +++++++++++++++++++++++++++++++++++++-------
 net/mptcp/protocol.h |  2 +-
 2 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b5676b37f8f4..759f0486c40b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -653,6 +653,33 @@ static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
 	}
 }
 
+static int mptcp_queue_backlog(struct mptcp_sock *msk, struct sk_buff *skb)
+{
+	u64 seq = MPTCP_SKB_CB(skb)->map_seq;
+	struct rb_node **p, *parent = NULL;
+
+	p = &msk->backlog_queue.rb_node;
+	if (RB_EMPTY_ROOT(&msk->backlog_queue))
+		goto insert;
+
+	while (*p) {
+		struct sk_buff *s;
+
+		parent = *p;
+		s = rb_to_skb(parent);
+
+		if (before64(seq, MPTCP_SKB_CB(s)->map_seq))
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+insert:
+	rb_link_node(&skb->rbnode, parent, p);
+	rb_insert_color(&skb->rbnode, &msk->backlog_queue);
+	return 0;
+}
+
 static void __mptcp_add_backlog(struct sock *sk,
 				struct mptcp_subflow_context *subflow,
 				struct sk_buff *skb)
@@ -669,8 +696,8 @@ static void __mptcp_add_backlog(struct sock *sk,
 	}
 
 	/* Try to coalesce with the last skb in our backlog */
-	if (!list_empty(&msk->backlog_list))
-		tail = list_last_entry(&msk->backlog_list, struct sk_buff, list);
+	if (!RB_EMPTY_ROOT(&msk->backlog_queue))
+		tail = skb_rb_last(&msk->backlog_queue);
 
 	if (tail && MPTCP_SKB_CB(skb)->map_seq == MPTCP_SKB_CB(tail)->end_seq &&
 	    ssk == tail->sk &&
@@ -681,7 +708,7 @@ static void __mptcp_add_backlog(struct sock *sk,
 		goto account;
 	}
 
-	list_add_tail(&skb->list, &msk->backlog_list);
+	mptcp_queue_backlog(msk, skb);
 	mptcp_subflow_lend_fwdmem(subflow, skb);
 	delta = skb->truesize;
 
@@ -2197,6 +2224,29 @@ static bool __mptcp_move_skbs(struct sock *sk, struct list_head *skbs, u32 *delt
 	return moved;
 }
 
+static void mptcp_backlog_queue_to_list(struct mptcp_sock *msk,
+					struct list_head *list)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_rb_first(&msk->backlog_queue)) != NULL) {
+		rb_erase(&skb->rbnode, &msk->backlog_queue);
+		RB_CLEAR_NODE(&skb->rbnode);
+		list_add_tail(&skb->list, list);
+	}
+}
+
+static void mptcp_backlog_list_to_queue(struct mptcp_sock *msk,
+					struct list_head *list)
+{
+	struct sk_buff *skb, *tmp;
+
+	list_for_each_entry_safe(skb, tmp, list, list) {
+		list_del(&skb->list);
+		mptcp_queue_backlog(msk, skb);
+	}
+}
+
 static bool mptcp_can_spool_backlog(struct sock *sk, struct list_head *skbs)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2208,12 +2258,12 @@ static bool mptcp_can_spool_backlog(struct sock *sk, struct list_head *skbs)
 			       mem_cgroup_from_sk(sk));
 
 	/* Don't spool the backlog if the rcvbuf is full. */
-	if (list_empty(&msk->backlog_list) ||
+	if (RB_EMPTY_ROOT(&msk->backlog_queue) ||
 	    sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
 		return false;
 
 	INIT_LIST_HEAD(skbs);
-	list_splice_init(&msk->backlog_list, skbs);
+	mptcp_backlog_queue_to_list(msk, skbs);
 	return true;
 }
 
@@ -2223,7 +2273,7 @@ static void mptcp_backlog_spooled(struct sock *sk, u32 moved,
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
 	WRITE_ONCE(msk->backlog_len, msk->backlog_len - moved);
-	list_splice(skbs, &msk->backlog_list);
+	mptcp_backlog_list_to_queue(msk, skbs);
 }
 
 static bool mptcp_move_skbs(struct sock *sk)
@@ -2307,7 +2357,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
 		copied += bytes_read;
 
-		if (!list_empty(&msk->backlog_list) && mptcp_move_skbs(sk))
+		if (!RB_EMPTY_ROOT(&msk->backlog_queue) && mptcp_move_skbs(sk))
 			continue;
 
 		/* only the MPTCP socket status is relevant here. The exit
@@ -2636,7 +2686,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 	/* Remove any reference from the backlog to this ssk; backlog skbs consume
 	 * space in the msk receive queue, no need to touch sk->sk_rmem_alloc
 	 */
-	list_for_each_entry(skb, &msk->backlog_list, list) {
+	skb_rbtree_walk(skb, &msk->backlog_queue) {
 		if (skb->sk != ssk)
 			continue;
 
@@ -2892,7 +2942,7 @@ static void mptcp_backlog_purge(struct sock *sk)
 	LIST_HEAD(backlog);
 
 	mptcp_data_lock(sk);
-	list_splice_init(&msk->backlog_list, &backlog);
+	mptcp_backlog_queue_to_list(msk, &backlog);
 	msk->backlog_len = 0;
 	mptcp_data_unlock(sk);
 
@@ -2995,7 +3045,7 @@ static void __mptcp_init_sock(struct sock *sk)
 	INIT_LIST_HEAD(&msk->conn_list);
 	INIT_LIST_HEAD(&msk->join_list);
 	INIT_LIST_HEAD(&msk->rtx_queue);
-	INIT_LIST_HEAD(&msk->backlog_list);
+	msk->backlog_queue = RB_ROOT;
 	INIT_WORK(&msk->work, mptcp_worker);
 	msk->out_of_order_queue = RB_ROOT;
 	msk->first_pending = NULL;
@@ -4331,7 +4381,7 @@ static struct sk_buff *mptcp_recv_skb(struct sock *sk, u32 *off)
 	struct sk_buff *skb;
 	u32 offset;
 
-	if (!list_empty(&msk->backlog_list))
+	if (!RB_EMPTY_ROOT(&msk->backlog_queue))
 		mptcp_move_skbs(sk);
 
 	while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index f5d4d7d030f2..f0eaba2c61fa 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -372,7 +372,7 @@ struct mptcp_sock {
 					 * allow_join
 					 */
 
-	struct list_head backlog_list;	/* protected by the data lock */
+	struct rb_root	backlog_queue;	/* protected by the data lock */
 	u32		backlog_len;
 	u32		backlog_unaccounted;
 };
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH mptcp-net v3 2/3] mptcp: fix the stall problems using backlog_queue
  2026-03-17  8:36 [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready Gang Yan
  2026-03-17  8:36 ` [PATCH mptcp-net v3 1/3] mptcp: replace backlog_list with backlog_queue Gang Yan
@ 2026-03-17  8:36 ` Gang Yan
  2026-03-17  8:36 ` [PATCH mptcp-net v3 3/3] mptcp: fix the stall problems with data_ready Gang Yan
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Gang Yan @ 2026-03-17  8:36 UTC (permalink / raw)
  To: mptcp; +Cc: pabeni, Gang Yan, Geliang Tang

From: Gang Yan <yangang@kylinos.cn>

The original condition would stop moving skbs or spooling backlog even
when the receive queue is empty, leading to receive stall.

Modify the condition in __mptcp_move_skbs() and mptcp_can_spool_backlog()
to only treat rcvbuf as full when:
  sk_rmem_alloc_get(sk) > sk->sk_rcvbuf && !skb_queue_empty(&sk->sk_receive_queue)

This ensures the backlog can still be moved to the receive queue when
the queue is empty, avoiding stall problems.

Fixes: 6228efe0cc01 ("mptcp: leverage the backlog for RX packet processing")
Co-developed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
---
 net/mptcp/protocol.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 759f0486c40b..b1915eef4dcf 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2204,7 +2204,8 @@ static bool __mptcp_move_skbs(struct sock *sk, struct list_head *skbs, u32 *delt
 	*delta = 0;
 	while (1) {
 		/* If the msk recvbuf is full stop, don't drop */
-		if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
+		if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf &&
+		    !skb_queue_empty(&sk->sk_receive_queue))
 			break;
 
 		prefetch(skb->next);
@@ -2259,7 +2260,8 @@ static bool mptcp_can_spool_backlog(struct sock *sk, struct list_head *skbs)
 
 	/* Don't spool the backlog if the rcvbuf is full. */
 	if (RB_EMPTY_ROOT(&msk->backlog_queue) ||
-	    sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
+	    (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf &&
+	     !skb_queue_empty(&sk->sk_receive_queue)))
 		return false;
 
 	INIT_LIST_HEAD(skbs);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH mptcp-net v3 3/3] mptcp: fix the stall problems with data_ready
  2026-03-17  8:36 [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready Gang Yan
  2026-03-17  8:36 ` [PATCH mptcp-net v3 1/3] mptcp: replace backlog_list with backlog_queue Gang Yan
  2026-03-17  8:36 ` [PATCH mptcp-net v3 2/3] mptcp: fix the stall problems using backlog_queue Gang Yan
@ 2026-03-17  8:36 ` Gang Yan
  2026-03-17  9:58 ` [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready MPTCP CI
  2026-03-24  6:23 ` Geliang Tang
  4 siblings, 0 replies; 6+ messages in thread
From: Gang Yan @ 2026-03-17  8:36 UTC (permalink / raw)
  To: mptcp; +Cc: pabeni, Gang Yan, Geliang Tang

From: Gang Yan <yangang@kylinos.cn>

There exists a stall caused by unprocessed backlog_queue in
'move_skbs_to_msk'.

This patch adds a check for backlog_queue and move skbs to receive
queue when no skbs were moved from subflow but backlog_queue is not
empty.

Fixes: 6228efe0cc01 ("mptcp: leverage the backlog for RX packet processing")
Co-developed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Gang Yan <yangang@kylinos.cn>
---
 net/mptcp/protocol.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b1915eef4dcf..023a4fb68617 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -29,6 +29,10 @@
 #include "mib.h"
 
 static unsigned int mptcp_inq_hint(const struct sock *sk);
+static bool mptcp_can_spool_backlog(struct sock *sk, struct list_head *skbs);
+static void mptcp_backlog_spooled(struct sock *sk, u32 moved,
+				  struct list_head *skbs);
+static bool __mptcp_move_skbs(struct sock *sk, struct list_head *skbs, u32 *delta);
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/mptcp.h>
@@ -892,6 +896,17 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 	bool moved;
 
 	moved = __mptcp_move_skbs_from_subflow(msk, ssk, true);
+	if (!moved && skb_queue_empty(&sk->sk_receive_queue) &&
+	    !RB_EMPTY_ROOT(&msk->backlog_queue)) {
+		struct list_head skbs;
+		u32 delta;
+
+		while (mptcp_can_spool_backlog(sk, &skbs)) {
+			moved |= __mptcp_move_skbs(sk, &skbs, &delta);
+			mptcp_backlog_spooled(sk, moved, &skbs);
+		}
+	}
+
 	__mptcp_ofo_queue(msk);
 	if (unlikely(ssk->sk_err))
 		__mptcp_subflow_error_report(sk, ssk);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready
  2026-03-17  8:36 [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready Gang Yan
                   ` (2 preceding siblings ...)
  2026-03-17  8:36 ` [PATCH mptcp-net v3 3/3] mptcp: fix the stall problems with data_ready Gang Yan
@ 2026-03-17  9:58 ` MPTCP CI
  2026-03-24  6:23 ` Geliang Tang
  4 siblings, 0 replies; 6+ messages in thread
From: MPTCP CI @ 2026-03-17  9:58 UTC (permalink / raw)
  To: Gang Yan; +Cc: mptcp

Hi Gang,

Thank you for your modifications, that's great!

Our CI did some validations and here is its report:

- KVM Validation: normal (except selftest_mptcp_join): Success! ✅
- KVM Validation: normal (only selftest_mptcp_join): Success! ✅
- KVM Validation: debug (except selftest_mptcp_join): Unstable: 1 failed test(s): packetdrill_mp_capable 🔴
- KVM Validation: debug (only selftest_mptcp_join): Success! ✅
- KVM Validation: btf-normal (only bpftest_all): Success! ✅
- KVM Validation: btf-debug (only bpftest_all): Success! ✅
- Task: https://github.com/multipath-tcp/mptcp_net-next/actions/runs/23186215138

Initiator: Patchew Applier
Commits: https://github.com/multipath-tcp/mptcp_net-next/commits/68246fd6f6f3
Patchwork: https://patchwork.kernel.org/project/mptcp/list/?series=1067816


If there are some issues, you can reproduce them using the same environment as
the one used by the CI thanks to a docker image, e.g.:

    $ cd [kernel source code]
    $ docker run -v "${PWD}:${PWD}:rw" -w "${PWD}" --privileged --rm -it \
        --pull always mptcp/mptcp-upstream-virtme-docker:latest \
        auto-normal

For more details:

    https://github.com/multipath-tcp/mptcp-upstream-virtme-docker


Please note that despite all the efforts that have been already done to have a
stable tests suite when executed on a public CI like here, it is possible some
reported issues are not due to your modifications. Still, do not hesitate to
help us improve that ;-)

Cheers,
MPTCP GH Action bot
Bot operated by Matthieu Baerts (NGI0 Core)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready
  2026-03-17  8:36 [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready Gang Yan
                   ` (3 preceding siblings ...)
  2026-03-17  9:58 ` [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready MPTCP CI
@ 2026-03-24  6:23 ` Geliang Tang
  4 siblings, 0 replies; 6+ messages in thread
From: Geliang Tang @ 2026-03-24  6:23 UTC (permalink / raw)
  To: Gang Yan, mptcp; +Cc: pabeni, Gang Yan

Hi Gang,

After testing, I found that with this set applied, the TLS test still
fails occasionally.

On Tue, 2026-03-17 at 16:36 +0800, Gang Yan wrote:
> From: Gang Yan <yangang@kylinos.cn>
> 
> Hi Paolo,Matt:
> 
> The patches have replaced the original backlog_list with an rb-tree
> based
> 'backlog_queue', and resolved the existing transmission stall issues
> by using
> the new backlog_queue infrastructure.
> 
> Looking forward to your comments and feedback.
> 
> Thanks
> Gang
> 
> Gang Yan (3):
>   mptcp: replace backlog_list with backlog_queue
>   mptcp: fix the stall problems using backlog_queue
>   mptcp: fix the stall problems with data_ready

As we discussed, in patch 3 we should pass "delta" to
mptcp_backlog_spooled(), not "moved". Additionally, it would be better
to extract this logic into a separate helper rather than embedding it
directly in move_skbs_to_msk().

For patch 2, I suggest putting

	sk_rmem_alloc_get(sk) > sk->sk_rcvbuf &&
	!skb_queue_empty(&sk->sk_receive_queue)

into a helper, e.g., mptcp_recvbuf_is_full().

I've changed the status of this set to "Changes Requested".

Thanks,
-Geliang

> 
>  net/mptcp/protocol.c | 93 +++++++++++++++++++++++++++++++++++++-----
> --
>  net/mptcp/protocol.h |  2 +-
>  2 files changed, 81 insertions(+), 14 deletions(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2026-03-24  6:23 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-17  8:36 [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready Gang Yan
2026-03-17  8:36 ` [PATCH mptcp-net v3 1/3] mptcp: replace backlog_list with backlog_queue Gang Yan
2026-03-17  8:36 ` [PATCH mptcp-net v3 2/3] mptcp: fix the stall problems using backlog_queue Gang Yan
2026-03-17  8:36 ` [PATCH mptcp-net v3 3/3] mptcp: fix the stall problems with data_ready Gang Yan
2026-03-17  9:58 ` [PATCH mptcp-net v3 0/3] mptcp: fix stall because of data_ready MPTCP CI
2026-03-24  6:23 ` Geliang Tang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox