All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups
@ 2024-12-12 16:58 Paolo Abeni
  2024-12-12 16:58 ` [PATCH mptcp-next 1/2] mptcp: dismiss __mptcp_rmem() Paolo Abeni
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Paolo Abeni @ 2024-12-12 16:58 UTC (permalink / raw)
  To: mptcp

a couple of incremental improvements on top of the pending rx path
refactor: patch 1 dismiss __mptcp_rmem(), patch 2 optimize a bit 
__mptcp_move_skbs()

Based-on: <cover.1733486870.git.pabeni@redhat.com>

Paolo Abeni (2):
  mptcp: dismiss __mptcp_rmem()
  mptcp: micro-optimize __mptcp_move_skb()

 net/mptcp/protocol.c | 113 +++++++++++++++++++------------------------
 net/mptcp/protocol.h |  13 ++---
 2 files changed, 55 insertions(+), 71 deletions(-)

-- 
2.45.2


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH mptcp-next 1/2] mptcp: dismiss __mptcp_rmem()
  2024-12-12 16:58 [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups Paolo Abeni
@ 2024-12-12 16:58 ` Paolo Abeni
  2024-12-12 16:58 ` [PATCH mptcp-next 2/2] mptcp: micro-optimize __mptcp_move_skb() Paolo Abeni
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Paolo Abeni @ 2024-12-12 16:58 UTC (permalink / raw)
  To: mptcp

After the RX path refactor, it become a wrapper for sk_rmem_alloc
access, with a slightly misleading name. Just drop it.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/protocol.c |  8 ++++----
 net/mptcp/protocol.h | 11 ++---------
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4f27b0cafac5..d6e8295b9404 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -494,7 +494,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk, int copied)
 	bool cleanup, rx_empty;
 
 	cleanup = (space > 0) && (space >= (old_space << 1)) && copied;
-	rx_empty = !__mptcp_rmem(sk) && copied;
+	rx_empty = !sk_rmem_alloc_get(sk) && copied;
 
 	mptcp_for_each_subflow(msk, subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -643,7 +643,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
 		WRITE_ONCE(tp->copied_seq, seq);
 		more_data_avail = mptcp_subflow_data_available(ssk);
 
-		if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
+		if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf) {
 			done = true;
 			break;
 		}
@@ -780,7 +780,7 @@ static void __mptcp_data_ready(struct sock *sk, struct sock *ssk)
 	__mptcp_rcvbuf_update(sk, ssk);
 
 	/* over limit? can't append more skbs to msk, Also, no need to wake-up*/
-	if (__mptcp_rmem(sk) > sk->sk_rcvbuf)
+	if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
 		return;
 
 	/* Wake-up the reader only for in-sequence data */
@@ -2045,7 +2045,7 @@ static bool __mptcp_move_skbs(struct sock *sk)
 		mptcp_for_each_subflow(msk, subflow)
 			__mptcp_rcvbuf_update(sk, subflow->tcp_sock);
 
-	if (__mptcp_rmem(sk) > sk->sk_rcvbuf)
+	if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
 		return false;
 
 	do {
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index a0d46b69746d..35a74b59541b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -377,14 +377,6 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk)
 #define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk)
 #endif
 
-/* the msk socket don't use the backlog, also account for the bulk
- * free memory
- */
-static inline int __mptcp_rmem(const struct sock *sk)
-{
-	return atomic_read(&sk->sk_rmem_alloc);
-}
-
 static inline int mptcp_win_from_space(const struct sock *sk, int space)
 {
 	return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space);
@@ -397,7 +389,8 @@ static inline int mptcp_space_from_win(const struct sock *sk, int win)
 
 static inline int __mptcp_space(const struct sock *sk)
 {
-	return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
+	return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
+				    sk_rmem_alloc_get(sk));
 }
 
 static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH mptcp-next 2/2] mptcp: micro-optimize __mptcp_move_skb()
  2024-12-12 16:58 [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups Paolo Abeni
  2024-12-12 16:58 ` [PATCH mptcp-next 1/2] mptcp: dismiss __mptcp_rmem() Paolo Abeni
@ 2024-12-12 16:58 ` Paolo Abeni
  2024-12-12 17:57 ` [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups MPTCP CI
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Paolo Abeni @ 2024-12-12 16:58 UTC (permalink / raw)
  To: mptcp

After the RX path refactor the mentioned function is expected to run
frequently, let's optimize it a bit.

Scan for ready subflow from the last processed one, and stop after
traversing the list once or reaching the msk memory limit - instead of
looking for dubious per-subflow conditions.
Also re-order the memory limit checks, to avoid duplicate tests.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/protocol.c | 111 +++++++++++++++++++------------------------
 net/mptcp/protocol.h |   2 +
 2 files changed, 52 insertions(+), 61 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d6e8295b9404..398ab465c256 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -567,15 +567,13 @@ static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
 }
 
 static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
-					   struct sock *ssk,
-					   unsigned int *bytes)
+					   struct sock *ssk)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 	struct sock *sk = (struct sock *)msk;
-	unsigned int moved = 0;
 	bool more_data_avail;
 	struct tcp_sock *tp;
-	bool done = false;
+	bool ret = false;
 
 	pr_debug("msk=%p ssk=%p\n", msk, ssk);
 	tp = tcp_sk(ssk);
@@ -585,20 +583,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
 		struct sk_buff *skb;
 		bool fin;
 
+		if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
+			break;
+
 		/* try to move as much data as available */
 		map_remaining = subflow->map_data_len -
 				mptcp_subflow_get_map_offset(subflow);
 
 		skb = skb_peek(&ssk->sk_receive_queue);
-		if (!skb) {
-			/* With racing move_skbs_to_msk() and __mptcp_move_skbs(),
-			 * a different CPU can have already processed the pending
-			 * data, stop here or we can enter an infinite loop
-			 */
-			if (!moved)
-				done = true;
+		if (unlikely(!skb))
 			break;
-		}
 
 		if (__mptcp_check_fallback(msk)) {
 			/* Under fallback skbs have no MPTCP extension and TCP could
@@ -611,19 +605,13 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
 
 		offset = seq - TCP_SKB_CB(skb)->seq;
 		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
-		if (fin) {
-			done = true;
+		if (fin)
 			seq++;
-		}
 
 		if (offset < skb->len) {
 			size_t len = skb->len - offset;
 
-			if (tp->urg_data)
-				done = true;
-
-			if (__mptcp_move_skb(msk, ssk, skb, offset, len))
-				moved += len;
+			ret = __mptcp_move_skb(msk, ssk, skb, offset, len) || ret;
 			seq += len;
 
 			if (unlikely(map_remaining < len)) {
@@ -637,22 +625,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
 			}
 
 			sk_eat_skb(ssk, skb);
-			done = true;
 		}
 
 		WRITE_ONCE(tp->copied_seq, seq);
 		more_data_avail = mptcp_subflow_data_available(ssk);
 
-		if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf) {
-			done = true;
-			break;
-		}
 	} while (more_data_avail);
 
-	if (moved > 0)
+	if (ret)
 		msk->last_data_recv = tcp_jiffies32;
-	*bytes += moved;
-	return done;
+	return ret;
 }
 
 static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
@@ -746,9 +728,9 @@ void __mptcp_error_report(struct sock *sk)
 static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 {
 	struct sock *sk = (struct sock *)msk;
-	unsigned int moved = 0;
+	bool moved;
 
-	__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
+	moved = __mptcp_move_skbs_from_subflow(msk, ssk);
 	__mptcp_ofo_queue(msk);
 	if (unlikely(ssk->sk_err)) {
 		if (!sock_owned_by_user(sk))
@@ -764,7 +746,7 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 	 */
 	if (mptcp_pending_data_fin(sk, NULL))
 		mptcp_schedule_work(sk);
-	return moved > 0;
+	return moved;
 }
 
 static void __mptcp_rcvbuf_update(struct sock *sk, struct sock *ssk)
@@ -779,10 +761,6 @@ static void __mptcp_data_ready(struct sock *sk, struct sock *ssk)
 
 	__mptcp_rcvbuf_update(sk, ssk);
 
-	/* over limit? can't append more skbs to msk, Also, no need to wake-up*/
-	if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
-		return;
-
 	/* Wake-up the reader only for in-sequence data */
 	if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
 		sk->sk_data_ready(sk);
@@ -882,20 +860,6 @@ bool mptcp_schedule_work(struct sock *sk)
 	return false;
 }
 
-static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
-{
-	struct mptcp_subflow_context *subflow;
-
-	msk_owned_by_me(msk);
-
-	mptcp_for_each_subflow(msk, subflow) {
-		if (READ_ONCE(subflow->data_avail))
-			return mptcp_subflow_tcp_sock(subflow);
-	}
-
-	return NULL;
-}
-
 static bool mptcp_skb_can_collapse_to(u64 write_seq,
 				      const struct sk_buff *skb,
 				      const struct mptcp_ext *mpext)
@@ -2033,37 +1997,62 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
 	msk->rcvq_space.time = mstamp;
 }
 
+static struct mptcp_subflow_context *
+__mptcp_first_ready_from(struct mptcp_sock *msk,
+			 struct mptcp_subflow_context *subflow)
+{
+	struct mptcp_subflow_context *start_subflow = subflow;
+
+	while (!READ_ONCE(subflow->data_avail)) {
+		subflow = mptcp_next_subflow(msk, subflow);
+		if (subflow == start_subflow)
+			return NULL;
+	}
+	return subflow;
+}
+
 static bool __mptcp_move_skbs(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow;
 	struct mptcp_sock *msk = mptcp_sk(sk);
-	unsigned int moved = 0;
-	bool ret, done;
+	bool ret = false;
+
+	if (list_empty(&msk->conn_list))
+		return false;
 
 	/* verify we can move any data from the subflow, eventually updating */
 	if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
 		mptcp_for_each_subflow(msk, subflow)
 			__mptcp_rcvbuf_update(sk, subflow->tcp_sock);
 
-	if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
-		return false;
-
-	do {
-		struct sock *ssk = mptcp_subflow_recv_lookup(msk);
+	subflow = list_first_entry(&msk->conn_list,
+				   struct mptcp_subflow_context, node);
+	for (;;) {
+		struct sock *ssk;
 		bool slowpath;
 
-		if (unlikely(!ssk))
+		/*
+		 * As an optimization avoid traversing the subflows list
+		 * and ev. acquiring the subflow socket lock before baling out
+		 */
+		if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
 			break;
 
-		slowpath = lock_sock_fast(ssk);
-		done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
+		subflow = __mptcp_first_ready_from(msk, subflow);
+		if (!subflow)
+			break;
 
+		ssk = mptcp_subflow_tcp_sock(subflow);
+		slowpath = lock_sock_fast(ssk);
+		ret = __mptcp_move_skbs_from_subflow(msk, ssk) || ret;
 		if (unlikely(ssk->sk_err))
 			__mptcp_error_report(sk);
 		unlock_sock_fast(ssk, slowpath);
-	} while (!done);
 
-	ret = moved > 0 || __mptcp_ofo_queue(msk);
+		subflow = mptcp_next_subflow(msk, subflow);
+	}
+
+	__mptcp_ofo_queue(msk);
 	if (ret)
 		mptcp_check_data_fin((struct sock *)msk);
 	return ret;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 35a74b59541b..a07fe3e8f337 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -351,6 +351,8 @@ struct mptcp_sock {
 	list_for_each_entry(__subflow, &((__msk)->conn_list), node)
 #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp)			\
 	list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node)
+#define mptcp_next_subflow(__msk, __subflow)				\
+	list_next_entry_circular(__subflow, &__msk->conn_list, node);
 
 extern struct genl_family mptcp_genl_family;
 
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups
  2024-12-12 16:58 [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups Paolo Abeni
  2024-12-12 16:58 ` [PATCH mptcp-next 1/2] mptcp: dismiss __mptcp_rmem() Paolo Abeni
  2024-12-12 16:58 ` [PATCH mptcp-next 2/2] mptcp: micro-optimize __mptcp_move_skb() Paolo Abeni
@ 2024-12-12 17:57 ` MPTCP CI
  2024-12-21  2:13 ` Mat Martineau
  2024-12-21 11:35 ` Matthieu Baerts
  4 siblings, 0 replies; 6+ messages in thread
From: MPTCP CI @ 2024-12-12 17:57 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: mptcp

Hi Paolo,

Thank you for your modifications, that's great!

Our CI did some validations and here is its report:

- KVM Validation: normal: Success! ✅
- KVM Validation: debug: Success! ✅
- KVM Validation: btf-normal (only bpftest_all): Success! ✅
- KVM Validation: btf-debug (only bpftest_all): Success! ✅
- Task: https://github.com/multipath-tcp/mptcp_net-next/actions/runs/12301307404

Initiator: Patchew Applier
Commits: https://github.com/multipath-tcp/mptcp_net-next/commits/63734435a74d
Patchwork: https://patchwork.kernel.org/project/mptcp/list/?series=917303


If there are some issues, you can reproduce them using the same environment as
the one used by the CI thanks to a docker image, e.g.:

    $ cd [kernel source code]
    $ docker run -v "${PWD}:${PWD}:rw" -w "${PWD}" --privileged --rm -it \
        --pull always mptcp/mptcp-upstream-virtme-docker:latest \
        auto-normal

For more details:

    https://github.com/multipath-tcp/mptcp-upstream-virtme-docker


Please note that despite all the efforts that have been already done to have a
stable tests suite when executed on a public CI like here, it is possible some
reported issues are not due to your modifications. Still, do not hesitate to
help us improve that ;-)

Cheers,
MPTCP GH Action bot
Bot operated by Matthieu Baerts (NGI0 Core)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups
  2024-12-12 16:58 [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups Paolo Abeni
                   ` (2 preceding siblings ...)
  2024-12-12 17:57 ` [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups MPTCP CI
@ 2024-12-21  2:13 ` Mat Martineau
  2024-12-21 11:35 ` Matthieu Baerts
  4 siblings, 0 replies; 6+ messages in thread
From: Mat Martineau @ 2024-12-21  2:13 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: mptcp

On Thu, 12 Dec 2024, Paolo Abeni wrote:

> a couple of incremental improvements on top of the pending rx path
> refactor: patch 1 dismiss __mptcp_rmem(), patch 2 optimize a bit
> __mptcp_move_skbs()
>

Both LGTM, thanks Paolo.

Reviewed-by: Mat Martineau <martineau@kernel.org>

> Based-on: <cover.1733486870.git.pabeni@redhat.com>
>
> Paolo Abeni (2):
>  mptcp: dismiss __mptcp_rmem()
>  mptcp: micro-optimize __mptcp_move_skb()
>
> net/mptcp/protocol.c | 113 +++++++++++++++++++------------------------
> net/mptcp/protocol.h |  13 ++---
> 2 files changed, 55 insertions(+), 71 deletions(-)
>
> -- 
> 2.45.2
>
>
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups
  2024-12-12 16:58 [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups Paolo Abeni
                   ` (3 preceding siblings ...)
  2024-12-21  2:13 ` Mat Martineau
@ 2024-12-21 11:35 ` Matthieu Baerts
  4 siblings, 0 replies; 6+ messages in thread
From: Matthieu Baerts @ 2024-12-21 11:35 UTC (permalink / raw)
  To: Paolo Abeni, mptcp

Hi Paolo, Mat,

On 12/12/2024 17:58, Paolo Abeni wrote:
> a couple of incremental improvements on top of the pending rx path
> refactor: patch 1 dismiss __mptcp_rmem(), patch 2 optimize a bit 
> __mptcp_move_skbs()

Thank you for the patches and the reviews!

Now in our tree (with a small modification of mptcp_next_subflow() as
suggested by CheckPatch [1])

[1] https://github.com/multipath-tcp/mptcp_net-next/actions/runs/12301307406

New patches for t/upstream:
- 94c0aa6980ba: mptcp: dismiss __mptcp_rmem()
- 515e071ef7f6: mptcp: micro-optimize __mptcp_move_skb()
- Results: 9213903d77ba..51bd6a121e25 (export)

Tests are now in progress:

- export:
https://github.com/multipath-tcp/mptcp_net-next/commit/fd12ae115620bb5aed8fa1c1182edb9ab226494a/checks

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-12-21 11:35 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-12 16:58 [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups Paolo Abeni
2024-12-12 16:58 ` [PATCH mptcp-next 1/2] mptcp: dismiss __mptcp_rmem() Paolo Abeni
2024-12-12 16:58 ` [PATCH mptcp-next 2/2] mptcp: micro-optimize __mptcp_move_skb() Paolo Abeni
2024-12-12 17:57 ` [PATCH mptcp-next 0/2] mptcp: rx refactor follow-ups MPTCP CI
2024-12-21  2:13 ` Mat Martineau
2024-12-21 11:35 ` Matthieu Baerts

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.