From: Geliang Tang <geliang@kernel.org>
To: Paolo Abeni <pabeni@redhat.com>, mptcp@lists.linux.dev
Subject: Re: [PATCH mptcp-next v2 03/12] mptcp: rcvbuf auto-tuning improvement
Date: Fri, 19 Sep 2025 17:50:21 +0800 [thread overview]
Message-ID: <139eaffc784289774aabd00885ae0dc3f7ade9d1.camel@kernel.org> (raw)
In-Reply-To: <41db4ac9e54972274efd501dc110c5820def3412.1758214563.git.pabeni@redhat.com>
On Thu, 2025-09-18 at 19:14 +0200, Paolo Abeni wrote:
> Apply to the MPTCP auto-tuning the same improvements introduced for
> the
> TCP protocol by the merge commit 2da35e4b4df9 ("Merge branch
> 'tcp-receive-side-improvements'").
>
> The main difference is that TCP subflow and the main MPTCP socket
> need
> to account separately for OoO: MPTCP does not care for TCP-level OoO
> and vice versa, as a consequence do not reflect MPTCP-level rcvbuf
> increase due to OoO packets at the subflow level.
>
> This refeactor additionally allow dropping the msk receive buffer
> update
> at receive time, as the latter only intended to cope with subflow
> receive
> buffer increase due to OoO packets.
>
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/487
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/559
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
LGTM!
Reviewed-by: Geliang Tang <geliang@kernel.org>
Tested-by: Geliang Tang <geliang@kernel.org>
Thanks,
-Geliang
> ---
> v1 -> v2:
> - fix unused variable
> - reword the commit message
> ---
> net/mptcp/protocol.c | 92 ++++++++++++++++++++----------------------
> --
> net/mptcp/protocol.h | 4 +-
> 2 files changed, 44 insertions(+), 52 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 9d95d24781509..162abafe3f320 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -179,6 +179,30 @@ static bool mptcp_ooo_try_coalesce(struct
> mptcp_sock *msk, struct sk_buff *to,
> return mptcp_try_coalesce((struct sock *)msk, to, from);
> }
>
> +static bool mptcp_rcvbuf_grow(struct sock *sk)
> +{
> + struct mptcp_sock *msk = mptcp_sk(sk);
> + int rcvwin, rcvbuf;
> +
> + if (!READ_ONCE(sock_net(sk)-
> >ipv4.sysctl_tcp_moderate_rcvbuf) ||
> + (sk->sk_userlocks & SOCK_RCVBUF_LOCK))
> + return false;
> +
> + rcvwin = ((u64)msk->rcvq_space.space << 1);
> +
> + if (!RB_EMPTY_ROOT(&msk->out_of_order_queue))
> + rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq -
> msk->ack_seq;
> +
> + rcvbuf = min_t(u64, mptcp_space_from_win(sk, rcvwin),
> + READ_ONCE(sock_net(sk)-
> >ipv4.sysctl_tcp_rmem[2]));
> +
> + if (rcvbuf > sk->sk_rcvbuf) {
> + WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
> + return true;
> + }
> + return false;
> +}
> +
> /* "inspired" by tcp_data_queue_ofo(), main differences:
> * - use mptcp seqs
> * - don't cope with sacks
> @@ -292,6 +316,9 @@ static void mptcp_data_queue_ofo(struct
> mptcp_sock *msk, struct sk_buff *skb)
> end:
> skb_condense(skb);
> skb_set_owner_r(skb, sk);
> + /* do not grow rcvbuf for not-yet-accepted or orphaned
> sockets. */
> + if (sk->sk_socket)
> + mptcp_rcvbuf_grow(sk);
> }
>
> static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock
> *ssk,
> @@ -784,18 +811,10 @@ static bool move_skbs_to_msk(struct mptcp_sock
> *msk, struct sock *ssk)
> return moved;
> }
>
> -static void __mptcp_rcvbuf_update(struct sock *sk, struct sock *ssk)
> -{
> - if (unlikely(ssk->sk_rcvbuf > sk->sk_rcvbuf))
> - WRITE_ONCE(sk->sk_rcvbuf, ssk->sk_rcvbuf);
> -}
> -
> static void __mptcp_data_ready(struct sock *sk, struct sock *ssk)
> {
> struct mptcp_sock *msk = mptcp_sk(sk);
>
> - __mptcp_rcvbuf_update(sk, ssk);
> -
> /* Wake-up the reader only for in-sequence data */
> if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
> sk->sk_data_ready(sk);
> @@ -2014,48 +2033,26 @@ static void mptcp_rcv_space_adjust(struct
> mptcp_sock *msk, int copied)
> if (msk->rcvq_space.copied <= msk->rcvq_space.space)
> goto new_measure;
>
> - if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
> &&
> - !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
> - u64 rcvwin, grow;
> - int rcvbuf;
> -
> - rcvwin = ((u64)msk->rcvq_space.copied << 1) + 16 *
> advmss;
> -
> - grow = rcvwin * (msk->rcvq_space.copied - msk-
> >rcvq_space.space);
> -
> - do_div(grow, msk->rcvq_space.space);
> - rcvwin += (grow << 1);
> -
> - rcvbuf = min_t(u64, mptcp_space_from_win(sk,
> rcvwin),
> - READ_ONCE(sock_net(sk)-
> >ipv4.sysctl_tcp_rmem[2]));
> -
> - if (rcvbuf > sk->sk_rcvbuf) {
> - u32 window_clamp;
> -
> - window_clamp = mptcp_win_from_space(sk,
> rcvbuf);
> - WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
> + msk->rcvq_space.space = msk->rcvq_space.copied;
> + if (mptcp_rcvbuf_grow(sk)) {
>
> - /* Make subflows follow along. If we do not
> do this, we
> - * get drops at subflow level if skbs can't
> be moved to
> - * the mptcp rx queue fast enough (announced
> rcv_win can
> - * exceed ssk->sk_rcvbuf).
> - */
> - mptcp_for_each_subflow(msk, subflow) {
> - struct sock *ssk;
> - bool slow;
> + /* Make subflows follow along. If we do not do
> this, we
> + * get drops at subflow level if skbs can't be moved
> to
> + * the mptcp rx queue fast enough (announced rcv_win
> can
> + * exceed ssk->sk_rcvbuf).
> + */
> + mptcp_for_each_subflow(msk, subflow) {
> + struct sock *ssk;
> + bool slow;
>
> - ssk =
> mptcp_subflow_tcp_sock(subflow);
> - slow = lock_sock_fast(ssk);
> - WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
> - WRITE_ONCE(tcp_sk(ssk)-
> >window_clamp, window_clamp);
> - if (tcp_can_send_ack(ssk))
> - tcp_cleanup_rbuf(ssk, 1);
> - unlock_sock_fast(ssk, slow);
> - }
> + ssk = mptcp_subflow_tcp_sock(subflow);
> + slow = lock_sock_fast(ssk);
> + tcp_sk(ssk)->rcvq_space.space = msk-
> >rcvq_space.copied;
> + tcp_rcvbuf_grow(ssk);
> + unlock_sock_fast(ssk, slow);
> }
> }
>
> - msk->rcvq_space.space = msk->rcvq_space.copied;
> new_measure:
> msk->rcvq_space.copied = 0;
> msk->rcvq_space.time = mstamp;
> @@ -2084,11 +2081,6 @@ static bool __mptcp_move_skbs(struct sock *sk)
> if (list_empty(&msk->conn_list))
> return false;
>
> - /* verify we can move any data from the subflow, eventually
> updating */
> - if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
> - mptcp_for_each_subflow(msk, subflow)
> - __mptcp_rcvbuf_update(sk, subflow-
> >tcp_sock);
> -
> subflow = list_first_entry(&msk->conn_list,
> struct mptcp_subflow_context,
> node);
> for (;;) {
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 9b5a248bad404..6ac58e92a1aa3 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -342,8 +342,8 @@ struct mptcp_sock {
> struct mptcp_pm_data pm;
> struct mptcp_sched_ops *sched;
> struct {
> - u32 space; /* bytes copied in last measurement
> window */
> - u32 copied; /* bytes copied in this measurement
> window */
> + int space; /* bytes copied in last measurement
> window */
> + int copied; /* bytes copied in this measurement
> window */
> u64 time; /* start time of measurement window
> */
> u64 rtt_us; /* last maximum rtt of subflows */
> } rcvq_space;
next prev parent reply other threads:[~2025-09-19 9:50 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-18 17:14 [PATCH mptcp-next v2 00/12] mptcp: receive path improvement Paolo Abeni
2025-09-18 17:14 ` [PATCH mptcp-next v2 01/12] mptcp: leverage skb deferral free Paolo Abeni
2025-09-19 9:49 ` Geliang Tang
2025-09-19 10:21 ` Matthieu Baerts
2025-09-18 17:14 ` [PATCH mptcp-next v2 02/12] tcp: make tcp_rcvbuf_grow() accessible to mptcp code Paolo Abeni
2025-09-19 9:50 ` Geliang Tang
2025-09-19 10:22 ` Matthieu Baerts
2025-09-18 17:14 ` [PATCH mptcp-next v2 03/12] mptcp: rcvbuf auto-tuning improvement Paolo Abeni
2025-09-19 9:50 ` Geliang Tang [this message]
2025-09-19 10:22 ` Matthieu Baerts
2025-09-18 17:14 ` [PATCH mptcp-next v2 04/12] mptcp: introduce the mptcp_init_skb helper Paolo Abeni
2025-09-19 12:36 ` Matthieu Baerts
2025-09-18 17:14 ` [PATCH mptcp-next v2 05/12] mptcp: remove unneeded mptcp_move_skb() Paolo Abeni
2025-09-19 9:50 ` Geliang Tang
2025-09-19 12:36 ` Matthieu Baerts
2025-09-18 17:14 ` [PATCH mptcp-next v2 06/12] mptcp: factor out a basic skb coalesce helper Paolo Abeni
2025-09-19 12:36 ` Matthieu Baerts
2025-09-18 17:14 ` [PATCH mptcp-next v2 07/12] mptcp: minor move_skbs_to_msk() cleanup Paolo Abeni
2025-09-19 9:50 ` Geliang Tang
2025-09-19 12:36 ` Matthieu Baerts
2025-09-19 15:01 ` Paolo Abeni
2025-09-18 17:14 ` [PATCH mptcp-next v2 08/12] mptcp: cleanup fallback data fin reception Paolo Abeni
2025-09-18 17:14 ` [PATCH mptcp-next v2 09/12] mptcp: leverage the sk backlog for RX packet processing Paolo Abeni
2025-09-18 17:14 ` [PATCH mptcp-next v2 10/12] mptcp: prevernt __mptcp_move_skbs() interfering with the fastpath Paolo Abeni
2025-09-18 17:14 ` [PATCH mptcp-next v2 11/12] mptcp: borrow forward memory from subflow Paolo Abeni
2025-09-18 17:14 ` [PATCH mptcp-next v2 12/12] mptcp: make fallback backlog aware Paolo Abeni
2025-09-18 20:33 ` [PATCH mptcp-next v2 00/12] mptcp: receive path improvement MPTCP CI
2025-09-19 2:22 ` Geliang Tang
2025-09-19 6:54 ` Paolo Abeni
2025-09-19 7:30 ` Geliang Tang
2025-09-19 8:14 ` Matthieu Baerts
2025-09-19 13:11 ` Matthieu Baerts
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=139eaffc784289774aabd00885ae0dc3f7ade9d1.camel@kernel.org \
--to=geliang@kernel.org \
--cc=mptcp@lists.linux.dev \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.