From: Geliang Tang <geliang.tang@suse.com>
To: Mat Martineau <mathew.j.martineau@linux.intel.com>
Cc: mptcp@lists.linux.dev
Subject: Re: [PATCH mptcp-next v4 2/2] mptcp: retrans for redundant sends
Date: Mon, 19 Dec 2022 21:17:31 +0800 [thread overview]
Message-ID: <20221219131731.GA4625@localhost.localdomain> (raw)
In-Reply-To: <86277516-113d-53cf-aded-10e3224f59e9@linux.intel.com>
On Wed, Dec 14, 2022 at 05:47:12PM -0800, Mat Martineau wrote:
> On Sun, 11 Dec 2022, Geliang Tang wrote:
>
> > Redundant sends need to work more like the MPTCP retransmit code path.
> > When the scheduler selects multiple subflows, the first subflow to send
> > is a "normal" transmit, and any other subflows would act like a retransmit
> > when accessing the dfrags.
> >
> > Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> > ---
> > net/mptcp/protocol.c | 45 ++++++++++++++++++++++++++++++++++++++++----
> > 1 file changed, 41 insertions(+), 4 deletions(-)
> >
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index 2342b9469181..4c07add44b02 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -45,6 +45,7 @@ static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_sm
> >
> > static void __mptcp_destroy_sock(struct sock *sk);
> > static void __mptcp_check_send_data_fin(struct sock *sk);
> > +static void __mptcp_retrans(struct sock *sk);
> >
> > DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
> > static struct net_device mptcp_napi_dev;
> > @@ -997,7 +998,7 @@ static void __mptcp_clean_una(struct sock *sk)
> >
> > if (unlikely(dfrag == msk->first_pending)) {
> > /* in recovery mode can see ack after the current snd head */
> > - if (WARN_ON_ONCE(!msk->recovery))
> > + if (!msk->recovery)
> > break;
> >
> > WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> > @@ -1012,7 +1013,7 @@ static void __mptcp_clean_una(struct sock *sk)
> >
> > /* prevent wrap around in recovery mode */
> > if (unlikely(delta > dfrag->already_sent)) {
> > - if (WARN_ON_ONCE(!msk->recovery))
> > + if (!msk->recovery)
> > goto out;
> > if (WARN_ON_ONCE(delta > dfrag->data_len))
> > goto out;
> > @@ -1111,6 +1112,7 @@ struct mptcp_sendmsg_info {
> > u16 sent;
> > unsigned int flags;
> > bool data_lock_held;
> > + struct mptcp_data_frag *last;
> > };
> >
> > static int mptcp_check_allowed_size(const struct mptcp_sock *msk, struct sock *ssk,
> > @@ -1526,6 +1528,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
> > info->sent = dfrag->already_sent;
> > info->limit = dfrag->data_len;
> > len = dfrag->data_len - dfrag->already_sent;
> > + info->last = dfrag;
> > while (len > 0) {
> > int ret = 0;
> >
> > @@ -1562,14 +1565,19 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> > struct sock *prev_ssk = NULL, *ssk = NULL;
> > struct mptcp_sock *msk = mptcp_sk(sk);
> > struct mptcp_subflow_context *subflow;
> > + struct mptcp_data_frag *head, *dfrag;
> > struct mptcp_sendmsg_info info = {
> > .flags = flags,
> > };
> > bool do_check_data_fin = false;
> > int push_count = 1;
> >
> > + head = mptcp_send_head(sk);
> > + if (!head)
> > + goto out;
> > +
> > while (mptcp_send_head(sk) && (push_count > 0)) {
> > - int ret = 0;
> > + int ret = 0, i = 0;
> >
> > if (mptcp_sched_get_send(msk))
> > break;
> > @@ -1578,6 +1586,19 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> >
> > mptcp_for_each_subflow(msk, subflow) {
> > if (READ_ONCE(subflow->scheduled)) {
> > + if (i > 0) {
> > + WRITE_ONCE(msk->first_pending, head);
> > + mptcp_push_release(ssk, &info, do_check_data_fin);
> > +
> > + while ((dfrag = mptcp_send_head(sk))) {
> > + __mptcp_retrans(sk);
> > + if (dfrag == info.last)
> > + break;
> > + WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> > + }
> > + goto out;
> > + }
> > +
> > mptcp_subflow_set_scheduled(subflow, false);
> >
> > prev_ssk = ssk;
> > @@ -1605,6 +1626,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> > push_count--;
> > continue;
> > }
> > + i++;
> > do_check_data_fin = true;
> > msk->last_snd = ssk;
> > }
> > @@ -1614,6 +1636,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> > /* at this point we held the socket lock for the last subflow we used */
> > mptcp_push_release(ssk, &info, do_check_data_fin);
> >
> > +out:
> > /* ensure the rtx timer is running */
> > if (!mptcp_timer_pending(sk))
> > mptcp_reset_timer(sk);
> > @@ -1628,13 +1651,18 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
> > struct mptcp_sendmsg_info info = {
> > .data_lock_held = true,
> > };
> > + struct mptcp_data_frag *head;
> > bool keep_pushing = true;
> > struct sock *xmit_ssk;
> > int copied = 0;
> >
> > + head = mptcp_send_head(sk);
> > + if (!head)
> > + goto out;
> > +
> > info.flags = 0;
> > while (mptcp_send_head(sk) && keep_pushing) {
> > - int ret = 0;
> > + int ret = 0, i = 0;
> >
> > /* check for a different subflow usage only after
> > * spooling the first chunk of data
> > @@ -1659,18 +1687,27 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
> > ret = __subflow_push_pending(sk, ssk, &info);
> > if (ret <= 0)
> > keep_pushing = false;
> > + i++;
> > copied += ret;
> > msk->last_snd = ssk;
> > }
> >
> > mptcp_for_each_subflow(msk, subflow) {
> > if (READ_ONCE(subflow->scheduled)) {
> > + if (i > 0) {
> > + WRITE_ONCE(msk->first_pending, head);
> > + if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags))
> > + mptcp_schedule_work(sk);
>
> Hi Geliang -
>
> It's not going to perform well enough to use the work queue for redundant
> sends.
>
> MPTCP_WORK_RTX works ok for retransmissions because that is expected to be
> infrequent. A redundant scheduler would be sending everything on multiple
> subflows.
>
> The retransmission code has some ideas for making redundant sends work, but
> redundant sending is different:
>
> 1. I think the scheduler will need to store some sequence numbers at the msk
> level so the same data is sent on different subflows, even on the
> __mptcp_subflow_push_pending() code path. This is a change for regular (not
> redundant) schedulers too, and lets the schedulers select which subflows to
> send on AND what data to send.
Sorry, Mat, I didn't get this idea yet. Please give me more details
about it. We should add sched_seq_start and sched_seq_end in struct
mptcp_sock, right? These sequence numbers should be set in the BPF
context by the users, so we need to add sched_seq_start and
sched_seq_end in struct mptcp_sched_data too. Something likes:
for (int i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
if (!data->contexts[i])
break;
mptcp_subflow_set_scheduled(data->contexts[i], true);
data->sched_seq_start = SN;
data->sched_seq_end = SN + LEN;
}
How can the users know what sequence number (SN) to write from? The
sequence number is generated in the kernel?
We can use (msk->sched_seq_end - msk->sched_seq_start) to replace
"msk->snd_burst", but I still don't know how to check these sequence
numbers in __mptcp_subflow_push_pending().
Thanks,
-Geliang
>
> 2. The redundant send code should not behave exactly like retransmit. For
> example, don't call mptcp_sched_get_retrans() or use the retransmit MIB
> counters. Maybe it's simpler to have a separate function rather than add a
> bunch of conditionals to __mptcp_retrans()?
>
> 3. When using the __mptcp_subflow_push_pending() code path, the
> MPTCP_DELEGATE_SEND technique should be used repeatedly until all
> redundantly scheduled subflows have sent their data.
>
>
> I'll check with other community members at the meeting tomorrow to see if
> some other ideas come up.
>
>
> > + goto out;
> > + }
> > +
> > mptcp_subflow_set_scheduled(subflow, false);
> >
> > xmit_ssk = mptcp_subflow_tcp_sock(subflow);
> > if (xmit_ssk != ssk) {
> > mptcp_subflow_delegate(subflow,
> > MPTCP_DELEGATE_SEND);
> > + i++;
> > msk->last_snd = xmit_ssk;
> > keep_pushing = false;
> > }
> > --
> > 2.35.3
> >
> >
> >
>
> Thanks,
>
> --
> Mat Martineau
> Intel
next prev parent reply other threads:[~2022-12-19 13:17 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-12-11 2:03 [PATCH mptcp-next v4 0/2] BPF redundant scheduler, part 3 Geliang Tang
2022-12-11 2:03 ` [PATCH mptcp-next v4 1/2] mptcp: update mptcp_push_release Geliang Tang
2022-12-15 0:53 ` Mat Martineau
2022-12-11 2:03 ` [PATCH mptcp-next v4 2/2] mptcp: retrans for redundant sends Geliang Tang
2022-12-15 1:47 ` Mat Martineau
2022-12-19 13:17 ` Geliang Tang [this message]
2022-12-20 2:31 ` Mat Martineau
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221219131731.GA4625@localhost.localdomain \
--to=geliang.tang@suse.com \
--cc=mathew.j.martineau@linux.intel.com \
--cc=mptcp@lists.linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.