* [PATCH mptcp-next v9 1/4] Squash to "mptcp: add get_subflow wrappers"
2022-10-18 11:03 [PATCH mptcp-next v9 0/4] refactor push pending Geliang Tang
@ 2022-10-18 11:03 ` Geliang Tang
2022-10-18 11:03 ` [PATCH mptcp-next v9 2/4] mptcp: use msk instead of mptcp_sk Geliang Tang
` (3 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: Geliang Tang @ 2022-10-18 11:03 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
Please update the commit log:
'''
This patch defines two new wrappers mptcp_sched_get_send() and
mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
Set the subflow pointers array in struct mptcp_sched_data before invoking
get_subflow(), then it can be used in get_subflow() in the BPF contexts.
Check the subflow scheduled flags to test which subflow or subflows are
picked by the scheduler.
Move sock_owned_by_me() and the fallback check code from
mptcp_subflow_get_send/retrans() into the wrappers.
'''
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/protocol.c | 8 +++---
net/mptcp/protocol.h | 4 +--
net/mptcp/sched.c | 61 +++++++++++++++++++++-----------------------
3 files changed, 35 insertions(+), 38 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3196d2a350f9..1cf8b1c68e09 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1547,7 +1547,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
int ret = 0;
prev_ssk = ssk;
- ssk = mptcp_sched_get_send(msk);
+ ssk = mptcp_subflow_get_send(msk);
/* First check. If the ssk has changed since
* the last round, release prev_ssk
@@ -1616,7 +1616,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
* check for a different subflow usage only after
* spooling the first chunk of data
*/
- xmit_ssk = first ? ssk : mptcp_sched_get_send(mptcp_sk(sk));
+ xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
if (!xmit_ssk)
goto out;
if (xmit_ssk != ssk) {
@@ -2500,7 +2500,7 @@ static void __mptcp_retrans(struct sock *sk)
mptcp_clean_una_wakeup(sk);
/* first check ssk: need to kick "stale" logic */
- ssk = mptcp_sched_get_retrans(msk);
+ ssk = mptcp_subflow_get_retrans(msk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag) {
if (mptcp_data_fin_enabled(msk)) {
@@ -3218,7 +3218,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
return;
if (!sock_owned_by_user(sk)) {
- struct sock *xmit_ssk = mptcp_sched_get_send(mptcp_sk(sk));
+ struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
if (xmit_ssk == ssk)
__mptcp_subflow_push_pending(sk, ssk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 2358a4083eb3..8f48f881adf8 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -643,8 +643,8 @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
bool scheduled);
struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
-struct sock *mptcp_sched_get_send(struct mptcp_sock *msk);
-struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk);
+int mptcp_sched_get_send(struct mptcp_sock *msk);
+int mptcp_sched_get_retrans(struct mptcp_sock *msk);
static inline bool __tcp_can_send(const struct sock *ssk)
{
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 044c5ec8bbfb..9b128714055a 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -114,67 +114,64 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk, bool reinject,
for (; i < MPTCP_SUBFLOWS_MAX; i++)
data->contexts[i] = NULL;
+ msk->snd_burst = 0;
+
return 0;
}
-struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
+int mptcp_sched_get_send(struct mptcp_sock *msk)
{
struct mptcp_sched_data data;
struct sock *ssk = NULL;
- int i;
- sock_owned_by_me((struct sock *)msk);
+ sock_owned_by_me((const struct sock *)msk);
/* the following check is moved out of mptcp_subflow_get_send */
if (__mptcp_check_fallback(msk)) {
- if (!msk->first)
- return NULL;
- return __tcp_can_send(msk->first) &&
- sk_stream_memory_free(msk->first) ? msk->first : NULL;
+ if (msk->first &&
+ __tcp_can_send(msk->first) &&
+ sk_stream_memory_free(msk->first)) {
+ mptcp_subflow_set_scheduled(mptcp_subflow_ctx(msk->first), true);
+ return 0;
+ }
+ return -EINVAL;
}
- if (!msk->sched)
- return mptcp_subflow_get_send(msk);
+ if (!msk->sched) {
+ ssk = mptcp_subflow_get_send(msk);
+ if (!ssk)
+ return -EINVAL;
+ mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true);
+ return 0;
+ }
mptcp_sched_data_init(msk, false, &data);
msk->sched->get_subflow(msk, &data);
- for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
- if (data.contexts[i] && READ_ONCE(data.contexts[i]->scheduled)) {
- ssk = data.contexts[i]->tcp_sock;
- msk->last_snd = ssk;
- break;
- }
- }
-
- return ssk;
+ return 0;
}
-struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
+int mptcp_sched_get_retrans(struct mptcp_sock *msk)
{
struct mptcp_sched_data data;
struct sock *ssk = NULL;
- int i;
sock_owned_by_me((const struct sock *)msk);
/* the following check is moved out of mptcp_subflow_get_retrans */
if (__mptcp_check_fallback(msk))
- return NULL;
+ return -EINVAL;
- if (!msk->sched)
- return mptcp_subflow_get_retrans(msk);
+ if (!msk->sched) {
+ ssk = mptcp_subflow_get_retrans(msk);
+ if (!ssk)
+ return -EINVAL;
+ mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true);
+ return 0;
+ }
mptcp_sched_data_init(msk, true, &data);
msk->sched->get_subflow(msk, &data);
- for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
- if (data.contexts[i] && READ_ONCE(data.contexts[i]->scheduled)) {
- ssk = data.contexts[i]->tcp_sock;
- msk->last_snd = ssk;
- break;
- }
- }
-
- return ssk;
+ return 0;
}
--
2.35.3
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH mptcp-next v9 2/4] mptcp: use msk instead of mptcp_sk
2022-10-18 11:03 [PATCH mptcp-next v9 0/4] refactor push pending Geliang Tang
2022-10-18 11:03 ` [PATCH mptcp-next v9 1/4] Squash to "mptcp: add get_subflow wrappers" Geliang Tang
@ 2022-10-18 11:03 ` Geliang Tang
2022-10-18 11:03 ` [PATCH mptcp-next v9 3/4] mptcp: change 'first' as a parameter Geliang Tang
` (2 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: Geliang Tang @ 2022-10-18 11:03 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
Use msk instead of mptcp_sk(sk) in the functions where the variable
"msk = mptcp_sk(sk)" has been defined.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/protocol.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1cf8b1c68e09..11be5b9c379d 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1616,7 +1616,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
* check for a different subflow usage only after
* spooling the first chunk of data
*/
- xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+ xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
if (!xmit_ssk)
goto out;
if (xmit_ssk != ssk) {
@@ -2261,7 +2261,7 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
struct mptcp_data_frag *cur, *rtx_head;
struct mptcp_sock *msk = mptcp_sk(sk);
- if (__mptcp_check_fallback(mptcp_sk(sk)))
+ if (__mptcp_check_fallback(msk))
return false;
if (tcp_rtx_and_write_queues_empty(sk))
@@ -2943,7 +2943,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
- if (mptcp_sk(sk)->token)
+ if (msk->token)
mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
if (sk->sk_state == TCP_CLOSE) {
@@ -3002,8 +3002,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_stop_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
- if (mptcp_sk(sk)->token)
- mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+ if (msk->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
/* msk->subflow is still intact, the following will not free the first
* subflow
--
2.35.3
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH mptcp-next v9 3/4] mptcp: change 'first' as a parameter
2022-10-18 11:03 [PATCH mptcp-next v9 0/4] refactor push pending Geliang Tang
2022-10-18 11:03 ` [PATCH mptcp-next v9 1/4] Squash to "mptcp: add get_subflow wrappers" Geliang Tang
2022-10-18 11:03 ` [PATCH mptcp-next v9 2/4] mptcp: use msk instead of mptcp_sk Geliang Tang
@ 2022-10-18 11:03 ` Geliang Tang
2022-10-18 11:03 ` [PATCH mptcp-next v9 4/4] mptcp: refactor push_pending logic Geliang Tang
2022-10-19 0:16 ` [PATCH mptcp-next v9 0/4] refactor push pending Mat Martineau
4 siblings, 0 replies; 10+ messages in thread
From: Geliang Tang @ 2022-10-18 11:03 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
The function mptcp_subflow_process_delegated() uses the input ssk first,
while __mptcp_check_push() invokes the packet scheduler first.
So this patch adds a new parameter named 'first' for the function
__mptcp_subflow_push_pending() to deal with these two cases separately.
With this change, the code that invokes the packet scheduler in the
function __mptcp_check_push() can be removed, and replaced by invoking
__mptcp_subflow_push_pending() directly.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/protocol.c | 20 ++++++--------------
1 file changed, 6 insertions(+), 14 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 11be5b9c379d..ddeb8b36a677 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1593,7 +1593,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
__mptcp_check_send_data_fin(sk);
}
-static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
+static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_sendmsg_info info = {
@@ -1602,7 +1602,6 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
struct mptcp_data_frag *dfrag;
struct sock *xmit_ssk;
int len, copied = 0;
- bool first = true;
info.flags = 0;
while ((dfrag = mptcp_send_head(sk))) {
@@ -1612,8 +1611,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
while (len > 0) {
int ret = 0;
- /* the caller already invoked the packet scheduler,
- * check for a different subflow usage only after
+ /* check for a different subflow usage only after
* spooling the first chunk of data
*/
xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
@@ -3217,16 +3215,10 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
if (!mptcp_send_head(sk))
return;
- if (!sock_owned_by_user(sk)) {
- struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
-
- if (xmit_ssk == ssk)
- __mptcp_subflow_push_pending(sk, ssk);
- else if (xmit_ssk)
- mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
- } else {
+ if (!sock_owned_by_user(sk))
+ __mptcp_subflow_push_pending(sk, ssk, false);
+ else
__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
- }
}
#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \
@@ -3317,7 +3309,7 @@ void mptcp_subflow_process_delegated(struct sock *ssk)
if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
mptcp_data_lock(sk);
if (!sock_owned_by_user(sk))
- __mptcp_subflow_push_pending(sk, ssk);
+ __mptcp_subflow_push_pending(sk, ssk, true);
else
__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
--
2.35.3
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH mptcp-next v9 4/4] mptcp: refactor push_pending logic
2022-10-18 11:03 [PATCH mptcp-next v9 0/4] refactor push pending Geliang Tang
` (2 preceding siblings ...)
2022-10-18 11:03 ` [PATCH mptcp-next v9 3/4] mptcp: change 'first' as a parameter Geliang Tang
@ 2022-10-18 11:03 ` Geliang Tang
2022-10-18 13:16 ` mptcp: refactor push_pending logic: Tests Results MPTCP CI
` (2 more replies)
2022-10-19 0:16 ` [PATCH mptcp-next v9 0/4] refactor push pending Mat Martineau
4 siblings, 3 replies; 10+ messages in thread
From: Geliang Tang @ 2022-10-18 11:03 UTC (permalink / raw)
To: mptcp; +Cc: Geliang Tang
To support redundant package schedulers more easily, this patch refactors
__mptcp_push_pending() logic from:
For each dfrag:
While sends succeed:
Call the scheduler (selects subflow and msk->snd_burst)
Update subflow locks (push/release/acquire as needed)
Send the dfrag data with mptcp_sendmsg_frag()
Update already_sent, snd_nxt, snd_burst
Update msk->first_pending
Push/release on final subflow
->
While the scheduler selects one subflow:
Lock the subflow
For each pending dfrag:
While sends succeed:
Send the dfrag data with mptcp_sendmsg_frag()
Update already_sent, snd_nxt, snd_burst
Update msk->first_pending
Break if required by msk->snd_burst / etc
Push and release the subflow
Refactors __mptcp_subflow_push_pending logic from:
For each dfrag:
While sends succeed:
Call the scheduler (selects subflow and msk->snd_burst)
Send the dfrag data with mptcp_subflow_delegate(), break
Send the dfrag data with mptcp_sendmsg_frag()
Update dfrag->already_sent, msk->snd_nxt, msk->snd_burst
Update msk->first_pending
->
While first_pending isn't empty:
Call the scheduler (selects subflow and msk->snd_burst)
Send the dfrag data with mptcp_subflow_delegate(), break
Send the dfrag data with mptcp_sendmsg_frag()
For each pending dfrag:
While sends succeed:
Send the dfrag data with mptcp_sendmsg_frag()
Update already_sent, snd_nxt, snd_burst
Update msk->first_pending
Break if required by msk->snd_burst / etc
Move the duplicate code from __mptcp_push_pending() and
__mptcp_subflow_push_pending() into a new helper function, named
__subflow_push_pending(). Simplify __mptcp_push_pending() and
__mptcp_subflow_push_pending() by invoking this helper.
Also move the burst check conditions out of the function
mptcp_subflow_get_send(), check them in __mptcp_push_pending() and
__mptcp_subflow_push_pending() in the inner "for each pending dfrag"
loop.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/protocol.c | 155 +++++++++++++++++++------------------------
1 file changed, 70 insertions(+), 85 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index ddeb8b36a677..9b73297f6543 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1417,14 +1417,6 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
u64 linger_time;
long tout = 0;
- /* re-use last subflow, if the burst allow that */
- if (msk->last_snd && msk->snd_burst > 0 &&
- sk_stream_memory_free(msk->last_snd) &&
- mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
- mptcp_set_timeout(sk);
- return msk->last_snd;
- }
-
/* pick the subflow with the lower wmem/wspace ratio */
for (i = 0; i < SSK_MODE_MAX; ++i) {
send_info[i].ssk = NULL;
@@ -1491,12 +1483,6 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
return ssk;
}
-static void mptcp_push_release(struct sock *ssk, struct mptcp_sendmsg_info *info)
-{
- tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal);
- release_sock(ssk);
-}
-
static void mptcp_update_post_push(struct mptcp_sock *msk,
struct mptcp_data_frag *dfrag,
u32 sent)
@@ -1528,68 +1514,80 @@ void mptcp_check_and_set_pending(struct sock *sk)
mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
}
-void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
+ struct mptcp_sendmsg_info *info)
{
- struct sock *prev_ssk = NULL, *ssk = NULL;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct mptcp_sendmsg_info info = {
- .flags = flags,
- };
- bool do_check_data_fin = false;
struct mptcp_data_frag *dfrag;
- int len;
+ int len, copied = 0, err = 0;
while ((dfrag = mptcp_send_head(sk))) {
- info.sent = dfrag->already_sent;
- info.limit = dfrag->data_len;
+ info->sent = dfrag->already_sent;
+ info->limit = dfrag->data_len;
len = dfrag->data_len - dfrag->already_sent;
while (len > 0) {
int ret = 0;
- prev_ssk = ssk;
- ssk = mptcp_subflow_get_send(msk);
-
- /* First check. If the ssk has changed since
- * the last round, release prev_ssk
- */
- if (ssk != prev_ssk && prev_ssk)
- mptcp_push_release(prev_ssk, &info);
- if (!ssk)
- goto out;
-
- /* Need to lock the new subflow only if different
- * from the previous one, otherwise we are still
- * helding the relevant lock
- */
- if (ssk != prev_ssk)
- lock_sock(ssk);
-
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info);
if (ret <= 0) {
- if (ret == -EAGAIN)
- continue;
- mptcp_push_release(ssk, &info);
+ err = copied ? : ret;
goto out;
}
- do_check_data_fin = true;
- info.sent += ret;
+ info->sent += ret;
+ copied += ret;
len -= ret;
mptcp_update_post_push(msk, dfrag, ret);
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+
+ if (msk->snd_burst <= 0 ||
+ !sk_stream_memory_free(ssk) ||
+ !mptcp_subflow_active(mptcp_subflow_ctx(ssk))) {
+ err = copied ? : -EAGAIN;
+ goto out;
+ }
+ mptcp_set_timeout(sk);
+ }
+ err = copied;
+
+out:
+ if (copied) {
+ tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle,
+ info->size_goal);
}
- /* at this point we held the socket lock for the last subflow we used */
- if (ssk)
- mptcp_push_release(ssk, &info);
+ return err;
+}
+
+void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_sendmsg_info info = {
+ .flags = flags,
+ };
+ struct sock *ssk;
+ int ret = 0;
+
+again:
+ while (mptcp_send_head(sk) && (ssk = mptcp_subflow_get_send(msk))) {
+ lock_sock(ssk);
+ ret = __subflow_push_pending(sk, ssk, &info);
+ release_sock(ssk);
+
+ if (ret <= 0) {
+ if (ret == -EAGAIN)
+ goto again;
+ goto out;
+ }
+ }
out:
/* ensure the rtx timer is running */
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
- if (do_check_data_fin)
+ if (ret > 0)
__mptcp_check_send_data_fin(sk);
}
@@ -1599,51 +1597,38 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
struct mptcp_sendmsg_info info = {
.data_lock_held = true,
};
- struct mptcp_data_frag *dfrag;
struct sock *xmit_ssk;
- int len, copied = 0;
+ int ret = 0;
info.flags = 0;
- while ((dfrag = mptcp_send_head(sk))) {
- info.sent = dfrag->already_sent;
- info.limit = dfrag->data_len;
- len = dfrag->data_len - dfrag->already_sent;
- while (len > 0) {
- int ret = 0;
-
- /* check for a different subflow usage only after
- * spooling the first chunk of data
- */
- xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
- if (!xmit_ssk)
- goto out;
- if (xmit_ssk != ssk) {
- mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
- MPTCP_DELEGATE_SEND);
- goto out;
- }
-
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
- if (ret <= 0)
- goto out;
-
- info.sent += ret;
- copied += ret;
- len -= ret;
- first = false;
+again:
+ while (mptcp_send_head(sk)) {
+ /* check for a different subflow usage only after
+ * spooling the first chunk of data
+ */
+ xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
+ if (!xmit_ssk)
+ goto out;
+ if (xmit_ssk != ssk) {
+ mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
+ MPTCP_DELEGATE_SEND);
+ goto out;
+ }
- mptcp_update_post_push(msk, dfrag, ret);
+ ret = __subflow_push_pending(sk, ssk, &info);
+ first = false;
+ if (ret <= 0) {
+ if (ret == -EAGAIN)
+ goto again;
+ break;
}
- WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
}
out:
/* __mptcp_alloc_tx_skb could have released some wmem and we are
* not going to flush it via release_sock()
*/
- if (copied) {
- tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
- info.size_goal);
+ if (ret > 0) {
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
--
2.35.3
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: mptcp: refactor push_pending logic: Tests Results
2022-10-18 11:03 ` [PATCH mptcp-next v9 4/4] mptcp: refactor push_pending logic Geliang Tang
@ 2022-10-18 13:16 ` MPTCP CI
2022-10-19 0:58 ` [PATCH mptcp-next v9 4/4] mptcp: refactor push_pending logic Mat Martineau
2022-10-19 2:17 ` mptcp: refactor push_pending logic: Tests Results MPTCP CI
2 siblings, 0 replies; 10+ messages in thread
From: MPTCP CI @ 2022-10-18 13:16 UTC (permalink / raw)
To: Geliang Tang; +Cc: mptcp
Hi Geliang,
Thank you for your modifications, that's great!
Our CI did some validations and here is its report:
- KVM Validation: normal:
- Unstable: 1 failed test(s): selftest_simult_flows 🔴:
- Task: https://cirrus-ci.com/task/5202885733515264
- Summary: https://api.cirrus-ci.com/v1/artifact/task/5202885733515264/summary/summary.txt
- {"code":404,"message":
- "Can't find artifacts containing file conclusion.txt"}:
- Task: https://cirrus-ci.com/task/6328785640357888
- Summary: https://api.cirrus-ci.com/v1/artifact/task/6328785640357888/summary/summary.txt
Initiator: Patchew Applier
Commits: https://github.com/multipath-tcp/mptcp_net-next/commits/ddd3c597286e
If there are some issues, you can reproduce them using the same environment as
the one used by the CI thanks to a docker image, e.g.:
$ cd [kernel source code]
$ docker run -v "${PWD}:${PWD}:rw" -w "${PWD}" --privileged --rm -it \
--pull always mptcp/mptcp-upstream-virtme-docker:latest \
auto-debug
For more details:
https://github.com/multipath-tcp/mptcp-upstream-virtme-docker
Please note that despite all the efforts that have been already done to have a
stable tests suite when executed on a public CI like here, it is possible some
reported issues are not due to your modifications. Still, do not hesitate to
help us improve that ;-)
Cheers,
MPTCP GH Action bot
Bot operated by Matthieu Baerts (Tessares)
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH mptcp-next v9 4/4] mptcp: refactor push_pending logic
2022-10-18 11:03 ` [PATCH mptcp-next v9 4/4] mptcp: refactor push_pending logic Geliang Tang
2022-10-18 13:16 ` mptcp: refactor push_pending logic: Tests Results MPTCP CI
@ 2022-10-19 0:58 ` Mat Martineau
2022-10-19 2:17 ` mptcp: refactor push_pending logic: Tests Results MPTCP CI
2 siblings, 0 replies; 10+ messages in thread
From: Mat Martineau @ 2022-10-19 0:58 UTC (permalink / raw)
To: Geliang Tang; +Cc: mptcp
On Tue, 18 Oct 2022, Geliang Tang wrote:
> To support redundant package schedulers more easily, this patch refactors
> __mptcp_push_pending() logic from:
>
> For each dfrag:
> While sends succeed:
> Call the scheduler (selects subflow and msk->snd_burst)
> Update subflow locks (push/release/acquire as needed)
> Send the dfrag data with mptcp_sendmsg_frag()
> Update already_sent, snd_nxt, snd_burst
> Update msk->first_pending
> Push/release on final subflow
>
> ->
>
> While the scheduler selects one subflow:
> Lock the subflow
> For each pending dfrag:
> While sends succeed:
> Send the dfrag data with mptcp_sendmsg_frag()
> Update already_sent, snd_nxt, snd_burst
> Update msk->first_pending
> Break if required by msk->snd_burst / etc
> Push and release the subflow
>
> Refactors __mptcp_subflow_push_pending logic from:
>
> For each dfrag:
> While sends succeed:
> Call the scheduler (selects subflow and msk->snd_burst)
> Send the dfrag data with mptcp_subflow_delegate(), break
> Send the dfrag data with mptcp_sendmsg_frag()
> Update dfrag->already_sent, msk->snd_nxt, msk->snd_burst
> Update msk->first_pending
>
> ->
>
> While first_pending isn't empty:
> Call the scheduler (selects subflow and msk->snd_burst)
> Send the dfrag data with mptcp_subflow_delegate(), break
> Send the dfrag data with mptcp_sendmsg_frag()
> For each pending dfrag:
> While sends succeed:
> Send the dfrag data with mptcp_sendmsg_frag()
> Update already_sent, snd_nxt, snd_burst
> Update msk->first_pending
> Break if required by msk->snd_burst / etc
>
> Move the duplicate code from __mptcp_push_pending() and
> __mptcp_subflow_push_pending() into a new helper function, named
> __subflow_push_pending(). Simplify __mptcp_push_pending() and
> __mptcp_subflow_push_pending() by invoking this helper.
>
> Also move the burst check conditions out of the function
> mptcp_subflow_get_send(), check them in __mptcp_push_pending() and
> __mptcp_subflow_push_pending() in the inner "for each pending dfrag"
> loop.
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/protocol.c | 155 +++++++++++++++++++------------------------
> 1 file changed, 70 insertions(+), 85 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index ddeb8b36a677..9b73297f6543 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1417,14 +1417,6 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
> u64 linger_time;
> long tout = 0;
>
> - /* re-use last subflow, if the burst allow that */
> - if (msk->last_snd && msk->snd_burst > 0 &&
> - sk_stream_memory_free(msk->last_snd) &&
> - mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
> - mptcp_set_timeout(sk);
> - return msk->last_snd;
> - }
> -
> /* pick the subflow with the lower wmem/wspace ratio */
> for (i = 0; i < SSK_MODE_MAX; ++i) {
> send_info[i].ssk = NULL;
> @@ -1491,12 +1483,6 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
> return ssk;
> }
>
> -static void mptcp_push_release(struct sock *ssk, struct mptcp_sendmsg_info *info)
> -{
> - tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal);
> - release_sock(ssk);
> -}
> -
> static void mptcp_update_post_push(struct mptcp_sock *msk,
> struct mptcp_data_frag *dfrag,
> u32 sent)
> @@ -1528,68 +1514,80 @@ void mptcp_check_and_set_pending(struct sock *sk)
> mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
> }
>
> -void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> +static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
> + struct mptcp_sendmsg_info *info)
> {
> - struct sock *prev_ssk = NULL, *ssk = NULL;
> struct mptcp_sock *msk = mptcp_sk(sk);
> - struct mptcp_sendmsg_info info = {
> - .flags = flags,
> - };
> - bool do_check_data_fin = false;
> struct mptcp_data_frag *dfrag;
> - int len;
> + int len, copied = 0, err = 0;
>
> while ((dfrag = mptcp_send_head(sk))) {
> - info.sent = dfrag->already_sent;
> - info.limit = dfrag->data_len;
> + info->sent = dfrag->already_sent;
> + info->limit = dfrag->data_len;
> len = dfrag->data_len - dfrag->already_sent;
> while (len > 0) {
> int ret = 0;
>
> - prev_ssk = ssk;
> - ssk = mptcp_subflow_get_send(msk);
> -
> - /* First check. If the ssk has changed since
> - * the last round, release prev_ssk
> - */
> - if (ssk != prev_ssk && prev_ssk)
> - mptcp_push_release(prev_ssk, &info);
> - if (!ssk)
> - goto out;
> -
> - /* Need to lock the new subflow only if different
> - * from the previous one, otherwise we are still
> - * helding the relevant lock
> - */
> - if (ssk != prev_ssk)
> - lock_sock(ssk);
> -
> - ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
> + ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info);
> if (ret <= 0) {
> - if (ret == -EAGAIN)
> - continue;
> - mptcp_push_release(ssk, &info);
> + err = copied ? : ret;
> goto out;
> }
>
> - do_check_data_fin = true;
> - info.sent += ret;
> + info->sent += ret;
> + copied += ret;
> len -= ret;
>
> mptcp_update_post_push(msk, dfrag, ret);
> }
> WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> +
> + if (msk->snd_burst <= 0 ||
> + !sk_stream_memory_free(ssk) ||
> + !mptcp_subflow_active(mptcp_subflow_ctx(ssk))) {
> + err = copied ? : -EAGAIN;
> + goto out;
> + }
> + mptcp_set_timeout(sk);
> + }
> + err = copied;
> +
> +out:
> + if (copied) {
> + tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle,
> + info->size_goal);
> }
>
> - /* at this point we held the socket lock for the last subflow we used */
> - if (ssk)
> - mptcp_push_release(ssk, &info);
> + return err;
> +}
> +
> +void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> +{
> + struct mptcp_sock *msk = mptcp_sk(sk);
> + struct mptcp_sendmsg_info info = {
> + .flags = flags,
> + };
> + struct sock *ssk;
> + int ret = 0;
> +
> +again:
> + while (mptcp_send_head(sk) && (ssk = mptcp_subflow_get_send(msk))) {
> + lock_sock(ssk);
> + ret = __subflow_push_pending(sk, ssk, &info);
> + release_sock(ssk);
Thanks for reworking this patch set, it's easier to see what's going on
now.
With this refactoring a long send will push, release, and re-lock the
subflow for every "burst" (as limited by snd_burst or by the available
buffer space). That seems like it could add noticeable overhead for
high-throughput connections, compared to the old code that would defer the
push & unlock. In earlier reviews I didn't consider this carefully enough:
I thought that __subflow_push_pending() would usually be able to send all
the pending dfrags, but I didn't consider sendmsg calls with lots of data
(more than the burst size).
I think it's possible to keep the push/unlock optimization in
__mptcp_push_pending() if the tcp_push() is handled outside
__subflow_push_pending().
Would likely need more information returned from the helper to give the
caller of __subflow_push_pending() enough information, like:
static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
struct mptcp_sendmsg_info *info,
bool *needs_push)
I'll talk to Paolo about this on Thursday too (and he's welcome to chime
in here :) )
> +
> + if (ret <= 0) {
> + if (ret == -EAGAIN)
> + goto again;
Better to use a 'continue' here.
> + goto out;
> + }
> + }
>
> out:
> /* ensure the rtx timer is running */
> if (!mptcp_timer_pending(sk))
> mptcp_reset_timer(sk);
> - if (do_check_data_fin)
> + if (ret > 0)
> __mptcp_check_send_data_fin(sk);
> }
>
> @@ -1599,51 +1597,38 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
> struct mptcp_sendmsg_info info = {
> .data_lock_held = true,
> };
> - struct mptcp_data_frag *dfrag;
> struct sock *xmit_ssk;
> - int len, copied = 0;
> + int ret = 0;
>
> info.flags = 0;
> - while ((dfrag = mptcp_send_head(sk))) {
> - info.sent = dfrag->already_sent;
> - info.limit = dfrag->data_len;
> - len = dfrag->data_len - dfrag->already_sent;
> - while (len > 0) {
> - int ret = 0;
> -
> - /* check for a different subflow usage only after
> - * spooling the first chunk of data
> - */
> - xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
> - if (!xmit_ssk)
> - goto out;
> - if (xmit_ssk != ssk) {
> - mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
> - MPTCP_DELEGATE_SEND);
> - goto out;
> - }
> -
> - ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
> - if (ret <= 0)
> - goto out;
> -
> - info.sent += ret;
> - copied += ret;
> - len -= ret;
> - first = false;
> +again:
> + while (mptcp_send_head(sk)) {
> + /* check for a different subflow usage only after
> + * spooling the first chunk of data
> + */
> + xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
> + if (!xmit_ssk)
> + goto out;
> + if (xmit_ssk != ssk) {
> + mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
> + MPTCP_DELEGATE_SEND);
> + goto out;
> + }
>
> - mptcp_update_post_push(msk, dfrag, ret);
> + ret = __subflow_push_pending(sk, ssk, &info);
> + first = false;
> + if (ret <= 0) {
> + if (ret == -EAGAIN)
> + goto again;
Another place to use 'continue'.
> + break;
> }
> - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> }
>
> out:
> /* __mptcp_alloc_tx_skb could have released some wmem and we are
> * not going to flush it via release_sock()
> */
> - if (copied) {
> - tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
> - info.size_goal);
> + if (ret > 0) {
> if (!mptcp_timer_pending(sk))
> mptcp_reset_timer(sk);
>
> --
> 2.35.3
>
>
>
--
Mat Martineau
Intel
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: mptcp: refactor push_pending logic: Tests Results
2022-10-18 11:03 ` [PATCH mptcp-next v9 4/4] mptcp: refactor push_pending logic Geliang Tang
2022-10-18 13:16 ` mptcp: refactor push_pending logic: Tests Results MPTCP CI
2022-10-19 0:58 ` [PATCH mptcp-next v9 4/4] mptcp: refactor push_pending logic Mat Martineau
@ 2022-10-19 2:17 ` MPTCP CI
2 siblings, 0 replies; 10+ messages in thread
From: MPTCP CI @ 2022-10-19 2:17 UTC (permalink / raw)
To: Geliang Tang; +Cc: mptcp
Hi Geliang,
Thank you for your modifications, that's great!
Our CI did some validations and here is its report:
- KVM Validation: normal:
- Unstable: 1 failed test(s): packetdrill_add_addr 🔴:
- Task: https://cirrus-ci.com/task/4741266072666112
- Summary: https://api.cirrus-ci.com/v1/artifact/task/4741266072666112/summary/summary.txt
- {"code":404,"message":
- "Can't find artifacts containing file conclusion.txt"}:
- Task: https://cirrus-ci.com/task/5867165979508736
- Summary: https://api.cirrus-ci.com/v1/artifact/task/5867165979508736/summary/summary.txt
Initiator: Patchew Applier
Commits: https://github.com/multipath-tcp/mptcp_net-next/commits/12a4c489939c
If there are some issues, you can reproduce them using the same environment as
the one used by the CI thanks to a docker image, e.g.:
$ cd [kernel source code]
$ docker run -v "${PWD}:${PWD}:rw" -w "${PWD}" --privileged --rm -it \
--pull always mptcp/mptcp-upstream-virtme-docker:latest \
auto-debug
For more details:
https://github.com/multipath-tcp/mptcp-upstream-virtme-docker
Please note that despite all the efforts that have been already done to have a
stable tests suite when executed on a public CI like here, it is possible some
reported issues are not due to your modifications. Still, do not hesitate to
help us improve that ;-)
Cheers,
MPTCP GH Action bot
Bot operated by Matthieu Baerts (Tessares)
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH mptcp-next v9 0/4] refactor push pending
2022-10-18 11:03 [PATCH mptcp-next v9 0/4] refactor push pending Geliang Tang
` (3 preceding siblings ...)
2022-10-18 11:03 ` [PATCH mptcp-next v9 4/4] mptcp: refactor push_pending logic Geliang Tang
@ 2022-10-19 0:16 ` Mat Martineau
2022-10-20 14:24 ` Matthieu Baerts
4 siblings, 1 reply; 10+ messages in thread
From: Mat Martineau @ 2022-10-19 0:16 UTC (permalink / raw)
To: Geliang Tang; +Cc: mptcp
On Tue, 18 Oct 2022, Geliang Tang wrote:
> v9:
> - move "register default scheduler" out of this series.
> - add missing "first = false" in patch 4
> - patch 2 could be merged before the commit "mptcp: add
> struct mptcp_sched_ops" as a non-bpf cleanup.
>
The first 3 patches look good to me:
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
I have some other comments for patch 4.
> v8:
> - merge "register default scheduler" into this series.
> - Move "multi-subflow scheduling" out of this series.
> - This series should be merged between the commit "mptcp: add get_subflow
> wrappers" and "bpf: Add bpf_mptcp_sched_ops", except the squash-to patches.
>
> v7:
> - update delegate sending in patch 10
> - rebase to export/20221011T063543.
>
> v6:
> - drop all msk->last_snd, add last_snd variable instead in patch 13
> - fix lock_sock issue in patch 3
> - merge squash-to patches
>
> v5:
> - address Mat's comments in v4.
>
> v4:
> - update __mptcp_subflow_push_pending as Mat suggested.
> - add more patches from "BPF redundant scheduler" series.
>
> v3:
> - add a cleanup patch.
> - remove msk->last_snd in mptcp_subflow_get_send().
> - add the loop that calls the scheduler again in __mptcp_push_pending().
>
> v2:
> - add snd_burst check in dfrags loop as Mat suggested.
>
> Refactor __mptcp_push_pending() and __mptcp_subflow_push_pending() to
> remove duplicate code and support redundant scheduler more easily in
> __mptcp_subflow_push_pending().
>
> Geliang Tang (4):
> Squash to "mptcp: add get_subflow wrappers"
> mptcp: use msk instead of mptcp_sk
> mptcp: change 'first' as a parameter
> mptcp: refactor push_pending logic
>
> net/mptcp/protocol.c | 183 +++++++++++++++++++------------------------
> net/mptcp/protocol.h | 4 +-
> net/mptcp/sched.c | 61 +++++++--------
> 3 files changed, 111 insertions(+), 137 deletions(-)
>
> --
> 2.35.3
>
>
>
--
Mat Martineau
Intel
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH mptcp-next v9 0/4] refactor push pending
2022-10-19 0:16 ` [PATCH mptcp-next v9 0/4] refactor push pending Mat Martineau
@ 2022-10-20 14:24 ` Matthieu Baerts
0 siblings, 0 replies; 10+ messages in thread
From: Matthieu Baerts @ 2022-10-20 14:24 UTC (permalink / raw)
To: Mat Martineau, Geliang Tang; +Cc: mptcp
Hi Geliang, Mat,
On 19/10/2022 02:16, Mat Martineau wrote:
> On Tue, 18 Oct 2022, Geliang Tang wrote:
>
>> v9:
>> - move "register default scheduler" out of this series.
>> - add missing "first = false" in patch 4
>> - patch 2 could be merged before the commit "mptcp: add
>> struct mptcp_sched_ops" as a non-bpf cleanup.
>>
>
> The first 3 patches look good to me:
>
> Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Thank you for the patches and the reviews!
I just added these patches in our tree (feat. with net-next) with Mat's
RvB tag:
- f7b33377ef44: "squashed" patch 1/4 in "mptcp: add get_subflow wrappers"
- abc20912434f: tg:msg: update commit message
- 810d6dba28d6: mptcp: use msk instead of mptcp_sk
- 3afbefdac9ae: mptcp: change 'first' as a parameter
- Results: e4c052c32f02..1f94545f93dd (export)
Tests are now in progress:
https://cirrus-ci.com/github/multipath-tcp/mptcp_net-next/export/20221020T142332
Cheers,
Matt
--
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
^ permalink raw reply [flat|nested] 10+ messages in thread