netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net 0/6] mptcp: Miscellaneous fixes
@ 2021-02-11 23:30 Mat Martineau
  2021-02-11 23:30 ` [PATCH net 1/6] mptcp: deliver ssk errors to msk Mat Martineau
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Mat Martineau @ 2021-02-11 23:30 UTC (permalink / raw)
  To: netdev; +Cc: Mat Martineau, davem, kuba, mptcp, matthieu.baerts

Here are some MPTCP fixes for the -net tree, addressing various issues
we have seen thanks to syzkaller and other testing:

Patch 1 correctly propagates errors at connection time and for TCP
fallback connections.

Patch 2 sets the expected poll() events on SEND_SHUTDOWN.

Patch 3 fixes a retranmit crash and unneeded retransmissions.

Patch 4 fixes possible uninitialized data on the error path during
socket creation.

Patch 5 addresses a problem with MPTCP window updates.

Patch 6 fixes a case where MPTCP retransmission can get stuck.


Paolo Abeni (6):
  mptcp: deliver ssk errors to msk
  mptcp: fix poll after shutdown
  mptcp: fix spurious retransmissions
  mptcp: init mptcp request socket earlier
  mptcp: better msk receive window updates
  mptcp: add a missing retransmission timer scheduling

 net/mptcp/options.c  | 10 +++---
 net/mptcp/protocol.c | 55 +++++++++++++++++++----------
 net/mptcp/protocol.h | 18 ++++------
 net/mptcp/subflow.c  | 83 +++++++++++++++++++++++++++++++-------------
 4 files changed, 107 insertions(+), 59 deletions(-)


base-commit: 8a28af7a3e85ddf358f8c41e401a33002f7a9587
-- 
2.30.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH net 1/6] mptcp: deliver ssk errors to msk
  2021-02-11 23:30 [PATCH net 0/6] mptcp: Miscellaneous fixes Mat Martineau
@ 2021-02-11 23:30 ` Mat Martineau
  2021-02-11 23:30 ` [PATCH net 2/6] mptcp: fix poll after shutdown Mat Martineau
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Mat Martineau @ 2021-02-11 23:30 UTC (permalink / raw)
  To: netdev; +Cc: Paolo Abeni, davem, kuba, mptcp, matthieu.baerts, Mat Martineau

From: Paolo Abeni <pabeni@redhat.com>

Currently all errors received on msk subflows are ignored.
We need to catch at least the errors on connect() and
on fallback sockets.

Use a custom sk_error_report callback at subflow level,
and do the real action under the msk socket lock - via
the usual sock_owned_by_user()/release_callback() schema.

Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
 net/mptcp/protocol.c |  7 +++++++
 net/mptcp/protocol.h |  4 ++++
 net/mptcp/subflow.c  | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f998a077c7dd..9eecd1383d24 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2959,6 +2959,8 @@ static void mptcp_release_cb(struct sock *sk)
 		mptcp_push_pending(sk, 0);
 		spin_lock_bh(&sk->sk_lock.slock);
 	}
+	if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
+		__mptcp_error_report(sk);
 
 	/* clear any wmem reservation and errors */
 	__mptcp_update_wmem(sk);
@@ -3355,6 +3357,11 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
 
+	/* This barrier is coupled with smp_wmb() in tcp_reset() */
+	smp_rmb();
+	if (sk->sk_err)
+		mask |= EPOLLERR;
+
 	return mask;
 }
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d67de793d363..0743f48a0644 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -95,6 +95,7 @@
 #define MPTCP_WORK_CLOSE_SUBFLOW 5
 #define MPTCP_PUSH_PENDING	6
 #define MPTCP_CLEAN_UNA		7
+#define MPTCP_ERROR_REPORT	8
 
 static inline bool before64(__u64 seq1, __u64 seq2)
 {
@@ -414,6 +415,7 @@ struct mptcp_subflow_context {
 	void	(*tcp_data_ready)(struct sock *sk);
 	void	(*tcp_state_change)(struct sock *sk);
 	void	(*tcp_write_space)(struct sock *sk);
+	void	(*tcp_error_report)(struct sock *sk);
 
 	struct	rcu_head rcu;
 };
@@ -478,6 +480,7 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
 	sk->sk_data_ready = ctx->tcp_data_ready;
 	sk->sk_state_change = ctx->tcp_state_change;
 	sk->sk_write_space = ctx->tcp_write_space;
+	sk->sk_error_report = ctx->tcp_error_report;
 
 	inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
 }
@@ -505,6 +508,7 @@ bool mptcp_finish_join(struct sock *sk);
 bool mptcp_schedule_work(struct sock *sk);
 void __mptcp_check_push(struct sock *sk, struct sock *ssk);
 void __mptcp_data_acked(struct sock *sk);
+void __mptcp_error_report(struct sock *sk);
 void mptcp_subflow_eof(struct sock *sk);
 bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
 void __mptcp_flush_join_list(struct mptcp_sock *msk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 278cbe3e539e..0ec3ccac6bb4 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1043,6 +1043,46 @@ static void subflow_write_space(struct sock *ssk)
 	/* we take action in __mptcp_clean_una() */
 }
 
+void __mptcp_error_report(struct sock *sk)
+{
+	struct mptcp_subflow_context *subflow;
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		int err = sock_error(ssk);
+
+		if (!err)
+			continue;
+
+		/* only propagate errors on fallen-back sockets or
+		 * on MPC connect
+		 */
+		if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
+			continue;
+
+		inet_sk_state_store(sk, inet_sk_state_load(ssk));
+		sk->sk_err = -err;
+
+		/* This barrier is coupled with smp_rmb() in mptcp_poll() */
+		smp_wmb();
+		sk->sk_error_report(sk);
+		break;
+	}
+}
+
+static void subflow_error_report(struct sock *ssk)
+{
+	struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+	mptcp_data_lock(sk);
+	if (!sock_owned_by_user(sk))
+		__mptcp_error_report(sk);
+	else
+		set_bit(MPTCP_ERROR_REPORT,  &mptcp_sk(sk)->flags);
+	mptcp_data_unlock(sk);
+}
+
 static struct inet_connection_sock_af_ops *
 subflow_default_af_ops(struct sock *sk)
 {
@@ -1352,9 +1392,11 @@ static int subflow_ulp_init(struct sock *sk)
 	ctx->tcp_data_ready = sk->sk_data_ready;
 	ctx->tcp_state_change = sk->sk_state_change;
 	ctx->tcp_write_space = sk->sk_write_space;
+	ctx->tcp_error_report = sk->sk_error_report;
 	sk->sk_data_ready = subflow_data_ready;
 	sk->sk_write_space = subflow_write_space;
 	sk->sk_state_change = subflow_state_change;
+	sk->sk_error_report = subflow_error_report;
 out:
 	return err;
 }
@@ -1407,6 +1449,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
 	new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
 	new_ctx->tcp_state_change = old_ctx->tcp_state_change;
 	new_ctx->tcp_write_space = old_ctx->tcp_write_space;
+	new_ctx->tcp_error_report = old_ctx->tcp_error_report;
 	new_ctx->rel_write_seq = 1;
 	new_ctx->tcp_sock = newsk;
 
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net 2/6] mptcp: fix poll after shutdown
  2021-02-11 23:30 [PATCH net 0/6] mptcp: Miscellaneous fixes Mat Martineau
  2021-02-11 23:30 ` [PATCH net 1/6] mptcp: deliver ssk errors to msk Mat Martineau
@ 2021-02-11 23:30 ` Mat Martineau
  2021-02-11 23:30 ` [PATCH net 3/6] mptcp: fix spurious retransmissions Mat Martineau
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Mat Martineau @ 2021-02-11 23:30 UTC (permalink / raw)
  To: netdev; +Cc: Paolo Abeni, davem, kuba, mptcp, matthieu.baerts, Mat Martineau

From: Paolo Abeni <pabeni@redhat.com>

The current mptcp_poll() implementation gives unexpected
results after shutdown(SEND_SHUTDOWN) and when the msk
status is TCP_CLOSE.

Set the correct mask.

Fixes: 8edf08649eed ("mptcp: rework poll+nospace handling")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
 net/mptcp/protocol.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 9eecd1383d24..42ca49954bdd 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3321,7 +3321,7 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
 	struct sock *sk = (struct sock *)msk;
 
 	if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
-		return 0;
+		return EPOLLOUT | EPOLLWRNORM;
 
 	if (sk_stream_is_writeable(sk))
 		return EPOLLOUT | EPOLLWRNORM;
@@ -3354,6 +3354,8 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
 		mask |= mptcp_check_readable(msk);
 		mask |= mptcp_check_writeable(msk);
 	}
+	if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
+		mask |= EPOLLHUP;
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
 
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net 3/6] mptcp: fix spurious retransmissions
  2021-02-11 23:30 [PATCH net 0/6] mptcp: Miscellaneous fixes Mat Martineau
  2021-02-11 23:30 ` [PATCH net 1/6] mptcp: deliver ssk errors to msk Mat Martineau
  2021-02-11 23:30 ` [PATCH net 2/6] mptcp: fix poll after shutdown Mat Martineau
@ 2021-02-11 23:30 ` Mat Martineau
  2021-02-11 23:30 ` [PATCH net 4/6] mptcp: init mptcp request socket earlier Mat Martineau
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Mat Martineau @ 2021-02-11 23:30 UTC (permalink / raw)
  To: netdev
  Cc: Paolo Abeni, davem, kuba, mptcp, matthieu.baerts,
	Christoph Paasch, Mat Martineau

From: Paolo Abeni <pabeni@redhat.com>

Syzkaller was able to trigger the following splat again:

WARNING: CPU: 1 PID: 12512 at net/mptcp/protocol.c:761 mptcp_reset_timer+0x12a/0x160 net/mptcp/protocol.c:761
Modules linked in:
CPU: 1 PID: 12512 Comm: kworker/1:6 Not tainted 5.10.0-rc6 #52
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Workqueue: events mptcp_worker
RIP: 0010:mptcp_reset_timer+0x12a/0x160 net/mptcp/protocol.c:761
Code: e8 4b 0c ad ff e8 56 21 88 fe 48 b8 00 00 00 00 00 fc ff df 48 c7 04 03 00 00 00 00 48 83 c4 40 5b 5d 41 5c c3 e8 36 21 88 fe <0f> 0b 41 bc c8 00 00 00 eb 98 e8 e7 b1 af fe e9 30 ff ff ff 48 c7
RSP: 0018:ffffc900018c7c68 EFLAGS: 00010293
RAX: ffff888108cb1c80 RBX: 1ffff92000318f8d RCX: ffffffff82ad0307
RDX: 0000000000000000 RSI: ffffffff82ad036a RDI: 0000000000000007
RBP: ffff888113e2d000 R08: ffff888108cb1c80 R09: ffffed10227c5ab7
R10: ffff888113e2d5b7 R11: ffffed10227c5ab6 R12: 0000000000000000
R13: ffff88801f100000 R14: ffff888113e2d5b0 R15: 0000000000000001
FS:  0000000000000000(0000) GS:ffff88811b500000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fd76a874ef8 CR3: 000000001689c005 CR4: 0000000000170ee0
Call Trace:
 mptcp_worker+0xaa4/0x1560 net/mptcp/protocol.c:2334
 process_one_work+0x8d3/0x1200 kernel/workqueue.c:2272
 worker_thread+0x9c/0x1090 kernel/workqueue.c:2418
 kthread+0x303/0x410 kernel/kthread.c:292
 ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:296

The mptcp_worker tries to update the MPTCP retransmission timer
even if such timer is not currently scheduled.

The mptcp_rtx_head() return value is bogus: we can have enqueued
data not yet transmitted. The above may additionally cause spurious,
unneeded MPTCP-level retransmissions.

Fix the issue adding an explicit clearing of the rtx queue before
trying to retransmit and checking for unacked data.
Additionally drop an unneeded timer stop call and the unused
mptcp_rtx_tail() helper.

Reported-by: Christoph Paasch <cpaasch@apple.com>
Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
 net/mptcp/protocol.c |  3 +--
 net/mptcp/protocol.h | 11 ++---------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 42ca49954bdd..c11fcf6f5faf 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -364,8 +364,6 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
 
 	/* Look for an acknowledged DATA_FIN */
 	if (mptcp_pending_data_fin_ack(sk)) {
-		mptcp_stop_timer(sk);
-
 		WRITE_ONCE(msk->snd_data_fin_enable, 0);
 
 		switch (sk->sk_state) {
@@ -2299,6 +2297,7 @@ static void mptcp_worker(struct work_struct *work)
 	if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
 		goto unlock;
 
+	__mptcp_clean_una(sk);
 	dfrag = mptcp_rtx_head(sk);
 	if (!dfrag)
 		goto unlock;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0743f48a0644..6a164add5b6f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -326,20 +326,13 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
 	return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
 }
 
-static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk)
+static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
-	if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una)))
+	if (msk->snd_una == READ_ONCE(msk->snd_nxt))
 		return NULL;
 
-	return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
-}
-
-static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
-{
-	struct mptcp_sock *msk = mptcp_sk(sk);
-
 	return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
 }
 
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net 4/6] mptcp: init mptcp request socket earlier
  2021-02-11 23:30 [PATCH net 0/6] mptcp: Miscellaneous fixes Mat Martineau
                   ` (2 preceding siblings ...)
  2021-02-11 23:30 ` [PATCH net 3/6] mptcp: fix spurious retransmissions Mat Martineau
@ 2021-02-11 23:30 ` Mat Martineau
  2021-02-11 23:30 ` [PATCH net 5/6] mptcp: better msk receive window updates Mat Martineau
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Mat Martineau @ 2021-02-11 23:30 UTC (permalink / raw)
  To: netdev
  Cc: Paolo Abeni, davem, kuba, mptcp, matthieu.baerts,
	Christoph Paasch, Mat Martineau

From: Paolo Abeni <pabeni@redhat.com>

The mptcp subflow route_req() callback performs the subflow
req initialization after the route_req() check. If the latter
fails, mptcp-specific bits of the current request sockets
are left uninitialized.

The above causes bad things at req socket disposal time, when
the mptcp resources are cleared.

This change addresses the issue by splitting subflow_init_req()
into the actual initialization and the mptcp-specific checks.
The initialization is moved before any possibly failing check.

Reported-by: Christoph Paasch <cpaasch@apple.com>
Fixes: 7ea851d19b23 ("tcp: merge 'init_req' and 'route_req' functions")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
 net/mptcp/subflow.c | 40 ++++++++++++++++------------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 0ec3ccac6bb4..8b2338dfdc80 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -92,7 +92,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
 	return msk;
 }
 
-static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
+static void subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
 {
 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
 
@@ -100,16 +100,6 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li
 	subflow_req->mp_join = 0;
 	subflow_req->msk = NULL;
 	mptcp_token_init_request(req);
-
-#ifdef CONFIG_TCP_MD5SIG
-	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
-	 * TCP option space.
-	 */
-	if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
-		return -EINVAL;
-#endif
-
-	return 0;
 }
 
 /* Init mptcp request socket.
@@ -117,20 +107,23 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li
  * Returns an error code if a JOIN has failed and a TCP reset
  * should be sent.
  */
-static int subflow_init_req(struct request_sock *req,
-			    const struct sock *sk_listener,
-			    struct sk_buff *skb)
+static int subflow_check_req(struct request_sock *req,
+			     const struct sock *sk_listener,
+			     struct sk_buff *skb)
 {
 	struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
 	struct mptcp_options_received mp_opt;
-	int ret;
 
 	pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
 
-	ret = __subflow_init_req(req, sk_listener);
-	if (ret)
-		return 0;
+#ifdef CONFIG_TCP_MD5SIG
+	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
+	 * TCP option space.
+	 */
+	if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
+		return -EINVAL;
+#endif
 
 	mptcp_get_options(skb, &mp_opt);
 
@@ -205,10 +198,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
 	struct mptcp_options_received mp_opt;
 	int err;
 
-	err = __subflow_init_req(req, sk_listener);
-	if (err)
-		return err;
-
+	subflow_init_req(req, sk_listener);
 	mptcp_get_options(skb, &mp_opt);
 
 	if (mp_opt.mp_capable && mp_opt.mp_join)
@@ -248,12 +238,13 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
 	int err;
 
 	tcp_rsk(req)->is_mptcp = 1;
+	subflow_init_req(req, sk);
 
 	dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req);
 	if (!dst)
 		return NULL;
 
-	err = subflow_init_req(req, sk, skb);
+	err = subflow_check_req(req, sk, skb);
 	if (err == 0)
 		return dst;
 
@@ -273,12 +264,13 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
 	int err;
 
 	tcp_rsk(req)->is_mptcp = 1;
+	subflow_init_req(req, sk);
 
 	dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req);
 	if (!dst)
 		return NULL;
 
-	err = subflow_init_req(req, sk, skb);
+	err = subflow_check_req(req, sk, skb);
 	if (err == 0)
 		return dst;
 
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net 5/6] mptcp: better msk receive window updates
  2021-02-11 23:30 [PATCH net 0/6] mptcp: Miscellaneous fixes Mat Martineau
                   ` (3 preceding siblings ...)
  2021-02-11 23:30 ` [PATCH net 4/6] mptcp: init mptcp request socket earlier Mat Martineau
@ 2021-02-11 23:30 ` Mat Martineau
  2021-02-11 23:30 ` [PATCH net 6/6] mptcp: add a missing retransmission timer scheduling Mat Martineau
  2021-02-12  2:50 ` [PATCH net 0/6] mptcp: Miscellaneous fixes patchwork-bot+netdevbpf
  6 siblings, 0 replies; 8+ messages in thread
From: Mat Martineau @ 2021-02-11 23:30 UTC (permalink / raw)
  To: netdev; +Cc: Paolo Abeni, davem, kuba, mptcp, matthieu.baerts, Mat Martineau

From: Paolo Abeni <pabeni@redhat.com>

Move mptcp_cleanup_rbuf() related checks inside the mentioned
helper and extend them to mirror TCP checks more closely.

Additionally drop the 'rmem_pending' hack, since commit 879526030c8b
("mptcp: protect the rx path with the msk socket spinlock") we
can use instead 'rmem_released'.

Fixes: ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
 net/mptcp/options.c  |  7 ++++---
 net/mptcp/protocol.c | 38 ++++++++++++++++++++++----------------
 net/mptcp/protocol.h |  3 +--
 3 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index e0d21c0607e5..82a37cc26776 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -498,8 +498,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+	u64 snd_data_fin_enable, ack_seq;
 	unsigned int dss_size = 0;
-	u64 snd_data_fin_enable;
 	struct mptcp_ext *mpext;
 	unsigned int ack_size;
 	bool ret = false;
@@ -531,13 +531,14 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 		return ret;
 	}
 
+	ack_seq = READ_ONCE(msk->ack_seq);
 	if (READ_ONCE(msk->use_64bit_ack)) {
 		ack_size = TCPOLEN_MPTCP_DSS_ACK64;
-		opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
+		opts->ext_copy.data_ack = ack_seq;
 		opts->ext_copy.ack64 = 1;
 	} else {
 		ack_size = TCPOLEN_MPTCP_DSS_ACK32;
-		opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq);
+		opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
 		opts->ext_copy.ack64 = 0;
 	}
 	opts->ext_copy.use_ack = 1;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c11fcf6f5faf..0dbf5cbd7e56 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -457,7 +457,18 @@ static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
 static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
 {
 	struct sock *ack_hint = READ_ONCE(msk->ack_hint);
+	int old_space = READ_ONCE(msk->old_wspace);
 	struct mptcp_subflow_context *subflow;
+	struct sock *sk = (struct sock *)msk;
+	bool cleanup;
+
+	/* this is a simple superset of what tcp_cleanup_rbuf() implements
+	 * so that we don't have to acquire the ssk socket lock most of the time
+	 * to do actually nothing
+	 */
+	cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
+	if (!cleanup)
+		return;
 
 	/* if the hinted ssk is still active, try to use it */
 	if (likely(ack_hint)) {
@@ -1865,7 +1876,7 @@ static void __mptcp_splice_receive_queue(struct sock *sk)
 	skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue);
 }
 
-static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
+static bool __mptcp_move_skbs(struct mptcp_sock *msk)
 {
 	struct sock *sk = (struct sock *)msk;
 	unsigned int moved = 0;
@@ -1885,13 +1896,10 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
 
 		slowpath = lock_sock_fast(ssk);
 		mptcp_data_lock(sk);
+		__mptcp_update_rmem(sk);
 		done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
 		mptcp_data_unlock(sk);
-		if (moved && rcv) {
-			WRITE_ONCE(msk->rmem_pending, min(rcv, moved));
-			tcp_cleanup_rbuf(ssk, 1);
-			WRITE_ONCE(msk->rmem_pending, 0);
-		}
+		tcp_cleanup_rbuf(ssk, moved);
 		unlock_sock_fast(ssk, slowpath);
 	} while (!done);
 
@@ -1904,6 +1912,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
 		ret |= __mptcp_ofo_queue(msk);
 		__mptcp_splice_receive_queue(sk);
 		mptcp_data_unlock(sk);
+		mptcp_cleanup_rbuf(msk);
 	}
 	if (ret)
 		mptcp_check_data_fin((struct sock *)msk);
@@ -1933,7 +1942,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 
 	while (copied < len) {
-		int bytes_read, old_space;
+		int bytes_read;
 
 		bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied);
 		if (unlikely(bytes_read < 0)) {
@@ -1944,14 +1953,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
 		copied += bytes_read;
 
-		if (skb_queue_empty(&msk->receive_queue) &&
-		    __mptcp_move_skbs(msk, len - copied))
-			continue;
-
 		/* be sure to advertise window change */
-		old_space = READ_ONCE(msk->old_wspace);
-		if ((tcp_space(sk) - old_space) >= old_space)
-			mptcp_cleanup_rbuf(msk);
+		mptcp_cleanup_rbuf(msk);
+
+		if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
+			continue;
 
 		/* only the master socket status is relevant here. The exit
 		 * conditions mirror closely tcp_recvmsg()
@@ -1979,7 +1985,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 				/* race breaker: the shutdown could be after the
 				 * previous receive queue check
 				 */
-				if (__mptcp_move_skbs(msk, len - copied))
+				if (__mptcp_move_skbs(msk))
 					continue;
 				break;
 			}
@@ -2012,7 +2018,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		/* .. race-breaker: ssk might have gotten new data
 		 * after last __mptcp_move_skbs() returned false.
 		 */
-		if (unlikely(__mptcp_move_skbs(msk, 0)))
+		if (unlikely(__mptcp_move_skbs(msk)))
 			set_bit(MPTCP_DATA_READY, &msk->flags);
 	} else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
 		/* data to read but mptcp_wait_data() cleared DATA_READY */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 6a164add5b6f..8d9f0ff10cb8 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -234,7 +234,6 @@ struct mptcp_sock {
 	u64		wnd_end;
 	unsigned long	timer_ival;
 	u32		token;
-	int		rmem_pending;
 	int		rmem_released;
 	unsigned long	flags;
 	bool		can_ack;
@@ -293,7 +292,7 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
 
 static inline int __mptcp_space(const struct sock *sk)
 {
-	return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending);
+	return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released);
 }
 
 static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net 6/6] mptcp: add a missing retransmission timer scheduling
  2021-02-11 23:30 [PATCH net 0/6] mptcp: Miscellaneous fixes Mat Martineau
                   ` (4 preceding siblings ...)
  2021-02-11 23:30 ` [PATCH net 5/6] mptcp: better msk receive window updates Mat Martineau
@ 2021-02-11 23:30 ` Mat Martineau
  2021-02-12  2:50 ` [PATCH net 0/6] mptcp: Miscellaneous fixes patchwork-bot+netdevbpf
  6 siblings, 0 replies; 8+ messages in thread
From: Mat Martineau @ 2021-02-11 23:30 UTC (permalink / raw)
  To: netdev; +Cc: Paolo Abeni, davem, kuba, mptcp, matthieu.baerts, Mat Martineau

From: Paolo Abeni <pabeni@redhat.com>

Currently we do not schedule the MPTCP retransmission
timer after pushing the data when such action happens
in the subflow context.

This may cause hang-up on active-backup scenarios, or
even when only single subflow msks are involved, if we lost
some peer's ack.

Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
 net/mptcp/options.c  | 3 +--
 net/mptcp/protocol.c | 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 82a37cc26776..8fec3dabe109 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -880,8 +880,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
 		msk->wnd_end = new_wnd_end;
 
 	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
-	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) &&
-	    sk_stream_memory_free(ssk))
+	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
 		__mptcp_check_push(sk, ssk);
 
 	if (after64(new_snd_una, old_snd_una)) {
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0dbf5cbd7e56..06da6ad31c87 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1582,6 +1582,9 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 		mptcp_set_timeout(sk, ssk);
 		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
 			 info.size_goal);
+		if (!mptcp_timer_pending(sk))
+			mptcp_reset_timer(sk);
+
 		if (msk->snd_data_fin_enable &&
 		    msk->snd_nxt + 1 == msk->write_seq)
 			mptcp_schedule_work(sk);
-- 
2.30.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net 0/6] mptcp: Miscellaneous fixes
  2021-02-11 23:30 [PATCH net 0/6] mptcp: Miscellaneous fixes Mat Martineau
                   ` (5 preceding siblings ...)
  2021-02-11 23:30 ` [PATCH net 6/6] mptcp: add a missing retransmission timer scheduling Mat Martineau
@ 2021-02-12  2:50 ` patchwork-bot+netdevbpf
  6 siblings, 0 replies; 8+ messages in thread
From: patchwork-bot+netdevbpf @ 2021-02-12  2:50 UTC (permalink / raw)
  To: Mat Martineau; +Cc: netdev, davem, kuba, mptcp, matthieu.baerts

Hello:

This series was applied to netdev/net.git (refs/heads/master):

On Thu, 11 Feb 2021 15:30:36 -0800 you wrote:
> Here are some MPTCP fixes for the -net tree, addressing various issues
> we have seen thanks to syzkaller and other testing:
> 
> Patch 1 correctly propagates errors at connection time and for TCP
> fallback connections.
> 
> Patch 2 sets the expected poll() events on SEND_SHUTDOWN.
> 
> [...]

Here is the summary with links:
  - [net,1/6] mptcp: deliver ssk errors to msk
    https://git.kernel.org/netdev/net/c/15cc10453398
  - [net,2/6] mptcp: fix poll after shutdown
    https://git.kernel.org/netdev/net/c/dd913410b0a4
  - [net,3/6] mptcp: fix spurious retransmissions
    https://git.kernel.org/netdev/net/c/64b9cea7a0af
  - [net,4/6] mptcp: init mptcp request socket earlier
    https://git.kernel.org/netdev/net/c/d8b59efa6406
  - [net,5/6] mptcp: better msk receive window updates
    https://git.kernel.org/netdev/net/c/e3859603ba13
  - [net,6/6] mptcp: add a missing retransmission timer scheduling
    https://git.kernel.org/netdev/net/c/d09d818ec2ed

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-02-12  2:53 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-02-11 23:30 [PATCH net 0/6] mptcp: Miscellaneous fixes Mat Martineau
2021-02-11 23:30 ` [PATCH net 1/6] mptcp: deliver ssk errors to msk Mat Martineau
2021-02-11 23:30 ` [PATCH net 2/6] mptcp: fix poll after shutdown Mat Martineau
2021-02-11 23:30 ` [PATCH net 3/6] mptcp: fix spurious retransmissions Mat Martineau
2021-02-11 23:30 ` [PATCH net 4/6] mptcp: init mptcp request socket earlier Mat Martineau
2021-02-11 23:30 ` [PATCH net 5/6] mptcp: better msk receive window updates Mat Martineau
2021-02-11 23:30 ` [PATCH net 6/6] mptcp: add a missing retransmission timer scheduling Mat Martineau
2021-02-12  2:50 ` [PATCH net 0/6] mptcp: Miscellaneous fixes patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).