[PATCH mptcp-next v4 0/2] mptcp: fix another close hang-up

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH mptcp-next v4 0/2] mptcp: fix another close hang-up
@ 2023-09-01 11:11 Paolo Abeni
  2023-09-01 11:11 ` [PATCH mptcp-next v4 1/2] mptcp: rename timer related helper to less confusing names Paolo Abeni
  2023-09-01 11:11 ` [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout Paolo Abeni
  0 siblings, 2 replies; 7+ messages in thread
From: Paolo Abeni @ 2023-09-01 11:11 UTC (permalink / raw)
  To: mptcp

If the TCP subflows close silently due to timeouts, the msk socket
can hang-up indefinitely.

Implement an additional close timeout triggered with the last subflow
close event and move the msk to TCP_CLOSE when it fires.

Should address issues/430 and issues/431.

v3 -> v4:
 - fix off-by-one in timer update/check (sigh, trival math is too
   hard:/)

v2 -> v3:
 - fix int overlflow in patch 2/2

v1 -> v2:
 - fix timeout timer scheduling in patch 2/2 (timeout never fired on v1)

Paolo Abeni (2):
  mptcp: rename timer related helper to less confusing names
  mptcp: implement connection level timeout.

 net/mptcp/protocol.c | 122 +++++++++++++++++++++----------------------
 net/mptcp/protocol.h |  22 +++++++-
 net/mptcp/subflow.c  |   3 +-
 3 files changed, 83 insertions(+), 64 deletions(-)

-- 
2.41.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH mptcp-next v4 1/2] mptcp: rename timer related helper to less confusing names
  2023-09-01 11:11 [PATCH mptcp-next v4 0/2] mptcp: fix another close hang-up Paolo Abeni
@ 2023-09-01 11:11 ` Paolo Abeni
  2023-09-01 11:11 ` [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout Paolo Abeni
  1 sibling, 0 replies; 7+ messages in thread
From: Paolo Abeni @ 2023-09-01 11:11 UTC (permalink / raw)
  To: mptcp

The msk socket uses to different timeout to track close related
events and retransmissions. The existing helpers do not indicate
clearly which timer they actually touch, making the related code
quite confusing.

Change the existing helpers name to avoid such confusion. No
functional change intended.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/protocol.c | 42 +++++++++++++++++++++---------------------
 net/mptcp/protocol.h |  2 +-
 net/mptcp/subflow.c  |  2 +-
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 357030f40cef..33093ed87077 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -399,7 +399,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
 	return false;
 }
 
-static void mptcp_stop_timer(struct sock *sk)
+static void mptcp_stop_rtx_timer(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -905,12 +905,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list
 	}
 }
 
-static bool mptcp_timer_pending(struct sock *sk)
+static bool mptcp_rtx_timer_pending(struct sock *sk)
 {
 	return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
 }
 
-static void mptcp_reset_timer(struct sock *sk)
+static void mptcp_reset_rtx_timer(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	unsigned long tout;
@@ -1044,10 +1044,10 @@ static void __mptcp_clean_una(struct sock *sk)
 out:
 	if (snd_una == READ_ONCE(msk->snd_nxt) &&
 	    snd_una == READ_ONCE(msk->write_seq)) {
-		if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
-			mptcp_stop_timer(sk);
+		if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
+			mptcp_stop_rtx_timer(sk);
 	} else {
-		mptcp_reset_timer(sk);
+		mptcp_reset_rtx_timer(sk);
 	}
 }
 
@@ -1620,8 +1620,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 		mptcp_push_release(ssk, &info);
 
 	/* ensure the rtx timer is running */
-	if (!mptcp_timer_pending(sk))
-		mptcp_reset_timer(sk);
+	if (!mptcp_rtx_timer_pending(sk))
+		mptcp_reset_rtx_timer(sk);
 	if (do_check_data_fin)
 		mptcp_check_send_data_fin(sk);
 }
@@ -1684,8 +1684,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
 	if (copied) {
 		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
 			 info.size_goal);
-		if (!mptcp_timer_pending(sk))
-			mptcp_reset_timer(sk);
+		if (!mptcp_rtx_timer_pending(sk))
+			mptcp_reset_rtx_timer(sk);
 
 		if (msk->snd_data_fin_enable &&
 		    msk->snd_nxt + 1 == msk->write_seq)
@@ -2254,7 +2254,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
 	sock_put(sk);
 }
 
-static void mptcp_timeout_timer(struct timer_list *t)
+static void mptcp_tout_timer(struct timer_list *t)
 {
 	struct sock *sk = from_timer(sk, t, sk_timer);
 
@@ -2623,14 +2623,14 @@ static void __mptcp_retrans(struct sock *sk)
 reset_timer:
 	mptcp_check_and_set_pending(sk);
 
-	if (!mptcp_timer_pending(sk))
-		mptcp_reset_timer(sk);
+	if (!mptcp_rtx_timer_pending(sk))
+		mptcp_reset_rtx_timer(sk);
 }
 
 /* schedule the timeout timer for the relevant event: either close timeout
  * or mp_fail timeout. The close timeout takes precedence on the mp_fail one
  */
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout)
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
 {
 	struct sock *sk = (struct sock *)msk;
 	unsigned long timeout, close_timeout;
@@ -2663,7 +2663,7 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
 	WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
 	unlock_sock_fast(ssk, slow);
 
-	mptcp_reset_timeout(msk, 0);
+	mptcp_reset_tout_timer(msk, 0);
 }
 
 static void mptcp_do_fastclose(struct sock *sk)
@@ -2752,7 +2752,7 @@ static void __mptcp_init_sock(struct sock *sk)
 
 	/* re-use the csk retrans timer for MPTCP-level retrans */
 	timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
-	timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
+	timer_setup(&sk->sk_timer, mptcp_tout_timer, 0);
 }
 
 static void mptcp_ca_reset(struct sock *sk)
@@ -2843,8 +2843,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
 		} else {
 			pr_debug("Sending DATA_FIN on subflow %p", ssk);
 			tcp_send_ack(ssk);
-			if (!mptcp_timer_pending(sk))
-				mptcp_reset_timer(sk);
+			if (!mptcp_rtx_timer_pending(sk))
+				mptcp_reset_rtx_timer(sk);
 		}
 		break;
 	}
@@ -2927,7 +2927,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
 
 	might_sleep();
 
-	mptcp_stop_timer(sk);
+	mptcp_stop_rtx_timer(sk);
 	sk_stop_timer(sk, &sk->sk_timer);
 	msk->pm.status = 0;
 	mptcp_release_sched(msk);
@@ -3047,7 +3047,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
 		__mptcp_destroy_sock(sk);
 		do_cancel_work = true;
 	} else {
-		mptcp_reset_timeout(msk, 0);
+		mptcp_reset_tout_timer(msk, 0);
 	}
 
 	return do_cancel_work;
@@ -3110,7 +3110,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
 	mptcp_check_listen_stop(sk);
 	inet_sk_state_store(sk, TCP_CLOSE);
 
-	mptcp_stop_timer(sk);
+	mptcp_stop_rtx_timer(sk);
 	sk_stop_timer(sk, &sk->sk_timer);
 
 	if (msk->token)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 12d70ce24843..4fcce9ad7d04 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -725,7 +725,7 @@ void mptcp_get_options(const struct sk_buff *skb,
 
 void mptcp_finish_connect(struct sock *sk);
 void __mptcp_set_connected(struct sock *sk);
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
 static inline bool mptcp_is_fully_established(struct sock *sk)
 {
 	return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 2f40c23fdb0d..433f290984c8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1226,7 +1226,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
 	WRITE_ONCE(subflow->fail_tout, fail_tout);
 	tcp_send_ack(ssk);
 
-	mptcp_reset_timeout(msk, subflow->fail_tout);
+	mptcp_reset_tout_timer(msk, subflow->fail_tout);
 }
 
 static bool subflow_check_data_avail(struct sock *ssk)
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout.
  2023-09-01 11:11 [PATCH mptcp-next v4 0/2] mptcp: fix another close hang-up Paolo Abeni
  2023-09-01 11:11 ` [PATCH mptcp-next v4 1/2] mptcp: rename timer related helper to less confusing names Paolo Abeni
@ 2023-09-01 11:11 ` Paolo Abeni
  2023-09-01 12:22   ` mptcp: implement connection level timeout.: Tests Results MPTCP CI
                     ` (2 more replies)
  1 sibling, 3 replies; 7+ messages in thread
From: Paolo Abeni @ 2023-09-01 11:11 UTC (permalink / raw)
  To: mptcp

According to RFC 8684 section 3.3:

  A connection is not closed unless [...] or an implementation-specific
  connection-level send timeout.

Currently the MPTCP protocol does not implement such timeout, and
connection timing-out at the TCP-level never move to close state.

Introduces a catch-up condition at subflow close time to move the
MPTCP socket to close, too.

That additionally allow removing similar existing inside the worker.

Finally, allow some additional timeout for plain ESTABLISHED mptcp
sockets, as the protocol allows creating new subflows even at that
point and making the connection functional again.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/430
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v2 -> v3:
 - fix integer overflow in mptcp_close_tout_expired()
v1 -> v2:
 - really set the close timeout as need.
---
 net/mptcp/protocol.c | 84 +++++++++++++++++++++-----------------------
 net/mptcp/protocol.h | 20 +++++++++++
 net/mptcp/subflow.c  |  1 +
 3 files changed, 62 insertions(+), 43 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 33093ed87077..80d06cab8bc7 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -886,6 +886,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
 	mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
 	mptcp_sockopt_sync_locked(msk, ssk);
 	mptcp_subflow_joined(msk, ssk);
+	mptcp_stop_tout_timer(sk);
 	return true;
 }
 
@@ -2363,18 +2364,15 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 	bool dispose_it, need_push = false;
 
 	/* If the first subflow moved to a close state before accept, e.g. due
-	 * to an incoming reset, mptcp either:
-	 * - if either the subflow or the msk are dead, destroy the context
-	 *   (the subflow socket is deleted by inet_child_forget) and the msk
-	 * - otherwise do nothing at the moment and take action at accept and/or
-	 *   listener shutdown - user-space must be able to accept() the closed
-	 *   socket.
+	 * to an incoming reset or listener shoutdown, the subflow socket is
+	 * already deleted by inet_child_forget() the mptcp socket can't survive
+	 * too.
 	 */
-	if (msk->in_accept_queue && msk->first == ssk) {
-		if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
-			return;
-
+	if (msk->in_accept_queue && msk->first == ssk &&
+	    (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
 		/* ensure later check in mptcp_worker() will dispose the msk */
+		inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32 -
+							  (TCP_TIMEWAIT_LEN + 1);
 		sock_set_flag(sk, SOCK_DEAD);
 		lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
 		mptcp_subflow_drop_ctx(ssk);
@@ -2437,6 +2435,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 out:
 	if (need_push)
 		__mptcp_push_pending(sk, 0);
+
+	/* Catch every 'all subflows closed' scenario, including peers silently
+	 * closing them, e.g. due to timeout.
+	 * For established sockets, allow an additional timeout before closing,
+	 * as the protocol can still create more subflows.
+	 */
+	if (list_is_singular(&msk->conn_list) && msk->first &&
+	    inet_sk_state_load(msk->first) == TCP_CLOSE) {
+		if (sk->sk_state != TCP_ESTABLISHED ||
+		    msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
+			inet_sk_state_store(sk, TCP_CLOSE);
+			mptcp_close_wake_up(sk);
+		} else {
+			mptcp_start_tout_timer(sk);
+		}
+	}
 }
 
 void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2480,23 +2494,14 @@ static void __mptcp_close_subflow(struct sock *sk)
 
 }
 
-static bool mptcp_should_close(const struct sock *sk)
+static bool mptcp_close_tout_expired(const struct sock *sk)
 {
-	s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
-	struct mptcp_subflow_context *subflow;
-
-	if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
-		return true;
+	if (!inet_csk(sk)->icsk_mtup.probe_timestamp ||
+	    sk->sk_state == TCP_CLOSE)
+		return false;
 
-	/* if all subflows are in closed status don't bother with additional
-	 * timeout
-	 */
-	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
-		if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) !=
-		    TCP_CLOSE)
-			return false;
-	}
-	return true;
+	return time_after32(tcp_jiffies32,
+		  inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);
 }
 
 static void mptcp_check_fastclose(struct mptcp_sock *msk)
@@ -2635,7 +2640,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
 	struct sock *sk = (struct sock *)msk;
 	unsigned long timeout, close_timeout;
 
-	if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
+	if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
 		return;
 
 	close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
@@ -2643,7 +2648,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
 	/* the close timeout takes precedence on the fail one, and here at least one of
 	 * them is active
 	 */
-	timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
+	timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
 
 	sk_reset_timer(sk, &sk->sk_timer, timeout);
 }
@@ -2662,8 +2667,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
 	mptcp_subflow_reset(ssk);
 	WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
 	unlock_sock_fast(ssk, slow);
-
-	mptcp_reset_tout_timer(msk, 0);
 }
 
 static void mptcp_do_fastclose(struct sock *sk)
@@ -2700,18 +2703,14 @@ static void mptcp_worker(struct work_struct *work)
 	if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
 		__mptcp_close_subflow(sk);
 
-	/* There is no point in keeping around an orphaned sk timedout or
-	 * closed, but we need the msk around to reply to incoming DATA_FIN,
-	 * even if it is orphaned and in FIN_WAIT2 state
-	 */
-	if (sock_flag(sk, SOCK_DEAD)) {
-		if (mptcp_should_close(sk))
-			mptcp_do_fastclose(sk);
+	if (mptcp_close_tout_expired(sk)) {
+		mptcp_do_fastclose(sk);
+		mptcp_close_wake_up(sk);
+	}
 
-		if (sk->sk_state == TCP_CLOSE) {
-			__mptcp_destroy_sock(sk);
-			goto unlock;
-		}
+	if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) {
+		__mptcp_destroy_sock(sk);
+		goto unlock;
 	}
 
 	if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
@@ -3010,7 +3009,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
 
 cleanup:
 	/* orphan all the subflows */
-	inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
 	mptcp_for_each_subflow(msk, subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 		bool slow = lock_sock_fast_nested(ssk);
@@ -3047,7 +3045,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
 		__mptcp_destroy_sock(sk);
 		do_cancel_work = true;
 	} else {
-		mptcp_reset_tout_timer(msk, 0);
+		mptcp_start_tout_timer(sk);
 	}
 
 	return do_cancel_work;
@@ -3111,7 +3109,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
 	inet_sk_state_store(sk, TCP_CLOSE);
 
 	mptcp_stop_rtx_timer(sk);
-	sk_stop_timer(sk, &sk->sk_timer);
+	mptcp_stop_tout_timer(sk);
 
 	if (msk->token)
 		mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 4fcce9ad7d04..392c2f247034 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -726,6 +726,26 @@ void mptcp_get_options(const struct sk_buff *skb,
 void mptcp_finish_connect(struct sock *sk);
 void __mptcp_set_connected(struct sock *sk);
 void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
+
+static inline void mptcp_stop_tout_timer(struct sock *sk)
+{
+	if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
+		return;
+
+	sk_stop_timer(sk, &sk->sk_timer);
+	inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
+}
+
+static inline void mptcp_start_tout_timer(struct sock *sk)
+{
+	/* avoid 0 timestamp, at that means no close timeout */
+	inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
+	if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
+		inet_csk(sk)->icsk_mtup.probe_timestamp = 1;
+
+	mptcp_reset_tout_timer(mptcp_sk(sk), 0);
+}
+
 static inline bool mptcp_is_fully_established(struct sock *sk)
 {
 	return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 433f290984c8..918c1a235790 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1552,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
 	mptcp_sock_graft(ssk, sk->sk_socket);
 	iput(SOCK_INODE(sf));
 	WRITE_ONCE(msk->allow_infinite_fallback, false);
+	mptcp_stop_tout_timer(sk);
 	return 0;
 
 failed_unlink:
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: mptcp: implement connection level timeout.: Tests Results
  2023-09-01 11:11 ` [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout Paolo Abeni
@ 2023-09-01 12:22   ` MPTCP CI
  2023-09-02  0:23   ` [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout Mat Martineau
  2023-09-05 21:59   ` Mat Martineau
  2 siblings, 0 replies; 7+ messages in thread
From: MPTCP CI @ 2023-09-01 12:22 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: mptcp

Hi Paolo,

Thank you for your modifications, that's great!

Our CI did some validations and here is its report:

- KVM Validation: normal (except selftest_mptcp_join):
  - Success! ✅:
  - Task: https://cirrus-ci.com/task/6412989272686592
  - Summary: https://api.cirrus-ci.com/v1/artifact/task/6412989272686592/summary/summary.txt

- KVM Validation: debug (only selftest_mptcp_join):
  - Success! ✅:
  - Task: https://cirrus-ci.com/task/5568564342554624
  - Summary: https://api.cirrus-ci.com/v1/artifact/task/5568564342554624/summary/summary.txt

- KVM Validation: normal (only selftest_mptcp_join):
  - Success! ✅:
  - Task: https://cirrus-ci.com/task/5005614389133312
  - Summary: https://api.cirrus-ci.com/v1/artifact/task/5005614389133312/summary/summary.txt

- KVM Validation: debug (except selftest_mptcp_join):
  - Unstable: 2 failed test(s): packetdrill_mp_join packetdrill_sockopts 🔴:
  - Task: https://cirrus-ci.com/task/6131514295975936
  - Summary: https://api.cirrus-ci.com/v1/artifact/task/6131514295975936/summary/summary.txt

Initiator: Patchew Applier
Commits: https://github.com/multipath-tcp/mptcp_net-next/commits/28508caef7ff


If there are some issues, you can reproduce them using the same environment as
the one used by the CI thanks to a docker image, e.g.:

    $ cd [kernel source code]
    $ docker run -v "${PWD}:${PWD}:rw" -w "${PWD}" --privileged --rm -it \
        --pull always mptcp/mptcp-upstream-virtme-docker:latest \
        auto-debug

For more details:

    https://github.com/multipath-tcp/mptcp-upstream-virtme-docker


Please note that despite all the efforts that have been already done to have a
stable tests suite when executed on a public CI like here, it is possible some
reported issues are not due to your modifications. Still, do not hesitate to
help us improve that ;-)

Cheers,
MPTCP GH Action bot
Bot operated by Matthieu Baerts (Tessares)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout.
  2023-09-01 11:11 ` [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout Paolo Abeni
  2023-09-01 12:22   ` mptcp: implement connection level timeout.: Tests Results MPTCP CI
@ 2023-09-02  0:23   ` Mat Martineau
  2023-09-04  8:04     ` Paolo Abeni
  2023-09-05 21:59   ` Mat Martineau
  2 siblings, 1 reply; 7+ messages in thread
From: Mat Martineau @ 2023-09-02  0:23 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: mptcp

On Fri, 1 Sep 2023, Paolo Abeni wrote:

> According to RFC 8684 section 3.3:
>
>  A connection is not closed unless [...] or an implementation-specific
>  connection-level send timeout.
>
> Currently the MPTCP protocol does not implement such timeout, and
> connection timing-out at the TCP-level never move to close state.
>
> Introduces a catch-up condition at subflow close time to move the
> MPTCP socket to close, too.
>
> That additionally allow removing similar existing inside the worker.
>
> Finally, allow some additional timeout for plain ESTABLISHED mptcp
> sockets, as the protocol allows creating new subflows even at that
> point and making the connection functional again.
>
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/430
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> v2 -> v3:
> - fix integer overflow in mptcp_close_tout_expired()
> v1 -> v2:
> - really set the close timeout as need.
> ---
> net/mptcp/protocol.c | 84 +++++++++++++++++++++-----------------------
> net/mptcp/protocol.h | 20 +++++++++++
> net/mptcp/subflow.c  |  1 +
> 3 files changed, 62 insertions(+), 43 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 33093ed87077..80d06cab8bc7 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -886,6 +886,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
> 	mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
> 	mptcp_sockopt_sync_locked(msk, ssk);
> 	mptcp_subflow_joined(msk, ssk);
> +	mptcp_stop_tout_timer(sk);
> 	return true;
> }
>
> @@ -2363,18 +2364,15 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
> 	bool dispose_it, need_push = false;
>
> 	/* If the first subflow moved to a close state before accept, e.g. due
> -	 * to an incoming reset, mptcp either:
> -	 * - if either the subflow or the msk are dead, destroy the context
> -	 *   (the subflow socket is deleted by inet_child_forget) and the msk
> -	 * - otherwise do nothing at the moment and take action at accept and/or
> -	 *   listener shutdown - user-space must be able to accept() the closed
> -	 *   socket.
> +	 * to an incoming reset or listener shoutdown, the subflow socket is

'shutdown'

> +	 * already deleted by inet_child_forget() the mptcp socket can't survive
> +	 * too.
> 	 */
> -	if (msk->in_accept_queue && msk->first == ssk) {
> -		if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
> -			return;
> -
> +	if (msk->in_accept_queue && msk->first == ssk &&
> +	    (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
> 		/* ensure later check in mptcp_worker() will dispose the msk */
> +		inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32 -
> +							  (TCP_TIMEWAIT_LEN + 1);
> 		sock_set_flag(sk, SOCK_DEAD);
> 		lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
> 		mptcp_subflow_drop_ctx(ssk);
> @@ -2437,6 +2435,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
> out:
> 	if (need_push)
> 		__mptcp_push_pending(sk, 0);
> +
> +	/* Catch every 'all subflows closed' scenario, including peers silently
> +	 * closing them, e.g. due to timeout.
> +	 * For established sockets, allow an additional timeout before closing,
> +	 * as the protocol can still create more subflows.
> +	 */
> +	if (list_is_singular(&msk->conn_list) && msk->first &&
> +	    inet_sk_state_load(msk->first) == TCP_CLOSE) {
> +		if (sk->sk_state != TCP_ESTABLISHED ||
> +		    msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
> +			inet_sk_state_store(sk, TCP_CLOSE);
> +			mptcp_close_wake_up(sk);
> +		} else {
> +			mptcp_start_tout_timer(sk);
> +		}
> +	}
> }
>
> void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
> @@ -2480,23 +2494,14 @@ static void __mptcp_close_subflow(struct sock *sk)
>
> }
>
> -static bool mptcp_should_close(const struct sock *sk)
> +static bool mptcp_close_tout_expired(const struct sock *sk)
> {
> -	s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
> -	struct mptcp_subflow_context *subflow;
> -
> -	if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
> -		return true;
> +	if (!inet_csk(sk)->icsk_mtup.probe_timestamp ||
> +	    sk->sk_state == TCP_CLOSE)
> +		return false;
>
> -	/* if all subflows are in closed status don't bother with additional
> -	 * timeout
> -	 */
> -	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
> -		if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) !=
> -		    TCP_CLOSE)
> -			return false;
> -	}
> -	return true;
> +	return time_after32(tcp_jiffies32,
> +		  inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);
> }
>
> static void mptcp_check_fastclose(struct mptcp_sock *msk)
> @@ -2635,7 +2640,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
> 	struct sock *sk = (struct sock *)msk;
> 	unsigned long timeout, close_timeout;
>
> -	if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
> +	if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
> 		return;
>
> 	close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;

RFC 8684 brings up "break-before-make" scenarios in a few places, I think 
one use case is for VM migrations. 60 seconds seems sufficient but maybe 
not?

There may also be scenarios with busy servers where the server-side admin 
may want to quickly close "zero subflow" MPTCP connections immediately (or 
with a shorter timeout) if their workload does not involve 
break-before-make and it's more important to free up resources quickly.

Given these examples, what do you think about making this timeout 
configurable via sysctl and/or sockopt?

- Mat

> @@ -2643,7 +2648,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
> 	/* the close timeout takes precedence on the fail one, and here at least one of
> 	 * them is active
> 	 */
> -	timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
> +	timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
>
> 	sk_reset_timer(sk, &sk->sk_timer, timeout);
> }
> @@ -2662,8 +2667,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
> 	mptcp_subflow_reset(ssk);
> 	WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
> 	unlock_sock_fast(ssk, slow);
> -
> -	mptcp_reset_tout_timer(msk, 0);
> }
>
> static void mptcp_do_fastclose(struct sock *sk)
> @@ -2700,18 +2703,14 @@ static void mptcp_worker(struct work_struct *work)
> 	if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
> 		__mptcp_close_subflow(sk);
>
> -	/* There is no point in keeping around an orphaned sk timedout or
> -	 * closed, but we need the msk around to reply to incoming DATA_FIN,
> -	 * even if it is orphaned and in FIN_WAIT2 state
> -	 */
> -	if (sock_flag(sk, SOCK_DEAD)) {
> -		if (mptcp_should_close(sk))
> -			mptcp_do_fastclose(sk);
> +	if (mptcp_close_tout_expired(sk)) {
> +		mptcp_do_fastclose(sk);
> +		mptcp_close_wake_up(sk);
> +	}
>
> -		if (sk->sk_state == TCP_CLOSE) {
> -			__mptcp_destroy_sock(sk);
> -			goto unlock;
> -		}
> +	if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) {
> +		__mptcp_destroy_sock(sk);
> +		goto unlock;
> 	}
>
> 	if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
> @@ -3010,7 +3009,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
>
> cleanup:
> 	/* orphan all the subflows */
> -	inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
> 	mptcp_for_each_subflow(msk, subflow) {
> 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
> 		bool slow = lock_sock_fast_nested(ssk);
> @@ -3047,7 +3045,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
> 		__mptcp_destroy_sock(sk);
> 		do_cancel_work = true;
> 	} else {
> -		mptcp_reset_tout_timer(msk, 0);
> +		mptcp_start_tout_timer(sk);
> 	}
>
> 	return do_cancel_work;
> @@ -3111,7 +3109,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
> 	inet_sk_state_store(sk, TCP_CLOSE);
>
> 	mptcp_stop_rtx_timer(sk);
> -	sk_stop_timer(sk, &sk->sk_timer);
> +	mptcp_stop_tout_timer(sk);
>
> 	if (msk->token)
> 		mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 4fcce9ad7d04..392c2f247034 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -726,6 +726,26 @@ void mptcp_get_options(const struct sk_buff *skb,
> void mptcp_finish_connect(struct sock *sk);
> void __mptcp_set_connected(struct sock *sk);
> void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
> +
> +static inline void mptcp_stop_tout_timer(struct sock *sk)
> +{
> +	if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
> +		return;
> +
> +	sk_stop_timer(sk, &sk->sk_timer);
> +	inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
> +}
> +
> +static inline void mptcp_start_tout_timer(struct sock *sk)
> +{
> +	/* avoid 0 timestamp, at that means no close timeout */
> +	inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
> +	if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
> +		inet_csk(sk)->icsk_mtup.probe_timestamp = 1;
> +
> +	mptcp_reset_tout_timer(mptcp_sk(sk), 0);
> +}
> +
> static inline bool mptcp_is_fully_established(struct sock *sk)
> {
> 	return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 433f290984c8..918c1a235790 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -1552,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
> 	mptcp_sock_graft(ssk, sk->sk_socket);
> 	iput(SOCK_INODE(sf));
> 	WRITE_ONCE(msk->allow_infinite_fallback, false);
> +	mptcp_stop_tout_timer(sk);
> 	return 0;
>
> failed_unlink:
> -- 
> 2.41.0
>
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout.
  2023-09-02  0:23   ` [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout Mat Martineau
@ 2023-09-04  8:04     ` Paolo Abeni
  0 siblings, 0 replies; 7+ messages in thread
From: Paolo Abeni @ 2023-09-04  8:04 UTC (permalink / raw)
  To: Mat Martineau; +Cc: mptcp

On Fri, 2023-09-01 at 17:23 -0700, Mat Martineau wrote:
> On Fri, 1 Sep 2023, Paolo Abeni wrote:
> > @@ -2635,7 +2640,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
> > 	struct sock *sk = (struct sock *)msk;
> > 	unsigned long timeout, close_timeout;
> > 
> > -	if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
> > +	if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
> > 		return;
> > 
> > 	close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
> 
> RFC 8684 brings up "break-before-make" scenarios in a few places, I think 
> one use case is for VM migrations. 60 seconds seems sufficient but maybe 
> not?
> 
> There may also be scenarios with busy servers where the server-side admin 
> may want to quickly close "zero subflow" MPTCP connections immediately (or 
> with a shorter timeout) if their workload does not involve 
> break-before-make and it's more important to free up resources quickly.
> 
> Given these examples, what do you think about making this timeout 
> configurable via sysctl and/or sockopt?

Yes, the idea is to add an additional sysctl. I guess we could add even
a sockopt (MPTCP_USER_TIMEOUT) to cap the above.

I did not include that here, because I intended to keep this change
small, and the sysctl would add a bit more diffstat noise.

Cheers,

Paolo


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout.
  2023-09-01 11:11 ` [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout Paolo Abeni
  2023-09-01 12:22   ` mptcp: implement connection level timeout.: Tests Results MPTCP CI
  2023-09-02  0:23   ` [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout Mat Martineau
@ 2023-09-05 21:59   ` Mat Martineau
  2 siblings, 0 replies; 7+ messages in thread
From: Mat Martineau @ 2023-09-05 21:59 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: mptcp

On Fri, 1 Sep 2023, Paolo Abeni wrote:

> According to RFC 8684 section 3.3:
>
>  A connection is not closed unless [...] or an implementation-specific
>  connection-level send timeout.
>
> Currently the MPTCP protocol does not implement such timeout, and
> connection timing-out at the TCP-level never move to close state.
>
> Introduces a catch-up condition at subflow close time to move the
> MPTCP socket to close, too.
>
> That additionally allow removing similar existing inside the worker.
>
> Finally, allow some additional timeout for plain ESTABLISHED mptcp
> sockets, as the protocol allows creating new subflows even at that
> point and making the connection functional again.
>
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/430
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>

Thanks for the reply to my first review - adding the sysctl as a follow-on 
commit is a good idea.

New comment below:

> ---
> v2 -> v3:
> - fix integer overflow in mptcp_close_tout_expired()
> v1 -> v2:
> - really set the close timeout as need.
> ---
> net/mptcp/protocol.c | 84 +++++++++++++++++++++-----------------------
> net/mptcp/protocol.h | 20 +++++++++++
> net/mptcp/subflow.c  |  1 +
> 3 files changed, 62 insertions(+), 43 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 33093ed87077..80d06cab8bc7 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -886,6 +886,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
> 	mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
> 	mptcp_sockopt_sync_locked(msk, ssk);
> 	mptcp_subflow_joined(msk, ssk);
> +	mptcp_stop_tout_timer(sk);
> 	return true;
> }
>
> @@ -2363,18 +2364,15 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
> 	bool dispose_it, need_push = false;
>
> 	/* If the first subflow moved to a close state before accept, e.g. due
> -	 * to an incoming reset, mptcp either:
> -	 * - if either the subflow or the msk are dead, destroy the context
> -	 *   (the subflow socket is deleted by inet_child_forget) and the msk
> -	 * - otherwise do nothing at the moment and take action at accept and/or
> -	 *   listener shutdown - user-space must be able to accept() the closed
> -	 *   socket.
> +	 * to an incoming reset or listener shoutdown, the subflow socket is
> +	 * already deleted by inet_child_forget() the mptcp socket can't survive
> +	 * too.
> 	 */
> -	if (msk->in_accept_queue && msk->first == ssk) {
> -		if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
> -			return;
> -
> +	if (msk->in_accept_queue && msk->first == ssk &&
> +	    (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
> 		/* ensure later check in mptcp_worker() will dispose the msk */
> +		inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32 -
> +							  (TCP_TIMEWAIT_LEN + 1);

Should make sure this is a non-zero value, to avoid the very tiny chance 
that the socket gets stuck forever if tcp_jiffies32 wrapped around at 
exactly the wrong time. (like the check in mptcp_start_tout_timer() 
below)

Can also fix the 'shoutdown' typo above in v5 :)


- Mat

> 		sock_set_flag(sk, SOCK_DEAD);
> 		lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
> 		mptcp_subflow_drop_ctx(ssk);
> @@ -2437,6 +2435,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
> out:
> 	if (need_push)
> 		__mptcp_push_pending(sk, 0);
> +
> +	/* Catch every 'all subflows closed' scenario, including peers silently
> +	 * closing them, e.g. due to timeout.
> +	 * For established sockets, allow an additional timeout before closing,
> +	 * as the protocol can still create more subflows.
> +	 */
> +	if (list_is_singular(&msk->conn_list) && msk->first &&
> +	    inet_sk_state_load(msk->first) == TCP_CLOSE) {
> +		if (sk->sk_state != TCP_ESTABLISHED ||
> +		    msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
> +			inet_sk_state_store(sk, TCP_CLOSE);
> +			mptcp_close_wake_up(sk);
> +		} else {
> +			mptcp_start_tout_timer(sk);
> +		}
> +	}
> }
>
> void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
> @@ -2480,23 +2494,14 @@ static void __mptcp_close_subflow(struct sock *sk)
>
> }
>
> -static bool mptcp_should_close(const struct sock *sk)
> +static bool mptcp_close_tout_expired(const struct sock *sk)
> {
> -	s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
> -	struct mptcp_subflow_context *subflow;
> -
> -	if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
> -		return true;
> +	if (!inet_csk(sk)->icsk_mtup.probe_timestamp ||
> +	    sk->sk_state == TCP_CLOSE)
> +		return false;
>
> -	/* if all subflows are in closed status don't bother with additional
> -	 * timeout
> -	 */
> -	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
> -		if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) !=
> -		    TCP_CLOSE)
> -			return false;
> -	}
> -	return true;
> +	return time_after32(tcp_jiffies32,
> +		  inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);
> }
>
> static void mptcp_check_fastclose(struct mptcp_sock *msk)
> @@ -2635,7 +2640,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
> 	struct sock *sk = (struct sock *)msk;
> 	unsigned long timeout, close_timeout;
>
> -	if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
> +	if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
> 		return;
>
> 	close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
> @@ -2643,7 +2648,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
> 	/* the close timeout takes precedence on the fail one, and here at least one of
> 	 * them is active
> 	 */
> -	timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
> +	timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
>
> 	sk_reset_timer(sk, &sk->sk_timer, timeout);
> }
> @@ -2662,8 +2667,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
> 	mptcp_subflow_reset(ssk);
> 	WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
> 	unlock_sock_fast(ssk, slow);
> -
> -	mptcp_reset_tout_timer(msk, 0);
> }
>
> static void mptcp_do_fastclose(struct sock *sk)
> @@ -2700,18 +2703,14 @@ static void mptcp_worker(struct work_struct *work)
> 	if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
> 		__mptcp_close_subflow(sk);
>
> -	/* There is no point in keeping around an orphaned sk timedout or
> -	 * closed, but we need the msk around to reply to incoming DATA_FIN,
> -	 * even if it is orphaned and in FIN_WAIT2 state
> -	 */
> -	if (sock_flag(sk, SOCK_DEAD)) {
> -		if (mptcp_should_close(sk))
> -			mptcp_do_fastclose(sk);
> +	if (mptcp_close_tout_expired(sk)) {
> +		mptcp_do_fastclose(sk);
> +		mptcp_close_wake_up(sk);
> +	}
>
> -		if (sk->sk_state == TCP_CLOSE) {
> -			__mptcp_destroy_sock(sk);
> -			goto unlock;
> -		}
> +	if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) {
> +		__mptcp_destroy_sock(sk);
> +		goto unlock;
> 	}
>
> 	if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
> @@ -3010,7 +3009,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
>
> cleanup:
> 	/* orphan all the subflows */
> -	inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
> 	mptcp_for_each_subflow(msk, subflow) {
> 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
> 		bool slow = lock_sock_fast_nested(ssk);
> @@ -3047,7 +3045,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
> 		__mptcp_destroy_sock(sk);
> 		do_cancel_work = true;
> 	} else {
> -		mptcp_reset_tout_timer(msk, 0);
> +		mptcp_start_tout_timer(sk);
> 	}
>
> 	return do_cancel_work;
> @@ -3111,7 +3109,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
> 	inet_sk_state_store(sk, TCP_CLOSE);
>
> 	mptcp_stop_rtx_timer(sk);
> -	sk_stop_timer(sk, &sk->sk_timer);
> +	mptcp_stop_tout_timer(sk);
>
> 	if (msk->token)
> 		mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 4fcce9ad7d04..392c2f247034 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -726,6 +726,26 @@ void mptcp_get_options(const struct sk_buff *skb,
> void mptcp_finish_connect(struct sock *sk);
> void __mptcp_set_connected(struct sock *sk);
> void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
> +
> +static inline void mptcp_stop_tout_timer(struct sock *sk)
> +{
> +	if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
> +		return;
> +
> +	sk_stop_timer(sk, &sk->sk_timer);
> +	inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
> +}
> +
> +static inline void mptcp_start_tout_timer(struct sock *sk)
> +{
> +	/* avoid 0 timestamp, at that means no close timeout */
> +	inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
> +	if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
> +		inet_csk(sk)->icsk_mtup.probe_timestamp = 1;
> +
> +	mptcp_reset_tout_timer(mptcp_sk(sk), 0);
> +}
> +
> static inline bool mptcp_is_fully_established(struct sock *sk)
> {
> 	return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 433f290984c8..918c1a235790 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -1552,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
> 	mptcp_sock_graft(ssk, sk->sk_socket);
> 	iput(SOCK_INODE(sf));
> 	WRITE_ONCE(msk->allow_infinite_fallback, false);
> +	mptcp_stop_tout_timer(sk);
> 	return 0;
>
> failed_unlink:
> -- 
> 2.41.0
>
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-09-05 22:00 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-01 11:11 [PATCH mptcp-next v4 0/2] mptcp: fix another close hang-up Paolo Abeni
2023-09-01 11:11 ` [PATCH mptcp-next v4 1/2] mptcp: rename timer related helper to less confusing names Paolo Abeni
2023-09-01 11:11 ` [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout Paolo Abeni
2023-09-01 12:22   ` mptcp: implement connection level timeout.: Tests Results MPTCP CI
2023-09-02  0:23   ` [PATCH mptcp-next v4 2/2] mptcp: implement connection level timeout Mat Martineau
2023-09-04  8:04     ` Paolo Abeni
2023-09-05 21:59   ` Mat Martineau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.