All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Carlier <devnexen@gmail.com>
To: mptcp@lists.linux.dev
Cc: Matthieu Baerts <matttbe@kernel.org>,
	Mat Martineau <martineau@kernel.org>,
	Geliang Tang <geliang@kernel.org>,
	David Carlier <devnexen@gmail.com>
Subject: [PATCH mptcp-next v3 2/3] mptcp: support MSG_ERRQUEUE on the parent socket
Date: Tue, 21 Apr 2026 23:33:37 +0100	[thread overview]
Message-ID: <20260421223338.52743-3-devnexen@gmail.com> (raw)
In-Reply-To: <20260421223338.52743-1-devnexen@gmail.com>

Handle MSG_ERRQUEUE on the MPTCP socket by selecting a subflow with
pending errqueue data, moving one error skb to the parent socket, and
consuming it through the parent socket ABI.

This surfaces subflow errqueue activity through poll(), keeps the
userspace ABI tied to the socket being used, and restores the skb to
the subflow errqueue if requeueing to the parent fails under rmem
pressure.

Signed-off-by: David Carlier <devnexen@gmail.com>
Assisted-by: Codex:gpt-5
Signed-off-by: David Carlier <devnexen@gmail.com>
---
 net/mptcp/protocol.c | 123 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 103 insertions(+), 20 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 6b486fc94c16..87871216bab2 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -818,28 +818,29 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
 static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
 {
 	int ssk_state;
-	int err;
+	int err = 0;
+	bool has_errqueue;
+
+	has_errqueue = !skb_queue_empty_lockless(&ssk->sk_error_queue);
 
-	/* only propagate errors on fallen-back sockets or
-	 * on MPC connect
+	/* Only fallback sockets and the MPC connect path inherit TCP's sk_err
+	 * semantics; consume ssk->sk_err only on those paths so steady-state
+	 * MPTCP doesn't silently drop TCP's one-shot errors.
 	 */
-	if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk)))
-		return false;
+	if (sk->sk_state == TCP_SYN_SENT ||
+	    __mptcp_check_fallback(mptcp_sk(sk))) {
+		err = sock_error(ssk);
+		if (err) {
+			ssk_state = inet_sk_state_load(ssk);
+			if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+				mptcp_set_state(sk, ssk_state);
+			WRITE_ONCE(sk->sk_err, -err);
+		}
+	}
 
-	err = sock_error(ssk);
-	if (!err)
+	if (!err && !has_errqueue)
 		return false;
 
-	/* We need to propagate only transition to CLOSE state.
-	 * Orphaned socket will see such state change via
-	 * subflow_sched_work_if_closed() and that path will properly
-	 * destroy the msk as needed.
-	 */
-	ssk_state = inet_sk_state_load(ssk);
-	if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
-		mptcp_set_state(sk, ssk_state);
-	WRITE_ONCE(sk->sk_err, -err);
-
 	/* This barrier is coupled with smp_rmb() in mptcp_poll() */
 	smp_wmb();
 	sk_error_report(sk);
@@ -2286,6 +2287,68 @@ static unsigned int mptcp_inq_hint(const struct sock *sk)
 	return 0;
 }
 
+static struct sock *mptcp_pick_errqueue_subflow(struct sock *sk)
+{
+	struct mptcp_subflow_context *subflow;
+	struct sock *ssk = NULL;
+
+	lock_sock(sk);
+	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
+		struct sock *subflow_sk = mptcp_subflow_tcp_sock(subflow);
+
+		if (skb_queue_empty_lockless(&subflow_sk->sk_error_queue))
+			continue;
+
+		if (!refcount_inc_not_zero(&subflow_sk->sk_refcnt))
+			continue;
+
+		ssk = subflow_sk;
+		break;
+	}
+	release_sock(sk);
+
+	return ssk;
+}
+
+static bool mptcp_has_error_queue(const struct sock *sk)
+{
+	return !skb_queue_empty_lockless(&sk->sk_error_queue);
+}
+
+static int mptcp_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct sk_buff *skb;
+	struct sock *ssk;
+	int ret, ret2;
+
+	if (READ_ONCE(sk->sk_err) || mptcp_has_error_queue(sk))
+		return inet_recv_error(sk, msg, len);
+
+	ssk = mptcp_pick_errqueue_subflow(sk);
+	if (!ssk)
+		return -EAGAIN;
+
+	skb = sock_dequeue_err_skb(ssk);
+	if (!skb)
+		goto put_ssk;
+
+	ret = sock_queue_err_skb(sk, skb);
+	if (ret) {
+		ret2 = sock_queue_err_skb(ssk, skb);
+		sock_put(ssk);
+		if (ret2)
+			kfree_skb(skb);
+		return ret;
+	}
+
+	sock_put(ssk);
+	return inet_recv_error(sk, msg, len);
+
+put_ssk:
+	sock_put(ssk);
+	return -EAGAIN;
+}
+
 static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			 int flags)
 {
@@ -2295,9 +2358,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	int target;
 	long timeo;
 
-	/* MSG_ERRQUEUE is really a no-op till we support IP_RECVERR */
 	if (unlikely(flags & MSG_ERRQUEUE))
-		return inet_recv_error(sk, msg, len);
+		return mptcp_recv_error(sk, msg, len);
 
 	lock_sock(sk);
 	if (unlikely(sk->sk_state == TCP_LISTEN)) {
@@ -4296,6 +4358,26 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
 	return 0;
 }
 
+static bool mptcp_subflow_has_error(struct sock *sk)
+{
+	struct mptcp_subflow_context *subflow;
+	bool has_error = false;
+
+	mptcp_data_lock(sk);
+	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+		if (READ_ONCE(ssk->sk_err) ||
+		    !skb_queue_empty_lockless(&ssk->sk_error_queue)) {
+			has_error = true;
+			break;
+		}
+	}
+	mptcp_data_unlock(sk);
+
+	return has_error;
+}
+
 static __poll_t mptcp_poll(struct file *file, struct socket *sock,
 			   struct poll_table_struct *wait)
 {
@@ -4339,7 +4421,8 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
 
 	/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
 	smp_rmb();
-	if (READ_ONCE(sk->sk_err))
+	if (READ_ONCE(sk->sk_err) || mptcp_has_error_queue(sk) ||
+	    mptcp_subflow_has_error(sk))
 		mask |= EPOLLERR;
 
 	return mask;
-- 
2.53.0


  parent reply	other threads:[~2026-04-21 22:33 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-21 22:33 [PATCH mptcp-next v3 0/3] mptcp: MSG_ERRQUEUE support on the parent socket David Carlier
2026-04-21 22:33 ` [PATCH mptcp-next v3 1/3] mptcp: propagate RECVERR sockopts to subflows David Carlier
2026-04-22  8:05   ` Paolo Abeni
2026-04-22  8:32     ` Matthieu Baerts
2026-04-22  8:35       ` Matthieu Baerts
2026-04-22  8:36         ` Matthieu Baerts
2026-04-22  8:48         ` Paolo Abeni
2026-04-22  8:50           ` Matthieu Baerts
2026-04-22 13:53             ` Paolo Abeni
2026-04-22 21:51     ` David CARLIER
2026-04-27 17:07       ` Matthieu Baerts
2026-04-21 22:33 ` David Carlier [this message]
2026-04-22  8:28   ` [PATCH mptcp-next v3 2/3] mptcp: support MSG_ERRQUEUE on the parent socket Paolo Abeni
2026-04-22 21:54     ` David CARLIER
2026-04-21 22:33 ` [PATCH mptcp-next v3 3/3] selftests: mptcp: cover IP_RECVERR sockopt propagation David Carlier
2026-04-21 23:38 ` [PATCH mptcp-next v3 0/3] mptcp: MSG_ERRQUEUE support on the parent socket MPTCP CI
2026-04-22  8:22 ` Matthieu Baerts
2026-04-22  8:56   ` David CARLIER
2026-04-27 21:10 ` [PATCH mptcp-next v4 0/4] " David Carlier
2026-04-27 21:10   ` [PATCH mptcp-next v4 1/4] mptcp: sockopt: factor inet_flags propagation into a mask David Carlier
2026-04-27 21:10   ` [PATCH mptcp-next v4 2/4] mptcp: propagate RECVERR sockopts to subflows David Carlier
2026-05-01 15:56     ` Matthieu Baerts
2026-04-27 21:10   ` [PATCH mptcp-next v4 3/4] mptcp: support MSG_ERRQUEUE on the parent socket David Carlier
2026-04-27 21:10   ` [PATCH mptcp-next v4 4/4] selftests: mptcp: cover IP_RECVERR sockopt propagation David Carlier
2026-04-28 18:48   ` [PATCH mptcp-next v4 0/4] mptcp: MSG_ERRQUEUE support on the parent socket Matthieu Baerts
2026-04-28 18:56     ` Matthieu Baerts
2026-04-28 19:15       ` David CARLIER
2026-05-01 14:49       ` Matthieu Baerts
2026-05-01 15:28         ` David CARLIER
2026-05-01 15:56           ` Matthieu Baerts
2026-04-28 19:48   ` MPTCP CI

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260421223338.52743-3-devnexen@gmail.com \
    --to=devnexen@gmail.com \
    --cc=geliang@kernel.org \
    --cc=martineau@kernel.org \
    --cc=matttbe@kernel.org \
    --cc=mptcp@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.