From: David Carlier <devnexen@gmail.com>
To: mptcp@lists.linux.dev
Cc: matttbe@kernel.org, martineau@kernel.org, geliang@kernel.org,
pabeni@redhat.com, David Carlier <devnexen@gmail.com>
Subject: [PATCH mptcp-next v10 2/4] mptcp: propagate RECVERR sockopts to subflows
Date: Fri, 29 May 2026 18:45:20 +0100 [thread overview]
Message-ID: <20260529174524.260199-3-devnexen@gmail.com> (raw)
In-Reply-To: <20260529174524.260199-1-devnexen@gmail.com>
Propagate IP_RECVERR/IP_RECVERR_RFC4884 and
IPV6_RECVERR/IPV6_RECVERR_RFC4884 from the MPTCP socket to existing
and future subflows.
mptcp_setsockopt_recverr() snapshots optval into a local int, applies
it to the parent socket via ip_setsockopt() / ipv6_setsockopt(), bumps
msk->setsockopt_seq, and forwards to every subflow via
mptcp_setsockopt_all_sf(). Newly-joining subflows pick up the four
RECVERR bits through sync_socket_options() now that
MPTCP_INET_FLAGS_MASK covers them.
mptcp_setsockopt_all_sf() skips IPv4 subflows when called with
SOL_IPV6: ipv6_setsockopt() on a sock with sk_family != AF_INET6
returns an error, which would abort the loop and leave the remaining
subflows desynchronised. This branch was unreachable before this
patch (the only caller was TCP_MAXSEG, family-agnostic); it becomes
live with the new IPV6_RECVERR / IPV6_RECVERR_RFC4884 caller and the
v4-subflow-on-AF_INET6-msk case (v4 MP_JOIN, or userspace PM grafting
a v4 subflow onto a v6 msk).
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Assisted-by: Codex:gpt-5
Signed-off-by: David Carlier <devnexen@gmail.com>
---
net/mptcp/sockopt.c | 138 +++++++++++++++++++++++++++++++++++++-------
1 file changed, 116 insertions(+), 22 deletions(-)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index b9cac04a749a..76ff3c41a481 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
+#include <net/ipv6.h>
#include <net/sock.h>
#include <net/protocol.h>
#include <net/tcp.h>
@@ -19,7 +20,11 @@
#define MPTCP_INET_FLAGS_MASK \
(BIT(INET_FLAGS_TRANSPARENT) | \
BIT(INET_FLAGS_FREEBIND) | \
- BIT(INET_FLAGS_BIND_ADDRESS_NO_PORT))
+ BIT(INET_FLAGS_BIND_ADDRESS_NO_PORT) | \
+ BIT(INET_FLAGS_RECVERR) | \
+ BIT(INET_FLAGS_RECVERR_RFC4884) | \
+ BIT(INET_FLAGS_RECVERR6) | \
+ BIT(INET_FLAGS_RECVERR6_RFC4884))
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
@@ -394,6 +399,85 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
return -EOPNOTSUPP;
}
+static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level,
+ int optname, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ int ret = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ /* SOL_IPV6 options on a v4 subflow (v4 MP_JOIN, or userspace PM
+ * grafting a v4 subflow onto an AF_INET6 msk) would otherwise
+ * abort the loop with -EAFNOSUPPORT from ipv6_setsockopt().
+ */
+ if (level == SOL_IPV6 && ssk->sk_family != AF_INET6)
+ continue;
+
+ ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
+ if (ret)
+ break;
+ }
+
+ if (!ret)
+ sockopt_seq_inc(msk);
+
+ return ret;
+}
+
+static int mptcp_setsockopt_recverr(struct mptcp_sock *msk, int level,
+ int optname, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct sock *sk = (struct sock *)msk;
+ int val, ret;
+
+ /* Let ip_setsockopt() / ipv6_setsockopt() validate optval and optlen
+ * (so 1-byte boolean writes keep the same ABI as plain TCP) and update
+ * the parent's RECVERR bit. Re-read that bit under lock_sock() and
+ * push it to the subflows: concurrent setsockopt callers cannot leave
+ * parent and subflows desynchronized this way.
+ */
+ if (level == SOL_IP)
+ ret = ip_setsockopt(sk, level, optname, optval, optlen);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (level == SOL_IPV6) {
+ if (sk->sk_family != AF_INET6)
+ return -ENOPROTOOPT;
+ ret = ipv6_setsockopt(sk, level, optname, optval, optlen);
+ }
+#endif
+ else
+ return -EOPNOTSUPP;
+ if (ret)
+ return ret;
+
+ lock_sock(sk);
+ switch (optname) {
+ case IP_RECVERR:
+ val = inet_test_bit(RECVERR, sk);
+ break;
+ case IP_RECVERR_RFC4884:
+ val = inet_test_bit(RECVERR_RFC4884, sk);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPV6_RECVERR:
+ val = inet6_test_bit(RECVERR6, sk);
+ break;
+ case IPV6_RECVERR_RFC4884:
+ val = inet6_test_bit(RECVERR6_RFC4884, sk);
+ break;
+#endif
+ }
+
+ ret = mptcp_setsockopt_all_sf(msk, level, optname,
+ KERNEL_SOCKPTR(&val), sizeof(val));
+ release_sock(sk);
+ return ret;
+}
+
static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -436,6 +520,10 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
release_sock(sk);
break;
+ case IPV6_RECVERR:
+ case IPV6_RECVERR_RFC4884:
+ ret = mptcp_setsockopt_recverr(msk, SOL_IPV6, optname, optval, optlen);
+ break;
}
return ret;
@@ -781,6 +869,9 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
case IP_TOS:
return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
+ case IP_RECVERR:
+ case IP_RECVERR_RFC4884:
+ return mptcp_setsockopt_recverr(msk, SOL_IP, optname, optval, optlen);
}
return -EOPNOTSUPP;
@@ -808,27 +899,6 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
return ret;
}
-static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level,
- int optname, sockptr_t optval,
- unsigned int optlen)
-{
- struct mptcp_subflow_context *subflow;
- int ret = 0;
-
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-
- ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
- if (ret)
- break;
- }
-
- if (!ret)
- sockopt_seq_inc(msk);
-
- return ret;
-}
-
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1473,6 +1543,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
case IP_LOCAL_PORT_RANGE:
return mptcp_put_int_option(msk, optval, optlen,
READ_ONCE(inet_sk(sk)->local_port_range));
+ case IP_RECVERR:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(RECVERR, sk));
+ case IP_RECVERR_RFC4884:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(RECVERR_RFC4884, sk));
}
return -EOPNOTSUPP;
@@ -1493,6 +1569,16 @@ static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname,
case IPV6_FREEBIND:
return mptcp_put_int_option(msk, optval, optlen,
inet_test_bit(FREEBIND, sk));
+ case IPV6_RECVERR:
+ if (sk->sk_family != AF_INET6)
+ return -ENOPROTOOPT;
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet6_test_bit(RECVERR6, sk));
+ case IPV6_RECVERR_RFC4884:
+ if (sk->sk_family != AF_INET6)
+ return -ENOPROTOOPT;
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet6_test_bit(RECVERR6_RFC4884, sk));
}
return -EOPNOTSUPP;
@@ -1601,6 +1687,14 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
src = READ_ONCE(inet_sk(sk)->inet_flags);
+ /* RECVERR6 bits are only read on AF_INET6 sockets; copying them onto a
+ * v4 subflow is dead state and diverges from the SOL_IPV6 skip in
+ * mptcp_setsockopt_all_sf().
+ */
+ if (ssk->sk_family != AF_INET6)
+ mask &= ~(BIT(INET_FLAGS_RECVERR6) |
+ BIT(INET_FLAGS_RECVERR6_RFC4884));
+
for_each_set_bit(b, &mask, BITS_PER_LONG)
assign_bit(b, &inet_sk(ssk)->inet_flags, src & BIT(b));
--
2.53.0
next prev parent reply other threads:[~2026-05-29 17:45 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-29 17:45 [PATCH mptcp-next v10 0/4] mptcp: MSG_ERRQUEUE support on the parent socket David Carlier
2026-05-29 17:45 ` [PATCH mptcp-next v10 1/4] mptcp: sockopt: factor inet_flags propagation into a mask David Carlier
2026-05-29 17:45 ` David Carlier [this message]
2026-05-29 17:45 ` [PATCH mptcp-next v10 3/4] mptcp: support MSG_ERRQUEUE on the parent socket David Carlier
2026-05-29 17:45 ` [PATCH mptcp-next v10 4/4] selftests: mptcp: cover IP_RECVERR sockopt propagation David Carlier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260529174524.260199-3-devnexen@gmail.com \
--to=devnexen@gmail.com \
--cc=geliang@kernel.org \
--cc=martineau@kernel.org \
--cc=matttbe@kernel.org \
--cc=mptcp@lists.linux.dev \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox