* [PATCH net 1/1] xfrm: espintcp: publish ULP context before entry points
[not found] <cover.1778464688.git.zylzyl2333@gmail.com>
@ 2026-05-11 13:40 ` Ren Wei
2026-05-12 10:07 ` Sabrina Dubroca
0 siblings, 1 reply; 2+ messages in thread
From: Ren Wei @ 2026-05-11 13:40 UTC (permalink / raw)
To: netdev
Cc: davem, edumazet, kuba, pabeni, horms, steffen.klassert, herbert,
sd, yuantan098, yifanwucs, tomapufckgml, bird, ronbogo,
zylzyl2333, n05ec
From: Yilin Zhu <zylzyl2333@gmail.com>
espintcp_init_sk() installs sk_prot, sk_socket->ops, and socket callbacks
while setting up the ULP state. These entry points can be observed by
receive and write-space paths before the attaching thread finishes.
Previously, espintcp_data_ready() could dereference icsk_ulp_data before
the new context was published, and espintcp_write_space() could schedule
ctx->work before INIT_WORK() initialized it.
Initialize the queues, saved callbacks, and TX work item before storing
the context in icsk_ulp_data. Then publish sk_prot, sk_socket->ops, and
the socket callbacks only after smp_wmb(), paired with an ordered context
load in espintcp_getctx(). Use READ_ONCE()/WRITE_ONCE() for lockless
socket pointer accesses.
Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)")
Cc: stable@kernel.org
Reported-by: Yuan Tan <yuantan098@gmail.com>
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Co-developed-by: Peihan Liu <ronbogo@outlook.com>
Signed-off-by: Peihan Liu <ronbogo@outlook.com>
Signed-off-by: Yilin Zhu <zylzyl2333@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
---
include/net/espintcp.h | 13 +++++++++--
net/xfrm/espintcp.c | 52 +++++++++++++++++++++++++++---------------
2 files changed, 45 insertions(+), 20 deletions(-)
diff --git a/include/net/espintcp.h b/include/net/espintcp.h
index c70efd704b6d..034be559786b 100644
--- a/include/net/espintcp.h
+++ b/include/net/espintcp.h
@@ -34,7 +34,16 @@ static inline struct espintcp_ctx *espintcp_getctx(const struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- /* RCU is only needed for diag */
- return (__force void *)icsk->icsk_ulp_data;
+ /*
+ * The caller reached an ESP entry point by observing sk_prot,
+ * sk_socket->ops, or one of the socket callbacks. Keep the ctx
+ * load after that observation so the caller cannot see the new
+ * entry point while still seeing stale icsk_ulp_data.
+ *
+ * Pairs with smp_wmb() in espintcp_init_sk().
+ */
+ smp_rmb();
+
+ return (__force void *)READ_ONCE(icsk->icsk_ulp_data);
}
#endif
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index a2756186e13a..d847632e7d4e 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -440,7 +440,9 @@ static void espintcp_destruct(struct sock *sk)
bool tcp_is_ulp_esp(struct sock *sk)
{
- return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot;
+ const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+ return prot == &espintcp_prot || prot == &espintcp6_prot;
}
EXPORT_SYMBOL_GPL(tcp_is_ulp_esp);
@@ -451,10 +453,12 @@ static void build_protos(struct proto *espintcp_prot,
static int espintcp_init_sk(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct proto_ops *ops;
struct strp_callbacks cb = {
.rcv_msg = espintcp_rcv,
.parse_msg = espintcp_parse,
};
struct espintcp_ctx *ctx;
+ struct proto *prot;
int err;
@@ -472,34 +476,46 @@ static int espintcp_init_sk(struct sock *sk)
__sk_dst_reset(sk);
- strp_check_rcv(&ctx->strp);
skb_queue_head_init(&ctx->ike_queue);
skb_queue_head_init(&ctx->out_queue);
+ ctx->saved_data_ready = READ_ONCE(sk->sk_data_ready);
+ ctx->saved_write_space = READ_ONCE(sk->sk_write_space);
+ ctx->saved_destruct = READ_ONCE(sk->sk_destruct);
+ INIT_WORK(&ctx->work, espintcp_tx_work);
+
+ /* avoid using task_frag */
+ sk->sk_allocation = GFP_ATOMIC;
+ sk->sk_use_task_frag = false;
if (sk->sk_family == AF_INET) {
- sk->sk_prot = &espintcp_prot;
- sk->sk_socket->ops = &espintcp_ops;
+ prot = &espintcp_prot;
+ ops = &espintcp_ops;
} else {
mutex_lock(&tcpv6_prot_mutex);
if (!espintcp6_prot.recvmsg)
- build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops);
+ build_protos(&espintcp6_prot, &espintcp6_ops,
+ READ_ONCE(sk->sk_prot),
+ READ_ONCE(sk->sk_socket->ops));
mutex_unlock(&tcpv6_prot_mutex);
- sk->sk_prot = &espintcp6_prot;
- sk->sk_socket->ops = &espintcp6_ops;
+ prot = &espintcp6_prot;
+ ops = &espintcp6_ops;
}
- ctx->saved_data_ready = sk->sk_data_ready;
- ctx->saved_write_space = sk->sk_write_space;
- ctx->saved_destruct = sk->sk_destruct;
- sk->sk_data_ready = espintcp_data_ready;
- sk->sk_write_space = espintcp_write_space;
- sk->sk_destruct = espintcp_destruct;
rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
- INIT_WORK(&ctx->work, espintcp_tx_work);
- /* avoid using task_frag */
- sk->sk_allocation = GFP_ATOMIC;
- sk->sk_use_task_frag = false;
+ /*
+ * Publish the fully initialized ctx before publishing any entry point
+ * that can call espintcp_getctx(). The read barrier there runs after
+ * the caller has observed one of these pointers.
+ */
+ smp_wmb();
+ WRITE_ONCE(sk->sk_prot, prot);
+ WRITE_ONCE(sk->sk_socket->ops, ops);
+ WRITE_ONCE(sk->sk_data_ready, espintcp_data_ready);
+ WRITE_ONCE(sk->sk_write_space, espintcp_write_space);
+ WRITE_ONCE(sk->sk_destruct, espintcp_destruct);
+
+ strp_check_rcv(&ctx->strp);
return 0;
@@ -530,7 +546,7 @@ static void espintcp_close(struct sock *sk, long timeout)
strp_stop(&ctx->strp);
- sk->sk_prot = &tcp_prot;
+ WRITE_ONCE(sk->sk_prot, &tcp_prot);
barrier();
disable_work_sync(&ctx->work);
--
2.47.3
^ permalink raw reply related [flat|nested] 2+ messages in thread