From: Qi Tang <tpluszz77@gmail.com>
To: Steffen Klassert <steffen.klassert@secunet.com>,
Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S . Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Simon Horman <horms@kernel.org>, David Ahern <dsahern@kernel.org>,
netdev@vger.kernel.org, Qi Tang <tpluszz77@gmail.com>,
stable@vger.kernel.org
Subject: [PATCH v2] xfrm: delay dev_put in xfrm_input to after transport reinject
Date: Tue, 31 Mar 2026 17:27:37 +0800 [thread overview]
Message-ID: <20260331092737.1937-1-tpluszz77@gmail.com> (raw)
xfrm_trans_queue() queues transport-mode packets for async reinject
via xfrm_trans_reinject() workqueue. After async crypto completes,
xfrm_input_resume() re-enters xfrm_input() with encap_type == -1,
which immediately calls dev_put(skb->dev) before the skb reaches
transport_finish and the reinject queue. The device can be freed
before the workqueue callback runs, causing a use-after-free when
xfrm_trans_reinject dereferences skb->dev.
Remove the dev_put from the async resumption entry and let the
reference survive through the transport reinject path. Introduce
async variants of the NF_HOOK okfn callbacks that queue the skb
with dev_held=true and drop the reference on error. The reinject
worker checks this flag and puts the reference after the callback
completes.
For the synchronous crypto path, the existing dev_hold/dev_put
around x->type->input() is unchanged — the reference is balanced
within the same softirq context before the skb reaches the queue.
If the loop re-enters async crypto (multi-SPI with a second
-EINPROGRESS), drop the extra reference from the earlier async
resume so exactly one reference accompanies the skb.
Fixes: acf568ee859f ("xfrm: Reinject transport-mode packets through tasklet")
Cc: stable@vger.kernel.org
Signed-off-by: Qi Tang <tpluszz77@gmail.com>
---
Changes in v2:
- Do not add extra dev_hold/dev_put pair (reviewer feedback:
"expensive operation, we just drop it too early")
- Reuse existing dev_hold from xfrm_input, delay dev_put to
reinject completion
- Add async okfn variants for IPv4/IPv6 transport_finish so
the reinject queue knows whether a dev ref is held
- Drop the cb->dev field from v1; use bool dev_held flag instead
Link: https://lore.kernel.org/all/20260320073023.21873-1-tpluszz77@gmail.com/
---
include/net/xfrm.h | 3 ++-
net/ipv4/esp4.c | 3 ++-
net/ipv4/xfrm4_input.c | 25 ++++++++++++++++++++++++-
net/ipv6/esp6.c | 3 ++-
net/ipv6/xfrm6_input.c | 16 +++++++++++++++-
net/xfrm/xfrm_input.c | 35 ++++++++++++++++++++++++++---------
6 files changed, 71 insertions(+), 14 deletions(-)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 10d3edde6b2f..1dd8b3b36649 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1779,7 +1779,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
int (*finish)(struct net *, struct sock *,
- struct sk_buff *));
+ struct sk_buff *),
+ bool dev_held);
int xfrm_trans_queue(struct sk_buff *skb,
int (*finish)(struct net *, struct sock *,
struct sk_buff *));
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 6dfc0bcdef65..0114c92b10d4 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -187,7 +187,8 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
int err;
local_bh_disable();
- err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
+ err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb,
+ false);
local_bh_enable();
/* EINPROGRESS just happens to do the right thing. It
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index f28cfd88eaf5..9765fdc63ffc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -46,6 +46,28 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
return NET_RX_DROP;
}
+static int xfrm4_rcv_encap_finish_async(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (!skb_dst(skb)) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), skb->dev))
+ goto drop;
+ }
+
+ if (xfrm_trans_queue_net(dev_net(skb->dev), skb,
+ xfrm4_rcv_encap_finish2, true))
+ goto drop;
+
+ return 0;
+drop:
+ dev_put(skb->dev);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+
int xfrm4_transport_finish(struct sk_buff *skb, int async)
{
struct xfrm_offload *xo = xfrm_offload(skb);
@@ -74,7 +96,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
- xfrm4_rcv_encap_finish);
+ async ? xfrm4_rcv_encap_finish_async :
+ xfrm4_rcv_encap_finish);
return 0;
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9f75313734f8..8a0a44d7d010 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -204,7 +204,8 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
int err;
local_bh_disable();
- err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
+ err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb,
+ false);
local_bh_enable();
/* EINPROGRESS just happens to do the right thing. It
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 9005fc156a20..d4eede5315ac 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -40,6 +40,19 @@ static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
return 0;
}
+static int xfrm6_transport_finish2_async(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (xfrm_trans_queue_net(dev_net(skb->dev), skb, ip6_rcv_finish,
+ true)) {
+ dev_put(skb->dev);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ return 0;
+}
+
int xfrm6_transport_finish(struct sk_buff *skb, int async)
{
struct xfrm_offload *xo = xfrm_offload(skb);
@@ -69,7 +82,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
- xfrm6_transport_finish2);
+ async ? xfrm6_transport_finish2_async :
+ xfrm6_transport_finish2);
return 0;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index dc1312ed5a09..2d75f984532a 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -40,6 +40,7 @@ struct xfrm_trans_cb {
} header;
int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
struct net *net;
+ bool dev_held;
};
#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
@@ -506,7 +507,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
/* An encap_type of -1 indicates async resumption. */
if (encap_type == -1) {
async = 1;
- dev_put(skb->dev);
seq = XFRM_SKB_CB(skb)->seq.input.low;
spin_lock(&x->lock);
goto resume;
@@ -659,8 +659,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
dev_hold(skb->dev);
nexthdr = x->type->input(x, skb);
- if (nexthdr == -EINPROGRESS)
+ if (nexthdr == -EINPROGRESS) {
+ if (async)
+ dev_put(skb->dev);
return 0;
+ }
dev_put(skb->dev);
spin_lock(&x->lock);
@@ -695,9 +698,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
err = xfrm_inner_mode_input(x, skb);
- if (err == -EINPROGRESS)
+ if (err == -EINPROGRESS) {
+ if (async)
+ dev_put(skb->dev);
return 0;
- else if (err) {
+ } else if (err) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
goto drop;
}
@@ -734,6 +739,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
sp->olen = 0;
if (skb_valid_dst(skb))
skb_dst_drop(skb);
+ if (async)
+ dev_put(skb->dev);
gro_cells_receive(&gro_cells, skb);
return 0;
} else {
@@ -753,6 +760,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
sp->olen = 0;
if (skb_valid_dst(skb))
skb_dst_drop(skb);
+ if (async)
+ dev_put(skb->dev);
gro_cells_receive(&gro_cells, skb);
return err;
}
@@ -763,6 +772,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
drop_unlock:
spin_unlock(&x->lock);
drop:
+ if (async)
+ dev_put(skb->dev);
xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1);
kfree_skb(skb);
return 0;
@@ -787,15 +798,20 @@ static void xfrm_trans_reinject(struct work_struct *work)
spin_unlock_bh(&trans->queue_lock);
local_bh_disable();
- while ((skb = __skb_dequeue(&queue)))
- XFRM_TRANS_SKB_CB(skb)->finish(XFRM_TRANS_SKB_CB(skb)->net,
- NULL, skb);
+ while ((skb = __skb_dequeue(&queue))) {
+ struct xfrm_trans_cb *cb = XFRM_TRANS_SKB_CB(skb);
+ struct net_device *dev = cb->dev_held ? skb->dev : NULL;
+
+ cb->finish(cb->net, NULL, skb);
+ dev_put(dev);
+ }
local_bh_enable();
}
int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
int (*finish)(struct net *, struct sock *,
- struct sk_buff *))
+ struct sk_buff *),
+ bool dev_held)
{
struct xfrm_trans_tasklet *trans;
@@ -808,6 +824,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
XFRM_TRANS_SKB_CB(skb)->finish = finish;
XFRM_TRANS_SKB_CB(skb)->net = net;
+ XFRM_TRANS_SKB_CB(skb)->dev_held = dev_held;
spin_lock_bh(&trans->queue_lock);
__skb_queue_tail(&trans->queue, skb);
spin_unlock_bh(&trans->queue_lock);
@@ -820,7 +837,7 @@ int xfrm_trans_queue(struct sk_buff *skb,
int (*finish)(struct net *, struct sock *,
struct sk_buff *))
{
- return xfrm_trans_queue_net(dev_net(skb->dev), skb, finish);
+ return xfrm_trans_queue_net(dev_net(skb->dev), skb, finish, false);
}
EXPORT_SYMBOL(xfrm_trans_queue);
--
2.43.0
next reply other threads:[~2026-03-31 9:27 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-31 9:27 Qi Tang [this message]
2026-04-02 10:36 ` [PATCH v2] xfrm: delay dev_put in xfrm_input to after transport reinject Steffen Klassert
2026-04-02 10:54 ` Florian Westphal
2026-04-02 11:26 ` Qi Tang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260331092737.1937-1-tpluszz77@gmail.com \
--to=tpluszz77@gmail.com \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=edumazet@google.com \
--cc=herbert@gondor.apana.org.au \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=stable@vger.kernel.org \
--cc=steffen.klassert@secunet.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox