public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] xfrm: delay dev_put in xfrm_input to after transport reinject
@ 2026-03-31  9:27 Qi Tang
  2026-04-02 10:36 ` Steffen Klassert
  0 siblings, 1 reply; 4+ messages in thread
From: Qi Tang @ 2026-03-31  9:27 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu
  Cc: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, David Ahern, netdev, Qi Tang, stable

xfrm_trans_queue() queues transport-mode packets for async reinject
via xfrm_trans_reinject() workqueue.  After async crypto completes,
xfrm_input_resume() re-enters xfrm_input() with encap_type == -1,
which immediately calls dev_put(skb->dev) before the skb reaches
transport_finish and the reinject queue.  The device can be freed
before the workqueue callback runs, causing a use-after-free when
xfrm_trans_reinject dereferences skb->dev.

Remove the dev_put from the async resumption entry and let the
reference survive through the transport reinject path.  Introduce
async variants of the NF_HOOK okfn callbacks that queue the skb
with dev_held=true and drop the reference on error.  The reinject
worker checks this flag and puts the reference after the callback
completes.

For the synchronous crypto path, the existing dev_hold/dev_put
around x->type->input() is unchanged — the reference is balanced
within the same softirq context before the skb reaches the queue.

If the loop re-enters async crypto (multi-SPI with a second
-EINPROGRESS), drop the extra reference from the earlier async
resume so exactly one reference accompanies the skb.

Fixes: acf568ee859f ("xfrm: Reinject transport-mode packets through tasklet")
Cc: stable@vger.kernel.org
Signed-off-by: Qi Tang <tpluszz77@gmail.com>
---
Changes in v2:
  - Do not add extra dev_hold/dev_put pair (reviewer feedback:
    "expensive operation, we just drop it too early")
  - Reuse existing dev_hold from xfrm_input, delay dev_put to
    reinject completion
  - Add async okfn variants for IPv4/IPv6 transport_finish so
    the reinject queue knows whether a dev ref is held
  - Drop the cb->dev field from v1; use bool dev_held flag instead

Link: https://lore.kernel.org/all/20260320073023.21873-1-tpluszz77@gmail.com/
---
 include/net/xfrm.h     |  3 ++-
 net/ipv4/esp4.c        |  3 ++-
 net/ipv4/xfrm4_input.c | 25 ++++++++++++++++++++++++-
 net/ipv6/esp6.c        |  3 ++-
 net/ipv6/xfrm6_input.c | 16 +++++++++++++++-
 net/xfrm/xfrm_input.c  | 35 ++++++++++++++++++++++++++---------
 6 files changed, 71 insertions(+), 14 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 10d3edde6b2f..1dd8b3b36649 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1779,7 +1779,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
 int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
 int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
 			 int (*finish)(struct net *, struct sock *,
-				       struct sk_buff *));
+				       struct sk_buff *),
+			 bool dev_held);
 int xfrm_trans_queue(struct sk_buff *skb,
 		     int (*finish)(struct net *, struct sock *,
 				   struct sk_buff *));
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 6dfc0bcdef65..0114c92b10d4 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -187,7 +187,8 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
 	int err;
 
 	local_bh_disable();
-	err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
+	err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb,
+				   false);
 	local_bh_enable();
 
 	/* EINPROGRESS just happens to do the right thing.  It
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index f28cfd88eaf5..9765fdc63ffc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -46,6 +46,28 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
 	return NET_RX_DROP;
 }
 
+static int xfrm4_rcv_encap_finish_async(struct net *net, struct sock *sk,
+					struct sk_buff *skb)
+{
+	if (!skb_dst(skb)) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
+					 ip4h_dscp(iph), skb->dev))
+			goto drop;
+	}
+
+	if (xfrm_trans_queue_net(dev_net(skb->dev), skb,
+				 xfrm4_rcv_encap_finish2, true))
+		goto drop;
+
+	return 0;
+drop:
+	dev_put(skb->dev);
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
 int xfrm4_transport_finish(struct sk_buff *skb, int async)
 {
 	struct xfrm_offload *xo = xfrm_offload(skb);
@@ -74,7 +96,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 
 	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
 		dev_net(skb->dev), NULL, skb, skb->dev, NULL,
-		xfrm4_rcv_encap_finish);
+		async ? xfrm4_rcv_encap_finish_async :
+			xfrm4_rcv_encap_finish);
 	return 0;
 }
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9f75313734f8..8a0a44d7d010 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -204,7 +204,8 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
 	int err;
 
 	local_bh_disable();
-	err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
+	err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb,
+				   false);
 	local_bh_enable();
 
 	/* EINPROGRESS just happens to do the right thing.  It
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 9005fc156a20..d4eede5315ac 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -40,6 +40,19 @@ static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
 	return 0;
 }
 
+static int xfrm6_transport_finish2_async(struct net *net, struct sock *sk,
+					 struct sk_buff *skb)
+{
+	if (xfrm_trans_queue_net(dev_net(skb->dev), skb, ip6_rcv_finish,
+				 true)) {
+		dev_put(skb->dev);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	return 0;
+}
+
 int xfrm6_transport_finish(struct sk_buff *skb, int async)
 {
 	struct xfrm_offload *xo = xfrm_offload(skb);
@@ -69,7 +82,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 
 	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
 		dev_net(skb->dev), NULL, skb, skb->dev, NULL,
-		xfrm6_transport_finish2);
+		async ? xfrm6_transport_finish2_async :
+			xfrm6_transport_finish2);
 	return 0;
 }
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index dc1312ed5a09..2d75f984532a 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -40,6 +40,7 @@ struct xfrm_trans_cb {
 	} header;
 	int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
 	struct net *net;
+	bool dev_held;
 };
 
 #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
@@ -506,7 +507,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		/* An encap_type of -1 indicates async resumption. */
 		if (encap_type == -1) {
 			async = 1;
-			dev_put(skb->dev);
 			seq = XFRM_SKB_CB(skb)->seq.input.low;
 			spin_lock(&x->lock);
 			goto resume;
@@ -659,8 +659,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			dev_hold(skb->dev);
 
 			nexthdr = x->type->input(x, skb);
-			if (nexthdr == -EINPROGRESS)
+			if (nexthdr == -EINPROGRESS) {
+				if (async)
+					dev_put(skb->dev);
 				return 0;
+			}
 
 			dev_put(skb->dev);
 			spin_lock(&x->lock);
@@ -695,9 +698,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
 
 		err = xfrm_inner_mode_input(x, skb);
-		if (err == -EINPROGRESS)
+		if (err == -EINPROGRESS) {
+			if (async)
+				dev_put(skb->dev);
 			return 0;
-		else if (err) {
+		} else if (err) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
 			goto drop;
 		}
@@ -734,6 +739,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			sp->olen = 0;
 		if (skb_valid_dst(skb))
 			skb_dst_drop(skb);
+		if (async)
+			dev_put(skb->dev);
 		gro_cells_receive(&gro_cells, skb);
 		return 0;
 	} else {
@@ -753,6 +760,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 				sp->olen = 0;
 			if (skb_valid_dst(skb))
 				skb_dst_drop(skb);
+			if (async)
+				dev_put(skb->dev);
 			gro_cells_receive(&gro_cells, skb);
 			return err;
 		}
@@ -763,6 +772,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 drop_unlock:
 	spin_unlock(&x->lock);
 drop:
+	if (async)
+		dev_put(skb->dev);
 	xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1);
 	kfree_skb(skb);
 	return 0;
@@ -787,15 +798,20 @@ static void xfrm_trans_reinject(struct work_struct *work)
 	spin_unlock_bh(&trans->queue_lock);
 
 	local_bh_disable();
-	while ((skb = __skb_dequeue(&queue)))
-		XFRM_TRANS_SKB_CB(skb)->finish(XFRM_TRANS_SKB_CB(skb)->net,
-					       NULL, skb);
+	while ((skb = __skb_dequeue(&queue))) {
+		struct xfrm_trans_cb *cb = XFRM_TRANS_SKB_CB(skb);
+		struct net_device *dev = cb->dev_held ? skb->dev : NULL;
+
+		cb->finish(cb->net, NULL, skb);
+		dev_put(dev);
+	}
 	local_bh_enable();
 }
 
 int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
 			 int (*finish)(struct net *, struct sock *,
-				       struct sk_buff *))
+				       struct sk_buff *),
+			 bool dev_held)
 {
 	struct xfrm_trans_tasklet *trans;
 
@@ -808,6 +824,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
 
 	XFRM_TRANS_SKB_CB(skb)->finish = finish;
 	XFRM_TRANS_SKB_CB(skb)->net = net;
+	XFRM_TRANS_SKB_CB(skb)->dev_held = dev_held;
 	spin_lock_bh(&trans->queue_lock);
 	__skb_queue_tail(&trans->queue, skb);
 	spin_unlock_bh(&trans->queue_lock);
@@ -820,7 +837,7 @@ int xfrm_trans_queue(struct sk_buff *skb,
 		     int (*finish)(struct net *, struct sock *,
 				   struct sk_buff *))
 {
-	return xfrm_trans_queue_net(dev_net(skb->dev), skb, finish);
+	return xfrm_trans_queue_net(dev_net(skb->dev), skb, finish, false);
 }
 EXPORT_SYMBOL(xfrm_trans_queue);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-04-02 11:26 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-31  9:27 [PATCH v2] xfrm: delay dev_put in xfrm_input to after transport reinject Qi Tang
2026-04-02 10:36 ` Steffen Klassert
2026-04-02 10:54   ` Florian Westphal
2026-04-02 11:26     ` Qi Tang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox