* [PATCH 01/10] xfrm: route MIGRATE notifications to caller's netns
2026-05-29 9:26 [PATCH 0/10] pull request (net): ipsec 2026-05-29 Steffen Klassert
@ 2026-05-29 9:26 ` Steffen Klassert
2026-05-29 20:20 ` patchwork-bot+netdevbpf
2026-05-29 9:26 ` [PATCH 02/10] xfrm: ipcomp: Free destination pages on acomp errors Steffen Klassert
` (8 subsequent siblings)
9 siblings, 1 reply; 12+ messages in thread
From: Steffen Klassert @ 2026-05-29 9:26 UTC (permalink / raw)
To: David Miller, Jakub Kicinski; +Cc: Herbert Xu, Steffen Klassert, netdev
From: Maoyi Xie <maoyixie.tju@gmail.com>
xfrm_send_migrate() in net/xfrm/xfrm_user.c and pfkey_send_migrate()
in net/key/af_key.c both hardcode &init_net for the multicast that
announces a successful XFRM_MSG_MIGRATE / SADB_X_MIGRATE.
XFRM_MSG_MIGRATE arrives on a per-netns NETLINK_XFRM socket, and the
rest of the xfrm/af_key netlink path was made netns-aware in 2008.
The other 14 multicast paths in xfrm_user.c route their event using
xs_net(x), xp_net(xp) or sock_net(skb->sk); only the migrate path
was missed.
Two consequences of the init_net hardcoding:
1. The notification (selector, old/new endpoint addresses, and the
km_address) is delivered to listeners on init_net's
XFRMNLGRP_MIGRATE / pfkey BROADCAST_ALL groups rather than on
the issuing netns. An IKE daemon running in init_net therefore
receives migration notifications originating from any other
netns on the host.
2. An IKE daemon running inside a non-init netns and subscribed
to its own XFRMNLGRP_MIGRATE / pfkey groups never receives the
notification of its own migration. IKEv2 MOBIKE / address-update
handling inside a netns is silently broken.
Thread struct net through km_migrate() and the xfrm_mgr.migrate
function pointer, drop the &init_net override in xfrm_send_migrate()
and pfkey_send_migrate(), and pass the caller's net (already in
scope in xfrm_migrate() via sock_net(skb->sk)) all the way down.
struct xfrm_mgr is in-tree only and not exported as a stable API,
so the function-pointer signature change is internal.
pfkey_broadcast() is already netns-aware via net_generic(net,
pfkey_net_id) since the pernet conversion. The five other
pfkey_broadcast() callers in af_key.c already pass xs_net(x),
sock_net(sk) or a per-netns net, so this only removes the
&init_net outlier.
Fixes: 5c79de6e79cd ("[XFRM]: User interface for handling XFRM_MSG_MIGRATE")
Cc: stable@vger.kernel.org # v5.15+
Signed-off-by: Maoyi Xie <maoyi.xie@ntu.edu.sg>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
include/net/xfrm.h | 3 ++-
net/key/af_key.c | 6 +++---
net/xfrm/xfrm_policy.c | 2 +-
net/xfrm/xfrm_state.c | 4 ++--
net/xfrm/xfrm_user.c | 5 ++---
5 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 10d3edde6b2f..874409127e29 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -715,6 +715,7 @@ struct xfrm_mgr {
const struct xfrm_migrate *m,
int num_bundles,
const struct xfrm_kmaddress *k,
+ struct net *net,
const struct xfrm_encap_tmpl *encap);
bool (*is_alive)(const struct km_event *c);
};
@@ -1891,7 +1892,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
#ifdef CONFIG_XFRM_MIGRATE
int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_bundles,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap);
struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
u32 if_id);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index a166a88d8788..9cffeef18cd9 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3564,7 +3564,7 @@ static int set_ipsecrequest(struct sk_buff *skb,
#ifdef CONFIG_NET_KEY_MIGRATE
static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_bundles,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap)
{
int i;
@@ -3669,7 +3669,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* broadcast migrate message to sockets */
- pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net);
+ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, net);
return 0;
@@ -3680,7 +3680,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
#else
static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_bundles,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap)
{
return -ENOPROTOOPT;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c944327ce66c..59968dcbafe1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -4703,7 +4703,7 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* Stage 5 - announce */
- km_migrate(sel, dir, type, m, num_migrate, k, encap);
+ km_migrate(sel, dir, type, m, num_migrate, k, net, encap);
xfrm_pol_put(pol);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 686014d39429..395d82411a87 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2837,7 +2837,7 @@ EXPORT_SYMBOL(km_policy_expired);
#ifdef CONFIG_XFRM_MIGRATE
int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_migrate,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap)
{
int err = -EINVAL;
@@ -2848,7 +2848,7 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
list_for_each_entry_rcu(km, &xfrm_km_list, list) {
if (km->migrate) {
ret = km->migrate(sel, dir, type, m, num_migrate, k,
- encap);
+ net, encap);
if (!ret)
err = ret;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 38a90e5ee3d9..71a4b7278eba 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -3271,10 +3271,9 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m,
static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_migrate,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap)
{
- struct net *net = &init_net;
struct sk_buff *skb;
int err;
@@ -3292,7 +3291,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
#else
static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_migrate,
- const struct xfrm_kmaddress *k,
+ const struct xfrm_kmaddress *k, struct net *net,
const struct xfrm_encap_tmpl *encap)
{
return -ENOPROTOOPT;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 02/10] xfrm: ipcomp: Free destination pages on acomp errors
2026-05-29 9:26 [PATCH 0/10] pull request (net): ipsec 2026-05-29 Steffen Klassert
2026-05-29 9:26 ` [PATCH 01/10] xfrm: route MIGRATE notifications to caller's netns Steffen Klassert
@ 2026-05-29 9:26 ` Steffen Klassert
2026-05-29 9:26 ` [PATCH 03/10] xfrm: Check for underflow in xfrm_state_mtu Steffen Klassert
` (7 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2026-05-29 9:26 UTC (permalink / raw)
To: David Miller, Jakub Kicinski; +Cc: Herbert Xu, Steffen Klassert, netdev
From: Herbert Xu <herbert@gondor.apana.org.au>
Move the out_free_req label up by a couple of lines so that the
allocated dst SG list gets freed on error as well as success.
Fixes: eb2953d26971 ("xfrm: ipcomp: Use crypto_acomp interface")
Cc: stable@kernel.org
Reported-by: Yuan Tan <yuantan098@gmail.com>
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Reported-by: Yilin Zhu <zylzyl2333@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/xfrm/xfrm_ipcomp.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 5f38dff16177..671d48f8c937 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -51,11 +51,15 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen)
struct scatterlist *dsg;
int len, dlen;
- if (unlikely(err))
- goto out_free_req;
+ if (unlikely(!req))
+ return err;
extra = acomp_request_extra(req);
dsg = extra->sg;
+
+ if (unlikely(err))
+ goto out_free_req;
+
dlen = req->dlen;
pskb_trim_unique(skb, 0);
@@ -84,10 +88,10 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen)
skb_shinfo(skb)->nr_frags++;
} while ((dlen -= len));
- for (; dsg; dsg = sg_next(dsg))
+out_free_req:
+ for (; dsg && sg_page(dsg); dsg = sg_next(dsg))
__free_page(sg_page(dsg));
-out_free_req:
acomp_request_free(req);
return err;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 03/10] xfrm: Check for underflow in xfrm_state_mtu
2026-05-29 9:26 [PATCH 0/10] pull request (net): ipsec 2026-05-29 Steffen Klassert
2026-05-29 9:26 ` [PATCH 01/10] xfrm: route MIGRATE notifications to caller's netns Steffen Klassert
2026-05-29 9:26 ` [PATCH 02/10] xfrm: ipcomp: Free destination pages on acomp errors Steffen Klassert
@ 2026-05-29 9:26 ` Steffen Klassert
2026-05-29 9:26 ` [PATCH 04/10] xfrm: ah: use skb_to_full_sk in async output callbacks Steffen Klassert
` (6 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2026-05-29 9:26 UTC (permalink / raw)
To: David Miller, Jakub Kicinski; +Cc: Herbert Xu, Steffen Klassert, netdev
From: David Ahern <dahern@nvidia.com>
Leo Lin reported OOB write issue in esp component:
xfrm_state_mtu() returns u32 but performs its arithmetic in unsigned
modulo-2^32 space using an attacker-influenced "header_len + authsize +
net_adj" subtracted from a small "mtu" argument. A nobody user can
install an IPv4 ESP tunnel SA with a large authentication key
(XFRMA_ALG_AUTH_TRUNC, e.g. hmac(sha512), 64-byte key, 64-byte trunc),
configure a small interface MTU (68 bytes), and set XFRMA_TFCPAD to a
large value. When a single UDP datagram is then sent through the
tunnel, xfrm_state_mtu() underflows to a near-2^32 value, and
esp_output() consumes it as a signed int via:
padto = min(x->tfcpad, xfrm_state_mtu(x, mtu_cached))
esp.tfclen = padto - skb->len (assigned to int)
esp.tfclen ends up negative (e.g. -207). It is sign-extended to size_t
when passed to memset() inside esp_output_fill_trailer(), producing a
~16 EB write of zeroes at skb_tail_pointer(skb). KASAN logs it as
"Write of size 18446744073709551537 at addr ffff888...".
Check for underflow and return 1. This causes the sendmsg attempt to
fail with ENETUNREACH.
Fixes: c5c252389374 ("[XFRM]: Optimize MTU calculation")
Reported-by: Leo Lin <leo@depthfirst.com>
Assisted-by: Codex:26.506.31004
Signed-off-by: David Ahern <dahern@nvidia.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/xfrm/xfrm_state.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 395d82411a87..589c3b6e4679 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -3114,10 +3114,14 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
const struct xfrm_type *type = READ_ONCE(x->type);
struct crypto_aead *aead;
u32 blksize, net_adj = 0;
+ u32 overhead, payload_mtu;
if (x->km.state != XFRM_STATE_VALID ||
- !type || type->proto != IPPROTO_ESP)
+ !type || type->proto != IPPROTO_ESP) {
+ if (mtu <= x->props.header_len)
+ return 1;
return mtu - x->props.header_len;
+ }
aead = x->data;
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
@@ -3140,8 +3144,17 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
break;
}
- return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
- net_adj) & ~(blksize - 1)) + net_adj - 2;
+ overhead = x->props.header_len + crypto_aead_authsize(aead) + net_adj;
+ if (mtu <= overhead)
+ return 1;
+
+ payload_mtu = mtu - overhead;
+ payload_mtu &= ~(blksize - 1);
+ if (payload_mtu <= 2)
+ return 1;
+
+ return payload_mtu + net_adj - 2;
+
}
EXPORT_SYMBOL_GPL(xfrm_state_mtu);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 04/10] xfrm: ah: use skb_to_full_sk in async output callbacks
2026-05-29 9:26 [PATCH 0/10] pull request (net): ipsec 2026-05-29 Steffen Klassert
` (2 preceding siblings ...)
2026-05-29 9:26 ` [PATCH 03/10] xfrm: Check for underflow in xfrm_state_mtu Steffen Klassert
@ 2026-05-29 9:26 ` Steffen Klassert
2026-05-29 9:26 ` [PATCH 05/10] esp: fix page frag reference leak on skb_to_sgvec failure Steffen Klassert
` (5 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2026-05-29 9:26 UTC (permalink / raw)
To: David Miller, Jakub Kicinski; +Cc: Herbert Xu, Steffen Klassert, netdev
From: Michael Bommarito <michael.bommarito@gmail.com>
When AH output is offloaded to an asynchronous crypto provider
(hardware accelerators such as AMD CCP, or a forced-async software
shim used for testing), the digest completion fires
ah_output_done() / ah6_output_done() on a workqueue. The egress
skb at that point may have been originated by a TCP listener
sending a SYN-ACK, which sets skb->sk to a request_sock via
skb_set_owner_edemux(); it may also have been originated by an
inet_timewait_sock retransmit. Neither is a full struct sock, and
passing the raw skb->sk to xfrm_output_resume() then forwards a
non-full socket through the rest of the xfrm output chain.
xfrm_output_resume() and its downstream consumers expect a full
sk where they dereference at all. The natural egress path
through ah_output_done() does not crash today because the
consumers that read past sock_common are either gated by
sk_fullsock() or short-circuit on flags that are clear on a fresh
request_sock; an exhaustive walk of the 50 most plausible
consumers under sch_fq, dev_queue_xmit, netfilter, tc-egress and
cgroup-egress BPF found no current unguarded deref. The bug is
still a real type confusion that future consumer changes could
turn into a memory-corruption primitive.
This is the same bug class fixed for ESP in commit 1620c88887b1
("xfrm: Fix the usage of skb->sk"). Apply the analogous fix to
AH: convert skb->sk to a full socket pointer (or NULL) via
skb_to_full_sk() before handing it to xfrm_output_resume().
The same async AH callbacks were touched recently for an
independent ESN-related ICV layout bug in commit ec54093e6a8f
("xfrm: ah: account for ESN high bits in async callbacks"); the
sk type-confusion addressed here is orthogonal. This patch is
part of an ongoing audit of the AH callback paths; an ah_output
ihl-validation hardening series is also currently under review on
netdev.
Reproduced under UML + KASAN + lockdep with a forced-async
hmac(sha1) shim that registers at priority 9999 and wraps the
sync in-tree hmac-sha1-lib. With the shim loaded, ah_output_done
runs on every SYN-ACK egress through a transport-mode AH SA and
skb->sk arrives as a request_sock (TCP_NEW_SYN_RECV); after this
patch, xfrm_output_resume() receives the listener (the result of
sk_to_full_sk()) and consumer derefs land on full-sock fields as
intended.
Fixes: 9ab1265d5231 ("xfrm: Use actual socket sk instead of skb socket for xfrm_output_resume")
Cc: stable@vger.kernel.org
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Michael Bommarito <michael.bommarito@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/ah4.c | 2 +-
net/ipv6/ah6.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 4366cbac3f06..6fd642d2278d 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -143,7 +143,7 @@ static void ah_output_done(void *data, int err)
}
kfree(AH_SKB_CB(skb)->tmp);
- xfrm_output_resume(skb->sk, skb, err);
+ xfrm_output_resume(skb_to_full_sk(skb), skb, err);
}
static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index de1e68199a01..76f7a2de9108 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -337,7 +337,7 @@ static void ah6_output_done(void *data, int err)
ah6_restore_hdrs(top_iph, iph_ext, extlen);
kfree(AH_SKB_CB(skb)->tmp);
- xfrm_output_resume(skb->sk, skb, err);
+ xfrm_output_resume(skb_to_full_sk(skb), skb, err);
}
static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 05/10] esp: fix page frag reference leak on skb_to_sgvec failure
2026-05-29 9:26 [PATCH 0/10] pull request (net): ipsec 2026-05-29 Steffen Klassert
` (3 preceding siblings ...)
2026-05-29 9:26 ` [PATCH 04/10] xfrm: ah: use skb_to_full_sk in async output callbacks Steffen Klassert
@ 2026-05-29 9:26 ` Steffen Klassert
2026-05-29 9:26 ` [PATCH 06/10] xfrm: esp: restore combined single-frag length gate Steffen Klassert
` (4 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2026-05-29 9:26 UTC (permalink / raw)
To: David Miller, Jakub Kicinski; +Cc: Herbert Xu, Steffen Klassert, netdev
From: e521588 <alessandro.schino@sbb.ch>
In esp_output_tail(), when esp->inplace is false, the old skb page frags
are replaced with a new page from the xfrm page_frag cache. The source
scatterlist (sg) is built from the old frags before the replacement, and
esp_ssg_unref() is responsible for releasing the old page references
after the crypto operation completes.
However, if the second skb_to_sgvec() call (which builds the destination
scatterlist from the new page) fails, the code jumps to error_free which
only calls kfree(tmp). The old page frag references captured in the
source scatterlist are never released:
1. sg[] is built from old frags via skb_to_sgvec() (no extra get_page)
2. nr_frags is set to 1 and frag[0] is replaced with the new page
3. Second skb_to_sgvec() fails -> goto error_free
4. kfree(tmp) frees the sg[] memory but old frags are not unref'd
5. kfree_skb() only releases frag[0] (the new page), not the old ones
Fix this by adding a bool parameter to esp_ssg_unref() that, when true,
unconditionally unrefs the source scatterlist frags without checking
req->src and req->dst, since those fields are not yet initialized by
aead_request_set_crypt() at the point of the error. Existing callers
pass false to preserve the original behavior.
The same issue exists in both esp4 and esp6 as the code is identical.
Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible")
Fixes: 03e2a30f6a27 ("esp6: Avoid skb_cow_data whenever possible")
Signed-off-by: Alessandro Schino <7991aleschino@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/esp4.c | 12 +++++++-----
net/ipv6/esp6.c | 12 +++++++-----
2 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 6a5febbdbee4..8314d7bddcb7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -96,7 +96,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
-static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
+static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, bool already_unref)
{
struct crypto_aead *aead = x->data;
int extralen = 0;
@@ -113,7 +113,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
/* Unref skb_frag_pages in the src scatterlist if necessary.
* Skip the first sg which comes from skb->data.
*/
- if (req->src != req->dst)
+ if (already_unref || req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
skb_page_unref(page_to_netmem(sg_page(sg)),
skb->pp_recycle);
@@ -220,7 +220,7 @@ static void esp_output_done(void *data, int err)
}
tmp = ESP_SKB_CB(skb)->tmp;
- esp_ssg_unref(x, tmp, skb);
+ esp_ssg_unref(x, tmp, skb, false);
kfree(tmp);
if (xo && (xo->flags & XFRM_DEV_RESUME)) {
@@ -569,8 +569,10 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
err = skb_to_sgvec(skb, dsg,
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
- if (unlikely(err < 0))
+ if (unlikely(err < 0)) {
+ esp_ssg_unref(x, tmp, skb, true);
goto error_free;
+ }
}
if ((x->props.flags & XFRM_STATE_ESN))
@@ -602,7 +604,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
}
if (sg != dsg)
- esp_ssg_unref(x, tmp, skb);
+ esp_ssg_unref(x, tmp, skb, false);
if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
err = esp_output_tail_tcp(x, skb);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9c06c5a1419d..9d0c4957ac62 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -113,7 +113,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
-static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
+static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, bool already_unref)
{
struct crypto_aead *aead = x->data;
int extralen = 0;
@@ -130,7 +130,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
/* Unref skb_frag_pages in the src scatterlist if necessary.
* Skip the first sg which comes from skb->data.
*/
- if (req->src != req->dst)
+ if (already_unref || req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
skb_page_unref(page_to_netmem(sg_page(sg)),
skb->pp_recycle);
@@ -254,7 +254,7 @@ static void esp_output_done(void *data, int err)
}
tmp = ESP_SKB_CB(skb)->tmp;
- esp_ssg_unref(x, tmp, skb);
+ esp_ssg_unref(x, tmp, skb, false);
kfree(tmp);
esp_output_encap_csum(skb);
@@ -600,8 +600,10 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
err = skb_to_sgvec(skb, dsg,
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
- if (unlikely(err < 0))
+ if (unlikely(err < 0)) {
+ esp_ssg_unref(x, tmp, skb, true);
goto error_free;
+ }
}
if ((x->props.flags & XFRM_STATE_ESN))
@@ -634,7 +636,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
}
if (sg != dsg)
- esp_ssg_unref(x, tmp, skb);
+ esp_ssg_unref(x, tmp, skb, false);
if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
err = esp_output_tail_tcp(x, skb);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 06/10] xfrm: esp: restore combined single-frag length gate
2026-05-29 9:26 [PATCH 0/10] pull request (net): ipsec 2026-05-29 Steffen Klassert
` (4 preceding siblings ...)
2026-05-29 9:26 ` [PATCH 05/10] esp: fix page frag reference leak on skb_to_sgvec failure Steffen Klassert
@ 2026-05-29 9:26 ` Steffen Klassert
2026-05-29 9:26 ` [PATCH 07/10] xfrm: iptfs: reset runtime state when cloning SAs Steffen Klassert
` (3 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2026-05-29 9:26 UTC (permalink / raw)
To: David Miller, Jakub Kicinski; +Cc: Herbert Xu, Steffen Klassert, netdev
From: Jingguo Tan <tanjingguo@huawei.com>
The ESP out-of-place fast path appends the trailer in esp_output_head()
before esp_output_tail() allocates the destination page frag. The
head-side gate currently checks skb->data_len and tailen separately, but
the tail code allocates a single destination frag from the combined
post-trailer skb->data_len.
Reject the page-frag fast path when the combined aligned length exceeds a
page. Otherwise skb_page_frag_refill() may fall back to a single page while
the destination sg still spans the combined skb->data_len.
Restore this combined-length page gate for both IPv4 and IPv6.
Fixes: 5bd8baab087d ("esp: limit skb_page_frag_refill use to a single page")
Cc: stable@vger.kernel.org
Signed-off-by: Lin Ma <malin89@huawei.com>
Signed-off-by: Chenyuan Mi <michenyuan@huawei.com>
Signed-off-by: Jingguo Tan <tanjingguo@huawei.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/esp4.c | 4 ++--
net/ipv6/esp6.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 8314d7bddcb7..5d3a8656687e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -419,8 +419,8 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
return err;
}
- if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
- ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
+ if (ALIGN(skb->data_len + tailen, L1_CACHE_BYTES) >
+ PAGE_SIZE)
goto cow;
if (!skb_cloned(skb)) {
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9d0c4957ac62..b963b8e72604 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -448,8 +448,8 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
return err;
}
- if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
- ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
+ if (ALIGN(skb->data_len + tailen, L1_CACHE_BYTES) >
+ PAGE_SIZE)
goto cow;
if (!skb_cloned(skb)) {
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 07/10] xfrm: iptfs: reset runtime state when cloning SAs
2026-05-29 9:26 [PATCH 0/10] pull request (net): ipsec 2026-05-29 Steffen Klassert
` (5 preceding siblings ...)
2026-05-29 9:26 ` [PATCH 06/10] xfrm: esp: restore combined single-frag length gate Steffen Klassert
@ 2026-05-29 9:26 ` Steffen Klassert
2026-05-29 9:26 ` [PATCH 08/10] xfrm: move policy_bydst RCU sync from per-netns .exit to .pre_exit Steffen Klassert
` (2 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2026-05-29 9:26 UTC (permalink / raw)
To: David Miller, Jakub Kicinski; +Cc: Herbert Xu, Steffen Klassert, netdev
From: Shaomin Chen <eeesssooo020@gmail.com>
iptfs_clone_state() clones the IPTFS mode data with kmemdup(). This
copies runtime objects which must not be shared with the original SA,
including the embedded sk_buff_head, hrtimers, spinlock, and in-flight
reassembly/reorder state.
If xfrm_state_migrate() fails after clone_state() but before the later
init_state() call has reinitialized those fields, the cloned state can be
destroyed by xfrm_state_gc_task() with list and timer state copied from the
original SA. With queued packets this lets the clone splice and free skbs
owned by the original IPTFS queue, leading to use-after-free and
double-free reports in iptfs_destroy_state() and skb release paths.
Reinitialize the clone's runtime state before publishing it through
x->mode_data. Because clone_state() now publishes a destroyable mode_data
object before init_state(), take the mode callback module reference there.
Avoid taking it again from __iptfs_init_state() for the same object.
Fixes: 0e4fbf013fa5 ("xfrm: iptfs: add user packet (tunnel ingress) handling")
Cc: stable@vger.kernel.org
Signed-off-by: Shaomin Chen <eeesssooo020@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/xfrm/xfrm_iptfs.c | 28 +++++++++++++++++++++++-----
1 file changed, 23 insertions(+), 5 deletions(-)
diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c
index 97bc979e55ba..6c6bbc040517 100644
--- a/net/xfrm/xfrm_iptfs.c
+++ b/net/xfrm/xfrm_iptfs.c
@@ -2650,7 +2650,8 @@ static void __iptfs_init_state(struct xfrm_state *x,
x->props.enc_hdr_len = sizeof(struct ip_iptfs_hdr);
/* Always keep a module reference when x->mode_data is set */
- __module_get(x->mode_cbs->owner);
+ if (x->mode_data != xtfs)
+ __module_get(x->mode_cbs->owner);
x->mode_data = xtfs;
xtfs->x = x;
@@ -2658,22 +2659,39 @@ static void __iptfs_init_state(struct xfrm_state *x,
static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig)
{
+ struct skb_wseq *w_saved = NULL;
struct xfrm_iptfs_data *xtfs;
xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL);
if (!xtfs)
return -ENOMEM;
- xtfs->ra_newskb = NULL;
if (xtfs->cfg.reorder_win_size) {
- xtfs->w_saved = kzalloc_objs(*xtfs->w_saved,
- xtfs->cfg.reorder_win_size);
- if (!xtfs->w_saved) {
+ w_saved = kzalloc_objs(*w_saved, xtfs->cfg.reorder_win_size);
+ if (!w_saved) {
kfree_sensitive(xtfs);
return -ENOMEM;
}
}
+ xtfs->w_saved = w_saved;
+
+ __skb_queue_head_init(&xtfs->queue);
+ xtfs->queue_size = 0;
+ hrtimer_setup(&xtfs->iptfs_timer, iptfs_delay_timer, CLOCK_MONOTONIC,
+ IPTFS_HRTIMER_MODE);
+
+ spin_lock_init(&xtfs->drop_lock);
+ hrtimer_setup(&xtfs->drop_timer, iptfs_drop_timer, CLOCK_MONOTONIC,
+ IPTFS_HRTIMER_MODE);
+ xtfs->w_seq_set = false;
+ xtfs->w_wantseq = 0;
+ xtfs->w_savedlen = 0;
+ xtfs->ra_newskb = NULL;
+ xtfs->ra_wantseq = 0;
+ xtfs->ra_runtlen = 0;
+
+ __module_get(x->mode_cbs->owner);
x->mode_data = xtfs;
xtfs->x = x;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 08/10] xfrm: move policy_bydst RCU sync from per-netns .exit to .pre_exit
2026-05-29 9:26 [PATCH 0/10] pull request (net): ipsec 2026-05-29 Steffen Klassert
` (6 preceding siblings ...)
2026-05-29 9:26 ` [PATCH 07/10] xfrm: iptfs: reset runtime state when cloning SAs Steffen Klassert
@ 2026-05-29 9:26 ` Steffen Klassert
2026-05-29 9:26 ` [PATCH 09/10] xfrm: input: hold netns during deferred transport reinjection Steffen Klassert
2026-05-29 9:26 ` [PATCH 10/10] Revert "esp: fix page frag reference leak on skb_to_sgvec failure" Steffen Klassert
9 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2026-05-29 9:26 UTC (permalink / raw)
To: David Miller, Jakub Kicinski; +Cc: Herbert Xu, Steffen Klassert, netdev
From: Usama Arif <usama.arif@linux.dev>
The struct pernet_operations docstring in include/net/net_namespace.h
explicitly warns against blocking RCU primitives in .exit handlers:
Exit methods using blocking RCU primitives, such as
synchronize_rcu(), should be implemented via exit_batch.
[...]
Please, avoid synchronize_rcu() at all, where it's possible.
Note that a combination of pre_exit() and exit() can
be used, since a synchronize_rcu() is guaranteed between
the calls.
xfrm_policy_fini() violates this: it calls synchronize_rcu() before
freeing the policy_bydst hash tables (so no RCU reader is mid-
traversal at free time), but runs from xfrm_net_ops.exit -- once per
namespace -- so a cleanup_net() of N namespaces pays N full RCU
grace periods serially.
Use the documented pre_exit/exit split. Move the policy flush (and
the workqueue drains it depends on) into a new .pre_exit handler;
xfrm_policy_fini() then runs in .exit and frees the hash tables
after the synchronize_rcu_expedited() that cleanup_net() guarantees
between the two phases. Providing O(1) RCU grace periods per batch
instead of O(N).
Observed on Linux 6.18 with a workload doing unshare(CLONE_NEWNET)
at ~13/sec sustained: cleanup_net() and the netns_wq rescuer kthread
both stuck in xfrm_policy_fini()'s synchronize_rcu(), >300k struct
net accumulated in the cleanup queue, Percpu in /proc/meminfo climbed
to 130+ GB on 256-CPU hosts, and memcg OOMs followed. setup_net and
__put_net counts were balanced, ruling out a refcount leak.
Fixes: 069daad4f2ae ("xfrm: Wait for RCU readers during policy netns exit")
Signed-off-by: Usama Arif <usama.arif@linux.dev>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/xfrm/xfrm_policy.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 59968dcbafe1..dd09d2063da2 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -4276,21 +4276,21 @@ static int __net_init xfrm_policy_init(struct net *net)
return -ENOMEM;
}
-static void xfrm_policy_fini(struct net *net)
+static void __net_exit xfrm_net_pre_exit(struct net *net)
{
- struct xfrm_pol_inexact_bin *b, *t;
- unsigned int sz;
- int dir;
-
disable_work_sync(&net->xfrm.policy_hthresh.work);
-
flush_work(&net->xfrm.policy_hash_work);
#ifdef CONFIG_XFRM_SUB_POLICY
xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
#endif
xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
+}
- synchronize_rcu();
+static void xfrm_policy_fini(struct net *net)
+{
+ struct xfrm_pol_inexact_bin *b, *t;
+ unsigned int sz;
+ int dir;
WARN_ON(!list_empty(&net->xfrm.policy_all));
@@ -4368,6 +4368,7 @@ static void __net_exit xfrm_net_exit(struct net *net)
static struct pernet_operations __net_initdata xfrm_net_ops = {
.init = xfrm_net_init,
+ .pre_exit = xfrm_net_pre_exit,
.exit = xfrm_net_exit,
};
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 09/10] xfrm: input: hold netns during deferred transport reinjection
2026-05-29 9:26 [PATCH 0/10] pull request (net): ipsec 2026-05-29 Steffen Klassert
` (7 preceding siblings ...)
2026-05-29 9:26 ` [PATCH 08/10] xfrm: move policy_bydst RCU sync from per-netns .exit to .pre_exit Steffen Klassert
@ 2026-05-29 9:26 ` Steffen Klassert
2026-05-29 9:26 ` [PATCH 10/10] Revert "esp: fix page frag reference leak on skb_to_sgvec failure" Steffen Klassert
9 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2026-05-29 9:26 UTC (permalink / raw)
To: David Miller, Jakub Kicinski; +Cc: Herbert Xu, Steffen Klassert, netdev
From: Zhengchuan Liang <zcliangcn@gmail.com>
Transport-mode reinjection stores a struct net pointer in skb->cb and
uses it later from xfrm_trans_reinject(). That pointer must stay valid
until the deferred callback runs.
Take a netns reference when queueing deferred reinjection work and drop
it after the callback completes. Use maybe_get_net() so the queueing
path does not revive a namespace that is already being torn down.
This keeps the existing workqueue design and fixes the netns lifetime
handling in one place for all users of xfrm_trans_queue_net().
Fixes: 7b3801927e52 ("xfrm: introduce xfrm_trans_queue_net")
Cc: stable@kernel.org
Reported-by: Yuan Tan <yuantan098@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Co-developed-by: Luxing Yin <tr0jan@lzu.edu.cn>
Signed-off-by: Luxing Yin <tr0jan@lzu.edu.cn>
Signed-off-by: Zhengchuan Liang <zcliangcn@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
Assisted-by: Codex:gpt-5.4
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/xfrm/xfrm_input.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index f65291eba1f6..e4c2cd24936d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -797,9 +797,12 @@ static void xfrm_trans_reinject(struct work_struct *work)
spin_unlock_bh(&trans->queue_lock);
local_bh_disable();
- while ((skb = __skb_dequeue(&queue)))
- XFRM_TRANS_SKB_CB(skb)->finish(XFRM_TRANS_SKB_CB(skb)->net,
- NULL, skb);
+ while ((skb = __skb_dequeue(&queue))) {
+ struct net *net = XFRM_TRANS_SKB_CB(skb)->net;
+
+ XFRM_TRANS_SKB_CB(skb)->finish(net, NULL, skb);
+ put_net(net);
+ }
local_bh_enable();
}
@@ -808,6 +811,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
struct sk_buff *))
{
struct xfrm_trans_tasklet *trans;
+ struct net *hold_net;
trans = this_cpu_ptr(&xfrm_trans_tasklet);
@@ -816,8 +820,12 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
+ hold_net = maybe_get_net(net);
+ if (!hold_net)
+ return -ENODEV;
+
XFRM_TRANS_SKB_CB(skb)->finish = finish;
- XFRM_TRANS_SKB_CB(skb)->net = net;
+ XFRM_TRANS_SKB_CB(skb)->net = hold_net;
spin_lock_bh(&trans->queue_lock);
__skb_queue_tail(&trans->queue, skb);
spin_unlock_bh(&trans->queue_lock);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 10/10] Revert "esp: fix page frag reference leak on skb_to_sgvec failure"
2026-05-29 9:26 [PATCH 0/10] pull request (net): ipsec 2026-05-29 Steffen Klassert
` (8 preceding siblings ...)
2026-05-29 9:26 ` [PATCH 09/10] xfrm: input: hold netns during deferred transport reinjection Steffen Klassert
@ 2026-05-29 9:26 ` Steffen Klassert
9 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2026-05-29 9:26 UTC (permalink / raw)
To: David Miller, Jakub Kicinski; +Cc: Herbert Xu, Steffen Klassert, netdev
This reverts commit 2982e599fff6faa21c8df147d96fc7af6c1a2f24.
The patch does not fully fix the issue and the Author does
not match the 'Signed-off-by:' tag, so revert it for now.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/esp4.c | 12 +++++-------
net/ipv6/esp6.c | 12 +++++-------
2 files changed, 10 insertions(+), 14 deletions(-)
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 5d3a8656687e..513c8215c947 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -96,7 +96,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
-static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, bool already_unref)
+static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
{
struct crypto_aead *aead = x->data;
int extralen = 0;
@@ -113,7 +113,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb,
/* Unref skb_frag_pages in the src scatterlist if necessary.
* Skip the first sg which comes from skb->data.
*/
- if (already_unref || req->src != req->dst)
+ if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
skb_page_unref(page_to_netmem(sg_page(sg)),
skb->pp_recycle);
@@ -220,7 +220,7 @@ static void esp_output_done(void *data, int err)
}
tmp = ESP_SKB_CB(skb)->tmp;
- esp_ssg_unref(x, tmp, skb, false);
+ esp_ssg_unref(x, tmp, skb);
kfree(tmp);
if (xo && (xo->flags & XFRM_DEV_RESUME)) {
@@ -569,10 +569,8 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
err = skb_to_sgvec(skb, dsg,
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
- if (unlikely(err < 0)) {
- esp_ssg_unref(x, tmp, skb, true);
+ if (unlikely(err < 0))
goto error_free;
- }
}
if ((x->props.flags & XFRM_STATE_ESN))
@@ -604,7 +602,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
}
if (sg != dsg)
- esp_ssg_unref(x, tmp, skb, false);
+ esp_ssg_unref(x, tmp, skb);
if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
err = esp_output_tail_tcp(x, skb);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index b963b8e72604..57481e423e59 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -113,7 +113,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
-static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, bool already_unref)
+static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
{
struct crypto_aead *aead = x->data;
int extralen = 0;
@@ -130,7 +130,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb,
/* Unref skb_frag_pages in the src scatterlist if necessary.
* Skip the first sg which comes from skb->data.
*/
- if (already_unref || req->src != req->dst)
+ if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
skb_page_unref(page_to_netmem(sg_page(sg)),
skb->pp_recycle);
@@ -254,7 +254,7 @@ static void esp_output_done(void *data, int err)
}
tmp = ESP_SKB_CB(skb)->tmp;
- esp_ssg_unref(x, tmp, skb, false);
+ esp_ssg_unref(x, tmp, skb);
kfree(tmp);
esp_output_encap_csum(skb);
@@ -600,10 +600,8 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
err = skb_to_sgvec(skb, dsg,
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
- if (unlikely(err < 0)) {
- esp_ssg_unref(x, tmp, skb, true);
+ if (unlikely(err < 0))
goto error_free;
- }
}
if ((x->props.flags & XFRM_STATE_ESN))
@@ -636,7 +634,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
}
if (sg != dsg)
- esp_ssg_unref(x, tmp, skb, false);
+ esp_ssg_unref(x, tmp, skb);
if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
err = esp_output_tail_tcp(x, skb);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread