* [PATCH ipsec 1/3] tools/selftests: Use a sensible timeout value for iperf3 client
2026-04-22 14:06 [PATCH ipsec 0/3] xfrm: Don't clobber inner headers when already set Cosmin Ratiu
@ 2026-04-22 14:06 ` Cosmin Ratiu
2026-04-22 14:06 ` [PATCH ipsec 2/3] tools/selftests: Add a VXLAN+IPsec traffic test Cosmin Ratiu
2026-04-22 14:06 ` [PATCH ipsec 3/3] xfrm: Don't clobber inner headers when already set Cosmin Ratiu
2 siblings, 0 replies; 4+ messages in thread
From: Cosmin Ratiu @ 2026-04-22 14:06 UTC (permalink / raw)
To: netdev
Cc: Steffen Klassert, Herbert Xu, David S . Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Simon Horman, Andrew Lunn,
Shuah Khan, Cosmin Ratiu, Nimrod Oren, Carolina Jubran,
Gal Pressman, linux-kselftest
The default timeout of cmd() is 5 seconds and Iperf3Runner requests the
iperf3 client to run for 10 seconds, which clearly doesn't work since
commit [1] enforced the timeout parameter.
Use a value derived from duration as timeout (+5 seconds for
startup/teardown/various other overhead).
[1] commit f0bd19316663 ("selftests: net: fix timeout passed as positional argument to communicate()")
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
---
tools/testing/selftests/drivers/net/lib/py/load.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index f181fa2d38fc..e24660e5c27f 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -48,7 +48,10 @@ class Iperf3Runner:
Starts the iperf3 client with the configured options.
"""
cmdline = self._build_client(streams, duration, reverse)
- return cmd(cmdline, background=background, host=self.env.remote)
+ kwargs = {"background": background, "host": self.env.remote}
+ if not background:
+ kwargs["timeout"] = duration + 5
+ return cmd(cmdline, **kwargs)
def measure_bandwidth(self, reverse=False):
"""
--
2.53.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH ipsec 2/3] tools/selftests: Add a VXLAN+IPsec traffic test
2026-04-22 14:06 [PATCH ipsec 0/3] xfrm: Don't clobber inner headers when already set Cosmin Ratiu
2026-04-22 14:06 ` [PATCH ipsec 1/3] tools/selftests: Use a sensible timeout value for iperf3 client Cosmin Ratiu
@ 2026-04-22 14:06 ` Cosmin Ratiu
2026-04-22 14:06 ` [PATCH ipsec 3/3] xfrm: Don't clobber inner headers when already set Cosmin Ratiu
2 siblings, 0 replies; 4+ messages in thread
From: Cosmin Ratiu @ 2026-04-22 14:06 UTC (permalink / raw)
To: netdev
Cc: Steffen Klassert, Herbert Xu, David S . Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Simon Horman, Andrew Lunn,
Shuah Khan, Cosmin Ratiu, Nimrod Oren, Carolina Jubran,
Gal Pressman, linux-kselftest
There are VXLAN tests and IPsec tests, but there is no test that
combines the two protocols and exercises the tunnel-over-ipsec code
paths. Fix that by adding a traffic test with VXLAN and IPsec using
crypto offload. This is runnable on HW which supports ESP offload (so no
nsim unfortunately).
Traffic is done with iperf3 and the test validates that there are no
packet drops and iperf3 can get to at least 100 Mbps (a very
conservative value on today's crypto offload HW, as it can typically
reach multi-Gbps rates).
Ran right now, the test fails due to a recently exposed bug in xfrm,
which will be fixed in the next patch:
# ./tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py
TAP version 13
1..4
# Check| At ./tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py,
# line 161, in test_vxlan_ipsec_crypto_offload:
# Check| ksft_eq(drops_after - drops_before, 0,
# Check failed 189 != 0 TX drops during VXLAN+IPsec
# Check| At ./tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py,
# line 163, in test_vxlan_ipsec_crypto_offload:
# Check| ksft_ge(bw_gbps, 0.1,
# Check failed 0.0015058278404812596 < 0.1 Minimum 100Mbps over
# VXLAN+IPsec
not ok 1 ipsec_vxlan.test_vxlan_ipsec_crypto_offload.outer_v4_inner_v4
...
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
---
.../testing/selftests/drivers/net/hw/Makefile | 1 +
tools/testing/selftests/drivers/net/hw/config | 5 +
.../selftests/drivers/net/hw/ipsec_vxlan.py | 204 ++++++++++++++++++
3 files changed, 210 insertions(+)
create mode 100755 tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 85ca4d1ecf9e..3b6ff4708005 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -30,6 +30,7 @@ TEST_PROGS = \
gro_hw.py \
hw_stats_l3.sh \
hw_stats_l3_gre.sh \
+ ipsec_vxlan.py \
iou-zcrx.py \
irq.py \
loopback.sh \
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
index dd50cb8a7911..ae0168c2bbe6 100644
--- a/tools/testing/selftests/drivers/net/hw/config
+++ b/tools/testing/selftests/drivers/net/hw/config
@@ -12,5 +12,10 @@ CONFIG_NET_IPGRE=y
CONFIG_NET_IPGRE_DEMUX=y
CONFIG_NETKIT=y
CONFIG_NET_SCH_INGRESS=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_ESP_OFFLOAD=y
+CONFIG_INET_ESP=y
+CONFIG_INET_ESP_OFFLOAD=y
CONFIG_UDMABUF=y
CONFIG_VXLAN=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py b/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py
new file mode 100755
index 000000000000..0740a4d85240
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+"""Traffic test for VXLAN + IPsec crypto-offload."""
+
+import os
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge
+from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx
+from lib.py import CmdExitFailure, NetDrvEpEnv, cmd, defer, ethtool, ip
+from lib.py import Iperf3Runner
+
+# Inner tunnel addresses - TEST-NET-2 (RFC 5737) / doc prefix (RFC 3849)
+INNER_V4_LOCAL = "198.51.100.1"
+INNER_V4_REMOTE = "198.51.100.2"
+INNER_V6_LOCAL = "2001:db8:100::1"
+INNER_V6_REMOTE = "2001:db8:100::2"
+
+# ESP parameters
+SPI_OUT = "0x1000"
+SPI_IN = "0x1001"
+# 128-bit key + 32-bit salt = 20 bytes hex, 128-bit ICV
+ESP_AEAD = "aead 'rfc4106(gcm(aes))' 0x" + "01" * 20 + " 128"
+
+
+def xfrm(args, host=None):
+ """Runs 'ip xfrm' via shell to preserve parentheses in algo names."""
+ cmd(f"ip xfrm {args}", shell=True, host=host)
+
+
+def check_xfrm_offload_support():
+ """Skips if iproute2 lacks xfrm offload support."""
+ out = cmd("ip xfrm state help", fail=False)
+ if "offload" not in out.stdout + out.stderr:
+ raise KsftSkipEx("iproute2 too old, missing xfrm offload")
+
+
+def check_esp_hw_offload(cfg):
+ """Skips if device lacks esp-hw-offload support."""
+ check_xfrm_offload_support()
+ try:
+ feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
+ except (CmdExitFailure, IndexError) as e:
+ raise KsftSkipEx(f"can't query features: {e}") from e
+ if not feat.get("esp-hw-offload", {}).get("active"):
+ raise KsftSkipEx("Device does not support esp-hw-offload")
+
+
+def get_tx_drops(cfg):
+ """Returns TX dropped counter from the physical device."""
+ stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0]
+ return stats["stats64"]["tx"]["dropped"]
+
+
+def setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver):
+ """Sets up VXLAN tunnel with IPsec transport-mode crypto-offload."""
+ vxlan_name = f"vx{os.getpid()}"
+ local_addr = cfg.addr_v[outer_ipver]
+ remote_addr = cfg.remote_addr_v[outer_ipver]
+
+ if inner_ipver == "4":
+ inner_local = f"{INNER_V4_LOCAL}/24"
+ inner_remote = f"{INNER_V4_REMOTE}/24"
+ addr_extra = ""
+ else:
+ inner_local = f"{INNER_V6_LOCAL}/64"
+ inner_remote = f"{INNER_V6_REMOTE}/64"
+ addr_extra = " nodad"
+
+ if outer_ipver == "6":
+ vxlan_opts = "udp6zerocsumtx udp6zerocsumrx"
+ else:
+ vxlan_opts = "noudpcsum"
+
+ # VXLAN tunnel - local side
+ ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} "
+ f"local {local_addr} remote {remote_addr} dev {cfg.ifname}")
+ defer(ip, f"link del {vxlan_name}")
+ ip(f"addr add {inner_local} dev {vxlan_name}{addr_extra}")
+ ip(f"link set {vxlan_name} up")
+
+ # VXLAN tunnel - remote side
+ ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} "
+ f"local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}",
+ host=cfg.remote)
+ defer(ip, f"link del {vxlan_name}", host=cfg.remote)
+ ip(f"addr add {inner_remote} dev {vxlan_name}{addr_extra}",
+ host=cfg.remote)
+ ip(f"link set {vxlan_name} up", host=cfg.remote)
+
+ # xfrm state - local outbound SA
+ xfrm(f"state add src {local_addr} dst {remote_addr} "
+ f"proto esp spi {SPI_OUT} "
+ f"{ESP_AEAD} "
+ f"mode transport offload crypto dev {cfg.ifname} dir out")
+ defer(xfrm, f"state del src {local_addr} dst {remote_addr} "
+ f"proto esp spi {SPI_OUT}")
+
+ # xfrm state - local inbound SA
+ xfrm(f"state add src {remote_addr} dst {local_addr} "
+ f"proto esp spi {SPI_IN} "
+ f"{ESP_AEAD} "
+ f"mode transport offload crypto dev {cfg.ifname} dir in")
+ defer(xfrm, f"state del src {remote_addr} dst {local_addr} "
+ f"proto esp spi {SPI_IN}")
+
+ # xfrm state - remote outbound SA (mirror, software crypto)
+ xfrm(f"state add src {remote_addr} dst {local_addr} "
+ f"proto esp spi {SPI_IN} "
+ f"{ESP_AEAD} "
+ f"mode transport",
+ host=cfg.remote)
+ defer(xfrm, f"state del src {remote_addr} dst {local_addr} "
+ f"proto esp spi {SPI_IN}", host=cfg.remote)
+
+ # xfrm state - remote inbound SA (mirror, software crypto)
+ xfrm(f"state add src {local_addr} dst {remote_addr} "
+ f"proto esp spi {SPI_OUT} "
+ f"{ESP_AEAD} "
+ f"mode transport",
+ host=cfg.remote)
+ defer(xfrm, f"state del src {local_addr} dst {remote_addr} "
+ f"proto esp spi {SPI_OUT}", host=cfg.remote)
+
+ # xfrm policy - local out
+ xfrm(f"policy add src {local_addr} dst {remote_addr} "
+ f"proto udp dport 4789 dir out "
+ f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport")
+ defer(xfrm, f"policy del src {local_addr} dst {remote_addr} "
+ f"proto udp dport 4789 dir out")
+
+ # xfrm policy - local in
+ xfrm(f"policy add src {remote_addr} dst {local_addr} "
+ f"proto udp dport 4789 dir in "
+ f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport")
+ defer(xfrm, f"policy del src {remote_addr} dst {local_addr} "
+ f"proto udp dport 4789 dir in")
+
+ # xfrm policy - remote out
+ xfrm(f"policy add src {remote_addr} dst {local_addr} "
+ f"proto udp dport 4789 dir out "
+ f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport",
+ host=cfg.remote)
+ defer(xfrm, f"policy del src {remote_addr} dst {local_addr} "
+ f"proto udp dport 4789 dir out", host=cfg.remote)
+
+ # xfrm policy - remote in
+ xfrm(f"policy add src {local_addr} dst {remote_addr} "
+ f"proto udp dport 4789 dir in "
+ f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport",
+ host=cfg.remote)
+ defer(xfrm, f"policy del src {local_addr} dst {remote_addr} "
+ f"proto udp dport 4789 dir in", host=cfg.remote)
+
+
+def _vxlan_ipsec_variants():
+ """Generates outer/inner IP version variants."""
+ for outer in ["4", "6"]:
+ for inner in ["4", "6"]:
+ yield KsftNamedVariant(f"outer_v{outer}_inner_v{inner}", outer, inner)
+
+
+@ksft_variants(_vxlan_ipsec_variants())
+def test_vxlan_ipsec_crypto_offload(cfg, outer_ipver, inner_ipver):
+ """Tests VXLAN+IPsec crypto-offload has no TX drops."""
+ cfg.require_ipver(outer_ipver)
+ check_esp_hw_offload(cfg)
+
+ setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver)
+
+ if inner_ipver == "4":
+ inner_local = INNER_V4_LOCAL
+ inner_remote = INNER_V4_REMOTE
+ ping = "ping"
+ else:
+ inner_local = INNER_V6_LOCAL
+ inner_remote = INNER_V6_REMOTE
+ ping = "ping -6"
+
+ cmd(f"{ping} -c 1 -W 2 {inner_remote}")
+
+ drops_before = get_tx_drops(cfg)
+
+ runner = Iperf3Runner(cfg, server_ip=inner_local,
+ client_ip=inner_remote)
+ bw_gbps = runner.measure_bandwidth(reverse=True)
+
+ cfg.wait_hw_stats_settle()
+ drops_after = get_tx_drops(cfg)
+
+ ksft_eq(drops_after - drops_before, 0,
+ comment="TX drops during VXLAN+IPsec")
+ ksft_ge(bw_gbps, 0.1,
+ comment="Minimum 100Mbps over VXLAN+IPsec")
+
+
+def main():
+ """Runs VXLAN+IPsec crypto-offload GSO selftest."""
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([test_vxlan_ipsec_crypto_offload], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
--
2.53.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH ipsec 3/3] xfrm: Don't clobber inner headers when already set
2026-04-22 14:06 [PATCH ipsec 0/3] xfrm: Don't clobber inner headers when already set Cosmin Ratiu
2026-04-22 14:06 ` [PATCH ipsec 1/3] tools/selftests: Use a sensible timeout value for iperf3 client Cosmin Ratiu
2026-04-22 14:06 ` [PATCH ipsec 2/3] tools/selftests: Add a VXLAN+IPsec traffic test Cosmin Ratiu
@ 2026-04-22 14:06 ` Cosmin Ratiu
2 siblings, 0 replies; 4+ messages in thread
From: Cosmin Ratiu @ 2026-04-22 14:06 UTC (permalink / raw)
To: netdev
Cc: Steffen Klassert, Herbert Xu, David S . Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Simon Horman, Andrew Lunn,
Shuah Khan, Cosmin Ratiu, Nimrod Oren, Carolina Jubran,
Gal Pressman, linux-kselftest
On VXLAN over IPsec egress, xfrm{4,6}_transport_output() blindly
overwrite inner_transport_header (== the inner TCP header saved in VXLAN
iptunnel_handle_offloads() -> skb_reset_inner_headers()) with the
current transport_header (== the VXLAN outer UDP header set by
udp_tunnel_xmit_skb()).
This was a latent bug, harmless until commit [1] added a doff validation
check in qdisc_pkt_len_segs_init() for encapsulated GSO packets. With
the wrong inner_transport_header set by xfrm, qdisc_pkt_len_segs_init()
interprets inner_transport_header as a TCP header, reads doff=0 from the
upper byte of the VNI and drops the packet with DROP_REASON_SKB_BAD_GSO.
Besides the use in GSO to determine the header size of segmented
packets, inner_transport_header might be used by drivers to set up
inner checksum offloading by pointing the HW to the inner transport
header. A quick browse through available drivers shows that mlx5 uses
skb->csum_start specifically for this scenario, while others either
don't support VXLAN over IPsec crypto offload (ixgbe) or the HW is
capable of parsing the packets itself (nfp, Chelsio).
But in all cases, it is more correct to let the inner_transport_header
point to the innermost header instead of overwriting it in xfrm.
So fix this by guarding all four inner header save sites in
xfrm_output.c (xfrm{4,6}_transport_output, xfrm{4,6}_tunnel_encap_add)
with a check for skb->inner_protocol. When inner_protocol is set, a
tunnel layer (VXLAN, Geneve, GRE, etc.) has already saved the correct
inner header offsets and they must not be overwritten. When
inner_protocol is zero, no prior tunnel encapsulation exists and xfrm
must save the inner headers itself. The tunnel mode checks are only
added for completion, since they aren't strictly required, as
xfrm_output() forces software GSO in tunnel mode before encap.
This makes the previously added test pass:
# ./tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py
TAP version 13
1..4
ok 1 ipsec_vxlan.test_vxlan_ipsec_crypto_offload.outer_v4_inner_v4
ok 2 ipsec_vxlan.test_vxlan_ipsec_crypto_offload.outer_v4_inner_v6
ok 3 ipsec_vxlan.test_vxlan_ipsec_crypto_offload.outer_v6_inner_v4
ok 4 ipsec_vxlan.test_vxlan_ipsec_crypto_offload.outer_v6_inner_v6
# Totals: pass:4 fail:0 xfail:0 xpass:0 skip:0 error:0
[1] commit 7fb4c1967011 ("net: pull headers in qdisc_pkt_len_segs_init()")
Fixes: f1bd7d659ef0 ("xfrm: Add encapsulation header offsets while SKB is not encrypted")
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
---
net/xfrm/xfrm_output.c | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index a9652b422f51..cc35c2fcbbe0 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -66,7 +66,9 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph = ip_hdr(skb);
int ihl = iph->ihl * 4;
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ if (!skb->inner_protocol)
+ skb_set_inner_transport_header(skb,
+ skb_transport_offset(skb));
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
@@ -167,7 +169,9 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
iph = ipv6_hdr(skb);
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ if (!skb->inner_protocol)
+ skb_set_inner_transport_header(skb,
+ skb_transport_offset(skb));
hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr);
if (hdr_len < 0)
@@ -276,8 +280,10 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *top_iph;
int flags;
- skb_set_inner_network_header(skb, skb_network_offset(skb));
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ if (!skb->inner_protocol) {
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ }
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
@@ -321,8 +327,10 @@ static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
struct ipv6hdr *top_iph;
int dsfield;
- skb_set_inner_network_header(skb, skb_network_offset(skb));
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ if (!skb->inner_protocol) {
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+ }
skb_set_network_header(skb, -x->props.header_len);
skb->mac_header = skb->network_header +
--
2.53.0
^ permalink raw reply related [flat|nested] 4+ messages in thread