Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH nf-next v2 2/3] netfilter: nf_conntrack_amanda: replace u_int16_t with u16
From: Carlos Grillet @ 2026-06-25 17:25 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Florian Westphal, Phil Sutter, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman
  Cc: netfilter-devel, coreteam, netdev, linux-kernel
In-Reply-To: <20260625172550.35781-1-carlos@carlosgrillet.me>

Use preferred kernel integer type u16 instead of the POSIX u_int16_t
variant.

No functional change.

Signed-off-by: Carlos Grillet <carlos@carlosgrillet.me>
---
 net/netfilter/nf_conntrack_amanda.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index ddafbdfc96dc..f10ac2c49f4b 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -89,7 +89,7 @@ static int amanda_help(struct sk_buff *skb,
 	struct nf_conntrack_tuple *tuple;
 	unsigned int dataoff, start, stop, off, i;
 	char pbuf[sizeof("65535")], *tmp;
-	u_int16_t len;
+	u16 len;
 	__be16 port;
 	int ret = NF_ACCEPT;
 	nf_nat_amanda_hook_fn *nf_nat_amanda;
-- 
2.54.0


^ permalink raw reply related

* [PATCH nf-next v2 1/3] netfilter: nf_conntrack_h323_main: replace u_int8_t with u8
From: Carlos Grillet @ 2026-06-25 17:25 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Florian Westphal, Phil Sutter, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman
  Cc: netfilter-devel, coreteam, netdev, linux-kernel
In-Reply-To: <20260625172550.35781-1-carlos@carlosgrillet.me>

Use preferred kernel integer type u8 instead of the POSIX u_int8_t
variant.

No functional change.

Signed-off-by: Carlos Grillet <carlos@carlosgrillet.me>
---
 net/netfilter/nf_conntrack_h323_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 7f189dceb3c4..68ecaf0daf95 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -671,7 +671,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
 static int callforward_do_filter(struct net *net,
 				 const union nf_inet_addr *src,
 				 const union nf_inet_addr *dst,
-				 u_int8_t family)
+				 u8 family)
 {
 	int ret = 0;
 
-- 
2.54.0


^ permalink raw reply related

* [PATCH ipsec] xfrm: reject optional IPTFS templates in outbound policies
From: Antony Antony @ 2026-06-25 17:25 UTC (permalink / raw)
  To: Steffen Klassert, Herbert Xu, "David S. Miller",
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Christian Hopps
  Cc: Tobias Brunner, netdev, syzbot+0ac4d84afe1066a1f3e9,
	Antony Antony, Antony Antony

syzbot reported a stack-out-of-bounds read in xfrm_state_find()
which flows from xfrm_tmpl_resolve_one().

Commit 3d776e31c841 ("xfrm: Reject optional tunnel/BEET mode
templates in outbound policies") disallowed optional tunnel and
BEET in outbound policies to prevent this. Later when IPTFS
added, it was not covered by that fix and can still trigger
the out-of-bounds read;

Extend the check to disallow optional IPTFS in outbound policies
as well. IPTFS should be identical to tunnel mode.
IN and FWD policies are not affected: xfrm_tmpl_resolve_one()
is only reachable via the outbound path.

Reproducer, before:
ip link add dummy0 type dummy
ip link set dummy0 up
ip addr add 10.1.1.1/24 dev dummy0
ip xfrm policy add src 10.1.1.1/32 dst 10.1.1.2/32 dir out tmpl
  src fc00::dead:1 dst fc00::dead:2 proto esp reqid 1 mode iptfs
  level use tmpl src fc00::dead:1 dst fc00::dead:2 proto esp reqid
  2 mode transport
ping -W 1 -c 1 10.1.1.2
PING 10.1.1.2 (10.1.1.2) 56(84) bytes of data.

[   64.168420] ==================================================================
[   64.169977] BUG: KASAN: stack-out-of-bounds in __xfrm6_addr_hash+0x11e/0x170
[   64.169977] Read of size 4 at addr ffff88800e1ffd20 by task ping/2844

[   64.169977] CPU: 2 UID: 0 PID: 2844 Comm: ping Not tainted 7.1.0-rc7-00180-geb23b588430a #98 PREEMPT(full)
[   64.169977] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[   64.169977] Call Trace:
[   64.169977]  <TASK>
[   64.169977]  dump_stack_lvl+0x47/0x70
[   64.169977]  ? __xfrm6_addr_hash+0x11e/0x170
[   64.169977]  print_report+0x152/0x4b0
[   64.169977]  ? ksys_mmap_pgoff+0x6d/0xa0
[   64.169977]  ? entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   64.169977]  ? rcu_read_unlock_sched+0xa/0x20
[   64.169977]  ? __virt_addr_valid+0x21b/0x230
[   64.169977]  ? __xfrm6_addr_hash+0x11e/0x170
[   64.169977]  kasan_report+0xa8/0xd0
[   64.169977]  ? __xfrm6_addr_hash+0x11e/0x170
[   64.169977]  __xfrm6_addr_hash+0x11e/0x170
[   64.169977]  __xfrm_dst_hash+0x24/0xc0
[   64.169977]  xfrm_state_find+0xa2d/0x2f90
[   64.169977]  ? __pfx_xfrm_state_find+0x10/0x10
[   64.169977]  ? __pfx_ftrace_graph_ret_addr+0x10/0x10
[   64.169977]  ? __pfx_ftrace_graph_ret_addr+0x10/0x10
[   64.169977]  xfrm_tmpl_resolve_one+0x210/0x570
[   64.169977]  ? __pfx_xfrm_tmpl_resolve_one+0x10/0x10
[   64.169977]  ? __pfx_stack_trace_consume_entry+0x10/0x10
[   64.169977]  ? kernel_text_address+0x5b/0x80
[   64.169977]  ? __kernel_text_address+0xe/0x30
[   64.169977]  ? unwind_get_return_address+0x5e/0x90
[   64.169977]  ? arch_stack_walk+0x8c/0xe0
[   64.169977]  xfrm_tmpl_resolve+0x130/0x200
[   64.169977]  ? __pfx_xfrm_tmpl_resolve+0x10/0x10
[   64.169977]  ? __pfx_xfrm_policy_inexact_lookup_rcu+0x10/0x10
[   64.169977]  ? __refcount_add_not_zero.constprop.0+0xb2/0x110
[   64.169977]  ? __pfx___refcount_add_not_zero.constprop.0+0x10/0x10
[   64.169977]  xfrm_resolve_and_create_bundle+0xd5/0x310
[   64.169977]  ? __pfx_xfrm_resolve_and_create_bundle+0x10/0x10
[   64.169977]  ? __pfx_xfrm_policy_lookup_bytype+0x10/0x10
[   64.169977]  ? __pfx_xfrm_policy_lookup_bytype+0x10/0x10
[   64.169977]  xfrm_lookup_with_ifid+0x3d8/0xb80
[   64.169977]  ? __pfx_xfrm_lookup_with_ifid+0x10/0x10
[   64.169977]  ? ip_route_output_key_hash+0xc6/0x110
[   64.169977]  ? kasan_save_track+0x10/0x30
[   64.169977]  xfrm_lookup_route+0x18/0xe0
[   64.169977]  ip4_datagram_release_cb+0x4c9/0x530
[   64.169977]  ? __pfx_ip4_datagram_release_cb+0x10/0x10
[   64.169977]  ? do_raw_spin_lock+0x71/0xc0
[   64.169977]  ? __pfx_do_raw_spin_lock+0x10/0x10
[   64.169977]  release_sock+0xb0/0x170
[   64.169977]  udp_connect+0x43/0x50
[   64.169977]  __sys_connect+0xa6/0x100
[   64.169977]  ? alloc_fd+0x2e9/0x300
[   64.169977]  ? __pfx___sys_connect+0x10/0x10
[   64.169977]  ? preempt_latency_start+0x1f/0x70
[   64.169977]  ? fd_install+0x7e/0x150
[   64.169977]  ? rcu_read_unlock_sched+0xa/0x20
[   64.169977]  ? __sys_socket+0xdf/0x130
[   64.169977]  ? __pfx___sys_socket+0x10/0x10
[   64.169977]  ? vma_refcount_put+0x43/0xa0
[   64.169977]  __x64_sys_connect+0x7e/0x90
[   64.169977]  do_syscall_64+0x11b/0x2b0
[   64.169977]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   64.169977] RIP: 0033:0x7f4851ecb570
[   64.169977] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 80 3d f9 ca 0d 00 00 74 17 b8 2a 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89 54
[   64.169977] RSP: 002b:00007ffc830e3498 EFLAGS: 00000202 ORIG_RAX: 000000000000002a
[   64.169977] RAX: ffffffffffffffda RBX: 00007ffc830e34d0 RCX: 00007f4851ecb570
[   64.169977] RDX: 0000000000000010 RSI: 00007ffc830e34d0 RDI: 0000000000000005
[   64.169977] RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000000000000
[   64.169977] R10: 0000000000000006 R11: 0000000000000202 R12: 0000000000000005
[   64.169977] R13: 0000000000000000 R14: 00005619a863f340 R15: 0000000000000000
[   64.169977]  </TASK>

[   64.169977] The buggy address belongs to stack of task ping/2844
[   64.169977]  and is located at offset 88 in frame:
[   64.169977]  ip4_datagram_release_cb+0x0/0x530

[   64.169977] This frame has 1 object:
[   64.169977]  [32, 88) 'fl4'

[   64.169977] The buggy address belongs to the physical page:
[   64.169977] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xe1ff
[   64.169977] flags: 0x4000000000000000(zone=1)
[   64.169977] raw: 4000000000000000 0000000000000000 ffffea0000387fc8 0000000000000000
[   64.169977] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[   64.169977] page dumped because: kasan: bad access detected

[   64.169977] Memory state around the buggy address:
[   64.169977]  ffff88800e1ffc00: f2 f2 00 00 f3 f3 00 00 00 00 00 00 00 00 00 00
[   64.169977]  ffff88800e1ffc80: 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00
[   64.169977] >ffff88800e1ffd00: 00 00 00 00 f3 f3 f3 f3 f3 00 00 00 00 00 00 00
[   64.169977]                                ^
[   64.169977]  ffff88800e1ffd80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1
[   64.169977]  ffff88800e1ffe00: f1 f1 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   64.169977] ==================================================================
[   64.245153] Disabling lock debugging due to kernel taint

After the fix:

ip xfrm policy add src 10.1.1.1/32 dst 10.1.1.2/32 dir out tmpl \
 src fc00::dead:1 dst fc00::dead:2 proto esp reqid 1 mode iptfs \
 level use tmpl src fc00::dead:1 dst fc00::dead:2 proto esp reqid 2 \
 mode transport

Error: Mode in optional template not allowed in outbound policy.

Fixes: d1716d5a44c3 ("xfrm: add generic iptfs defines and functionality")
Reported-by: syzbot+0ac4d84afe1066a1f3e9@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6a3ceb94.43b4ff68.30a095.0004.GAE@google.com/T/
Signed-off-by: Antony Antony <antony@phenome.org>
---
 net/xfrm/xfrm_user.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 61eb5de33b87..b36741c4ea3d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2077,13 +2077,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family,
 		switch (ut[i].mode) {
 		case XFRM_MODE_TUNNEL:
 		case XFRM_MODE_BEET:
+		case XFRM_MODE_IPTFS:
 			if (ut[i].optional && dir == XFRM_POLICY_OUT) {
 				NL_SET_ERR_MSG(extack, "Mode in optional template not allowed in outbound policy");
 				return -EINVAL;
 			}
 			break;
-		case XFRM_MODE_IPTFS:
-			break;
 		default:
 			if (ut[i].family != prev_family) {
 				NL_SET_ERR_MSG(extack, "Mode in template doesn't support a family change");

---
base-commit: 40f0b1047918539f0b0f795ac65e35336b4c2c78
change-id: 20260625-xfrm-pol-out-tmpl-iptfs-reject-fix-10373324a939

Best regards,
--  
Antony Antony <antony.antony@secunet.com>


^ permalink raw reply related

* [net:main 69/101] net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
From: kernel test robot @ 2026-06-25 17:11 UTC (permalink / raw)
  To: Steffen Klassert; +Cc: oe-kbuild-all, netdev, Jakub Kicinski

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git main
head:   02f144fbb4c86c360495d33debe307cb46a57f95
commit: e9deb406c10f5a73bcfd62f42ca1187b220bc188 [69/101] Merge tag 'ipsec-2026-06-22' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
config: s390-randconfig-r133-20260625 (https://download.01.org/0day-ci/archive/20260626/202606260102.GcwhFSNK-lkp@intel.com/config)
compiler: s390-linux-gcc (GCC) 8.5.0
sparse: v0.6.5-rc1
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260626/202606260102.GcwhFSNK-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202606260102.GcwhFSNK-lkp@intel.com/

All warnings (new ones prefixed by >>):

   net/xfrm/xfrm_state.c: Assembler messages:
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
>> net/xfrm/xfrm_state.c:1220: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'
   net/xfrm/xfrm_state.c:1225: Warning: missing closing `"'


vim +1220 net/xfrm/xfrm_state.c

edcd582152090b David S. Miller  2006-08-24  1200  
81a331a0e72ddc Steffen Klassert 2024-10-23  1201  struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
81a331a0e72ddc Steffen Klassert 2024-10-23  1202  					   const xfrm_address_t *daddr,
81a331a0e72ddc Steffen Klassert 2024-10-23  1203  					   __be32 spi, u8 proto,
81a331a0e72ddc Steffen Klassert 2024-10-23  1204  					   unsigned short family)
81a331a0e72ddc Steffen Klassert 2024-10-23  1205  {
e952837f3ddb0f Florian Westphal 2024-11-28  1206  	struct xfrm_hash_state_ptrs state_ptrs;
81a331a0e72ddc Steffen Klassert 2024-10-23  1207  	struct hlist_head *state_cache_input;
81a331a0e72ddc Steffen Klassert 2024-10-23  1208  	struct xfrm_state *x = NULL;
81a331a0e72ddc Steffen Klassert 2024-10-23  1209  
ddd3d013292031 Herbert Xu       2026-06-12  1210  	/* BH is always disabled on the input path. */
ddd3d013292031 Herbert Xu       2026-06-12  1211  	lockdep_assert_in_softirq();
ddd3d013292031 Herbert Xu       2026-06-12  1212  
6c9b7db96db62e Sebastian Sewior 2025-01-23  1213  	state_cache_input = raw_cpu_ptr(net->xfrm.state_cache_input);
81a331a0e72ddc Steffen Klassert 2024-10-23  1214  
81a331a0e72ddc Steffen Klassert 2024-10-23  1215  	hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) {
81a331a0e72ddc Steffen Klassert 2024-10-23  1216  		if (x->props.family != family ||
81a331a0e72ddc Steffen Klassert 2024-10-23  1217  		    x->id.spi       != spi ||
81a331a0e72ddc Steffen Klassert 2024-10-23  1218  		    x->id.proto     != proto ||
81a331a0e72ddc Steffen Klassert 2024-10-23  1219  		    !xfrm_addr_equal(&x->id.daddr, daddr, family))
81a331a0e72ddc Steffen Klassert 2024-10-23 @1220  			continue;
81a331a0e72ddc Steffen Klassert 2024-10-23  1221  
81a331a0e72ddc Steffen Klassert 2024-10-23  1222  		if ((mark & x->mark.m) != x->mark.v)
81a331a0e72ddc Steffen Klassert 2024-10-23  1223  			continue;
81a331a0e72ddc Steffen Klassert 2024-10-23  1224  		if (!xfrm_state_hold_rcu(x))
81a331a0e72ddc Steffen Klassert 2024-10-23  1225  			continue;
81a331a0e72ddc Steffen Klassert 2024-10-23  1226  		goto out;
81a331a0e72ddc Steffen Klassert 2024-10-23  1227  	}
81a331a0e72ddc Steffen Klassert 2024-10-23  1228  
e952837f3ddb0f Florian Westphal 2024-11-28  1229  	xfrm_hash_ptrs_get(net, &state_ptrs);
e952837f3ddb0f Florian Westphal 2024-11-28  1230  
e952837f3ddb0f Florian Westphal 2024-11-28  1231  	x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family);
ddd3d013292031 Herbert Xu       2026-06-12  1232  	if (x) {
ddd3d013292031 Herbert Xu       2026-06-12  1233  		spin_lock(&net->xfrm.xfrm_state_lock);
ddd3d013292031 Herbert Xu       2026-06-12  1234  		if (x->km.state != XFRM_STATE_VALID) {
ddd3d013292031 Herbert Xu       2026-06-12  1235  			/*
ddd3d013292031 Herbert Xu       2026-06-12  1236  			 * The state is about to be destroyed.
ddd3d013292031 Herbert Xu       2026-06-12  1237  			 *
ddd3d013292031 Herbert Xu       2026-06-12  1238  			 * Don't add it to the cache but still
ddd3d013292031 Herbert Xu       2026-06-12  1239  			 * return it to the caller.
ddd3d013292031 Herbert Xu       2026-06-12  1240  			 */
ddd3d013292031 Herbert Xu       2026-06-12  1241  		} else if (hlist_unhashed(&x->state_cache_input)) {
81a331a0e72ddc Steffen Klassert 2024-10-23  1242  			hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
81a331a0e72ddc Steffen Klassert 2024-10-23  1243  		} else {
81a331a0e72ddc Steffen Klassert 2024-10-23  1244  			hlist_del_rcu(&x->state_cache_input);
81a331a0e72ddc Steffen Klassert 2024-10-23  1245  			hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
81a331a0e72ddc Steffen Klassert 2024-10-23  1246  		}
ddd3d013292031 Herbert Xu       2026-06-12  1247  		spin_unlock(&net->xfrm.xfrm_state_lock);
81a331a0e72ddc Steffen Klassert 2024-10-23  1248  	}
81a331a0e72ddc Steffen Klassert 2024-10-23  1249  
81a331a0e72ddc Steffen Klassert 2024-10-23  1250  out:
81a331a0e72ddc Steffen Klassert 2024-10-23  1251  	return x;
81a331a0e72ddc Steffen Klassert 2024-10-23  1252  }
81a331a0e72ddc Steffen Klassert 2024-10-23  1253  EXPORT_SYMBOL(xfrm_input_state_lookup);
81a331a0e72ddc Steffen Klassert 2024-10-23  1254  

:::::: The code at line 1220 was first introduced by commit
:::::: 81a331a0e72ddc2f75092603d9577bd1a0ca23ad xfrm: Add an inbound percpu state cache.

:::::: TO: Steffen Klassert <steffen.klassert@secunet.com>
:::::: CC: Steffen Klassert <steffen.klassert@secunet.com>

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply

* Re: [PATCH net] net: ethtool: keep rtnl_lock for ops using ethtool_op_get_link()
From: Harshitha Ramamurthy @ 2026-06-25 17:06 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, edumazet, pabeni, andrew+netdev, horms,
	Breno Leitao, joshwash, anthony.l.nguyen, przemyslaw.kitszel,
	saeedm, tariqt, mbloch, leon, alexanderduyck, kernel-team, kys,
	haiyangz, wei.liu, decui, longli, jordanrhee, jacob.e.keller,
	nktgrg, debarghyak, mohsin.bashr, ernis, sdf, gal, linux-rdma,
	linux-hyperv
In-Reply-To: <20260624190439.2521219-1-kuba@kernel.org>

On Wed, Jun 24, 2026 at 12:04 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> Breno reports following splats on mlx5:
>
>   RTNL: assertion failed at net/core/dev.c (2241)
>   WARNING: net/core/dev.c:2241 at netif_state_change+0xed/0x130, CPU#5: ethtool/1335
>   RIP: 0010:netif_state_change+0xf9/0x130
>   Call Trace:
>     <TASK>
>      __linkwatch_sync_dev+0xea/0x120
>      ethtool_op_get_link+0xe/0x20
>      __ethtool_get_link+0x26/0x40
>      linkstate_prepare_data+0x51/0x200
>      ethnl_default_doit+0x213/0x470
>      genl_family_rcv_msg_doit+0xdd/0x110
>
> Looks like I missed ethtool_op_get_link() trying to sync linkwatch,
> which needs rtnl_lock. Not all drivers do this - bnxt doesn't,
> it just returns the link state, so add an opt-in bit.
>
> Reported-by: Breno Leitao <leitao@debian.org>
> Fixes: 45079e00133e ("net: ethtool: optionally skip rtnl_lock on Netlink path for GET ops")
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
> CC: joshwash@google.com
> CC: hramamurthy@google.com
> CC: anthony.l.nguyen@intel.com
> CC: przemyslaw.kitszel@intel.com
> CC: saeedm@nvidia.com
> CC: tariqt@nvidia.com
> CC: mbloch@nvidia.com
> CC: leon@kernel.org
> CC: alexanderduyck@fb.com
> CC: kernel-team@meta.com
> CC: kys@microsoft.com
> CC: haiyangz@microsoft.com
> CC: wei.liu@kernel.org
> CC: decui@microsoft.com
> CC: longli@microsoft.com
> CC: jordanrhee@google.com
> CC: jacob.e.keller@intel.com
> CC: nktgrg@google.com
> CC: debarghyak@google.com
> CC: leitao@debian.org
> CC: mohsin.bashr@gmail.com
> CC: ernis@linux.microsoft.com
> CC: sdf@fomichev.me
> CC: gal@nvidia.com
> CC: linux-rdma@vger.kernel.org
> CC: linux-hyperv@vger.kernel.org
> ---
>  include/linux/ethtool.h                                 | 2 ++
>  net/ethtool/common.h                                    | 4 ++++
>  drivers/net/ethernet/google/gve/gve_ethtool.c           | 3 ++-
>  drivers/net/ethernet/intel/iavf/iavf_ethtool.c          | 1 +
>  drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c    | 3 ++-
>  drivers/net/ethernet/mellanox/mlx5/core/en_rep.c        | 3 ++-
>  drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c | 4 +++-
>  drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c         | 3 ++-
>  drivers/net/ethernet/microsoft/mana/mana_ethtool.c      | 3 ++-
>  9 files changed, 20 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
> index 1b834e2a522e..5d491a98265e 100644
> --- a/include/linux/ethtool.h
> +++ b/include/linux/ethtool.h
> @@ -942,6 +942,7 @@ struct kernel_ethtool_ts_info {
>  #define ETHTOOL_OP_NEEDS_RTNL_GPAUSEPARAM      BIT(5)
>  #define ETHTOOL_OP_NEEDS_RTNL_SPAUSEPARAM      BIT(6)
>  #define ETHTOOL_OP_NEEDS_RTNL_RSS              BIT(7)
> +#define ETHTOOL_OP_NEEDS_RTNL_GLINK            BIT(8)
>
>  /**
>   * struct ethtool_ops - optional netdev operations
> @@ -978,6 +979,7 @@ struct kernel_ethtool_ts_info {
>   *      - phylink helpers (note that phydev is currently unsupported!)
>   *      - netdev_update_features()
>   *      - netif_set_real_num_tx_queues()
> + *      - ethtool_op_get_link() (syncs link watch under rtnl_lock)
>   *
>   * @get_drvinfo: Report driver/device information. Modern drivers no
>   *     longer have to implement this callback. Most fields are
> diff --git a/net/ethtool/common.h b/net/ethtool/common.h
> index 2b3847f00801..4e5356e26f40 100644
> --- a/net/ethtool/common.h
> +++ b/net/ethtool/common.h
> @@ -113,6 +113,8 @@ ethtool_nl_msg_needs_rtnl(const struct net_device *dev, u8 cmd)
>                 return ops->op_needs_rtnl & ETHTOOL_OP_NEEDS_RTNL_SPAUSEPARAM;
>         case ETHTOOL_MSG_RSS_SET:
>                 return ops->op_needs_rtnl & ETHTOOL_OP_NEEDS_RTNL_RSS;
> +       case ETHTOOL_MSG_LINKSTATE_GET:
> +               return ops->op_needs_rtnl & ETHTOOL_OP_NEEDS_RTNL_GLINK;
>         case ETHTOOL_MSG_TSCONFIG_GET:
>         case ETHTOOL_MSG_TSCONFIG_SET:
>                 /* tsconfig calls ndos (ndo_hwtstamp_set/get), not ethtool ops.
> @@ -159,6 +161,8 @@ ethtool_ioctl_needs_rtnl(const struct net_device *dev, u32 ethcmd)
>         case ETHTOOL_SRXFH:
>         case ETHTOOL_SRXFHINDIR:
>                 return ops->op_needs_rtnl & ETHTOOL_OP_NEEDS_RTNL_RSS;
> +       case ETHTOOL_GLINK:
> +               return ops->op_needs_rtnl & ETHTOOL_OP_NEEDS_RTNL_GLINK;
>         }
>         return false;
>  }
> diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
> index 7cc22916852f..8199738ba979 100644
> --- a/drivers/net/ethernet/google/gve/gve_ethtool.c
> +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
> @@ -984,7 +984,8 @@ const struct ethtool_ops gve_ethtool_ops = {
>         .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT |
>                                  ETHTOOL_RING_USE_RX_BUF_LEN,
>         .op_needs_rtnl = ETHTOOL_OP_NEEDS_RTNL_SCHANNELS |
> -                        ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM,
> +                        ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM |
> +                        ETHTOOL_OP_NEEDS_RTNL_GLINK,

Acked-by: Harshitha Ramamurthy <hramamurthy@google.com>

Thanks for the fix!
>         .get_drvinfo = gve_get_drvinfo,
>         .get_strings = gve_get_strings,
>         .get_sset_count = gve_get_sset_count,
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
> index a615d599b88e..e7cf12eaa268 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
> @@ -1855,6 +1855,7 @@ static const struct ethtool_ops iavf_ethtool_ops = {
>         .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
>                                      ETHTOOL_COALESCE_USE_ADAPTIVE,
>         .supported_input_xfrm   = RXH_XFRM_SYM_XOR,
> +       .op_needs_rtnl          = ETHTOOL_OP_NEEDS_RTNL_GLINK,
>         .get_drvinfo            = iavf_get_drvinfo,
>         .get_link               = ethtool_op_get_link,
>         .get_ringparam          = iavf_get_ringparam,
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
> index 2f5b626ba33f..112926d07634 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
> @@ -2721,7 +2721,8 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
>         .rxfh_max_num_contexts  = MLX5E_MAX_NUM_RSS,
>         .op_needs_rtnl          = ETHTOOL_OP_NEEDS_RTNL_SCHANNELS |
>                                   ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM |
> -                                 ETHTOOL_OP_NEEDS_RTNL_SPFLAGS,
> +                                 ETHTOOL_OP_NEEDS_RTNL_SPFLAGS |
> +                                 ETHTOOL_OP_NEEDS_RTNL_GLINK,
>         .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
>                                      ETHTOOL_COALESCE_MAX_FRAMES |
>                                      ETHTOOL_COALESCE_USE_ADAPTIVE |
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
> index 1a8a19f980d3..c8b76d301c92 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
> @@ -419,7 +419,8 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
>                                      ETHTOOL_COALESCE_MAX_FRAMES |
>                                      ETHTOOL_COALESCE_USE_ADAPTIVE,
>         .op_needs_rtnl     = ETHTOOL_OP_NEEDS_RTNL_SCHANNELS |
> -                            ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM,
> +                            ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM |
> +                            ETHTOOL_OP_NEEDS_RTNL_GLINK,
>         .get_drvinfo       = mlx5e_rep_get_drvinfo,
>         .get_link          = ethtool_op_get_link,
>         .get_strings       = mlx5e_rep_get_strings,
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
> index 9b3b32408c64..01ddc3def9ac 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
> @@ -286,7 +286,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = {
>                                      ETHTOOL_COALESCE_MAX_FRAMES |
>                                      ETHTOOL_COALESCE_USE_ADAPTIVE,
>         .op_needs_rtnl      = ETHTOOL_OP_NEEDS_RTNL_SCHANNELS |
> -                             ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM,
> +                             ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM |
> +                             ETHTOOL_OP_NEEDS_RTNL_GLINK,
>         .get_drvinfo        = mlx5i_get_drvinfo,
>         .get_strings        = mlx5i_get_strings,
>         .get_sset_count     = mlx5i_get_sset_count,
> @@ -309,6 +310,7 @@ const struct ethtool_ops mlx5i_ethtool_ops = {
>  };
>
>  const struct ethtool_ops mlx5i_pkey_ethtool_ops = {
> +       .op_needs_rtnl      = ETHTOOL_OP_NEEDS_RTNL_GLINK,
>         .get_drvinfo        = mlx5i_get_drvinfo,
>         .get_link           = ethtool_op_get_link,
>         .get_ts_info        = mlx5i_get_ts_info,
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> index cb34fc166ef9..0e47088ec44b 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> @@ -2024,7 +2024,8 @@ static const struct ethtool_ops fbnic_ethtool_ops = {
>                                           ETHTOOL_OP_NEEDS_RTNL_GPAUSEPARAM |
>                                           ETHTOOL_OP_NEEDS_RTNL_SPAUSEPARAM |
>                                           ETHTOOL_OP_NEEDS_RTNL_SCHANNELS |
> -                                         ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM,
> +                                         ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM |
> +                                         ETHTOOL_OP_NEEDS_RTNL_GLINK,
>         .get_drvinfo                    = fbnic_get_drvinfo,
>         .get_regs_len                   = fbnic_get_regs_len,
>         .get_regs                       = fbnic_get_regs,
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> index 94e658d07a27..881df597d7f9 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
> @@ -597,7 +597,8 @@ static int mana_get_link_ksettings(struct net_device *ndev,
>  const struct ethtool_ops mana_ethtool_ops = {
>         .supported_coalesce_params = ETHTOOL_COALESCE_RX_CQE_FRAMES,
>         .op_needs_rtnl          = ETHTOOL_OP_NEEDS_RTNL_SCHANNELS |
> -                                 ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM,
> +                                 ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM |
> +                                 ETHTOOL_OP_NEEDS_RTNL_GLINK,
>         .get_ethtool_stats      = mana_get_ethtool_stats,
>         .get_sset_count         = mana_get_sset_count,
>         .get_strings            = mana_get_strings,
> --
> 2.54.0
>

^ permalink raw reply

* [PATCH 5.15/6.1/6.6 2/2] sctp: disable BH before calling udp_tunnel_xmit_skb()
From: Alexander Martyniuk @ 2026-06-25 16:53 UTC (permalink / raw)
  To: stable, Greg Kroah-Hartman
  Cc: Alexander Martyniuk, Marcelo Ricardo Leitner, Xin Long,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Weiming Shi, linux-sctp, netdev, linux-kernel
In-Reply-To: <20260625165335.162311-1-alexevgmart@gmail.com>

From: Xin Long <lucien.xin@gmail.com>

commit 2cd7e6971fc2787408ceef17906ea152791448cf upstream.

udp_tunnel_xmit_skb() / udp_tunnel6_xmit_skb() are expected to run with
BH disabled.  After commit 6f1a9140ecda ("add xmit recursion limit to
tunnel xmit functions"), on the path:

  udp(6)_tunnel_xmit_skb() -> ip(6)tunnel_xmit()

dev_xmit_recursion_inc()/dec() must stay balanced on the same CPU.

Without local_bh_disable(), the context may move between CPUs, which can
break the inc/dec pairing. This may lead to incorrect recursion level
detection and cause packets to be dropped in ip(6)_tunnel_xmit() or
__dev_queue_xmit().

Fix it by disabling BH around both IPv4 and IPv6 SCTP UDP xmit paths.

In my testing, after enabling the SCTP over UDP:

  # ip net exec ha sysctl -w net.sctp.udp_port=9899
  # ip net exec ha sysctl -w net.sctp.encap_port=9899
  # ip net exec hb sysctl -w net.sctp.udp_port=9899
  # ip net exec hb sysctl -w net.sctp.encap_port=9899

  # ip net exec ha iperf3 -s

- without this patch:

  # ip net exec hb iperf3 -c 192.168.0.1 --sctp
  [  5]   0.00-10.00  sec  37.2 MBytes  31.2 Mbits/sec  sender
  [  5]   0.00-10.00  sec  37.1 MBytes  31.1 Mbits/sec  receiver

- with this patch:

  # ip net exec hb iperf3 -c 192.168.0.1 --sctp
  [  5]   0.00-10.00  sec  3.14 GBytes  2.69 Gbits/sec  sender
  [  5]   0.00-10.00  sec  3.14 GBytes  2.69 Gbits/sec  receiver

Fixes: 6f1a9140ecda ("net: add xmit recursion limit to tunnel xmit functions")
Fixes: 046c052b475e ("sctp: enable udp tunneling socks")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Link: https://patch.msgid.link/c874a8548221dcd56ff03c65ba75a74e6cf99119.1776017727.git.lucien.xin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Alexander Martyniuk <alexevgmart@gmail.com>
---
 net/sctp/ipv6.c     | 2 ++
 net/sctp/protocol.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 12469cf1a49d..99686b87b99a 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -263,9 +263,11 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t)
 	skb_set_inner_ipproto(skb, IPPROTO_SCTP);
 	label = ip6_make_flowlabel(sock_net(sk), skb, fl6->flowlabel, true, fl6);
 
+	local_bh_disable();
 	udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr, &fl6->daddr,
 			     tclass, ip6_dst_hoplimit(dst), label,
 			     sctp_sk(sk)->udp_port, t->encap_port, false);
+	local_bh_enable();
 	return 0;
 }
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 2185f44198de..0f7e241178f5 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1085,9 +1085,11 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t)
 	skb_reset_inner_mac_header(skb);
 	skb_reset_inner_transport_header(skb);
 	skb_set_inner_ipproto(skb, IPPROTO_SCTP);
+	local_bh_disable();
 	udp_tunnel_xmit_skb((struct rtable *)dst, sk, skb, fl4->saddr,
 			    fl4->daddr, dscp, ip4_dst_hoplimit(dst), df,
 			    sctp_sk(sk)->udp_port, t->encap_port, false, false);
+	local_bh_enable();
 	return 0;
 }
 
-- 
2.43.0


^ permalink raw reply related

* [PATCH 5.15/6.1/6.6 1/2] net: ipv6: Make udp_tunnel6_xmit_skb() void
From: Alexander Martyniuk @ 2026-06-25 16:53 UTC (permalink / raw)
  To: stable, Greg Kroah-Hartman
  Cc: Alexander Martyniuk, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Simon Horman, David Ahern,
	Marcelo Ricardo Leitner, Xin Long, Jon Maloy, Ying Xue, netdev,
	linux-kernel, linux-sctp, tipc-discussion, Petr Machata,
	Ido Schimmel, Nikolay Aleksandrov
In-Reply-To: <20260625165335.162311-1-alexevgmart@gmail.com>

From: Petr Machata <petrm@nvidia.com>

commit 6a7d88ca15f73c5c570c372238f71d63da1fda55 upstream.

The function always returns zero, thus the return value does not carry any
signal. Just make it void.

Most callers already ignore the return value. However:

- Refold arguments of the call from sctp_v6_xmit() so that they fit into
  the 80-column limit.

- tipc_udp_xmit() initializes err from the return value, but that should
  already be always zero at that point. So there's no practical change, but
  elision of the assignment prompts a couple more tweaks to clean up the
  function.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/7facacf9d8ca3ca9391a4aee88160913671b868d.1750113335.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Alexander Martyniuk <alexevgmart@gmail.com>
---
 include/net/udp_tunnel.h  |  2 +-
 net/ipv6/ip6_udp_tunnel.c |  3 +--
 net/sctp/ipv6.c           |  7 ++++---
 net/tipc/udp_media.c      | 10 +++++-----
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 6818a59a1ebc..dc796ddd231d 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -152,7 +152,7 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb
 			 __be16 df, __be16 src_port, __be16 dst_port,
 			 bool xnet, bool nocheck);
 
-int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
+void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
 			 struct sk_buff *skb,
 			 struct net_device *dev, struct in6_addr *saddr,
 			 struct in6_addr *daddr,
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 7aef559e60ec..886c42de0566 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -74,7 +74,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 }
 EXPORT_SYMBOL_GPL(udp_sock_create6);
 
-int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
+void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
 			 struct sk_buff *skb,
 			 struct net_device *dev, struct in6_addr *saddr,
 			 struct in6_addr *daddr,
@@ -108,7 +108,6 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
 	ip6h->saddr	  = *saddr;
 
 	ip6tunnel_xmit(sk, skb, dev);
-	return 0;
 }
 EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0673857cb3d8..12469cf1a49d 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -263,9 +263,10 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t)
 	skb_set_inner_ipproto(skb, IPPROTO_SCTP);
 	label = ip6_make_flowlabel(sock_net(sk), skb, fl6->flowlabel, true, fl6);
 
-	return udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr,
-				    &fl6->daddr, tclass, ip6_dst_hoplimit(dst),
-				    label, sctp_sk(sk)->udp_port, t->encap_port, false);
+	udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr, &fl6->daddr,
+			     tclass, ip6_dst_hoplimit(dst), label,
+			     sctp_sk(sk)->udp_port, t->encap_port, false);
+	return 0;
 }
 
 /* Returns the dst cache entry for the given source and destination ip
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index e993bd6ed7c2..26aca3df2978 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -172,7 +172,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
 			 struct udp_media_addr *dst, struct dst_cache *cache)
 {
 	struct dst_entry *ndst;
-	int ttl, err = 0;
+	int ttl, err;
 
 	local_bh_disable();
 	ndst = dst_cache_get(cache);
@@ -217,13 +217,13 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
 			dst_cache_set_ip6(cache, ndst, &fl6.saddr);
 		}
 		ttl = ip6_dst_hoplimit(ndst);
-		err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
-					   &src->ipv6, &dst->ipv6, 0, ttl, 0,
-					   src->port, dst->port, false);
+		udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
+				     &src->ipv6, &dst->ipv6, 0, ttl, 0,
+				     src->port, dst->port, false);
 #endif
 	}
 	local_bh_enable();
-	return err;
+	return 0;
 
 tx_error:
 	local_bh_enable();
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH bpf-next v10 1/5] bpf: add bpf_icmp_send kfunc
From: Mahe Tardy @ 2026-06-25 16:49 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: bpf, andrii, ast, daniel, john.fastabend, jordan, martin.lau,
	yonghong.song, emil, netdev, edumazet, kuba, pabeni, davem, horms
In-Reply-To: <aj1V2ZdzY1EGtsma@devvm7509.cco0.facebook.com>

On Thu, Jun 25, 2026 at 09:24:59AM -0700, Stanislav Fomichev wrote:
> On 06/25, Mahe Tardy wrote:

[...]

> > +__bpf_kfunc int bpf_icmp_send(struct __sk_buff *skb_ctx, int type, int code)
> > +{
> > +	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
> > +	struct sk_buff *nskb;
> > +	struct sock *sk;
> > +
> > +	sk = skb_to_full_sk(skb);
> > +	if (sk && sk->sk_kern_sock &&
> > +	    (sk->sk_protocol == IPPROTO_ICMP || sk->sk_protocol == IPPROTO_ICMPV6))
> > +		return -EBUSY;
> > +
> > +	switch (skb->protocol) {
> > +#if IS_ENABLED(CONFIG_INET)
> > +	case htons(ETH_P_IP): {
> > +		if (type != ICMP_DEST_UNREACH)
> > +			return -EOPNOTSUPP;
> > +		if (code < 0 || code > NR_ICMP_UNREACH ||
> > +		    code == ICMP_FRAG_NEEDED) /* needs a valid next-hop MTU */
> > +			return -EINVAL;
> > +
> > +		/* icmp_send expects skb_dst to be a real rtable. */
> > +		if (!skb_valid_dst(skb))
> > +			return -ENETUNREACH;
> > +
> > +		nskb = skb_clone(skb, GFP_ATOMIC);
> > +		if (!nskb)
> > +			return -ENOMEM;
> > +
> > +		memset(IPCB(nskb), 0, sizeof(*IPCB(nskb)));
> > +		icmp_send(nskb, type, code, 0);
> > +		consume_skb(nskb);
> > +		break;
> > +	}
> > +#endif
> > +#if IS_ENABLED(CONFIG_IPV6)
> > +	case htons(ETH_P_IPV6):
> > +		if (type != ICMPV6_DEST_UNREACH)
> > +			return -EOPNOTSUPP;
> > +		if (code < 0 || code > ICMPV6_REJECT_ROUTE)
> > +			return -EINVAL;
> 
> [..]
> 
> > +		/* icmpv6_send may treat skb_dst as rt6_info. */
> > +		if (skb_metadata_dst(skb))
> > +			return -ENETUNREACH;
> 
> A bit confused about this. Which part of icmpv6_send treats skb_dst as rt6_info?
> (I see the original sashiko report about dst, but icmp6 seems to be not
> requiring it)

Yeah I was also a bit confused because this came out of nowhere as soon
as I put the skb_valid_dst only on the IPv4 path (for different
reasons), but there is actually a potential trace in which we have type
confusion indeed:

- icmp6_send() checks scoped source addresses and calls icmp6_iif() at net/ipv6/icmp.c:702
- icmp6_iif() calls icmp6_dev() at net/ipv6/icmp.c:441
- icmp6_dev() does skb_rt6_info(skb) for loopback/L3 master devices at net/ipv6/icmp.c:428
- skb_rt6_info() casts any non-NULL dst to struct rt6_info at include/net/ip6_route.h:233
- rt6->rt6i_idev is then dereferenced at net/ipv6/icmp.c:434

When checking with pahole, we can find this on my local kernel:

struct rt6_info {
	struct dst_entry           dst;                  /*     0   136 */
	/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
	struct fib6_info *         from;                 /*   136     8 */
	int                        sernum;               /*   144     4 */
	struct rt6key              rt6i_dst;             /*   148    20 */
	struct rt6key              rt6i_src;             /*   168    20 */
	struct in6_addr            rt6i_gateway;         /*   188    16 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 3 boundary (192 bytes) was 16 bytes ago --- */
	struct inet6_dev *         rt6i_idev;            /*   208     8 */  <--- we dereference this
	u32                        rt6i_flags;           /*   216     4 */
	short unsigned int         rt6i_nfheader_len;    /*   220     2 */

	/* size: 224, cachelines: 4, members: 9 */
	/* sum members: 218, holes: 1, sum holes: 4 */
	/* padding: 2 */
	/* last cacheline: 32 bytes */
};

And the metadata_dst would look like this:

struct metadata_dst {
	struct dst_entry           dst;                  /*     0   136 */
	/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
	enum metadata_type         type;                 /*   136     4 */

	/* XXX 4 bytes hole, try to pack */

	union {
		struct ip_tunnel_info tun_info;          /*   144    96 */
		struct hw_port_info port_info;           /*   144    16 */
		struct macsec_info macsec_info;          /*   144     8 */
		struct xfrm_md_info xfrm_info;           /*   144    16 */
	} u;                                             /*   144    96 */  <--- we land on this union

	/* size: 240, cachelines: 4, members: 3 */
	/* sum members: 236, holes: 1, sum holes: 4 */
	/* last cacheline: 48 bytes */
};

Let's say it's a struct ip_tunnel_info:

struct ip_tunnel_info {
	struct ip_tunnel_key       key;                  /*     0    64 */

	/* XXX last struct has 7 bytes of padding */

	/* --- cacheline 1 boundary (64 bytes) --- */
	struct ip_tunnel_encap     encap;                /*    64     8 */  <--- 144 + 64 = 208 we land here
	struct dst_cache           dst_cache;            /*    72    16 */
	u8                         options_len;          /*    88     1 */
	u8                         mode;                 /*    89     1 */

	/* size: 96, cachelines: 2, members: 5 */
	/* padding: 6 */
	/* paddings: 1, sum paddings: 7 */
	/* last cacheline: 32 bytes */
};

So I imagine this is fairly tricky to trigger but still a case of type
confusion. I have actually no idea how likely this can happen from my
call but the trace makes sense at least.

^ permalink raw reply

* [PATCH v2] netfilter: nf_log: validate MAC header was set before dumping it
From: Alexander Martyniuk @ 2026-06-25 16:47 UTC (permalink / raw)
  To: sashal
  Cc: alexevgmart, bestswngs, coreteam, davem, fw, gregkh, kaber,
	kadlec, kuba, kuznet, linux-kernel, netdev, netfilter-devel,
	pablo, stable, xmei5, yoshfuji
In-Reply-To: <20260625054005.0003.nflog-510@kernel.org>

From: Xiang Mei <xmei5@asu.edu>

commit a84b6fedbc97078788be78dbdd7517d143ad1a77 upstream.

The fallback path of dump_mac_header() guards the MAC header access
only with "skb->mac_header != skb->network_header", without checking
skb_mac_header_was_set(). When the MAC header is unset, mac_header is
0xffff, so the test passes and skb_mac_header(skb) returns
skb->head + 0xffff, ~64 KiB past the buffer; the loop then reads
dev->hard_header_len bytes out of bounds into the kernel log.

This is reachable via the netdev logger: nf_log_unknown_packet() calls
dump_mac_header() unconditionally, and an skb sent through AF_PACKET
with PACKET_QDISC_BYPASS reaches the egress hook with mac_header still
unset (__dev_queue_xmit(), which would reset it, is bypassed).

Add the skb_mac_header_was_set() check the ARPHRD_ETHER path already
uses, and replace the open-coded MAC header length test with
skb_mac_header_len(). Only skbs with an unset MAC header are affected;
valid ones are dumped as before.

 BUG: KASAN: slab-out-of-bounds in dump_mac_header (net/netfilter/nf_log_syslog.c:831)
 Read of size 1 at addr ffff88800ea49d3f by task exploit/148
 Call Trace:
  kasan_report (mm/kasan/report.c:595)
  dump_mac_header (net/netfilter/nf_log_syslog.c:831)
  nf_log_netdev_packet (net/netfilter/nf_log_syslog.c:938 net/netfilter/nf_log_syslog.c:963)
  nf_log_packet (net/netfilter/nf_log.c:260)
  nft_log_eval (net/netfilter/nft_log.c:60)
  nft_do_chain (net/netfilter/nf_tables_core.c:285)
  nft_do_chain_netdev (net/netfilter/nft_chain_filter.c:307)
  nf_hook_slow (net/netfilter/core.c:619)
  nf_hook_direct_egress (net/packet/af_packet.c:257)
  packet_xmit (net/packet/af_packet.c:280)
  packet_sendmsg (net/packet/af_packet.c:3114)
  __sys_sendto (net/socket.c:2265)

Fixes: 7eb9282cd0ef ("netfilter: ipt_LOG/ip6t_LOG: add option to print decoded MAC header")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Alexander Martyniuk <alexevgmart@gmail.com>
---
 net/ipv4/netfilter/nf_log_ipv4.c | 4 ++--
 net/ipv6/netfilter/nf_log_ipv6.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index d07583fac8f8..d6164e8e2c73 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -296,8 +296,8 @@ static void dump_ipv4_mac_header(struct nf_log_buf *m,
 
 fallback:
 	nf_log_buf_add(m, "MAC=");
-	if (dev->hard_header_len &&
-	    skb->mac_header != skb->network_header) {
+	if (dev->hard_header_len && skb_mac_header_was_set(skb) &&
+	    skb_mac_header_len(skb) != 0) {
 		const unsigned char *p = skb_mac_header(skb);
 		unsigned int i;
 
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 8210ff34ed9b..cc724870a467 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -309,8 +309,8 @@ static void dump_ipv6_mac_header(struct nf_log_buf *m,
 
 fallback:
 	nf_log_buf_add(m, "MAC=");
-	if (dev->hard_header_len &&
-	    skb->mac_header != skb->network_header) {
+	if (dev->hard_header_len && skb_mac_header_was_set(skb) &&
+	    skb_mac_header_len(skb) != 0) {
 		const unsigned char *p = skb_mac_header(skb);
 		unsigned int len = dev->hard_header_len;
 		unsigned int i;
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH net] net: ethtool: keep rtnl_lock for ops using ethtool_op_get_link()
From: Breno Leitao @ 2026-06-25 16:47 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Jakub Kicinski, davem, netdev, edumazet, pabeni, andrew+netdev,
	horms, joshwash, hramamurthy, anthony.l.nguyen,
	przemyslaw.kitszel, saeedm, tariqt, mbloch, leon, alexanderduyck,
	kernel-team, kys, haiyangz, wei.liu, decui, longli, jordanrhee,
	jacob.e.keller, nktgrg, debarghyak, mohsin.bashr, ernis, sdf, gal,
	linux-rdma, linux-hyperv
In-Reply-To: <aj1Nqe3RoITzxSEb@devvm7509.cco0.facebook.com>

On Thu, Jun 25, 2026 at 08:48:03AM -0700, Stanislav Fomichev wrote:
> On 06/24, Jakub Kicinski wrote:
> > Breno reports following splats on mlx5:
> > 
> >   RTNL: assertion failed at net/core/dev.c (2241)
> >   WARNING: net/core/dev.c:2241 at netif_state_change+0xed/0x130, CPU#5: ethtool/1335
> >   RIP: 0010:netif_state_change+0xf9/0x130
> >   Call Trace:
> >     <TASK>
> >      __linkwatch_sync_dev+0xea/0x120
> >      ethtool_op_get_link+0xe/0x20
> >      __ethtool_get_link+0x26/0x40
> >      linkstate_prepare_data+0x51/0x200
> >      ethnl_default_doit+0x213/0x470
> >      genl_family_rcv_msg_doit+0xdd/0x110
> > 
> > Looks like I missed ethtool_op_get_link() trying to sync linkwatch,
> > which needs rtnl_lock. Not all drivers do this - bnxt doesn't,
> > it just returns the link state, so add an opt-in bit.
> > 
> > Reported-by: Breno Leitao <leitao@debian.org>
> > Fixes: 45079e00133e ("net: ethtool: optionally skip rtnl_lock on Netlink path for GET ops")
> > Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> 
> Acked-by: Stanislav Fomichev <sdf@fomichev.me>

Reviewed-by: Breno Leitao <leitao@debian.org>

^ permalink raw reply

* Re: [PATCH v3] net: fman: fix use-after-free on IRQF_SHARED handler after probe failure
From: Simon Horman @ 2026-06-25 16:42 UTC (permalink / raw)
  To: zhaojinming
  Cc: Simon Horman, andrew, andrew+netdev, davem, edumazet, kuba,
	linux-kernel, madalin.bucur, netdev, pabeni, sean.anderson
In-Reply-To: <20260624094922.2971930-2-zhaojinming@uniontech.com>

This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
Full review at: https://sashiko.dev
---
net: fman: fix use-after-free on IRQF_SHARED handler after probe failure

This patch fixes a use-after-free in the error path of read_dts_node(). It
manually releases the registered IRQ handlers before freeing the fman
structure, preventing the devres cleanup from leaving a registered IRQ
pointing to freed memory.

> diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
> index 013273a2de32f..ba2338da0cea1 100644
> --- a/drivers/net/ethernet/freescale/fman/fman.c
> +++ b/drivers/net/ethernet/freescale/fman/fman.c
[ ... ]
> @@ -2786,7 +2790,7 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
>  		if (err < 0) {
>  			dev_err(&of_dev->dev, "%s: irq %d allocation failed (error = %d)\n",
>  				__func__, fman->dts_params.err_irq, err);
> -			goto fman_free;
> +			goto free_main_irq;

[Severity: High]
This is a pre-existing issue, but since we are modifying the IRQ error
handling here, could the shared interrupt handler crash if it fires during
this setup phase?

Just before this snippet in read_dts_node(), devm_request_irq() is called
with IRQF_SHARED while fman is still partially initialized. Since fman was
allocated with kzalloc_obj(), fman->cfg and fman->fpm_regs are NULL.

If a shared interrupt fires immediately, fman_irq() is called:

drivers/net/ethernet/freescale/fman/fman.c:fman_irq() {
        ...
        if (!is_init_done(fman->cfg))
                return IRQ_NONE;

        fpm_rg = fman->fpm_regs;
        pending = ioread32be(&fpm_rg->fm_npi);
        ...
}

Since fman->cfg is NULL, is_init_done() behaves like this:

drivers/net/ethernet/freescale/fman/fman.c:is_init_done() {
        if (!cfg)
                return true;
        ...
}

Would this cause fman_irq() to bypass the early return, proceed with a NULL
fpm_rg pointer, and dereference it during the ioread32be() call?

>  		}
>  	}

^ permalink raw reply

* Re: [PATCH 1/2] bpf: preserve rx_queue_index across XDP redirects
From: Alexei Starovoitov @ 2026-06-25 16:44 UTC (permalink / raw)
  To: Jakub Kicinski, Siddharth C
  Cc: ast, hawk, andrii, netdev, bpf, linux-kernel, linux-kselftest
In-Reply-To: <20260624185432.32d90aa8@kernel.org>

On Wed Jun 24, 2026 at 6:54 PM PDT, Jakub Kicinski wrote:
> On Sat, 20 Jun 2026 12:13:13 +0000 Siddharth C wrote:
>> diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
>> index 5e59ab896f05..8f2d7013620f 100644
>> --- a/kernel/bpf/cpumap.c
>> +++ b/kernel/bpf/cpumap.c
>> @@ -197,7 +197,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
>>  
>>  		rxq.dev = xdpf->dev_rx;
>>  		rxq.mem.type = xdpf->mem_type;
>> -		/* TODO: report queue_index to xdp_rxq_info */
>> +		rxq.queue_index = xdpf->rx_queue_index;
>
> Do you actually need this or you're just trying to address the TODO?

It's a 3rd if not 4th attempt from various "people" to address this TODO.
We should just remove this line instead.


^ permalink raw reply

* Re: [PATCH bpf-next v10 1/5] bpf: add bpf_icmp_send kfunc
From: Stanislav Fomichev @ 2026-06-25 16:24 UTC (permalink / raw)
  To: Mahe Tardy
  Cc: bpf, andrii, ast, daniel, john.fastabend, jordan, martin.lau,
	yonghong.song, emil, netdev, edumazet, kuba, pabeni, davem, horms
In-Reply-To: <20260625110321.28236-2-mahe.tardy@gmail.com>

On 06/25, Mahe Tardy wrote:
> This is needed in the context of Tetragon to provide improved feedback
> (in contrast to just dropping packets) to east-west traffic when blocked
> by policies using cgroup_skb programs.
> 
> This reuses concepts from netfilter reject target codepath with the
> differences that:
> * Packets are cloned since the BPF user can still let the packet pass
>   (SK_PASS from the cgroup_skb progs for example) and the current skb
>   need to stay untouched (cgroup_skb hooks only allow read-only skb
>   payload).
> * We protect against recursion since the kfunc, by generating an ICMP
>   error message, could retrigger the BPF prog that invoked it.
> 
> Only ICMP_DEST_UNREACH and ICMPV6_DEST_UNREACH are currently supported.
> The interface accepts a type parameter to facilitate future extension to
> other ICMP control message types.
> 
> For normal cgroup_skb paths, the skb dst route should already be set.
> However, bpf_prog_test_run_skb can create synthetic IPv4 skbs without an
> attached route. In that case, icmp_send returns early, and the kfunc
> would otherwise report success despite no ICMP reply being sent. The
> check also rejects metadata dsts, which are not valid struct rtable
> instances. For IPv6, reject metadata dsts only: icmpv6_send can reach
> icmp6_dev, where skb_rt6_info treats any non-NULL skb dst as a struct
> rt6_info, which is not valid for metadata_dst.
> 
> Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
> Reviewed-by: Jordan Rife <jordan@jrife.io>
> Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
> ---
>  net/core/filter.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 95 insertions(+)
> 
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 2e96b4b847ce..0a0191586b44 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -84,6 +84,9 @@
>  #include <linux/un.h>
>  #include <net/xdp_sock_drv.h>
>  #include <net/inet_dscp.h>
> +#include <linux/icmpv6.h>
> +#include <net/icmp.h>
> +#include <net/ip6_route.h>
> 
>  #include "dev.h"
> 
> @@ -12546,6 +12549,88 @@ __bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
>  	return 0;
>  }
> 
> +/**
> + * bpf_icmp_send - Send an ICMP control message
> + * @skb_ctx: Packet that triggered the control message
> + * @type: ICMP type (only ICMP_DEST_UNREACH/ICMPV6_DEST_UNREACH supported)
> + * @code: ICMP code (0-15 except ICMP_FRAG_NEEDED for IPv4, 0-6 for IPv6)
> + *
> + * Sends an ICMP control message in response to the packet. The original packet
> + * is cloned before sending the ICMP message, so the BPF program can still let
> + * the packet pass if desired.
> + *
> + * Currently only ICMP_DEST_UNREACH (IPv4) and ICMPV6_DEST_UNREACH (IPv6) are
> + * supported.
> + *
> + * Return: 0 on success (send attempt), negative error code on failure:
> + *         -EBUSY: Recursion detected
> + *         -EPROTONOSUPPORT: Non-IP protocol
> + *         -EOPNOTSUPP: Unsupported ICMP type
> + *         -EINVAL: Invalid code parameter
> + *         -ENETUNREACH: No usable route/dst for the ICMP reply
> + *         -ENOMEM: Memory allocation failed
> + */
> +__bpf_kfunc int bpf_icmp_send(struct __sk_buff *skb_ctx, int type, int code)
> +{
> +	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
> +	struct sk_buff *nskb;
> +	struct sock *sk;
> +
> +	sk = skb_to_full_sk(skb);
> +	if (sk && sk->sk_kern_sock &&
> +	    (sk->sk_protocol == IPPROTO_ICMP || sk->sk_protocol == IPPROTO_ICMPV6))
> +		return -EBUSY;
> +
> +	switch (skb->protocol) {
> +#if IS_ENABLED(CONFIG_INET)
> +	case htons(ETH_P_IP): {
> +		if (type != ICMP_DEST_UNREACH)
> +			return -EOPNOTSUPP;
> +		if (code < 0 || code > NR_ICMP_UNREACH ||
> +		    code == ICMP_FRAG_NEEDED) /* needs a valid next-hop MTU */
> +			return -EINVAL;
> +
> +		/* icmp_send expects skb_dst to be a real rtable. */
> +		if (!skb_valid_dst(skb))
> +			return -ENETUNREACH;
> +
> +		nskb = skb_clone(skb, GFP_ATOMIC);
> +		if (!nskb)
> +			return -ENOMEM;
> +
> +		memset(IPCB(nskb), 0, sizeof(*IPCB(nskb)));
> +		icmp_send(nskb, type, code, 0);
> +		consume_skb(nskb);
> +		break;
> +	}
> +#endif
> +#if IS_ENABLED(CONFIG_IPV6)
> +	case htons(ETH_P_IPV6):
> +		if (type != ICMPV6_DEST_UNREACH)
> +			return -EOPNOTSUPP;
> +		if (code < 0 || code > ICMPV6_REJECT_ROUTE)
> +			return -EINVAL;

[..]

> +		/* icmpv6_send may treat skb_dst as rt6_info. */
> +		if (skb_metadata_dst(skb))
> +			return -ENETUNREACH;

A bit confused about this. Which part of icmpv6_send treats skb_dst as rt6_info?
(I see the original sashiko report about dst, but icmp6 seems to be not
requiring it)

^ permalink raw reply

* Re: [PATCH net] net: enetc: fix potential divide-by-zero when num_vsi is zero
From: patchwork-bot+netdevbpf @ 2026-06-25 16:21 UTC (permalink / raw)
  To: Wei Fang
  Cc: claudiu.manoil, vladimir.oltean, xiaoning.wang, andrew+netdev,
	davem, edumazet, kuba, pabeni, Frank.Li, wei.fang, imx, netdev,
	linux-kernel
In-Reply-To: <20260624072726.1238903-1-wei.fang@oss.nxp.com>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Wed, 24 Jun 2026 15:27:26 +0800 you wrote:
> From: Wei Fang <wei.fang@nxp.com>
> 
> For i.MX94 series, all the standalone ENETCs do not support SR-IOV, so
> pf->caps.num_vsi is zero. This leads to a divide-by-zero in
> enetc4_default_rings_allocation() when distributing rings among PF and
> VFs.
> 
> [...]

Here is the summary with links:
  - [net] net: enetc: fix potential divide-by-zero when num_vsi is zero
    https://git.kernel.org/netdev/net/c/5da65537792b

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH] octeontx2-af: Free BPID bitmap on setup failure
From: patchwork-bot+netdevbpf @ 2026-06-25 16:20 UTC (permalink / raw)
  To: haoxiang_li2024
  Cc: sgoutham, lcherian, gakula, hkelam, sbhatta, andrew+netdev, davem,
	edumazet, kuba, pabeni, horms, netdev, linux-kernel, stable
In-Reply-To: <20260623114316.2182271-1-haoxiang_li2024@163.com>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Tue, 23 Jun 2026 19:43:16 +0800 you wrote:
> nix_setup_bpids() allocates bp->bpids with rvu_alloc_bitmap(), which uses
> a plain kcalloc(). If any of the following devm_kcalloc() allocations for
> the BPID mapping arrays fails, the function returns without freeing the
> bitmap. Free the BPID bitmap before returning from those error paths.
> 
> Fixes: d6212d2e41a0 ("octeontx2-af: Create BPIDs free pool")
> Cc: stable@vger.kernel.org
> Signed-off-by: Haoxiang Li <haoxiang_li2024@163.com>
> 
> [...]

Here is the summary with links:
  - octeontx2-af: Free BPID bitmap on setup failure
    https://git.kernel.org/netdev/net/c/36323f54cd32

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH v2 net 0/3] net: udp_tunnel: fix races and use-after-free
From: patchwork-bot+netdevbpf @ 2026-06-25 16:20 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, kuba, pabeni, horms, samsun1006219, sdf, netdev,
	eric.dumazet
In-Reply-To: <20260625065938.654652-1-edumazet@google.com>

Hello:

This series was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Thu, 25 Jun 2026 06:59:35 +0000 you wrote:
> Yue Sun reported a use-after-free and debugobjects warning in
> udp_tunnel_nic_device_sync_work() when concurrently creating and
> destroying netdevsim and geneve devices.
> 
> This series resolves the UAF and the underlying data races that
> make the fix vulnerable.
> 
> [...]

Here is the summary with links:
  - [v2,net,1/3] net: udp_tunnel: prevent double queueing in udp_tunnel_nic_device_sync
    https://git.kernel.org/netdev/net/c/ecf69d4b4337
  - [v2,net,2/3] net: udp_tunnel: convert state flags to atomic bitops
    (no matching commit)
  - [v2,net,3/3] net: udp_tunnel: use atomic bitops for missed bitmap
    (no matching commit)

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH] net: sparx5: unregister blocking notifier on init failure
From: patchwork-bot+netdevbpf @ 2026-06-25 16:20 UTC (permalink / raw)
  To: haoxiang_li2024
  Cc: andrew+netdev, davem, edumazet, kuba, pabeni, Steen.Hegelund,
	daniel.machon, UNGLinuxDriver, kees, horms, bjarni.jonasson,
	lars.povlsen, netdev, linux-arm-kernel, linux-kernel, stable
In-Reply-To: <20260623115714.2192074-1-haoxiang_li2024@163.com>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Tue, 23 Jun 2026 19:57:14 +0800 you wrote:
> sparx5_register_notifier_blocks() registers the switchdev blocking
> notifier before allocating the ordered workqueue. If the workqueue
> allocation fails, the error path unregisters the switchdev and netdevice
> notifiers, but leaves the blocking notifier registered.
> 
> Add a separate error label for the workqueue allocation failure path and
> unregister the switchdev blocking notifier there.
> 
> [...]

Here is the summary with links:
  - net: sparx5: unregister blocking notifier on init failure
    https://git.kernel.org/netdev/net/c/483be61b4a9a

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net-next v2] selftests: tls: size splice_short pipe by page size
From: patchwork-bot+netdevbpf @ 2026-06-25 16:20 UTC (permalink / raw)
  To: Nirmoy Das; +Cc: kuba, sd, john.fastabend, horms, netdev, linux-kernel
In-Reply-To: <20260624134416.3235403-1-nirmoyd@nvidia.com>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Wed, 24 Jun 2026 06:44:16 -0700 you wrote:
> splice_short grows its pipe with (MAX_FRAGS + 1) * 0x1000 so it can
> queue one short vmsplice() buffer for each fragment before draining the
> pipe. That assumes 4K pipe buffers.
> 
> On 64K-page kernels the request is rounded to 262144 bytes, which
> provides only four pipe buffers. The fifth one-byte vmsplice() blocks in
> pipe_wait_writable and the test times out before it reaches the TLS path.
> 
> [...]

Here is the summary with links:
  - [net-next,v2] selftests: tls: size splice_short pipe by page size
    https://git.kernel.org/netdev/net/c/3e52f56875c6

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH v2 net 0/2] tipc: syzbot related fixes
From: patchwork-bot+netdevbpf @ 2026-06-25 16:20 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, kuba, pabeni, horms, kuniyu, lucien.xin, jmaloy,
	tipc-discussion, netdev, eric.dumazet
In-Reply-To: <20260623173030.2925059-1-edumazet@google.com>

Hello:

This series was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Tue, 23 Jun 2026 17:30:28 +0000 you wrote:
> First patch fixes a recent syzbot report.
> 
> Second patch is inspired by numerous syzbot soft lockup
> reports with RTNL pressure.
> 
> Eric Dumazet (2):
>   tipc: fix UAF in cleanup_bearer() due to premature dst_cache_destroy()
>   tipc: avoid busy looping in tipc_exit_net()
> 
> [...]

Here is the summary with links:
  - [v2,net,1/2] tipc: fix UAF in cleanup_bearer() due to premature dst_cache_destroy()
    https://git.kernel.org/netdev/net/c/7116764ca53f
  - [v2,net,2/2] tipc: avoid busy looping in tipc_exit_net()
    https://git.kernel.org/netdev/net/c/c1481c94e74c

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net] net: ethernet: qualcomm: ppe: Demote from supported and fix maintainer addresses
From: patchwork-bot+netdevbpf @ 2026-06-25 16:20 UTC (permalink / raw)
  To: Krzysztof Kozlowski
  Cc: andersson, mturquette, sboyd, bmasney, robh, krzk+dt, conor+dt,
	jie.luo, andrew+netdev, davem, edumazet, kuba, pabeni,
	quic_leiwei, quic_suruchia, quic_pavir, linux-kernel,
	linux-arm-msm, linux-clk, devicetree, netdev
In-Reply-To: <20260623073307.36483-2-krzysztof.kozlowski@oss.qualcomm.com>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Tue, 23 Jun 2026 09:33:08 +0200 you wrote:
> Emails to the maintainer of Qualcomm PPE Ethernet driver (Luo Jie
> <quic_luoj@quicinc.com>) bounce permanently (full mailbox), because the
> "quicinc.com" addresses were deprecated for public work.  All Qualcomm
> contributors are aware of that and were asked to fix their addresses.
> 
> Driver is not supported - in terms of how netdev understands supported
> commitment - if maintainer does not care to receive the patches for its
> code, so demote it to "maintained" to reflect true status.
> 
> [...]

Here is the summary with links:
  - [net] net: ethernet: qualcomm: ppe: Demote from supported and fix maintainer addresses
    https://git.kernel.org/netdev/net/c/efd7fb21bad8

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net] dt-bindings: net: renesas,ether: Drop example "ethernet-phy-ieee802.3-c22" fallback
From: patchwork-bot+netdevbpf @ 2026-06-25 16:20 UTC (permalink / raw)
  To: Rob Herring
  Cc: niklas.soderlund, andrew+netdev, davem, edumazet, kuba, pabeni,
	krzk+dt, conor+dt, geert+renesas, magnus.damm, sergei.shtylyov,
	netdev, linux-renesas-soc, devicetree, linux-kernel
In-Reply-To: <20260624150250.131966-2-robh@kernel.org>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Wed, 24 Jun 2026 10:02:50 -0500 you wrote:
> Fix the Micrel PHY in the example which shouldn't have the
> fallback "ethernet-phy-ieee802.3-c22" compatible:
> 
> Documentation/devicetree/bindings/net/renesas,ether.example.dtb: ethernet-phy@1 \
>   (ethernet-phy-id0022.1537): compatible: ['ethernet-phy-id0022.1537', 'ethernet-phy-ieee802.3-c22'] is too long
>         from schema $id: http://devicetree.org/schemas/net/micrel.yaml
> 
> [...]

Here is the summary with links:
  - [net] dt-bindings: net: renesas,ether: Drop example "ethernet-phy-ieee802.3-c22" fallback
    https://git.kernel.org/netdev/net/c/14eb1d2c03b3

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net-next] openvswitch: conntrack: annotate ct limit hlist traversal
From: patchwork-bot+netdevbpf @ 2026-06-25 16:20 UTC (permalink / raw)
  To: Runyu Xiao
  Cc: aconole, echaudro, i.maximets, davem, edumazet, kuba, pabeni,
	horms, netdev, dev, linux-kernel, jianhao.xu
In-Reply-To: <20260624150149.3510541-1-runyu.xiao@seu.edu.cn>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Wed, 24 Jun 2026 23:01:49 +0800 you wrote:
> ct_limit_set() is documented as being called with ovs_mutex held. It
> walks the ct limit hlist with hlist_for_each_entry_rcu(), but the
> iterator does not currently pass the OVS lockdep condition used
> elsewhere for RCU-protected OVS objects.
> 
> Pass lockdep_ovsl_is_held() to the iterator. This matches the function's
> existing caller contract and lets CONFIG_PROVE_RCU_LIST distinguish the
> ovs_mutex-protected update path from the RCU read-side ct_limit_get()
> path.
> 
> [...]

Here is the summary with links:
  - [net-next] openvswitch: conntrack: annotate ct limit hlist traversal
    https://git.kernel.org/netdev/net/c/0e901ee5c6f9

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH V2 net 0/4] net: hns3: fix configuration deadlocks and refactor link setup
From: patchwork-bot+netdevbpf @ 2026-06-25 16:20 UTC (permalink / raw)
  To: Jijie Shao
  Cc: davem, edumazet, kuba, pabeni, andrew+netdev, horms, shenjian15,
	liuyonglong, chenhao418, huangdonghua3, yangshuaisong, netdev,
	linux-kernel
In-Reply-To: <20260624141319.271439-1-shaojijie@huawei.com>

Hello:

This series was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Wed, 24 Jun 2026 22:13:15 +0800 you wrote:
> This patch series addresses a sequence of link configuration deadlocks
> and parameter contamination issues in the hns3 network driver, which
> typically occur during hardware resets or driver initialization under
> specific user-configured scenarios.
> 
> The bugs root from asynchronous discrepancies between the MAC state
> machine and cached user requests during sudden hardware resets, leading
> to invalid parameter combos or frozen registers.
> 
> [...]

Here is the summary with links:
  - [V2,net,1/4] net: hns3: unify copper port ksettings configuration path
    https://git.kernel.org/netdev/net/c/d77e98f8b2b3
  - [V2,net,2/4] net: hns3: refactor MAC autoneg and speed configuration
    https://git.kernel.org/netdev/net/c/c01f6e6bdc1c
  - [V2,net,3/4] net: hns3: fix permanent link down deadlock after reset
    https://git.kernel.org/netdev/net/c/c711f6d1cee9
  - [V2,net,4/4] net: hns3: differentiate autoneg default values between copper and fiber
    https://git.kernel.org/netdev/net/c/d9d349c4e8a0

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH v5 net] net: mana: Optimize irq affinity for low vcpu configs
From: patchwork-bot+netdevbpf @ 2026-06-25 16:20 UTC (permalink / raw)
  To: Shradha Gupta
  Cc: decui, wei.liu, haiyangz, kys, andrew+netdev, davem, edumazet,
	kuba, pabeni, kotaranov, horms, ernis, dipayanroy, shirazsaleem,
	mhklinux, longli, yury.norov, linux-hyperv, linux-kernel, netdev,
	paulros, shradhagupta, ssengar, stable, ynorov
In-Reply-To: <20260624072138.1632849-1-shradhagupta@linux.microsoft.com>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Wed, 24 Jun 2026 00:21:35 -0700 you wrote:
> Before the commit 755391121038 ("net: mana: Allocate MSI-X vectors
> dynamically"), all the MANA IRQs were assigned statically and together
> during early driver load.
> 
> After this commit, the IRQ allocation for MANA was done in two phases.
> HWC IRQ allocated earlier and then, queue IRQs dynamically added at a
> later point. By this time, the IRQ weights on vCPUs can become imbalanced
> and if IRQ count is greater than the vCPU count the topology aware IRQ
> distribution logic in MANA can cause multiple MANA IRQs to land on the
> same vCPUs, while other sibling vCPUs have none (case 1).
> 
> [...]

Here is the summary with links:
  - [v5,net] net: mana: Optimize irq affinity for low vcpu configs
    https://git.kernel.org/netdev/net/c/5316394b1752

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH v3 4/4] vhost/vsock: add VHOST_RESET_OWNER ioctl
From: Pavel Tikhomirov @ 2026-06-25 16:13 UTC (permalink / raw)
  To: Andrey Drobyshev, linux-kernel
  Cc: kvm, virtualization, netdev, sgarzare, mst, stefanha,
	dongli.zhang, maciej.szmigiero, bchaney, mark.kanda, den
In-Reply-To: <20260625155416.480669-5-andrey.drobyshev@virtuozzo.com>

Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>

On 6/25/26 17:54, Andrey Drobyshev wrote:
> From: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
> 
> This ioctl is needed for QEMU's CPR (checkpoint-restore) migration of
> the guest with vhost-vsock device.  For this to work, we need to reset
> the device ownership on the source side by calling RESET_OWNER, and then
> claim it on the dest side by calling SET_OWNER.  We expect not to lose any
> AF_VSOCK connection while this happens.
> 
> RESET_OWNER keeps the guest CID hashed, so that connections survive. That
> leaves the device reachable by a lockless send/cancel path while the worker
> is being torn down: a concurrent vhost_transport_send_pkt() or
> vhost_transport_cancel_pkt() can call vhost_vq_work_queue() as
> vhost_workers_free() frees the worker.  That might cause a use-after-free
> of vq->worker.  In addition, any work queued onto the dying worker leaves
> VHOST_WORK_QUEUED stuck, stalling send_pkt_queue after resume.
> 
> Fence the send/cancel paths around the teardown: send_pkt()/cancel_pkt()
> only kick the worker while the backend is alive.  And reset_owner() calls
> synchronize_rcu() after drop_backends() so in-flight send/cancel finish
> before the worker is freed.
> 
> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
> Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
> ---
>  drivers/vhost/vsock.c | 51 +++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 49 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 81d4f7209719..f0a0aa7d3200 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -318,7 +318,14 @@ vhost_transport_send_pkt(struct sk_buff *skb, struct net *net)
>  		atomic_inc(&vsock->queued_replies);
>  
>  	virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
> -	vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
> +
> +	/* Skip the kick once the backend is gone (stop/RESET_OWNER); the skb
> +	 * stays queued and vhost_vsock_start() drains it. Pairs with the
> +	 * synchronize_rcu() in vhost_vsock_reset_owner().
> +	 */
> +	if (data_race(vhost_vq_get_backend(&vsock->vqs[VSOCK_VQ_RX])))
> +		vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX],
> +				    &vsock->send_pkt_work);
>  
>  	rcu_read_unlock();
>  	return len;
> @@ -346,7 +353,15 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
>  		int new_cnt;
>  
>  		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
> -		if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
> +
> +		/* Skip the kick once the backend is gone (stop/RESET_OWNER):
> +		 * vhost_poll_queue() would touch the worker which is being freed
> +		 * by teardown, e.g. on RESET_OWNER.  Pairs with the
> +		 * synchronize_rcu() in vhost_vsock_reset_owner().  The TX VQ is
> +		 * re-kicked by vhost_vsock_start().
> +		 */
> +		if (data_race(vhost_vq_get_backend(tx_vq)) &&
> +		    new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
>  			vhost_poll_queue(&tx_vq->poll);
>  	}
>  
> @@ -903,6 +918,36 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
>  	return -EFAULT;
>  }
>  
> +static int vhost_vsock_reset_owner(struct vhost_vsock *vsock)
> +{
> +	struct vhost_iotlb *umem;
> +	long err;
> +
> +	mutex_lock(&vsock->dev.mutex);
> +	err = vhost_dev_check_owner(&vsock->dev);
> +	if (err)
> +		goto done;
> +	umem = vhost_dev_reset_owner_prepare();
> +	if (!umem) {
> +		err = -ENOMEM;
> +		goto done;
> +	}
> +	vhost_vsock_drop_backends(vsock);
> +
> +	/* Let in-flight send_pkt() callers stop touching the worker before the
> +	 * flush + free below. Pairs with the backend check in
> +	 * vhost_transport_send_pkt().
> +	 */
> +	synchronize_rcu();
> +
> +	vhost_vsock_flush(vsock);
> +	vhost_dev_stop(&vsock->dev);
> +	vhost_dev_reset_owner(&vsock->dev, umem);
> +done:
> +	mutex_unlock(&vsock->dev.mutex);
> +	return err;
> +}
> +
>  static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
>  				  unsigned long arg)
>  {
> @@ -946,6 +991,8 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
>  			return -EOPNOTSUPP;
>  		vhost_set_backend_features(&vsock->dev, features);
>  		return 0;
> +	case VHOST_RESET_OWNER:
> +		return vhost_vsock_reset_owner(vsock);
>  	default:
>  		mutex_lock(&vsock->dev.mutex);
>  		r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);

-- 
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox