netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 net-next] udp: remove busylock and add per NUMA queues
@ 2025-09-20  8:02 Eric Dumazet
  2025-09-20 18:10 ` Jakub Kicinski
  2025-09-20 18:33 ` [syzbot ci] " syzbot ci
  0 siblings, 2 replies; 4+ messages in thread
From: Eric Dumazet @ 2025-09-20  8:02 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Willem de Bruijn, Kuniyuki Iwashima, netdev,
	eric.dumazet, Eric Dumazet

busylock was protecting UDP sockets against packet floods,
but unfortunately was not protecting the host itself.

Under stress, many cpus could spin while acquiring the busylock,
and NIC had to drop packets. Or packets would be dropped
in cpu backlog if RPS/RFS were in place.

This patch replaces the busylock by intermediate
lockless queues. (One queue per NUMA node).

This means that fewer number of cpus have to acquire
the UDP receive queue lock.

Most of the cpus can either:
- immediately drop the packet.
- or queue it in their NUMA aware lockless queue.

Then one of the cpu is chosen to process this lockless queue
in a batch.

The batch only contains packets that were cooked on the same
NUMA node, thus with very limited latency impact.

Tested:

DDOS targeting a victim UDP socket, on a platform with 6 NUMA nodes
(Intel(R) Xeon(R) 6985P-C)

Before:

nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams                 1004179            0.0
Udp6InErrors                    3117               0.0
Udp6RcvbufErrors                3117               0.0

After:
nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams                 1116633            0.0
Udp6InErrors                    14197275           0.0
Udp6RcvbufErrors                14197275           0.0

We can see this host can now proces 14.2 M more packets per second
while under attack, and the victim socket can receive 11 % more
packets.

I used a small bpftrace program measuring time (in us) spent in
__udp_enqueue_schedule_skb().

Before:

@udp_enqueue_us[398]:
[0]                24901 |@@@                                                 |
[1]                63512 |@@@@@@@@@                                           |
[2, 4)            344827 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[4, 8)            244673 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@                |
[8, 16)            54022 |@@@@@@@@                                            |
[16, 32)          222134 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@                   |
[32, 64)          232042 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@                  |
[64, 128)           4219 |                                                    |
[128, 256)           188 |                                                    |

After:

@udp_enqueue_us[398]:
[0]              5608855 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[1]              1111277 |@@@@@@@@@@                                          |
[2, 4)            501439 |@@@@                                                |
[4, 8)            102921 |                                                    |
[8, 16)            29895 |                                                    |
[16, 32)           43500 |                                                    |
[32, 64)           31552 |                                                    |
[64, 128)            979 |                                                    |
[128, 256)            13 |                                                    |

Note that the remaining bottleneck for this platform is in
udp_drops_inc() because we limited struct numa_drop_counters
to only two nodes so far.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
v2: added a kfree(up->udp_prod_queue) in udpv6_destroy_sock() (Jakub feedback on v1)
    added bpftrace histograms in changelog.

v1: https://lore.kernel.org/netdev/20250919164308.2455564-1-edumazet@google.com/

 include/linux/udp.h |  9 ++++-
 include/net/udp.h   | 11 ++++-
 net/ipv4/udp.c      | 99 ++++++++++++++++++++++++---------------------
 net/ipv6/udp.c      |  6 ++-
 4 files changed, 74 insertions(+), 51 deletions(-)

diff --git a/include/linux/udp.h b/include/linux/udp.h
index e554890c4415b411f35007d3ece9e6042db7a544..58795688a18636ea79aa1f5d06eacc676a2e7849 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -44,6 +44,12 @@ enum {
 	UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */
 };
 
+/* per NUMA structure for lockless producer usage. */
+struct udp_prod_queue {
+	struct llist_head	ll_root ____cacheline_aligned_in_smp;
+	atomic_t		rmem_alloc;
+};
+
 struct udp_sock {
 	/* inet_sock has to be the first member */
 	struct inet_sock inet;
@@ -90,6 +96,8 @@ struct udp_sock {
 						struct sk_buff *skb,
 						int nhoff);
 
+	struct udp_prod_queue *udp_prod_queue;
+
 	/* udp_recvmsg try to use this before splicing sk_receive_queue */
 	struct sk_buff_head	reader_queue ____cacheline_aligned_in_smp;
 
@@ -109,7 +117,6 @@ struct udp_sock {
 	 */
 	struct hlist_node	tunnel_list;
 	struct numa_drop_counters drop_counters;
-	spinlock_t		busylock ____cacheline_aligned_in_smp;
 };
 
 #define udp_test_bit(nr, sk)			\
diff --git a/include/net/udp.h b/include/net/udp.h
index 059a0cee5f559b8d75e71031a00d0aa2769e257f..cffedb3e40f24513e44fb7598c0ad917fd15b616 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -284,16 +284,23 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
 				  netdev_features_t features, bool is_ipv6);
 
-static inline void udp_lib_init_sock(struct sock *sk)
+static inline int udp_lib_init_sock(struct sock *sk)
 {
 	struct udp_sock *up = udp_sk(sk);
 
 	sk->sk_drop_counters = &up->drop_counters;
-	spin_lock_init(&up->busylock);
 	skb_queue_head_init(&up->reader_queue);
 	INIT_HLIST_NODE(&up->tunnel_list);
 	up->forward_threshold = sk->sk_rcvbuf >> 2;
 	set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
+
+	up->udp_prod_queue = kcalloc(nr_node_ids, sizeof(*up->udp_prod_queue),
+				     GFP_KERNEL);
+	if (!up->udp_prod_queue)
+		return -ENOMEM;
+	for (int i = 0; i < nr_node_ids; i++)
+		init_llist_head(&up->udp_prod_queue[i].ll_root);
+	return 0;
 }
 
 static inline void udp_drops_inc(struct sock *sk)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 85cfc32eb2ccb3e229177fb37910fefde0254ffe..fedc939342f3d1ab580548e2b4dd39b5e3a1c397 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1685,25 +1685,6 @@ static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb)
 	udp_rmem_release(sk, udp_skb_truesize(skb), 1, true);
 }
 
-/* Idea of busylocks is to let producers grab an extra spinlock
- * to relieve pressure on the receive_queue spinlock shared by consumer.
- * Under flood, this means that only one producer can be in line
- * trying to acquire the receive_queue spinlock.
- */
-static spinlock_t *busylock_acquire(struct sock *sk)
-{
-	spinlock_t *busy = &udp_sk(sk)->busylock;
-
-	spin_lock(busy);
-	return busy;
-}
-
-static void busylock_release(spinlock_t *busy)
-{
-	if (busy)
-		spin_unlock(busy);
-}
-
 static int udp_rmem_schedule(struct sock *sk, int size)
 {
 	int delta;
@@ -1718,14 +1699,23 @@ static int udp_rmem_schedule(struct sock *sk, int size)
 int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct sk_buff_head *list = &sk->sk_receive_queue;
+	struct udp_prod_queue *udp_prod_queue;
+	struct llist_node *ll_list;
 	unsigned int rmem, rcvbuf;
-	spinlock_t *busy = NULL;
 	int size, err = -ENOMEM;
+	struct sk_buff *next;
+	int total_size = 0;
+	int q_size = 0;
+	int nb = 0;
 
 	rmem = atomic_read(&sk->sk_rmem_alloc);
 	rcvbuf = READ_ONCE(sk->sk_rcvbuf);
 	size = skb->truesize;
 
+	udp_prod_queue = &udp_sk(sk)->udp_prod_queue[numa_node_id()];
+
+	rmem += atomic_read(&udp_prod_queue->rmem_alloc);
+
 	/* Immediately drop when the receive queue is full.
 	 * Cast to unsigned int performs the boundary check for INT_MAX.
 	 */
@@ -1747,45 +1737,60 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 	if (rmem > (rcvbuf >> 1)) {
 		skb_condense(skb);
 		size = skb->truesize;
-		rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
-		if (rmem > rcvbuf)
-			goto uncharge_drop;
-		busy = busylock_acquire(sk);
-	} else {
-		atomic_add(size, &sk->sk_rmem_alloc);
 	}
 
 	udp_set_dev_scratch(skb);
 
+	atomic_add(size, &udp_prod_queue->rmem_alloc);
+
+	if (!llist_add(&skb->ll_node, &udp_prod_queue->ll_root))
+		return 0;
+
 	spin_lock(&list->lock);
-	err = udp_rmem_schedule(sk, size);
-	if (err) {
-		spin_unlock(&list->lock);
-		goto uncharge_drop;
-	}
 
-	sk_forward_alloc_add(sk, -size);
+	ll_list = llist_del_all(&udp_prod_queue->ll_root);
 
-	/* no need to setup a destructor, we will explicitly release the
-	 * forward allocated memory on dequeue
-	 */
-	sock_skb_set_dropcount(sk, skb);
+	ll_list = llist_reverse_order(ll_list);
+
+	llist_for_each_entry_safe(skb, next, ll_list, ll_node) {
+		size = udp_skb_truesize(skb);
+		total_size += size;
+		err = udp_rmem_schedule(sk, size);
+		if (err) {
+			udp_drops_inc(sk);
+			// TODO update SNMP values.
+			sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_PROTO_MEM);
+			continue;
+		}
+
+		q_size += size;
+		sk_forward_alloc_add(sk, -size);
+
+		/* no need to setup a destructor, we will explicitly release the
+		 * forward allocated memory on dequeue
+		 */
+		sock_skb_set_dropcount(sk, skb);
+		nb++;
+		__skb_queue_tail(list, skb);
+	}
+
+	atomic_add(q_size, &sk->sk_rmem_alloc);
 
-	__skb_queue_tail(list, skb);
 	spin_unlock(&list->lock);
 
-	if (!sock_flag(sk, SOCK_DEAD))
-		INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
+	atomic_sub(total_size, &udp_prod_queue->rmem_alloc);
 
-	busylock_release(busy);
-	return 0;
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		while (nb) {
+			INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
+			nb--;
+		}
+	}
 
-uncharge_drop:
-	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+	return 0;
 
 drop:
 	udp_drops_inc(sk);
-	busylock_release(busy);
 	return err;
 }
 EXPORT_IPV6_MOD_GPL(__udp_enqueue_schedule_skb);
@@ -1814,10 +1819,11 @@ static void udp_destruct_sock(struct sock *sk)
 
 int udp_init_sock(struct sock *sk)
 {
-	udp_lib_init_sock(sk);
+	int res = udp_lib_init_sock(sk);
+
 	sk->sk_destruct = udp_destruct_sock;
 	set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
-	return 0;
+	return res;
 }
 
 void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
@@ -2906,6 +2912,7 @@ void udp_destroy_sock(struct sock *sk)
 			udp_tunnel_cleanup_gro(sk);
 		}
 	}
+	kfree(up->udp_prod_queue);
 }
 
 typedef struct sk_buff *(*udp_gro_receive_t)(struct sock *sk,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9f4d340d1e3a63d38f80138ef9f6aac4a33afa05..90e2945e6cf9066bc36c57cbb29b8aa68e7afe4e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -67,10 +67,11 @@ static void udpv6_destruct_sock(struct sock *sk)
 
 int udpv6_init_sock(struct sock *sk)
 {
-	udp_lib_init_sock(sk);
+	int res = udp_lib_init_sock(sk);
+
 	sk->sk_destruct = udpv6_destruct_sock;
 	set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
-	return 0;
+	return res;
 }
 
 INDIRECT_CALLABLE_SCOPE
@@ -1828,6 +1829,7 @@ void udpv6_destroy_sock(struct sock *sk)
 			udp_tunnel_cleanup_gro(sk);
 		}
 	}
+	kfree(up->udp_prod_queue);
 }
 
 /*
-- 
2.51.0.470.ga7dc726c21-goog


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 net-next] udp: remove busylock and add per NUMA queues
  2025-09-20  8:02 [PATCH v2 net-next] udp: remove busylock and add per NUMA queues Eric Dumazet
@ 2025-09-20 18:10 ` Jakub Kicinski
  2025-09-20 19:38   ` Eric Dumazet
  2025-09-20 18:33 ` [syzbot ci] " syzbot ci
  1 sibling, 1 reply; 4+ messages in thread
From: Jakub Kicinski @ 2025-09-20 18:10 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Paolo Abeni, Simon Horman, Willem de Bruijn,
	Kuniyuki Iwashima, netdev, eric.dumazet

On Sat, 20 Sep 2025 08:02:27 +0000 Eric Dumazet wrote:
> busylock was protecting UDP sockets against packet floods,
> but unfortunately was not protecting the host itself.
> 
> Under stress, many cpus could spin while acquiring the busylock,
> and NIC had to drop packets. Or packets would be dropped
> in cpu backlog if RPS/RFS were in place.
> 
> This patch replaces the busylock by intermediate
> lockless queues. (One queue per NUMA node).
> 
> This means that fewer number of cpus have to acquire
> the UDP receive queue lock.
> 
> Most of the cpus can either:
> - immediately drop the packet.
> - or queue it in their NUMA aware lockless queue.
> 
> Then one of the cpu is chosen to process this lockless queue
> in a batch.
> 
> The batch only contains packets that were cooked on the same
> NUMA node, thus with very limited latency impact.

Occasionally hitting a UaF like this:
https://netdev-3.bots.linux.dev/vmksft-net-dbg/results/306342/3-fcnal-ipv6-sh/stderr
decoded:
https://netdev-3.bots.linux.dev/vmksft-net-dbg/results/306342/vm-crash-thr2-0
-- 
pw-bot: cr

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [syzbot ci] Re: udp: remove busylock and add per NUMA queues
  2025-09-20  8:02 [PATCH v2 net-next] udp: remove busylock and add per NUMA queues Eric Dumazet
  2025-09-20 18:10 ` Jakub Kicinski
@ 2025-09-20 18:33 ` syzbot ci
  1 sibling, 0 replies; 4+ messages in thread
From: syzbot ci @ 2025-09-20 18:33 UTC (permalink / raw)
  To: davem, edumazet, eric.dumazet, horms, kuba, kuniyu, netdev,
	pabeni, willemb
  Cc: syzbot, syzkaller-bugs

syzbot ci has tested the following series

[v2] udp: remove busylock and add per NUMA queues
https://lore.kernel.org/all/20250920080227.3674860-1-edumazet@google.com
* [PATCH v2 net-next] udp: remove busylock and add per NUMA queues

and found the following issue:
KASAN: slab-use-after-free Read in __udp_enqueue_schedule_skb

Full report is available here:
https://ci.syzbot.org/series/9921e6c6-67ac-435d-a76a-a9cfb67b2f12

***

KASAN: slab-use-after-free Read in __udp_enqueue_schedule_skb

tree:      net-next
URL:       https://kernel.googlesource.com/pub/scm/linux/kernel/git/netdev/net-next.git
base:      315f423be0d1ebe720d8fd4fa6bed68586b13d34
arch:      amd64
compiler:  Debian clang version 20.1.8 (++20250708063551+0c9f909b7976-1~exp1~20250708183702.136), Debian LLD 20.1.8
config:    https://ci.syzbot.org/builds/987bf81f-fa72-4f8b-a27a-db2b99aed02b/config
syz repro: https://ci.syzbot.org/findings/d36fc1b8-fea8-4e40-af0d-515be973a67a/syz_repro

==================================================================
BUG: KASAN: slab-use-after-free in instrument_atomic_read include/linux/instrumented.h:68 [inline]
BUG: KASAN: slab-use-after-free in atomic_read include/linux/atomic/atomic-instrumented.h:32 [inline]
BUG: KASAN: slab-use-after-free in __udp_enqueue_schedule_skb+0x15c/0xfe0 net/ipv4/udp.c:1717
Read of size 4 at addr ffff88810d802d08 by task syz.0.120/6361

CPU: 0 UID: 0 PID: 6361 Comm: syz.0.120 Not tainted syzkaller #0 PREEMPT(full) 
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
 print_address_description mm/kasan/report.c:378 [inline]
 print_report+0xca/0x240 mm/kasan/report.c:482
 kasan_report+0x118/0x150 mm/kasan/report.c:595
 check_region_inline mm/kasan/generic.c:-1 [inline]
 kasan_check_range+0x2b0/0x2c0 mm/kasan/generic.c:189
 instrument_atomic_read include/linux/instrumented.h:68 [inline]
 atomic_read include/linux/atomic/atomic-instrumented.h:32 [inline]
 __udp_enqueue_schedule_skb+0x15c/0xfe0 net/ipv4/udp.c:1717
 __udp_queue_rcv_skb net/ipv4/udp.c:2326 [inline]
 udp_queue_rcv_one_skb+0xab9/0x19e0 net/ipv4/udp.c:2455
 __udp4_lib_mcast_deliver+0xc06/0xcf0 net/ipv4/udp.c:2565
 __udp4_lib_rcv+0x10e2/0x2600 net/ipv4/udp.c:2704
 ip_protocol_deliver_rcu+0x282/0x440 net/ipv4/ip_input.c:205
 ip_local_deliver_finish+0x3bb/0x6f0 net/ipv4/ip_input.c:239
 NF_HOOK+0x30c/0x3a0 include/linux/netfilter.h:318
 dst_input include/net/dst.h:474 [inline]
 ip_sublist_rcv_finish+0x221/0x2a0 net/ipv4/ip_input.c:585
 ip_list_rcv_finish net/ipv4/ip_input.c:629 [inline]
 ip_sublist_rcv+0x5b1/0xa10 net/ipv4/ip_input.c:645
 ip_list_rcv+0x3e2/0x430 net/ipv4/ip_input.c:679
 __netif_receive_skb_list_ptype net/core/dev.c:6115 [inline]
 __netif_receive_skb_list_core+0x7d2/0x800 net/core/dev.c:6162
 __netif_receive_skb_list net/core/dev.c:6214 [inline]
 netif_receive_skb_list_internal+0x96f/0xcb0 net/core/dev.c:6305
 netif_receive_skb_list+0x54/0x450 net/core/dev.c:6357
 xdp_recv_frames net/bpf/test_run.c:280 [inline]
 xdp_test_run_batch net/bpf/test_run.c:361 [inline]
 bpf_test_run_xdp_live+0x1786/0x1b10 net/bpf/test_run.c:390
 bpf_prog_test_run_xdp+0x713/0x1000 net/bpf/test_run.c:1322
 bpf_prog_test_run+0x2c7/0x340 kernel/bpf/syscall.c:4590
 __sys_bpf+0x581/0x870 kernel/bpf/syscall.c:6047
 __do_sys_bpf kernel/bpf/syscall.c:6139 [inline]
 __se_sys_bpf kernel/bpf/syscall.c:6137 [inline]
 __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:6137
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f10d458eba9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f10d54e4038 EFLAGS: 00000246 ORIG_RAX: 0000000000000141
RAX: ffffffffffffffda RBX: 00007f10d47d5fa0 RCX: 00007f10d458eba9
RDX: 0000000000000048 RSI: 0000200000000600 RDI: 000000000000000a
RBP: 00007f10d4611e19 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007f10d47d6038 R14: 00007f10d47d5fa0 R15: 00007ffd5c7da268
 </TASK>

Allocated by task 6361:
 kasan_save_stack mm/kasan/common.c:47 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:68
 poison_kmalloc_redzone mm/kasan/common.c:388 [inline]
 __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:405
 kasan_kmalloc include/linux/kasan.h:260 [inline]
 __do_kmalloc_node mm/slub.c:4376 [inline]
 __kmalloc_noprof+0x27a/0x4f0 mm/slub.c:4388
 kmalloc_noprof include/linux/slab.h:909 [inline]
 kmalloc_array_noprof include/linux/slab.h:948 [inline]
 udp_lib_init_sock include/net/udp.h:297 [inline]
 udpv6_init_sock+0x198/0x3b0 net/ipv6/udp.c:70
 inet6_create+0xef4/0x1260 net/ipv6/af_inet6.c:259
 __sock_create+0x4b3/0x9f0 net/socket.c:1589
 sock_create net/socket.c:1647 [inline]
 __sys_socket_create net/socket.c:1684 [inline]
 __sys_socket+0xd7/0x1b0 net/socket.c:1731
 __do_sys_socket net/socket.c:1745 [inline]
 __se_sys_socket net/socket.c:1743 [inline]
 __x64_sys_socket+0x7a/0x90 net/socket.c:1743
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Freed by task 6358:
 kasan_save_stack mm/kasan/common.c:47 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:68
 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:576
 poison_slab_object mm/kasan/common.c:243 [inline]
 __kasan_slab_free+0x5b/0x80 mm/kasan/common.c:275
 kasan_slab_free include/linux/kasan.h:233 [inline]
 slab_free_hook mm/slub.c:2422 [inline]
 slab_free mm/slub.c:4695 [inline]
 kfree+0x18e/0x440 mm/slub.c:4894
 sk_common_release+0x75/0x310 net/core/sock.c:3919
 inet_release+0x144/0x190 net/ipv4/af_inet.c:437
 __sock_release net/socket.c:649 [inline]
 sock_close+0xc3/0x240 net/socket.c:1439
 __fput+0x44c/0xa70 fs/file_table.c:468
 task_work_run+0x1d4/0x260 kernel/task_work.c:227
 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline]
 exit_to_user_mode_loop+0xec/0x110 kernel/entry/common.c:43
 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline]
 syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline]
 syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline]
 do_syscall_64+0x2bd/0x3b0 arch/x86/entry/syscall_64.c:100
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

The buggy address belongs to the object at ffff88810d802d00
 which belongs to the cache kmalloc-128 of size 128
The buggy address is located 8 bytes inside of
 freed 128-byte region [ffff88810d802d00, ffff88810d802d80)

The buggy address belongs to the physical page:
page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10d802
anon flags: 0x57ff00000000000(node=1|zone=2|lastcpupid=0x7ff)
page_type: f5(slab)
raw: 057ff00000000000 ffff88801a441a00 ffffea0004451300 0000000000000005
raw: 0000000000000000 0000000000100010 00000000f5000000 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 0, migratetype Unmovable, gfp_mask 0x252800(GFP_NOWAIT|__GFP_NORETRY|__GFP_COMP|__GFP_THISNODE), pid 5962, tgid 5962 (syz-executor), ts 86250634562, free_ts 86224202724
 set_page_owner include/linux/page_owner.h:32 [inline]
 post_alloc_hook+0x240/0x2a0 mm/page_alloc.c:1851
 prep_new_page mm/page_alloc.c:1859 [inline]
 get_page_from_freelist+0x21e4/0x22c0 mm/page_alloc.c:3858
 __alloc_frozen_pages_noprof+0x181/0x370 mm/page_alloc.c:5148
 alloc_slab_page mm/slub.c:2494 [inline]
 allocate_slab+0x65/0x370 mm/slub.c:2660
 new_slab mm/slub.c:2714 [inline]
 ___slab_alloc+0xbeb/0x1420 mm/slub.c:3901
 __slab_alloc mm/slub.c:3992 [inline]
 __slab_alloc_node mm/slub.c:4067 [inline]
 slab_alloc_node mm/slub.c:4228 [inline]
 __do_kmalloc_node mm/slub.c:4375 [inline]
 __kmalloc_node_noprof+0x2fd/0x4e0 mm/slub.c:4382
 kmalloc_array_node_noprof include/linux/slab.h:1020 [inline]
 alloc_slab_obj_exts+0x39/0xa0 mm/slub.c:2033
 __memcg_slab_post_alloc_hook+0x31e/0x7f0 mm/memcontrol.c:3174
 memcg_slab_post_alloc_hook mm/slub.c:2221 [inline]
 slab_post_alloc_hook mm/slub.c:4201 [inline]
 slab_alloc_node mm/slub.c:4240 [inline]
 kmem_cache_alloc_noprof+0x2bf/0x3c0 mm/slub.c:4247
 alloc_empty_file+0x55/0x1d0 fs/file_table.c:237
 alloc_file fs/file_table.c:354 [inline]
 alloc_file_pseudo+0x13d/0x210 fs/file_table.c:383
 sock_alloc_file+0xb8/0x2e0 net/socket.c:470
 sock_map_fd net/socket.c:500 [inline]
 __sys_socket+0x13d/0x1b0 net/socket.c:1740
 __do_sys_socket net/socket.c:1745 [inline]
 __se_sys_socket net/socket.c:1743 [inline]
 __x64_sys_socket+0x7a/0x90 net/socket.c:1743
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
page last free pid 27 tgid 27 stack trace:
 reset_page_owner include/linux/page_owner.h:25 [inline]
 free_pages_prepare mm/page_alloc.c:1395 [inline]
 __free_frozen_pages+0xbc4/0xd30 mm/page_alloc.c:2895
 pagetable_free include/linux/mm.h:2898 [inline]
 pagetable_dtor_free include/linux/mm.h:2996 [inline]
 __tlb_remove_table+0x2d2/0x3b0 include/asm-generic/tlb.h:220
 __tlb_remove_table_free mm/mmu_gather.c:227 [inline]
 tlb_remove_table_rcu+0x85/0x100 mm/mmu_gather.c:290
 rcu_do_batch kernel/rcu/tree.c:2605 [inline]
 rcu_core+0xcab/0x1770 kernel/rcu/tree.c:2861
 handle_softirqs+0x286/0x870 kernel/softirq.c:579
 do_softirq+0xec/0x180 kernel/softirq.c:480
 __local_bh_enable_ip+0x17d/0x1c0 kernel/softirq.c:407
 spin_unlock_bh include/linux/spinlock.h:396 [inline]
 nsim_dev_trap_report drivers/net/netdevsim/dev.c:835 [inline]
 nsim_dev_trap_report_work+0x7c7/0xb80 drivers/net/netdevsim/dev.c:866
 process_one_work kernel/workqueue.c:3236 [inline]
 process_scheduled_works+0xae1/0x17b0 kernel/workqueue.c:3319
 worker_thread+0x8a0/0xda0 kernel/workqueue.c:3400
 kthread+0x711/0x8a0 kernel/kthread.c:463
 ret_from_fork+0x439/0x7d0 arch/x86/kernel/process.c:148
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245

Memory state around the buggy address:
 ffff88810d802c00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff88810d802c80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88810d802d00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                      ^
 ffff88810d802d80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff88810d802e00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
==================================================================


***

If these findings have caused you to resend the series or submit a
separate fix, please add the following tag to your commit message:
  Tested-by: syzbot@syzkaller.appspotmail.com

---
This report is generated by a bot. It may contain errors.
syzbot ci engineers can be reached at syzkaller@googlegroups.com.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 net-next] udp: remove busylock and add per NUMA queues
  2025-09-20 18:10 ` Jakub Kicinski
@ 2025-09-20 19:38   ` Eric Dumazet
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Dumazet @ 2025-09-20 19:38 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: David S . Miller, Paolo Abeni, Simon Horman, Willem de Bruijn,
	Kuniyuki Iwashima, netdev, eric.dumazet

On Sat, Sep 20, 2025 at 11:11 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Sat, 20 Sep 2025 08:02:27 +0000 Eric Dumazet wrote:
> > busylock was protecting UDP sockets against packet floods,
> > but unfortunately was not protecting the host itself.
> >
> > Under stress, many cpus could spin while acquiring the busylock,
> > and NIC had to drop packets. Or packets would be dropped
> > in cpu backlog if RPS/RFS were in place.
> >
> > This patch replaces the busylock by intermediate
> > lockless queues. (One queue per NUMA node).
> >
> > This means that fewer number of cpus have to acquire
> > the UDP receive queue lock.
> >
> > Most of the cpus can either:
> > - immediately drop the packet.
> > - or queue it in their NUMA aware lockless queue.
> >
> > Then one of the cpu is chosen to process this lockless queue
> > in a batch.
> >
> > The batch only contains packets that were cooked on the same
> > NUMA node, thus with very limited latency impact.
>
> Occasionally hitting a UaF like this:
> https://netdev-3.bots.linux.dev/vmksft-net-dbg/results/306342/3-fcnal-ipv6-sh/stderr
> decoded:
> https://netdev-3.bots.linux.dev/vmksft-net-dbg/results/306342/vm-crash-thr2-0
> --
> pw-bot: cr

Yeah, destroy is called while there are packets in flight, from inet_release()

I have to hook the  kfree(up->udp_prod_queue) calls in udp_destruct_common()

I will test:

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fedc939342f3d1ab580548e2b4dd39b5e3a1c397..59bf422151171330b7190523e0f287947409b6b5
100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1808,6 +1808,7 @@ void udp_destruct_common(struct sock *sk)
                kfree_skb(skb);
        }
        udp_rmem_release(sk, total, 0, true);
+       kfree(up->udp_prod_queue);
 }
 EXPORT_IPV6_MOD_GPL(udp_destruct_common);

@@ -2912,7 +2913,6 @@ void udp_destroy_sock(struct sock *sk)
                        udp_tunnel_cleanup_gro(sk);
                }
        }
-       kfree(up->udp_prod_queue);
 }

 typedef struct sk_buff *(*udp_gro_receive_t)(struct sock *sk,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 90e2945e6cf9066bc36c57cbb29b8aa68e7afe4e..813a2ba75824d14631642bf6973f65063b2825cb
100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1829,7 +1829,6 @@ void udpv6_destroy_sock(struct sock *sk)
                        udp_tunnel_cleanup_gro(sk);
                }
        }
-       kfree(up->udp_prod_queue);
 }

 /*

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-09-20 19:38 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-20  8:02 [PATCH v2 net-next] udp: remove busylock and add per NUMA queues Eric Dumazet
2025-09-20 18:10 ` Jakub Kicinski
2025-09-20 19:38   ` Eric Dumazet
2025-09-20 18:33 ` [syzbot ci] " syzbot ci

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).