[PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
@ 2025-07-04  5:48 Kuniyuki Iwashima
  2025-07-08  0:43 ` patchwork-bot+netdevbpf
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Kuniyuki Iwashima @ 2025-07-04  5:48 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev,
	Jason Baron

Netlink has this pattern in some places

  if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
  	atomic_add(skb->truesize, &sk->sk_rmem_alloc);

, which has the same problem fixed by commit 5a465a0da13e ("udp:
Fix multiple wraparounds of sk->sk_rmem_alloc.").

For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition
is always false as the two operands are of int.

Then, a single socket can eat as many skb as possible until OOM
happens, and we can see multiple wraparounds of sk->sk_rmem_alloc.

Let's fix it by using atomic_add_return() and comparing the two
variables as unsigned int.

Before:
  [root@fedora ~]# ss -f netlink
  Recv-Q      Send-Q Local Address:Port                Peer Address:Port
  -1668710080 0               rtnl:nl_wraparound/293               *

After:
  [root@fedora ~]# ss -f netlink
  Recv-Q     Send-Q Local Address:Port                Peer Address:Port
  2147483072 0               rtnl:nl_wraparound/290               *
  ^
  `--- INT_MAX - 576

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Jason Baron <jbaron@akamai.com>
Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
---
 net/netlink/af_netlink.c | 81 ++++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 32 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e8972a857e51..79fbaf7333ce 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 	WARN_ON(skb->sk != NULL);
 	skb->sk = sk;
 	skb->destructor = netlink_skb_destructor;
-	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
 	sk_mem_charge(sk, skb->truesize);
 }
 
@@ -1212,41 +1211,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
 int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 		      long *timeo, struct sock *ssk)
 {
+	DECLARE_WAITQUEUE(wait, current);
 	struct netlink_sock *nlk;
+	unsigned int rmem;
 
 	nlk = nlk_sk(sk);
+	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
 
-	if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-	     test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
-		DECLARE_WAITQUEUE(wait, current);
-		if (!*timeo) {
-			if (!ssk || netlink_is_kernel(ssk))
-				netlink_overrun(sk);
-			sock_put(sk);
-			kfree_skb(skb);
-			return -EAGAIN;
-		}
-
-		__set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue(&nlk->wait, &wait);
+	if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) &&
+	    !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
+		netlink_skb_set_owner_r(skb, sk);
+		return 0;
+	}
 
-		if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-		     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
-		    !sock_flag(sk, SOCK_DEAD))
-			*timeo = schedule_timeout(*timeo);
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 
-		__set_current_state(TASK_RUNNING);
-		remove_wait_queue(&nlk->wait, &wait);
+	if (!*timeo) {
+		if (!ssk || netlink_is_kernel(ssk))
+			netlink_overrun(sk);
 		sock_put(sk);
+		kfree_skb(skb);
+		return -EAGAIN;
+	}
 
-		if (signal_pending(current)) {
-			kfree_skb(skb);
-			return sock_intr_errno(*timeo);
-		}
-		return 1;
+	__set_current_state(TASK_INTERRUPTIBLE);
+	add_wait_queue(&nlk->wait, &wait);
+	rmem = atomic_read(&sk->sk_rmem_alloc);
+
+	if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) ||
+	     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
+	    !sock_flag(sk, SOCK_DEAD))
+		*timeo = schedule_timeout(*timeo);
+
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&nlk->wait, &wait);
+	sock_put(sk);
+
+	if (signal_pending(current)) {
+		kfree_skb(skb);
+		return sock_intr_errno(*timeo);
 	}
-	netlink_skb_set_owner_r(skb, sk);
-	return 0;
+
+	return 1;
 }
 
 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
@@ -1307,6 +1313,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
 	ret = -ECONNREFUSED;
 	if (nlk->netlink_rcv != NULL) {
 		ret = skb->len;
+		atomic_add(skb->truesize, &sk->sk_rmem_alloc);
 		netlink_skb_set_owner_r(skb, sk);
 		NETLINK_CB(skb).sk = ssk;
 		netlink_deliver_tap_kernel(sk, ssk, skb);
@@ -1383,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
+	unsigned int rmem, rcvbuf;
 
-	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+	rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+
+	if ((rmem != skb->truesize || rmem <= rcvbuf) &&
 	    !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
 		netlink_skb_set_owner_r(skb, sk);
 		__netlink_sendskb(sk, skb);
-		return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
+		return rmem > (rcvbuf >> 1);
 	}
+
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 	return -1;
 }
 
@@ -2249,6 +2262,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 	struct module *module;
 	int err = -ENOBUFS;
 	int alloc_min_size;
+	unsigned int rmem;
 	int alloc_size;
 
 	if (!lock_taken)
@@ -2258,9 +2272,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 		goto errout_skb;
 	}
 
-	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
-		goto errout_skb;
-
 	/* NLMSG_GOODSIZE is small to avoid high order allocations being
 	 * required, but it makes sense to _attempt_ a 32KiB allocation
 	 * to reduce number of system calls on dump operations, if user
@@ -2283,6 +2294,12 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 	if (!skb)
 		goto errout_skb;
 
+	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+	if (rmem >= READ_ONCE(sk->sk_rcvbuf)) {
+		atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+		goto errout_skb;
+	}
+
 	/* Trim skb to allocated size. User is expected to provide buffer as
 	 * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
 	 * netlink_recvmsg())). dump will pack as many smaller messages as
-- 
2.50.0.727.gbf7dc18ff4-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
  2025-07-04  5:48 [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc Kuniyuki Iwashima
@ 2025-07-08  0:43 ` patchwork-bot+netdevbpf
       [not found] ` <CGME20250710083401eucas1p1d18e23791e1f22c0c0aaf823a35526a2@eucas1p1.samsung.com>
  2025-08-08 13:59 ` Heyne, Maximilian
  2 siblings, 0 replies; 10+ messages in thread
From: patchwork-bot+netdevbpf @ 2025-07-08  0:43 UTC (permalink / raw)
  To: Kuniyuki Iwashima
  Cc: davem, edumazet, kuba, pabeni, horms, kuni1840, netdev, jbaron

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Fri,  4 Jul 2025 05:48:18 +0000 you wrote:
> Netlink has this pattern in some places
> 
>   if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
>   	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
> 
> , which has the same problem fixed by commit 5a465a0da13e ("udp:
> Fix multiple wraparounds of sk->sk_rmem_alloc.").
> 
> [...]

Here is the summary with links:
  - [v1,net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
    https://git.kernel.org/netdev/net/c/ae8f160e7eb2

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
       [not found] ` <CGME20250710083401eucas1p1d18e23791e1f22c0c0aaf823a35526a2@eucas1p1.samsung.com>
@ 2025-07-10  8:34   ` Marek Szyprowski
  2025-07-10 19:43     ` Jakub Kicinski
  0 siblings, 1 reply; 10+ messages in thread
From: Marek Szyprowski @ 2025-07-10  8:34 UTC (permalink / raw)
  To: Kuniyuki Iwashima, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Simon Horman, Kuniyuki Iwashima, netdev, Jason Baron

On 04.07.2025 07:48, Kuniyuki Iwashima wrote:
> Netlink has this pattern in some places
>
>    if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
>    	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
>
> , which has the same problem fixed by commit 5a465a0da13e ("udp:
> Fix multiple wraparounds of sk->sk_rmem_alloc.").
>
> For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition
> is always false as the two operands are of int.
>
> Then, a single socket can eat as many skb as possible until OOM
> happens, and we can see multiple wraparounds of sk->sk_rmem_alloc.
>
> Let's fix it by using atomic_add_return() and comparing the two
> variables as unsigned int.
>
> Before:
>    [root@fedora ~]# ss -f netlink
>    Recv-Q      Send-Q Local Address:Port                Peer Address:Port
>    -1668710080 0               rtnl:nl_wraparound/293               *
>
> After:
>    [root@fedora ~]# ss -f netlink
>    Recv-Q     Send-Q Local Address:Port                Peer Address:Port
>    2147483072 0               rtnl:nl_wraparound/290               *
>    ^
>    `--- INT_MAX - 576
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Reported-by: Jason Baron <jbaron@akamai.com>
> Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
> Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>

This patch landed recently in linux-next as commit ae8f160e7eb2 
("netlink: Fix wraparounds of sk->sk_rmem_alloc."). In my tests I found 
that it breaks wifi drivers operation on my tests boards (various ARM 
32bit and 64bit ones). Reverting it on top of next-20250709 fixes this 
issue. Here is the log from the failure observed on the Samsung 
Peach-Pit Chromebook:

# dmesg | grep wifi
[   16.174311] mwifiex_sdio mmc2:0001:1: WLAN is not the winner! Skip FW 
dnld
[   16.503969] mwifiex_sdio mmc2:0001:1: WLAN FW is active
[   16.574635] mwifiex_sdio mmc2:0001:1: host_mlme: disable, key_api: 2
[   16.586152] mwifiex_sdio mmc2:0001:1: CMD_RESP: cmd 0x242 error, 
result=0x2
[   16.641184] mwifiex_sdio mmc2:0001:1: info: MWIFIEX VERSION: mwifiex 
1.0 (15.68.7.p87)
[   16.649474] mwifiex_sdio mmc2:0001:1: driver_version = mwifiex 1.0 
(15.68.7.p87)
[   25.953285] mwifiex_sdio mmc2:0001:1 wlan0: renamed from mlan0
# ifconfig wlan0 up
# iw wlan0 scan
command failed: No buffer space available (-105)
#

Let me know if You need more information to debug this issue.

> ---
>   net/netlink/af_netlink.c | 81 ++++++++++++++++++++++++----------------
>   1 file changed, 49 insertions(+), 32 deletions(-)
>
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index e8972a857e51..79fbaf7333ce 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
>   	WARN_ON(skb->sk != NULL);
>   	skb->sk = sk;
>   	skb->destructor = netlink_skb_destructor;
> -	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
>   	sk_mem_charge(sk, skb->truesize);
>   }
>   
> @@ -1212,41 +1211,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
>   int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
>   		      long *timeo, struct sock *ssk)
>   {
> +	DECLARE_WAITQUEUE(wait, current);
>   	struct netlink_sock *nlk;
> +	unsigned int rmem;
>   
>   	nlk = nlk_sk(sk);
> +	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
>   
> -	if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
> -	     test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
> -		DECLARE_WAITQUEUE(wait, current);
> -		if (!*timeo) {
> -			if (!ssk || netlink_is_kernel(ssk))
> -				netlink_overrun(sk);
> -			sock_put(sk);
> -			kfree_skb(skb);
> -			return -EAGAIN;
> -		}
> -
> -		__set_current_state(TASK_INTERRUPTIBLE);
> -		add_wait_queue(&nlk->wait, &wait);
> +	if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) &&
> +	    !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
> +		netlink_skb_set_owner_r(skb, sk);
> +		return 0;
> +	}
>   
> -		if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
> -		     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
> -		    !sock_flag(sk, SOCK_DEAD))
> -			*timeo = schedule_timeout(*timeo);
> +	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
>   
> -		__set_current_state(TASK_RUNNING);
> -		remove_wait_queue(&nlk->wait, &wait);
> +	if (!*timeo) {
> +		if (!ssk || netlink_is_kernel(ssk))
> +			netlink_overrun(sk);
>   		sock_put(sk);
> +		kfree_skb(skb);
> +		return -EAGAIN;
> +	}
>   
> -		if (signal_pending(current)) {
> -			kfree_skb(skb);
> -			return sock_intr_errno(*timeo);
> -		}
> -		return 1;
> +	__set_current_state(TASK_INTERRUPTIBLE);
> +	add_wait_queue(&nlk->wait, &wait);
> +	rmem = atomic_read(&sk->sk_rmem_alloc);
> +
> +	if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) ||
> +	     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
> +	    !sock_flag(sk, SOCK_DEAD))
> +		*timeo = schedule_timeout(*timeo);
> +
> +	__set_current_state(TASK_RUNNING);
> +	remove_wait_queue(&nlk->wait, &wait);
> +	sock_put(sk);
> +
> +	if (signal_pending(current)) {
> +		kfree_skb(skb);
> +		return sock_intr_errno(*timeo);
>   	}
> -	netlink_skb_set_owner_r(skb, sk);
> -	return 0;
> +
> +	return 1;
>   }
>   
>   static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
> @@ -1307,6 +1313,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
>   	ret = -ECONNREFUSED;
>   	if (nlk->netlink_rcv != NULL) {
>   		ret = skb->len;
> +		atomic_add(skb->truesize, &sk->sk_rmem_alloc);
>   		netlink_skb_set_owner_r(skb, sk);
>   		NETLINK_CB(skb).sk = ssk;
>   		netlink_deliver_tap_kernel(sk, ssk, skb);
> @@ -1383,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
>   static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
>   {
>   	struct netlink_sock *nlk = nlk_sk(sk);
> +	unsigned int rmem, rcvbuf;
>   
> -	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
> +	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
> +	rcvbuf = READ_ONCE(sk->sk_rcvbuf);
> +
> +	if ((rmem != skb->truesize || rmem <= rcvbuf) &&
>   	    !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
>   		netlink_skb_set_owner_r(skb, sk);
>   		__netlink_sendskb(sk, skb);
> -		return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
> +		return rmem > (rcvbuf >> 1);
>   	}
> +
> +	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
>   	return -1;
>   }
>   
> @@ -2249,6 +2262,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
>   	struct module *module;
>   	int err = -ENOBUFS;
>   	int alloc_min_size;
> +	unsigned int rmem;
>   	int alloc_size;
>   
>   	if (!lock_taken)
> @@ -2258,9 +2272,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
>   		goto errout_skb;
>   	}
>   
> -	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
> -		goto errout_skb;
> -
>   	/* NLMSG_GOODSIZE is small to avoid high order allocations being
>   	 * required, but it makes sense to _attempt_ a 32KiB allocation
>   	 * to reduce number of system calls on dump operations, if user
> @@ -2283,6 +2294,12 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
>   	if (!skb)
>   		goto errout_skb;
>   
> +	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
> +	if (rmem >= READ_ONCE(sk->sk_rcvbuf)) {
> +		atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
> +		goto errout_skb;
> +	}
> +
>   	/* Trim skb to allocated size. User is expected to provide buffer as
>   	 * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
>   	 * netlink_recvmsg())). dump will pack as many smaller messages as

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
  2025-07-10  8:34   ` Marek Szyprowski
@ 2025-07-10 19:43     ` Jakub Kicinski
  2025-07-10 23:33       ` Marek Szyprowski
  0 siblings, 1 reply; 10+ messages in thread
From: Jakub Kicinski @ 2025-07-10 19:43 UTC (permalink / raw)
  To: Marek Szyprowski
  Cc: Kuniyuki Iwashima, David S. Miller, Eric Dumazet, Paolo Abeni,
	Simon Horman, Kuniyuki Iwashima, netdev, Jason Baron

On Thu, 10 Jul 2025 10:34:00 +0200 Marek Szyprowski wrote:
> > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > Reported-by: Jason Baron <jbaron@akamai.com>
> > Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>  
> 
> This patch landed recently in linux-next as commit ae8f160e7eb2 
> ("netlink: Fix wraparounds of sk->sk_rmem_alloc."). In my tests I found 
> that it breaks wifi drivers operation on my tests boards (various ARM 
> 32bit and 64bit ones). Reverting it on top of next-20250709 fixes this 
> issue. Here is the log from the failure observed on the Samsung 
> Peach-Pit Chromebook:
> 
> # dmesg | grep wifi
> [   16.174311] mwifiex_sdio mmc2:0001:1: WLAN is not the winner! Skip FW 
> dnld
> [   16.503969] mwifiex_sdio mmc2:0001:1: WLAN FW is active
> [   16.574635] mwifiex_sdio mmc2:0001:1: host_mlme: disable, key_api: 2
> [   16.586152] mwifiex_sdio mmc2:0001:1: CMD_RESP: cmd 0x242 error, 
> result=0x2
> [   16.641184] mwifiex_sdio mmc2:0001:1: info: MWIFIEX VERSION: mwifiex 
> 1.0 (15.68.7.p87)
> [   16.649474] mwifiex_sdio mmc2:0001:1: driver_version = mwifiex 1.0 
> (15.68.7.p87)
> [   25.953285] mwifiex_sdio mmc2:0001:1 wlan0: renamed from mlan0
> # ifconfig wlan0 up
> # iw wlan0 scan
> command failed: No buffer space available (-105)
> #
> 
> Let me know if You need more information to debug this issue.

Thanks a lot for the report! I don't see any obvious bugs.
Would you be able to test this?

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 79fbaf7333ce..aeb05d99e016 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2258,11 +2258,11 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 	struct netlink_ext_ack extack = {};
 	struct netlink_callback *cb;
 	struct sk_buff *skb = NULL;
+	unsigned int rmem, rcvbuf;
 	size_t max_recvmsg_len;
 	struct module *module;
 	int err = -ENOBUFS;
 	int alloc_min_size;
-	unsigned int rmem;
 	int alloc_size;
 
 	if (!lock_taken)
@@ -2294,8 +2294,9 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
 	if (!skb)
 		goto errout_skb;
 
+	rcvbuf = READ_ONCE(sk->sk_rcvbuf);
 	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
-	if (rmem >= READ_ONCE(sk->sk_rcvbuf)) {
+	if (rmem != skb->truesize && rmem >= rcvbuf) {
 		atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 		goto errout_skb;
 	}

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
  2025-07-10 19:43     ` Jakub Kicinski
@ 2025-07-10 23:33       ` Marek Szyprowski
  0 siblings, 0 replies; 10+ messages in thread
From: Marek Szyprowski @ 2025-07-10 23:33 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Kuniyuki Iwashima, David S. Miller, Eric Dumazet, Paolo Abeni,
	Simon Horman, Kuniyuki Iwashima, netdev, Jason Baron

On 10.07.2025 21:43, Jakub Kicinski wrote:
> On Thu, 10 Jul 2025 10:34:00 +0200 Marek Szyprowski wrote:
>>> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
>>> Reported-by: Jason Baron <jbaron@akamai.com>
>>> Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
>>> Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
>> This patch landed recently in linux-next as commit ae8f160e7eb2
>> ("netlink: Fix wraparounds of sk->sk_rmem_alloc."). In my tests I found
>> that it breaks wifi drivers operation on my tests boards (various ARM
>> 32bit and 64bit ones). Reverting it on top of next-20250709 fixes this
>> issue. Here is the log from the failure observed on the Samsung
>> Peach-Pit Chromebook:
>>
>> # dmesg | grep wifi
>> [   16.174311] mwifiex_sdio mmc2:0001:1: WLAN is not the winner! Skip FW
>> dnld
>> [   16.503969] mwifiex_sdio mmc2:0001:1: WLAN FW is active
>> [   16.574635] mwifiex_sdio mmc2:0001:1: host_mlme: disable, key_api: 2
>> [   16.586152] mwifiex_sdio mmc2:0001:1: CMD_RESP: cmd 0x242 error,
>> result=0x2
>> [   16.641184] mwifiex_sdio mmc2:0001:1: info: MWIFIEX VERSION: mwifiex
>> 1.0 (15.68.7.p87)
>> [   16.649474] mwifiex_sdio mmc2:0001:1: driver_version = mwifiex 1.0
>> (15.68.7.p87)
>> [   25.953285] mwifiex_sdio mmc2:0001:1 wlan0: renamed from mlan0
>> # ifconfig wlan0 up
>> # iw wlan0 scan
>> command failed: No buffer space available (-105)
>> #
>>
>> Let me know if You need more information to debug this issue.
> Thanks a lot for the report! I don't see any obvious bugs.
> Would you be able to test this?
>
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index 79fbaf7333ce..aeb05d99e016 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -2258,11 +2258,11 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
>   	struct netlink_ext_ack extack = {};
>   	struct netlink_callback *cb;
>   	struct sk_buff *skb = NULL;
> +	unsigned int rmem, rcvbuf;
>   	size_t max_recvmsg_len;
>   	struct module *module;
>   	int err = -ENOBUFS;
>   	int alloc_min_size;
> -	unsigned int rmem;
>   	int alloc_size;
>   
>   	if (!lock_taken)
> @@ -2294,8 +2294,9 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
>   	if (!skb)
>   		goto errout_skb;
>   
> +	rcvbuf = READ_ONCE(sk->sk_rcvbuf);
>   	rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
> -	if (rmem >= READ_ONCE(sk->sk_rcvbuf)) {
> +	if (rmem != skb->truesize && rmem >= rcvbuf) {
>   		atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
>   		goto errout_skb;
>   	}
>
The above change fixes my issue. Thanks! Feel free to add:

Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
  2025-07-04  5:48 [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc Kuniyuki Iwashima
  2025-07-08  0:43 ` patchwork-bot+netdevbpf
       [not found] ` <CGME20250710083401eucas1p1d18e23791e1f22c0c0aaf823a35526a2@eucas1p1.samsung.com>
@ 2025-08-08 13:59 ` Heyne, Maximilian
  2025-08-08 15:54   ` Kuniyuki Iwashima
  2 siblings, 1 reply; 10+ messages in thread
From: Heyne, Maximilian @ 2025-08-08 13:59 UTC (permalink / raw)
  To: Kuniyuki Iwashima
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Kuniyuki Iwashima, netdev@vger.kernel.org,
	Jason Baron, Ahmed, Aaron, Kumar, Praveen, Paul Moore, Eric Paris,
	linux-audit@redhat.com, linux-kernel@vger.kernel.org

On Fri, Jul 04, 2025 at 05:48:18AM +0000, Kuniyuki Iwashima wrote:
> Netlink has this pattern in some places
> 
>   if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
>   	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
> 
> , which has the same problem fixed by commit 5a465a0da13e ("udp:
> Fix multiple wraparounds of sk->sk_rmem_alloc.").
> 
> For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition
> is always false as the two operands are of int.
> 
> Then, a single socket can eat as many skb as possible until OOM
> happens, and we can see multiple wraparounds of sk->sk_rmem_alloc.
> 
> Let's fix it by using atomic_add_return() and comparing the two
> variables as unsigned int.
> 
> Before:
>   [root@fedora ~]# ss -f netlink
>   Recv-Q      Send-Q Local Address:Port                Peer Address:Port
>   -1668710080 0               rtnl:nl_wraparound/293               *
> 
> After:
>   [root@fedora ~]# ss -f netlink
>   Recv-Q     Send-Q Local Address:Port                Peer Address:Port
>   2147483072 0               rtnl:nl_wraparound/290               *
>   ^
>   `--- INT_MAX - 576
> 
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Reported-by: Jason Baron <jbaron@akamai.com>
> Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
> Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>

Hi Kuniyuki,

We're seeing soft lockups with this patch in a variety of (stable)
kernel versions.

I was able to reproduce it on a couple of different EC2 instances also
with the latest linux kernel 6.16-rc7 using the following steps:

systemctl start auditd
sudo auditctl -D
sudo auditctl -b 512
sudo auditctl -a always,exit -F arch=b64 -S mmap,munmap,mprotect,brk -k memory_ops
sudo auditctl -e 1

Then execute some programs that call some of these memory functions,
such as repeated calls of "sudo auditctl -s" or logging in via SSH.

These settings are set in a way to create a lot audit messages. Once the
backlog (see auditctl -s) overshoots the backlog_limit, the system soft
lockups:

[  460.056244] watchdog: BUG: soft lockup - CPU#1 stuck for 52s! [kauditd:32]
[  460.056249] Modules linked in: mousedev(E) nls_ascii(E) nls_cp437(E) sunrpc(E) vfat(E) fat(E) psmouse(E) atkbd(E) libps2(E) vivaldi_fmap(E) i8042(E) serio(E) skx_edac_common(E) button(E) ena(E) ghash_clmulni_intel(E) sch_fq_codel(E) drm(E) i2c_core(E) dm_mod(E) drm_panel_orientation_quirks(E) backlight(E) fuse(E) loop(E) dax(E) configfs(E) dmi_sysfs(E) efivarfs(E)
[  460.056272] CPU: 1 UID: 0 PID: 32 Comm: kauditd Tainted: G            EL      6.16.0-rc7+ #3 PREEMPT(none)
[  460.056275] Tainted: [E]=UNSIGNED_MODULE, [L]=SOFTLOCKUP
[  460.056276] Hardware name: Amazon EC2 t3.medium/, BIOS 1.0 10/16/2017
[  460.056277] RIP: 0010:_raw_spin_unlock_irqrestore+0x1b/0x30
[  460.056284] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 e8 16 07 00 00 90 f7 c6 00 02 00 00 74 01 fb 65 ff 0d b5 23 b6 01 <74> 05 c3 cc cc cc cc 0f 1f 44 00 00 e9 14 23 00 00 0f 1f 40 00 90
[  460.056285] RSP: 0018:ffffb762c0123d70 EFLAGS: 00000246
[  460.056287] RAX: 0000000000000001 RBX: ffff9b14c08eafc0 RCX: ffff9b14c3337348
[  460.056288] RDX: ffff9b14c3337348 RSI: 0000000000000246 RDI: ffff9b14c3337340
[  460.056289] RBP: ffff9b14c3337000 R08: ffffffff93cea880 R09: 0000000000000001
[  460.056290] R10: 0000000000000001 R11: 0000000000000080 R12: ffff9b14c1b72500
[  460.056291] R13: ffffb762c0123de0 R14: ffff9b14c3337340 R15: ffff9b14c3337080
[  460.056294] FS:  0000000000000000(0000) GS:ffff9b1563788000(0000) knlGS:0000000000000000
[  460.056296] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  460.056297] CR2: 00007f36fd5d21b4 CR3: 000000010241a002 CR4: 00000000007706f0
[  460.056298] PKRU: 55555554
[  460.056299] Call Trace:
[  460.056300]  <TASK>
[  460.056302]  netlink_attachskb+0xb7/0x2f0
[  460.056308]  ? __pfx_default_wake_function+0x10/0x10
[  460.056313]  netlink_unicast+0xea/0x3b0
[  460.056315]  kauditd_send_queue+0xaf/0x170
[  460.056318]  ? __pfx_kauditd_send_multicast_skb+0x10/0x10
[  460.056320]  ? __pfx_kauditd_retry_skb+0x10/0x10
[  460.056321]  kauditd_thread+0x132/0x2b0
[  460.056323]  ? __pfx_autoremove_wake_function+0x10/0x10
[  460.056327]  ? __pfx_kauditd_thread+0x10/0x10
[  460.056328]  kthread+0xfb/0x230
[  460.056331]  ? __pfx_kthread+0x10/0x10
[  460.056332]  ? __pfx_kthread+0x10/0x10
[  460.056334]  ret_from_fork+0x142/0x160
[  460.056338]  ? __pfx_kthread+0x10/0x10
[  460.056339]  ret_from_fork_asm+0x1a/0x30
[  460.056343]  </TASK>
[  469.011800] audit_log_start: 120 callbacks suppressed
[  469.011805] audit: audit_backlog=513 > audit_backlog_limit=512
[  469.013154] audit: audit_lost=1 audit_rate_limit=0 audit_backlog_limit=512
[  469.013967] audit: backlog limit exceeded
[  469.014617] audit: audit_backlog=513 > audit_backlog_limit=512
[  469.015313] audit: audit_lost=2 audit_rate_limit=0 audit_backlog_limit=512
[  469.016112] audit: backlog limit exceeded

We've bug reports from many users, so the issue is rather wide-spread.

So far I don't know why the commit is causing this issue and will keep
investigating. However, when reverted (together with its 2 follow-up
patches), the issue goes away and host do not lock up.

Thanks,
Maximilian



Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
  2025-08-08 13:59 ` Heyne, Maximilian
@ 2025-08-08 15:54   ` Kuniyuki Iwashima
  2025-08-13 19:00     ` Paul Moore
  0 siblings, 1 reply; 10+ messages in thread
From: Kuniyuki Iwashima @ 2025-08-08 15:54 UTC (permalink / raw)
  To: Heyne, Maximilian
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Kuniyuki Iwashima, netdev@vger.kernel.org,
	Jason Baron, Ahmed, Aaron, Kumar, Praveen, Paul Moore, Eric Paris,
	linux-audit@redhat.com, linux-kernel@vger.kernel.org

On Fri, Aug 8, 2025 at 6:59 AM Heyne, Maximilian <mheyne@amazon.de> wrote:
>
> On Fri, Jul 04, 2025 at 05:48:18AM +0000, Kuniyuki Iwashima wrote:
> > Netlink has this pattern in some places
> >
> >   if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
> >       atomic_add(skb->truesize, &sk->sk_rmem_alloc);
> >
> > , which has the same problem fixed by commit 5a465a0da13e ("udp:
> > Fix multiple wraparounds of sk->sk_rmem_alloc.").
> >
> > For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition
> > is always false as the two operands are of int.
> >
> > Then, a single socket can eat as many skb as possible until OOM
> > happens, and we can see multiple wraparounds of sk->sk_rmem_alloc.
> >
> > Let's fix it by using atomic_add_return() and comparing the two
> > variables as unsigned int.
> >
> > Before:
> >   [root@fedora ~]# ss -f netlink
> >   Recv-Q      Send-Q Local Address:Port                Peer Address:Port
> >   -1668710080 0               rtnl:nl_wraparound/293               *
> >
> > After:
> >   [root@fedora ~]# ss -f netlink
> >   Recv-Q     Send-Q Local Address:Port                Peer Address:Port
> >   2147483072 0               rtnl:nl_wraparound/290               *
> >   ^
> >   `--- INT_MAX - 576
> >
> > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > Reported-by: Jason Baron <jbaron@akamai.com>
> > Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
>
> Hi Kuniyuki,
>
> We're seeing soft lockups with this patch in a variety of (stable)
> kernel versions.
>
> I was able to reproduce it on a couple of different EC2 instances also
> with the latest linux kernel 6.16-rc7 using the following steps:
>
> systemctl start auditd
> sudo auditctl -D
> sudo auditctl -b 512
> sudo auditctl -a always,exit -F arch=b64 -S mmap,munmap,mprotect,brk -k memory_ops
> sudo auditctl -e 1
>
> Then execute some programs that call some of these memory functions,
> such as repeated calls of "sudo auditctl -s" or logging in via SSH.
>
> These settings are set in a way to create a lot audit messages. Once the
> backlog (see auditctl -s) overshoots the backlog_limit, the system soft
> lockups:
>
> [  460.056244] watchdog: BUG: soft lockup - CPU#1 stuck for 52s! [kauditd:32]
> [  460.056249] Modules linked in: mousedev(E) nls_ascii(E) nls_cp437(E) sunrpc(E) vfat(E) fat(E) psmouse(E) atkbd(E) libps2(E) vivaldi_fmap(E) i8042(E) serio(E) skx_edac_common(E) button(E) ena(E) ghash_clmulni_intel(E) sch_fq_codel(E) drm(E) i2c_core(E) dm_mod(E) drm_panel_orientation_quirks(E) backlight(E) fuse(E) loop(E) dax(E) configfs(E) dmi_sysfs(E) efivarfs(E)
> [  460.056272] CPU: 1 UID: 0 PID: 32 Comm: kauditd Tainted: G            EL      6.16.0-rc7+ #3 PREEMPT(none)
> [  460.056275] Tainted: [E]=UNSIGNED_MODULE, [L]=SOFTLOCKUP
> [  460.056276] Hardware name: Amazon EC2 t3.medium/, BIOS 1.0 10/16/2017
> [  460.056277] RIP: 0010:_raw_spin_unlock_irqrestore+0x1b/0x30
> [  460.056284] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 e8 16 07 00 00 90 f7 c6 00 02 00 00 74 01 fb 65 ff 0d b5 23 b6 01 <74> 05 c3 cc cc cc cc 0f 1f 44 00 00 e9 14 23 00 00 0f 1f 40 00 90
> [  460.056285] RSP: 0018:ffffb762c0123d70 EFLAGS: 00000246
> [  460.056287] RAX: 0000000000000001 RBX: ffff9b14c08eafc0 RCX: ffff9b14c3337348
> [  460.056288] RDX: ffff9b14c3337348 RSI: 0000000000000246 RDI: ffff9b14c3337340
> [  460.056289] RBP: ffff9b14c3337000 R08: ffffffff93cea880 R09: 0000000000000001
> [  460.056290] R10: 0000000000000001 R11: 0000000000000080 R12: ffff9b14c1b72500
> [  460.056291] R13: ffffb762c0123de0 R14: ffff9b14c3337340 R15: ffff9b14c3337080
> [  460.056294] FS:  0000000000000000(0000) GS:ffff9b1563788000(0000) knlGS:0000000000000000
> [  460.056296] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  460.056297] CR2: 00007f36fd5d21b4 CR3: 000000010241a002 CR4: 00000000007706f0
> [  460.056298] PKRU: 55555554
> [  460.056299] Call Trace:
> [  460.056300]  <TASK>
> [  460.056302]  netlink_attachskb+0xb7/0x2f0
> [  460.056308]  ? __pfx_default_wake_function+0x10/0x10
> [  460.056313]  netlink_unicast+0xea/0x3b0
...
>
> We've bug reports from many users, so the issue is rather wide-spread.
>
> So far I don't know why the commit is causing this issue and will keep
> investigating. However, when reverted (together with its 2 follow-up
> patches), the issue goes away and host do not lock up.

Thanks for the report, Max!

Does your tree have this commit ?  This is the 3rd follow-up patch.

commit 759dfc7d04bab1b0b86113f1164dc1fec192b859
Author: Fedor Pchelkin <pchelkin@ispras.ru>
Date:   Mon Jul 28 08:06:47 2025

    netlink: avoid infinite retry looping in netlink_unicast()

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
  2025-08-08 15:54   ` Kuniyuki Iwashima
@ 2025-08-13 19:00     ` Paul Moore
  2025-08-15 10:00       ` Heyne, Maximilian
  0 siblings, 1 reply; 10+ messages in thread
From: Paul Moore @ 2025-08-13 19:00 UTC (permalink / raw)
  To: Kuniyuki Iwashima, Heyne, Maximilian
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Kuniyuki Iwashima, netdev@vger.kernel.org,
	Jason Baron, Ahmed, Aaron, Kumar, Praveen, Eric Paris,
	linux-audit@redhat.com, linux-kernel@vger.kernel.org

On Fri, Aug 8, 2025 at 11:54 AM Kuniyuki Iwashima <kuniyu@google.com> wrote:
> On Fri, Aug 8, 2025 at 6:59 AM Heyne, Maximilian <mheyne@amazon.de> wrote:
> > On Fri, Jul 04, 2025 at 05:48:18AM +0000, Kuniyuki Iwashima wrote:
> > > Netlink has this pattern in some places
> > >
> > >   if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
> > >       atomic_add(skb->truesize, &sk->sk_rmem_alloc);
> > >
> > > , which has the same problem fixed by commit 5a465a0da13e ("udp:
> > > Fix multiple wraparounds of sk->sk_rmem_alloc.").
> > >
> > > For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition
> > > is always false as the two operands are of int.
> > >
> > > Then, a single socket can eat as many skb as possible until OOM
> > > happens, and we can see multiple wraparounds of sk->sk_rmem_alloc.
> > >
> > > Let's fix it by using atomic_add_return() and comparing the two
> > > variables as unsigned int.
> > >
> > > Before:
> > >   [root@fedora ~]# ss -f netlink
> > >   Recv-Q      Send-Q Local Address:Port                Peer Address:Port
> > >   -1668710080 0               rtnl:nl_wraparound/293               *
> > >
> > > After:
> > >   [root@fedora ~]# ss -f netlink
> > >   Recv-Q     Send-Q Local Address:Port                Peer Address:Port
> > >   2147483072 0               rtnl:nl_wraparound/290               *
> > >   ^
> > >   `--- INT_MAX - 576
> > >
> > > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > > Reported-by: Jason Baron <jbaron@akamai.com>
> > > Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
> > > Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
> >
> > Hi Kuniyuki,
> >
> > We're seeing soft lockups with this patch in a variety of (stable)
> > kernel versions.
> >
> > I was able to reproduce it on a couple of different EC2 instances also
> > with the latest linux kernel 6.16-rc7 using the following steps:
> >
> > systemctl start auditd
> > sudo auditctl -D
> > sudo auditctl -b 512
> > sudo auditctl -a always,exit -F arch=b64 -S mmap,munmap,mprotect,brk -k memory_ops
> > sudo auditctl -e 1
> >
> > Then execute some programs that call some of these memory functions,
> > such as repeated calls of "sudo auditctl -s" or logging in via SSH.
> >
> > These settings are set in a way to create a lot audit messages. Once the
> > backlog (see auditctl -s) overshoots the backlog_limit, the system soft
> > lockups:
> >
> > [  460.056244] watchdog: BUG: soft lockup - CPU#1 stuck for 52s! [kauditd:32]
> > [  460.056249] Modules linked in: mousedev(E) nls_ascii(E) nls_cp437(E) sunrpc(E) vfat(E) fat(E) psmouse(E) atkbd(E) libps2(E) vivaldi_fmap(E) i8042(E) serio(E) skx_edac_common(E) button(E) ena(E) ghash_clmulni_intel(E) sch_fq_codel(E) drm(E) i2c_core(E) dm_mod(E) drm_panel_orientation_quirks(E) backlight(E) fuse(E) loop(E) dax(E) configfs(E) dmi_sysfs(E) efivarfs(E)
> > [  460.056272] CPU: 1 UID: 0 PID: 32 Comm: kauditd Tainted: G            EL      6.16.0-rc7+ #3 PREEMPT(none)
> > [  460.056275] Tainted: [E]=UNSIGNED_MODULE, [L]=SOFTLOCKUP
> > [  460.056276] Hardware name: Amazon EC2 t3.medium/, BIOS 1.0 10/16/2017
> > [  460.056277] RIP: 0010:_raw_spin_unlock_irqrestore+0x1b/0x30
> > [  460.056284] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 e8 16 07 00 00 90 f7 c6 00 02 00 00 74 01 fb 65 ff 0d b5 23 b6 01 <74> 05 c3 cc cc cc cc 0f 1f 44 00 00 e9 14 23 00 00 0f 1f 40 00 90
> > [  460.056285] RSP: 0018:ffffb762c0123d70 EFLAGS: 00000246
> > [  460.056287] RAX: 0000000000000001 RBX: ffff9b14c08eafc0 RCX: ffff9b14c3337348
> > [  460.056288] RDX: ffff9b14c3337348 RSI: 0000000000000246 RDI: ffff9b14c3337340
> > [  460.056289] RBP: ffff9b14c3337000 R08: ffffffff93cea880 R09: 0000000000000001
> > [  460.056290] R10: 0000000000000001 R11: 0000000000000080 R12: ffff9b14c1b72500
> > [  460.056291] R13: ffffb762c0123de0 R14: ffff9b14c3337340 R15: ffff9b14c3337080
> > [  460.056294] FS:  0000000000000000(0000) GS:ffff9b1563788000(0000) knlGS:0000000000000000
> > [  460.056296] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [  460.056297] CR2: 00007f36fd5d21b4 CR3: 000000010241a002 CR4: 00000000007706f0
> > [  460.056298] PKRU: 55555554
> > [  460.056299] Call Trace:
> > [  460.056300]  <TASK>
> > [  460.056302]  netlink_attachskb+0xb7/0x2f0
> > [  460.056308]  ? __pfx_default_wake_function+0x10/0x10
> > [  460.056313]  netlink_unicast+0xea/0x3b0
> ...
> >
> > We've bug reports from many users, so the issue is rather wide-spread.
> >
> > So far I don't know why the commit is causing this issue and will keep
> > investigating. However, when reverted (together with its 2 follow-up
> > patches), the issue goes away and host do not lock up.
>
> Thanks for the report, Max!
>
> Does your tree have this commit ?  This is the 3rd follow-up patch.
>
> commit 759dfc7d04bab1b0b86113f1164dc1fec192b859
> Author: Fedor Pchelkin <pchelkin@ispras.ru>
> Date:   Mon Jul 28 08:06:47 2025
>
>     netlink: avoid infinite retry looping in netlink_unicast()

Hopefully that resolves the problem, Maximilian?

Normally the audit subsystem is reasonably robust when faced with
significant audit loads.  An example I use for testing is to enable
logging for *every* syscall (from the command line, don't make this
persist via the config file!) and then shutdown the system; the system
will obviously slow quite a bit under the absurd load, but it should
shutdown gracefully without any lockups.

-- 
paul-moore.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
  2025-08-13 19:00     ` Paul Moore
@ 2025-08-15 10:00       ` Heyne, Maximilian
  2025-08-15 14:12         ` Paul Moore
  0 siblings, 1 reply; 10+ messages in thread
From: Heyne, Maximilian @ 2025-08-15 10:00 UTC (permalink / raw)
  To: Paul Moore
  Cc: Kuniyuki Iwashima, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, Kuniyuki Iwashima,
	netdev@vger.kernel.org, Jason Baron, Ahmed, Aaron, Kumar, Praveen,
	Eric Paris, linux-audit@redhat.com, linux-kernel@vger.kernel.org

On Wed, Aug 13, 2025 at 03:00:29PM -0400, Paul Moore wrote:
> On Fri, Aug 8, 2025 at 11:54???AM Kuniyuki Iwashima <kuniyu@google.com> wrote:
> > On Fri, Aug 8, 2025 at 6:59???AM Heyne, Maximilian <mheyne@amazon.de> wrote:
> > > On Fri, Jul 04, 2025 at 05:48:18AM +0000, Kuniyuki Iwashima wrote:
> > > > Netlink has this pattern in some places
> > > >
> > > >   if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
> > > >       atomic_add(skb->truesize, &sk->sk_rmem_alloc);
> > > >
> > > > , which has the same problem fixed by commit 5a465a0da13e ("udp:
> > > > Fix multiple wraparounds of sk->sk_rmem_alloc.").
> > > >
> > > > For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition
> > > > is always false as the two operands are of int.
> > > >
> > > > Then, a single socket can eat as many skb as possible until OOM
> > > > happens, and we can see multiple wraparounds of sk->sk_rmem_alloc.
> > > >
> > > > Let's fix it by using atomic_add_return() and comparing the two
> > > > variables as unsigned int.
> > > >
> > > > Before:
> > > >   [root@fedora ~]# ss -f netlink
> > > >   Recv-Q      Send-Q Local Address:Port                Peer Address:Port
> > > >   -1668710080 0               rtnl:nl_wraparound/293               *
> > > >
> > > > After:
> > > >   [root@fedora ~]# ss -f netlink
> > > >   Recv-Q     Send-Q Local Address:Port                Peer Address:Port
> > > >   2147483072 0               rtnl:nl_wraparound/290               *
> > > >   ^
> > > >   `--- INT_MAX - 576
> > > >
> > > > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > > > Reported-by: Jason Baron <jbaron@akamai.com>
> > > > Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
> > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
> > >
> > > Hi Kuniyuki,
> > >
> > > We're seeing soft lockups with this patch in a variety of (stable)
> > > kernel versions.
> > >
> > > I was able to reproduce it on a couple of different EC2 instances also
> > > with the latest linux kernel 6.16-rc7 using the following steps:
> > >
> > > systemctl start auditd
> > > sudo auditctl -D
> > > sudo auditctl -b 512
> > > sudo auditctl -a always,exit -F arch=b64 -S mmap,munmap,mprotect,brk -k memory_ops
> > > sudo auditctl -e 1
> > >
> > > Then execute some programs that call some of these memory functions,
> > > such as repeated calls of "sudo auditctl -s" or logging in via SSH.
> > >
> > > These settings are set in a way to create a lot audit messages. Once the
> > > backlog (see auditctl -s) overshoots the backlog_limit, the system soft
> > > lockups:
> > >
> > > [  460.056244] watchdog: BUG: soft lockup - CPU#1 stuck for 52s! [kauditd:32]
> > > [  460.056249] Modules linked in: mousedev(E) nls_ascii(E) nls_cp437(E) sunrpc(E) vfat(E) fat(E) psmouse(E) atkbd(E) libps2(E) vivaldi_fmap(E) i8042(E) serio(E) skx_edac_common(E) button(E) ena(E) ghash_clmulni_intel(E) sch_fq_codel(E) drm(E) i2c_core(E) dm_mod(E) drm_panel_orientation_quirks(E) backlight(E) fuse(E) loop(E) dax(E) configfs(E) dmi_sysfs(E) efivarfs(E)
> > > [  460.056272] CPU: 1 UID: 0 PID: 32 Comm: kauditd Tainted: G            EL      6.16.0-rc7+ #3 PREEMPT(none)
> > > [  460.056275] Tainted: [E]=UNSIGNED_MODULE, [L]=SOFTLOCKUP
> > > [  460.056276] Hardware name: Amazon EC2 t3.medium/, BIOS 1.0 10/16/2017
> > > [  460.056277] RIP: 0010:_raw_spin_unlock_irqrestore+0x1b/0x30
> > > [  460.056284] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 e8 16 07 00 00 90 f7 c6 00 02 00 00 74 01 fb 65 ff 0d b5 23 b6 01 <74> 05 c3 cc cc cc cc 0f 1f 44 00 00 e9 14 23 00 00 0f 1f 40 00 90
> > > [  460.056285] RSP: 0018:ffffb762c0123d70 EFLAGS: 00000246
> > > [  460.056287] RAX: 0000000000000001 RBX: ffff9b14c08eafc0 RCX: ffff9b14c3337348
> > > [  460.056288] RDX: ffff9b14c3337348 RSI: 0000000000000246 RDI: ffff9b14c3337340
> > > [  460.056289] RBP: ffff9b14c3337000 R08: ffffffff93cea880 R09: 0000000000000001
> > > [  460.056290] R10: 0000000000000001 R11: 0000000000000080 R12: ffff9b14c1b72500
> > > [  460.056291] R13: ffffb762c0123de0 R14: ffff9b14c3337340 R15: ffff9b14c3337080
> > > [  460.056294] FS:  0000000000000000(0000) GS:ffff9b1563788000(0000) knlGS:0000000000000000
> > > [  460.056296] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > [  460.056297] CR2: 00007f36fd5d21b4 CR3: 000000010241a002 CR4: 00000000007706f0
> > > [  460.056298] PKRU: 55555554
> > > [  460.056299] Call Trace:
> > > [  460.056300]  <TASK>
> > > [  460.056302]  netlink_attachskb+0xb7/0x2f0
> > > [  460.056308]  ? __pfx_default_wake_function+0x10/0x10
> > > [  460.056313]  netlink_unicast+0xea/0x3b0
> > ...
> > >
> > > We've bug reports from many users, so the issue is rather wide-spread.
> > >
> > > So far I don't know why the commit is causing this issue and will keep
> > > investigating. However, when reverted (together with its 2 follow-up
> > > patches), the issue goes away and host do not lock up.
> >
> > Thanks for the report, Max!
> >
> > Does your tree have this commit ?  This is the 3rd follow-up patch.
> >
> > commit 759dfc7d04bab1b0b86113f1164dc1fec192b859
> > Author: Fedor Pchelkin <pchelkin@ispras.ru>
> > Date:   Mon Jul 28 08:06:47 2025
> >
> >     netlink: avoid infinite retry looping in netlink_unicast()
> 

Hi Paul,

> Hopefully that resolves the problem, Maximilian?

sorry for the late reply. Just tested the commit yesterday and I can
confirm that this fixes our issues.

> Normally the audit subsystem is reasonably robust when faced with
> significant audit loads.  An example I use for testing is to enable
> logging for *every* syscall (from the command line, don't make this
> persist via the config file!) and then shutdown the system; the system
> will obviously slow quite a bit under the absurd load, but it should
> shutdown gracefully without any lockups.
> 

Thank you for suggesting this. Will add something like this to our
internal testing. Do you know whether there is already some stress test
that covers the audit subsystem or would have any selftest found this
issue?

Regards,
Maximilian



Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc.
  2025-08-15 10:00       ` Heyne, Maximilian
@ 2025-08-15 14:12         ` Paul Moore
  0 siblings, 0 replies; 10+ messages in thread
From: Paul Moore @ 2025-08-15 14:12 UTC (permalink / raw)
  To: Heyne, Maximilian
  Cc: Kuniyuki Iwashima, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, Kuniyuki Iwashima,
	netdev@vger.kernel.org, Jason Baron, Ahmed, Aaron, Kumar, Praveen,
	Eric Paris, linux-audit@redhat.com, linux-kernel@vger.kernel.org

On Fri, Aug 15, 2025 at 6:00 AM Heyne, Maximilian <mheyne@amazon.de> wrote:
> On Wed, Aug 13, 2025 at 03:00:29PM -0400, Paul Moore wrote:

...

> > Hopefully that resolves the problem, Maximilian?
>
> sorry for the late reply. Just tested the commit yesterday and I can
> confirm that this fixes our issues.

Great, thanks for confirming that.

> > Normally the audit subsystem is reasonably robust when faced with
> > significant audit loads.  An example I use for testing is to enable
> > logging for *every* syscall (from the command line, don't make this
> > persist via the config file!) and then shutdown the system; the system
> > will obviously slow quite a bit under the absurd load, but it should
> > shutdown gracefully without any lockups.
>
> Thank you for suggesting this. Will add something like this to our
> internal testing.

I wish I could say I regularly stress the audit subsystem in that way,
but I typically only do that when I make a related change or happen to
notice something in a related subsystem which might have an impact.
Additional testing is always welcome!

> Do you know whether there is already some stress test
> that covers the audit subsystem ...

Aside from the manual test that I already mentioned, which is my
preferred mechanism for stressing the logging/queuing mechanism, there
are two (?) contributed stress tests in the audit-testsuite, but I
don't run them and I doubt anyone does on a regular basis (look in the
tests_manual directory).

* https://github.com/linux-audit/audit-testsuite

> ... or would have any selftest found this issue?

Not having looked at the root cause, as that work was done before I
dug into this thread, I honestly can't say.

-- 
paul-moore.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2025-08-15 14:12 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-04  5:48 [PATCH v1 net] netlink: Fix wraparounds of sk->sk_rmem_alloc Kuniyuki Iwashima
2025-07-08  0:43 ` patchwork-bot+netdevbpf
     [not found] ` <CGME20250710083401eucas1p1d18e23791e1f22c0c0aaf823a35526a2@eucas1p1.samsung.com>
2025-07-10  8:34   ` Marek Szyprowski
2025-07-10 19:43     ` Jakub Kicinski
2025-07-10 23:33       ` Marek Szyprowski
2025-08-08 13:59 ` Heyne, Maximilian
2025-08-08 15:54   ` Kuniyuki Iwashima
2025-08-13 19:00     ` Paul Moore
2025-08-15 10:00       ` Heyne, Maximilian
2025-08-15 14:12         ` Paul Moore

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).