From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
patches@lists.linux.dev, Jason Baron <jbaron@akamai.com>,
Kuniyuki Iwashima <kuniyu@google.com>,
Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
Subject: [PATCH 5.15 05/77] netlink: Fix wraparounds of sk->sk_rmem_alloc.
Date: Tue, 15 Jul 2025 15:13:04 +0200 [thread overview]
Message-ID: <20250715130751.898988255@linuxfoundation.org> (raw)
In-Reply-To: <20250715130751.668489382@linuxfoundation.org>
5.15-stable review patch. If anyone has any objections, please let me know.
------------------
From: Kuniyuki Iwashima <kuniyu@google.com>
[ Upstream commit ae8f160e7eb24240a2a79fc4c815c6a0d4ee16cc ]
Netlink has this pattern in some places
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
atomic_add(skb->truesize, &sk->sk_rmem_alloc);
, which has the same problem fixed by commit 5a465a0da13e ("udp:
Fix multiple wraparounds of sk->sk_rmem_alloc.").
For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition
is always false as the two operands are of int.
Then, a single socket can eat as many skb as possible until OOM
happens, and we can see multiple wraparounds of sk->sk_rmem_alloc.
Let's fix it by using atomic_add_return() and comparing the two
variables as unsigned int.
Before:
[root@fedora ~]# ss -f netlink
Recv-Q Send-Q Local Address:Port Peer Address:Port
-1668710080 0 rtnl:nl_wraparound/293 *
After:
[root@fedora ~]# ss -f netlink
Recv-Q Send-Q Local Address:Port Peer Address:Port
2147483072 0 rtnl:nl_wraparound/290 *
^
`--- INT_MAX - 576
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Jason Baron <jbaron@akamai.com>
Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250704054824.1580222-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
net/netlink/af_netlink.c | 81 ++++++++++++++++++++++++----------------
1 file changed, 49 insertions(+), 32 deletions(-)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index d3852526ef52e..ae83e56478998 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
WARN_ON(skb->sk != NULL);
skb->sk = sk;
skb->destructor = netlink_skb_destructor;
- atomic_add(skb->truesize, &sk->sk_rmem_alloc);
sk_mem_charge(sk, skb->truesize);
}
@@ -1211,41 +1210,48 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
long *timeo, struct sock *ssk)
{
+ DECLARE_WAITQUEUE(wait, current);
struct netlink_sock *nlk;
+ unsigned int rmem;
nlk = nlk_sk(sk);
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
- DECLARE_WAITQUEUE(wait, current);
- if (!*timeo) {
- if (!ssk || netlink_is_kernel(ssk))
- netlink_overrun(sk);
- sock_put(sk);
- kfree_skb(skb);
- return -EAGAIN;
- }
-
- __set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&nlk->wait, &wait);
+ if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) &&
+ !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
+ netlink_skb_set_owner_r(skb, sk);
+ return 0;
+ }
- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
- !sock_flag(sk, SOCK_DEAD))
- *timeo = schedule_timeout(*timeo);
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&nlk->wait, &wait);
+ if (!*timeo) {
+ if (!ssk || netlink_is_kernel(ssk))
+ netlink_overrun(sk);
sock_put(sk);
+ kfree_skb(skb);
+ return -EAGAIN;
+ }
- if (signal_pending(current)) {
- kfree_skb(skb);
- return sock_intr_errno(*timeo);
- }
- return 1;
+ __set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&nlk->wait, &wait);
+ rmem = atomic_read(&sk->sk_rmem_alloc);
+
+ if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) ||
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
+ !sock_flag(sk, SOCK_DEAD))
+ *timeo = schedule_timeout(*timeo);
+
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&nlk->wait, &wait);
+ sock_put(sk);
+
+ if (signal_pending(current)) {
+ kfree_skb(skb);
+ return sock_intr_errno(*timeo);
}
- netlink_skb_set_owner_r(skb, sk);
- return 0;
+
+ return 1;
}
static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
@@ -1305,6 +1311,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
ret = -ECONNREFUSED;
if (nlk->netlink_rcv != NULL) {
ret = skb->len;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
netlink_skb_set_owner_r(skb, sk);
NETLINK_CB(skb).sk = ssk;
netlink_deliver_tap_kernel(sk, ssk, skb);
@@ -1383,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
{
struct netlink_sock *nlk = nlk_sk(sk);
+ unsigned int rmem, rcvbuf;
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+
+ if ((rmem != skb->truesize || rmem <= rcvbuf) &&
!test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
netlink_skb_set_owner_r(skb, sk);
__netlink_sendskb(sk, skb);
- return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
+ return rmem > (rcvbuf >> 1);
}
+
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
return -1;
}
@@ -2206,6 +2219,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
struct module *module;
int err = -ENOBUFS;
int alloc_min_size;
+ unsigned int rmem;
int alloc_size;
if (!lock_taken)
@@ -2215,9 +2229,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
goto errout_skb;
}
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
- goto errout_skb;
-
/* NLMSG_GOODSIZE is small to avoid high order allocations being
* required, but it makes sense to _attempt_ a 16K bytes allocation
* to reduce number of system calls on dump operations, if user
@@ -2240,6 +2251,12 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
if (!skb)
goto errout_skb;
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+ if (rmem >= READ_ONCE(sk->sk_rcvbuf)) {
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+ goto errout_skb;
+ }
+
/* Trim skb to allocated size. User is expected to provide buffer as
* large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
* netlink_recvmsg())). dump will pack as many smaller messages as
--
2.39.5
next prev parent reply other threads:[~2025-07-15 13:29 UTC|newest]
Thread overview: 86+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-15 13:12 [PATCH 5.15 00/77] 5.15.189-rc1 review Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 01/77] drm/exynos: exynos7_drm_decon: add vblank check in IRQ handling Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 02/77] ASoC: fsl_asrc: use internal measured ratio for non-ideal ratio mode Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 03/77] perf: Revert to requiring CAP_SYS_ADMIN for uprobes Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 04/77] fix proc_sys_compare() handling of in-lookup dentries Greg Kroah-Hartman
2025-07-15 13:13 ` Greg Kroah-Hartman [this message]
2025-07-15 13:13 ` [PATCH 5.15 06/77] tipc: Fix use-after-free in tipc_conn_close() Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 07/77] vsock: Fix transport_{g2h,h2g} TOCTOU Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 08/77] vsock: Fix transport_* TOCTOU Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 09/77] vsock: Fix IOCTL_VM_SOCKETS_GET_LOCAL_CID to check also `transport_local` Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 10/77] net: phy: smsc: Fix Auto-MDIX configuration when disabled by strap Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 11/77] net: phy: smsc: Fix link failure in forced mode with Auto-MDIX Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 12/77] atm: clip: Fix potential null-ptr-deref in to_atmarpd() Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 13/77] atm: clip: Fix memory leak of struct clip_vcc Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 14/77] atm: clip: Fix infinite recursive call of clip_push() Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 15/77] atm: clip: Fix NULL pointer dereference in vcc_sendmsg() Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 16/77] net/sched: Abort __tc_modify_qdisc if parent class does not exist Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 17/77] fs/proc: do_task_stat: use __for_each_thread() Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 18/77] ice: safer stats processing Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 19/77] rxrpc: Fix oops due to non-existence of prealloc backlog struct Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 20/77] bpf: fix precision backtracking instruction iteration Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 21/77] thermal/int340x_thermal: handle data_vault when the value is ZERO_SIZE_PTR Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 22/77] aoe: avoid potential deadlock at set_capacity Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 23/77] bpf, sockmap: Fix skb refcnt race after locking changes Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 24/77] jfs: fix null ptr deref in dtInsertEntry Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 25/77] xen: replace xen_remap() with memremap() Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 26/77] x86/mce/amd: Fix threshold limit reset Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 27/77] x86/mce: Dont remove sysfs if thresholding sysfs init fails Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 28/77] x86/mce: Make sure CMCI banks are cleared during shutdown on Intel Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 29/77] gre: Fix IPv6 multicast route creation Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 30/77] pinctrl: qcom: msm: mark certain pins as invalid for interrupts Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 31/77] drm/sched: Increment job count before swapping tail spsc queue Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 32/77] drm/gem: Fix race in drm_gem_handle_create_tail() Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 33/77] usb: gadget: u_serial: Fix race condition in TTY wakeup Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 34/77] Revert "ACPI: battery: negate current when discharging" Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 35/77] btrfs: propagate last_unlink_trans earlier when doing a rmdir Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 36/77] btrfs: use btrfs_record_snapshot_destroy() during rmdir Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 37/77] RDMA/mlx5: Fix vport loopback for MPV device Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 38/77] pwm: mediatek: Ensure to disable clocks in error path Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 39/77] netlink: Fix rmem check in netlink_broadcast_deliver() Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 40/77] netlink: make sure we allow at least one dump skb Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 41/77] virtio-net: ensure the received length does not exceed allocated size Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 42/77] xhci: Allow RPM on the USB controller (1022:43f7) by default Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 43/77] usb: xhci: quirk for data loss in ISOC transfers Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 44/77] xhci: Disable stream for xHC controller with XHCI_BROKEN_STREAMS Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 45/77] Input: xpad - support Acer NGR 200 Controller Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 46/77] usb:cdnsp: remove TRB_FLUSH_ENDPOINT command Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 47/77] usb: cdnsp: Replace snprintf() with the safer scnprintf() variant Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 48/77] usb: cdnsp: Fix issue with CV Bad Descriptor test Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 49/77] usb: dwc3: Abort suspend on soft disconnect failure Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 50/77] dma-buf: add dma_resv_for_each_fence_unlocked v8 Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 51/77] dma-buf: use new iterator in dma_resv_wait_timeout Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 52/77] dma-buf: fix timeout handling in dma_resv_wait_timeout v2 Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 53/77] platform/x86: think-lmi: Fix sysfs group cleanup Greg Kroah-Hartman
2025-07-15 18:38 ` Kurt Borja
2025-07-15 19:00 ` Kurt Borja
2025-07-15 19:04 ` Sasha Levin
2025-07-15 19:22 ` Kurt Borja
2025-07-15 19:12 ` Sasha Levin
2025-07-15 21:18 ` Kurt Borja
2025-07-15 13:13 ` [PATCH 5.15 54/77] wifi: zd1211rw: Fix potential NULL pointer dereference in zd_mac_tx_to_dev() Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 55/77] md/raid1: Fix stack memory use after return in raid1_reshape Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 56/77] raid10: cleanup memleak at raid10_make_request Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 57/77] nbd: fix uaf in nbd_genl_connect() error path Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 58/77] smb: server: make use of rdma_destroy_qp() Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 59/77] ksmbd: fix a mount write count leak in ksmbd_vfs_kern_path_locked() Greg Kroah-Hartman
2025-07-15 13:13 ` [PATCH 5.15 60/77] netfilter: flowtable: account for Ethernet header in nf_flow_pppoe_proto() Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 61/77] net: appletalk: Fix device refcount leak in atrtr_create() Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 62/77] net: phy: microchip: limit 100M workaround to link-down events on LAN88xx Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 63/77] can: m_can: m_can_handle_lost_msg(): downgrade msg lost in rx message to debug level Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 64/77] net: ll_temac: Fix missing tx_pending check in ethtools_set_ringparam() Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 65/77] bnxt_en: Fix DCB ETS validation Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 66/77] bnxt_en: Set DMA unmap len correctly for XDP_REDIRECT Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 67/77] atm: idt77252: Add missing `dma_map_error()` Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 68/77] um: vector: Reduce stack usage in vector_eth_configure() Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 69/77] net: usb: qmi_wwan: add SIMCom 8230C composition Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 70/77] HID: lenovo: Add support for ThinkPad X1 Tablet Thin Keyboard Gen2 Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 71/77] vt: add missing notification when switching back to text mode Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 72/77] HID: Add IGNORE quirk for SMARTLINKTECHNOLOGY Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 73/77] HID: quirks: Add quirk for 2 Chicony Electronics HP 5MP Cameras Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 74/77] Input: atkbd - do not skip atkbd_deactivate() when skipping ATKBD_CMD_GETID Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 75/77] vhost-scsi: protect vq->log_used with vq->mutex Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 76/77] x86/mm: Disable hugetlb page table sharing on 32-bit Greg Kroah-Hartman
2025-07-15 13:14 ` [PATCH 5.15 77/77] x86: Fix X86_FEATURE_VERW_CLEAR definition Greg Kroah-Hartman
2025-07-15 16:20 ` [PATCH 5.15 00/77] 5.15.189-rc1 review Vijayendra Suman
2025-07-16 14:59 ` Shuah Khan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250715130751.898988255@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=jbaron@akamai.com \
--cc=kuba@kernel.org \
--cc=kuniyu@google.com \
--cc=patches@lists.linux.dev \
--cc=sashal@kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).