* [PATCH 6.12.y v2] mm/vmalloc: fix data race in show_numa_info()
@ 2025-07-03 13:01 Jeongjun Park
2025-07-05 21:40 ` Sasha Levin
0 siblings, 1 reply; 5+ messages in thread
From: Jeongjun Park @ 2025-07-03 13:01 UTC (permalink / raw)
To: stable; +Cc: urezki, akpm, edumazet, linux-mm, linux-kernel, Jeongjun Park
commit 5c5f0468d172ddec2e333d738d2a1f85402cf0bc upstream.
The following data-race was found in show_numa_info():
==================================================================
BUG: KCSAN: data-race in vmalloc_info_show / vmalloc_info_show
read to 0xffff88800971fe30 of 4 bytes by task 8289 on cpu 0:
show_numa_info mm/vmalloc.c:4936 [inline]
vmalloc_info_show+0x5a8/0x7e0 mm/vmalloc.c:5016
seq_read_iter+0x373/0xb40 fs/seq_file.c:230
proc_reg_read_iter+0x11e/0x170 fs/proc/inode.c:299
....
write to 0xffff88800971fe30 of 4 bytes by task 8287 on cpu 1:
show_numa_info mm/vmalloc.c:4934 [inline]
vmalloc_info_show+0x38f/0x7e0 mm/vmalloc.c:5016
seq_read_iter+0x373/0xb40 fs/seq_file.c:230
proc_reg_read_iter+0x11e/0x170 fs/proc/inode.c:299
....
value changed: 0x0000008f -> 0x00000000
==================================================================
According to this report,there is a read/write data-race because
m->private is accessible to multiple CPUs. To fix this, instead of
allocating the heap in proc_vmalloc_init() and passing the heap address to
m->private, vmalloc_info_show() should allocate the heap.
Link: https://lkml.kernel.org/r/20250508165620.15321-1-aha310510@gmail.com
Fixes: 8e1d743f2c26 ("mm: vmalloc: support multiple nodes in vmallocinfo")
Signed-off-by: Jeongjun Park <aha310510@gmail.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/vmalloc.c | 63 +++++++++++++++++++++++++++++-----------------------
1 file changed, 35 insertions(+), 28 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index cc04e501b1c5..7888600b6a79 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3095,7 +3095,7 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm)
/*
* Before removing VM_UNINITIALIZED,
* we should make sure that vm has proper values.
- * Pair with smp_rmb() in show_numa_info().
+ * Pair with smp_rmb() in vread_iter() and vmalloc_info_show().
*/
smp_wmb();
vm->flags &= ~VM_UNINITIALIZED;
@@ -4938,28 +4938,29 @@ bool vmalloc_dump_obj(void *object)
#endif
#ifdef CONFIG_PROC_FS
-static void show_numa_info(struct seq_file *m, struct vm_struct *v)
-{
- if (IS_ENABLED(CONFIG_NUMA)) {
- unsigned int nr, *counters = m->private;
- unsigned int step = 1U << vm_area_page_order(v);
- if (!counters)
- return;
+/*
+ * Print number of pages allocated on each memory node.
+ *
+ * This function can only be called if CONFIG_NUMA is enabled
+ * and VM_UNINITIALIZED bit in v->flags is disabled.
+ */
+static void show_numa_info(struct seq_file *m, struct vm_struct *v,
+ unsigned int *counters)
+{
+ unsigned int nr;
+ unsigned int step = 1U << vm_area_page_order(v);
- if (v->flags & VM_UNINITIALIZED)
- return;
- /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
- smp_rmb();
+ if (!counters)
+ return;
- memset(counters, 0, nr_node_ids * sizeof(unsigned int));
+ memset(counters, 0, nr_node_ids * sizeof(unsigned int));
- for (nr = 0; nr < v->nr_pages; nr += step)
- counters[page_to_nid(v->pages[nr])] += step;
- for_each_node_state(nr, N_HIGH_MEMORY)
- if (counters[nr])
- seq_printf(m, " N%u=%u", nr, counters[nr]);
- }
+ for (nr = 0; nr < v->nr_pages; nr += step)
+ counters[page_to_nid(v->pages[nr])] += step;
+ for_each_node_state(nr, N_HIGH_MEMORY)
+ if (counters[nr])
+ seq_printf(m, " N%u=%u", nr, counters[nr]);
}
static void show_purge_info(struct seq_file *m)
@@ -4987,6 +4988,10 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
struct vmap_area *va;
struct vm_struct *v;
int i;
+ unsigned int *counters;
+
+ if (IS_ENABLED(CONFIG_NUMA))
+ counters = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
for (i = 0; i < nr_vmap_nodes; i++) {
vn = &vmap_nodes[i];
@@ -5003,6 +5008,11 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
}
v = va->vm;
+ if (v->flags & VM_UNINITIALIZED)
+ continue;
+
+ /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
+ smp_rmb();
seq_printf(m, "0x%pK-0x%pK %7ld",
v->addr, v->addr + v->size, v->size);
@@ -5037,7 +5047,9 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
if (is_vmalloc_addr(v->pages))
seq_puts(m, " vpages");
- show_numa_info(m, v);
+ if (IS_ENABLED(CONFIG_NUMA))
+ show_numa_info(m, v, counters);
+
seq_putc(m, '\n');
}
spin_unlock(&vn->busy.lock);
@@ -5047,19 +5059,14 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
* As a final step, dump "unpurged" areas.
*/
show_purge_info(m);
+ if (IS_ENABLED(CONFIG_NUMA))
+ kfree(counters);
return 0;
}
static int __init proc_vmalloc_init(void)
{
- void *priv_data = NULL;
-
- if (IS_ENABLED(CONFIG_NUMA))
- priv_data = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
-
- proc_create_single_data("vmallocinfo",
- 0400, NULL, vmalloc_info_show, priv_data);
-
+ proc_create_single("vmallocinfo", 0400, NULL, vmalloc_info_show);
return 0;
}
module_init(proc_vmalloc_init);
--
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 6.12.y v2] mm/vmalloc: fix data race in show_numa_info()
2025-07-03 13:01 [PATCH 6.12.y v2] mm/vmalloc: fix data race in show_numa_info() Jeongjun Park
@ 2025-07-05 21:40 ` Sasha Levin
2025-07-28 17:50 ` [PATCH 6.12.y v2] KVM: x86: Free vCPUs before freeing VM state Kevin Cheng
0 siblings, 1 reply; 5+ messages in thread
From: Sasha Levin @ 2025-07-05 21:40 UTC (permalink / raw)
To: stable; +Cc: Jeongjun Park, Sasha Levin
[ Sasha's backport helper bot ]
Hi,
✅ All tests passed successfully. No issues detected.
No action required from the submitter.
The upstream commit SHA1 provided is correct: 5c5f0468d172ddec2e333d738d2a1f85402cf0bc
Status in newer kernel trees:
6.15.y | Not found
Note: The patch differs from the upstream commit:
---
1: 5c5f0468d172d ! 1: 271ecf078a403 mm/vmalloc: fix data race in show_numa_info()
@@ Metadata
## Commit message ##
mm/vmalloc: fix data race in show_numa_info()
+ commit 5c5f0468d172ddec2e333d738d2a1f85402cf0bc upstream.
+
The following data-race was found in show_numa_info():
==================================================================
@@ mm/vmalloc.c: bool vmalloc_dump_obj(void *object)
static void show_purge_info(struct seq_file *m)
@@ mm/vmalloc.c: static int vmalloc_info_show(struct seq_file *m, void *p)
- struct vmap_node *vn;
struct vmap_area *va;
struct vm_struct *v;
+ int i;
+ unsigned int *counters;
+
+ if (IS_ENABLED(CONFIG_NUMA))
+ counters = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
- for_each_vmap_node(vn) {
- spin_lock(&vn->busy.lock);
+ for (i = 0; i < nr_vmap_nodes; i++) {
+ vn = &vmap_nodes[i];
@@ mm/vmalloc.c: static int vmalloc_info_show(struct seq_file *m, void *p)
}
---
Results of testing on various branches:
| Branch | Patch Apply | Build Test |
|---------------------------|-------------|------------|
| stable/linux-6.12.y | Success | Success |
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 6.12.y v2] KVM: x86: Free vCPUs before freeing VM state
2025-07-05 21:40 ` Sasha Levin
@ 2025-07-28 17:50 ` Kevin Cheng
2025-07-28 19:07 ` [PATCH 6.12.y] " Kevin Cheng
2025-07-29 19:49 ` [PATCH 6.12.y v2] " Sasha Levin
0 siblings, 2 replies; 5+ messages in thread
From: Kevin Cheng @ 2025-07-28 17:50 UTC (permalink / raw)
To: sashal
Cc: aha310510, stable, Sean Christopherson, Aaron Lewis, Jim Mattson,
Yan Zhao, Rick P Edgecombe, Kai Huang, Isaku Yamahata,
Paolo Bonzini, Kevin Cheng
From: Sean Christopherson <seanjc@google.com>
[ Upstream commit 17bcd714426386fda741a4bccd96a2870179344b ]
Free vCPUs before freeing any VM state, as both SVM and VMX may access
VM state when "freeing" a vCPU that is currently "in" L2, i.e. that needs
to be kicked out of nested guest mode.
Commit 6fcee03df6a1 ("KVM: x86: avoid loading a vCPU after .vm_destroy was
called") partially fixed the issue, but for unknown reasons only moved the
MMU unloading before VM destruction. Complete the change, and free all
vCPU state prior to destroying VM state, as nVMX accesses even more state
than nSVM.
In addition to the AVIC, KVM can hit a use-after-free on MSR filters:
kvm_msr_allowed+0x4c/0xd0
__kvm_set_msr+0x12d/0x1e0
kvm_set_msr+0x19/0x40
load_vmcs12_host_state+0x2d8/0x6e0 [kvm_intel]
nested_vmx_vmexit+0x715/0xbd0 [kvm_intel]
nested_vmx_free_vcpu+0x33/0x50 [kvm_intel]
vmx_free_vcpu+0x54/0xc0 [kvm_intel]
kvm_arch_vcpu_destroy+0x28/0xf0
kvm_vcpu_destroy+0x12/0x50
kvm_arch_destroy_vm+0x12c/0x1c0
kvm_put_kvm+0x263/0x3c0
kvm_vm_release+0x21/0x30
and an upcoming fix to process injectable interrupts on nested VM-Exit
will access the PIC:
BUG: kernel NULL pointer dereference, address: 0000000000000090
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
CPU: 23 UID: 1000 PID: 2658 Comm: kvm-nx-lpage-re
RIP: 0010:kvm_cpu_has_extint+0x2f/0x60 [kvm]
Call Trace:
<TASK>
kvm_cpu_has_injectable_intr+0xe/0x60 [kvm]
nested_vmx_vmexit+0x2d7/0xdf0 [kvm_intel]
nested_vmx_free_vcpu+0x40/0x50 [kvm_intel]
vmx_vcpu_free+0x2d/0x80 [kvm_intel]
kvm_arch_vcpu_destroy+0x2d/0x130 [kvm]
kvm_destroy_vcpus+0x8a/0x100 [kvm]
kvm_arch_destroy_vm+0xa7/0x1d0 [kvm]
kvm_destroy_vm+0x172/0x300 [kvm]
kvm_vcpu_release+0x31/0x50 [kvm]
Inarguably, both nSVM and nVMX need to be fixed, but punt on those
cleanups for the moment. Conceptually, vCPUs should be freed before VM
state. Assets like the I/O APIC and PIC _must_ be allocated before vCPUs
are created, so it stands to reason that they must be freed _after_ vCPUs
are destroyed.
Reported-by: Aaron Lewis <aaronlewis@google.com>
Closes: https://lore.kernel.org/all/20240703175618.2304869-2-aaronlewis@google.com
Cc: Jim Mattson <jmattson@google.com>
Cc: Yan Zhao <yan.y.zhao@intel.com>
Cc: Rick P Edgecombe <rick.p.edgecombe@intel.com>
Cc: Kai Huang <kai.huang@intel.com>
Cc: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250224235542.2562848-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Cheng <chengkev@google.com>
---
arch/x86/kvm/x86.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f378d479fea3f..7f91b11e6f0ec 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12888,11 +12888,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
mutex_unlock(&kvm->slots_lock);
}
kvm_unload_vcpu_mmus(kvm);
+ kvm_destroy_vcpus(kvm);
kvm_x86_call(vm_destroy)(kvm);
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
- kvm_destroy_vcpus(kvm);
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
kvm_mmu_uninit_vm(kvm);
--
2.50.1.487.gc89ff58d15-goog
^ permalink raw reply related [flat|nested] 5+ messages in thread
* re: [PATCH 6.12.y] KVM: x86: Free vCPUs before freeing VM state
2025-07-28 17:50 ` [PATCH 6.12.y v2] KVM: x86: Free vCPUs before freeing VM state Kevin Cheng
@ 2025-07-28 19:07 ` Kevin Cheng
2025-07-29 19:49 ` [PATCH 6.12.y v2] " Sasha Levin
1 sibling, 0 replies; 5+ messages in thread
From: Kevin Cheng @ 2025-07-28 19:07 UTC (permalink / raw)
To: chengkev
Cc: aaronlewis, aha310510, isaku.yamahata, jmattson, kai.huang,
pbonzini, rick.p.edgecombe, sashal, seanjc, stable, yan.y.zhao
Please ignore above patch. I replied to the wrong thread. Sorry!
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 6.12.y v2] KVM: x86: Free vCPUs before freeing VM state
2025-07-28 17:50 ` [PATCH 6.12.y v2] KVM: x86: Free vCPUs before freeing VM state Kevin Cheng
2025-07-28 19:07 ` [PATCH 6.12.y] " Kevin Cheng
@ 2025-07-29 19:49 ` Sasha Levin
1 sibling, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2025-07-29 19:49 UTC (permalink / raw)
To: stable; +Cc: Sasha Levin
[ Sasha's backport helper bot ]
Hi,
✅ All tests passed successfully. No issues detected.
No action required from the submitter.
The upstream commit SHA1 provided is correct: 17bcd714426386fda741a4bccd96a2870179344b
WARNING: Author mismatch between patch and upstream commit:
Backport author: Kevin Cheng <chengkev@google.com>
Commit author: Sean Christopherson <seanjc@google.com>
Status in newer kernel trees:
6.15.y | Present (exact SHA1)
Note: The patch differs from the upstream commit:
---
1: 17bcd7144263 ! 1: 0bfb70711c37 KVM: x86: Free vCPUs before freeing VM state
@@ Metadata
## Commit message ##
KVM: x86: Free vCPUs before freeing VM state
+ [ Upstream commit 17bcd714426386fda741a4bccd96a2870179344b ]
+
Free vCPUs before freeing any VM state, as both SVM and VMX may access
VM state when "freeing" a vCPU that is currently "in" L2, i.e. that needs
to be kicked out of nested guest mode.
@@ Commit message
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250224235542.2562848-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+ Signed-off-by: Kevin Cheng <chengkev@google.com>
## arch/x86/kvm/x86.c ##
@@ arch/x86/kvm/x86.c: void kvm_arch_destroy_vm(struct kvm *kvm)
---
Results of testing on various branches:
| Branch | Patch Apply | Build Test |
|---------------------------|-------------|------------|
| origin/linux-6.12.y | Success | Success |
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-07-29 19:49 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-03 13:01 [PATCH 6.12.y v2] mm/vmalloc: fix data race in show_numa_info() Jeongjun Park
2025-07-05 21:40 ` Sasha Levin
2025-07-28 17:50 ` [PATCH 6.12.y v2] KVM: x86: Free vCPUs before freeing VM state Kevin Cheng
2025-07-28 19:07 ` [PATCH 6.12.y] " Kevin Cheng
2025-07-29 19:49 ` [PATCH 6.12.y v2] " Sasha Levin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).