* [QEMU PATCH 0/1] cpmu: segfault issue with hotplug event
@ 2025-12-05 14:03 Joshua Lant
2025-12-05 14:03 ` [QEMU PATCH 1/1] cpmu: teardown cpmu timers on device exit Joshua Lant
2025-12-15 14:44 ` [QEMU PATCH 0/1] cpmu: segfault issue with hotplug event Jonathan Cameron
0 siblings, 2 replies; 3+ messages in thread
From: Joshua Lant @ 2025-12-05 14:03 UTC (permalink / raw)
To: linux-cxl; +Cc: jonathan.cameron, Joshua Lant
Hi,
The CXL device topology shown below casues a hotplug event at some
point during boot. This triggers a segfault in cpmu_timer_update due
to a dangling pointer. The teardown of the pci devices has already
happened at this point. However, the associated CPMU timers remain in
the timer_list. We need to remove them before the rest of the device
teardown happens. This should not cause any issue, since if a hot-add
happens the realize function for the corresponding device will be called
and the timers will be set up once again (although I have not tested
this scenario. Only that I can now complete the boot process correctly).
Many thanks,
Josh
qemu: https://gitlab.com/jic23/qemu -b cxl-2025-10-03-draft
kernel: https://github.com/weiny2/linux-kernel.git -b dcd-v6-2025-04-13
device topology:
-device usb-ehci,id=ehci \
-object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/t3_cxl1.raw,size=8G \
-object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/t3_lsa1.raw,size=1M \
-object memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/t3_cxl2.raw,size=8G \
-object memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/t3_lsa2.raw,size=1M \
-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.0,hdm_for_passthrough=true \
-device pxb-cxl,bus_nr=48,bus=pcie.0,id=cxl.1,hdm_for_passthrough=true \
-device cxl-rp,port=0,bus=cxl.0,id=root_port1,chassis=0,slot=1 \
-device cxl-rp,port=1,bus=cxl.1,id=root_port2,chassis=1,slot=1 \
-device cxl-upstream,port=0,sn=1234,bus=root_port1,id=us0,addr=0.0,multifunction=on, \
-device cxl-upstream,port=0,sn=5678,bus=root_port2,id=us1,addr=0.1,multifunction=on, \
-device cxl-switch-mailbox-cci,bus=root_port1,addr=0.3,target=us0 \
-device cxl-downstream,port=0,bus=us0,id=swport0,slot=3 \
-device cxl-downstream,port=0,bus=us1,id=swport1,slot=4 \
-device cxl-type3,bus=swport0,volatile-dc-memdev=cxl-mem1,id=cxl-dcd0,lsa=cxl-lsa1,num-dc-regions=8,sn=99 \
-device cxl-type3,bus=swport1,volatile-dc-memdev=cxl-mem2,id=cxl-dcd1,lsa=cxl-lsa2,num-dc-regions=8,sn=100 \
-machine cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.size=8G,cxl-fmw.1.targets.0=cxl.1,cxl-fmw.1.size=8G"
Joshua Lant (1):
cpmu: teardown cpmu timers on device exit
hw/cxl/cxl-cpmu.c | 8 ++++++++
hw/mem/cxl_type3.c | 5 +++++
hw/pci-bridge/cxl_downstream.c | 2 ++
hw/pci-bridge/cxl_upstream.c | 4 ++++
include/hw/cxl/cxl_cpmu.h | 1 +
5 files changed, 20 insertions(+)
--
2.47.3
^ permalink raw reply [flat|nested] 3+ messages in thread
* [QEMU PATCH 1/1] cpmu: teardown cpmu timers on device exit
2025-12-05 14:03 [QEMU PATCH 0/1] cpmu: segfault issue with hotplug event Joshua Lant
@ 2025-12-05 14:03 ` Joshua Lant
2025-12-15 14:44 ` [QEMU PATCH 0/1] cpmu: segfault issue with hotplug event Jonathan Cameron
1 sibling, 0 replies; 3+ messages in thread
From: Joshua Lant @ 2025-12-05 14:03 UTC (permalink / raw)
To: linux-cxl; +Cc: jonathan.cameron, Joshua Lant
Hot-remove events on CXL devices cause a segfault in QEMU
in the function "cpmu_timer_update" in cxl-cpmu.c,
when casting the input opaque to the CPMUState structure.
This is caused by a dangling pointer to opaque, since the pointer
has been previously freed at some other point during the teardown
of the pci device from the hot-remove.
In order to fix this we need to remove the timers at the start of the
exit functions for the relevant devices, so they never get called
again by qemu_clock_run_all_timers after teardown has happened.
Note that this should not be required for the root port since
it is not hot-pluggable.
Signed-off-by: Joshua Lant <joshualant@gmail.com>
---
hw/cxl/cxl-cpmu.c | 8 ++++++++
hw/mem/cxl_type3.c | 5 +++++
hw/pci-bridge/cxl_downstream.c | 2 ++
hw/pci-bridge/cxl_upstream.c | 4 ++++
include/hw/cxl/cxl_cpmu.h | 1 +
5 files changed, 20 insertions(+)
diff --git a/hw/cxl/cxl-cpmu.c b/hw/cxl/cxl-cpmu.c
index 2f863d14f6..a25e6fa554 100644
--- a/hw/cxl/cxl-cpmu.c
+++ b/hw/cxl/cxl-cpmu.c
@@ -281,6 +281,14 @@ static void cpmu_counter_update(void *opaque)
}
}
+void cxl_cpmu_timer_destroy(CPMUState *cpmu) {
+ if (cpmu->timer) {
+ timer_del(cpmu->timer);
+ timer_free(cpmu->timer);
+ cpmu->timer = NULL;
+ }
+}
+
void cxl_cpmu_register_block_init(Object *obj, CXLDeviceState *cxl_dstate,
int id, uint8_t msi_n)
{
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 305525c5f7..d07c877ca2 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1299,7 +1299,12 @@ void ct3_exit(PCIDevice *pci_dev)
CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
ComponentRegisters *regs = &cxl_cstate->crb;
+ CPMUState *cpmu;
+ for(int i = 0; i < CXL_NUM_CPMU_INSTANCES; i++) {
+ cpmu = &ct3d->cxl_dstate.cpmu[i];
+ cxl_cpmu_timer_destroy(cpmu);
+ }
pcie_aer_exit(pci_dev);
cxl_doe_cdat_release(cxl_cstate);
msix_uninit_exclusive_bar(pci_dev);
diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
index 6aa8586f01..de256c5abb 100644
--- a/hw/pci-bridge/cxl_downstream.c
+++ b/hw/pci-bridge/cxl_downstream.c
@@ -244,7 +244,9 @@ static void cxl_dsp_realize(PCIDevice *d, Error **errp)
static void cxl_dsp_exitfn(PCIDevice *d)
{
PCIESlot *s = PCIE_SLOT(d);
+ CPMUState *cpmu = &CXL_DSP(d)->cpmu;
+ cxl_cpmu_timer_destroy(cpmu);
pcie_aer_exit(d);
pcie_chassis_del_slot(s);
pcie_cap_exit(d);
diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
index c2150afff3..f21b9e41be 100644
--- a/hw/pci-bridge/cxl_upstream.c
+++ b/hw/pci-bridge/cxl_upstream.c
@@ -420,6 +420,10 @@ err_bridge:
static void cxl_usp_exitfn(PCIDevice *d)
{
+ CXLUpstreamPort *usp = CXL_USP(d);
+ CPMUState *cpmu = &usp->cpmu;
+
+ cxl_cpmu_timer_destroy(cpmu);
pcie_aer_exit(d);
pcie_cap_exit(d);
msi_uninit(d);
diff --git a/include/hw/cxl/cxl_cpmu.h b/include/hw/cxl/cxl_cpmu.h
index 872a75ccc9..6f3b191a7d 100644
--- a/include/hw/cxl/cxl_cpmu.h
+++ b/include/hw/cxl/cxl_cpmu.h
@@ -99,4 +99,5 @@ void cxl_cpmu_register_block_init(Object *obj,
void cxl_cpmu_register_block_init2(Object *obj, CPMUState *cpmu,
MemoryRegion *registers,
int id, uint8_t msi_n);
+void cxl_cpmu_timer_destroy(CPMUState *cpmu);
#endif
--
2.47.3
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [QEMU PATCH 0/1] cpmu: segfault issue with hotplug event
2025-12-05 14:03 [QEMU PATCH 0/1] cpmu: segfault issue with hotplug event Joshua Lant
2025-12-05 14:03 ` [QEMU PATCH 1/1] cpmu: teardown cpmu timers on device exit Joshua Lant
@ 2025-12-15 14:44 ` Jonathan Cameron
1 sibling, 0 replies; 3+ messages in thread
From: Jonathan Cameron @ 2025-12-15 14:44 UTC (permalink / raw)
To: Joshua Lant; +Cc: linux-cxl
On Fri, 5 Dec 2025 14:03:28 +0000
Joshua Lant <joshualant@gmail.com> wrote:
> Hi,
>
> The CXL device topology shown below casues a hotplug event at some
> point during boot. This triggers a segfault in cpmu_timer_update due
> to a dangling pointer. The teardown of the pci devices has already
> happened at this point. However, the associated CPMU timers remain in
> the timer_list. We need to remove them before the rest of the device
> teardown happens. This should not cause any issue, since if a hot-add
> happens the realize function for the corresponding device will be called
> and the timers will be set up once again (although I have not tested
> this scenario. Only that I can now complete the boot process correctly).
>
> Many thanks,
>
> Josh
>
> qemu: https://gitlab.com/jic23/qemu -b cxl-2025-10-03-draft
> kernel: https://github.com/weiny2/linux-kernel.git -b dcd-v6-2025-04-13
I'm curious what is triggering the hotplug to occur, but indeed this is
clearly a bug. I'll squash the changes into the relevant CPMU patches.
So applied in across relevant 3ish patches. I tweaked it a bit to reduce
the use of local variables that were only used once.
Given we are near the QEMU 10.2 release, I'll wait for that before pushing
out a new tree incorporating this fix.
Thanks,
Jonathan
> device topology:
>
> -device usb-ehci,id=ehci \
> -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/t3_cxl1.raw,size=8G \
> -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/t3_lsa1.raw,size=1M \
> -object memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/t3_cxl2.raw,size=8G \
> -object memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/t3_lsa2.raw,size=1M \
> -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.0,hdm_for_passthrough=true \
> -device pxb-cxl,bus_nr=48,bus=pcie.0,id=cxl.1,hdm_for_passthrough=true \
> -device cxl-rp,port=0,bus=cxl.0,id=root_port1,chassis=0,slot=1 \
> -device cxl-rp,port=1,bus=cxl.1,id=root_port2,chassis=1,slot=1 \
> -device cxl-upstream,port=0,sn=1234,bus=root_port1,id=us0,addr=0.0,multifunction=on, \
> -device cxl-upstream,port=0,sn=5678,bus=root_port2,id=us1,addr=0.1,multifunction=on, \
> -device cxl-switch-mailbox-cci,bus=root_port1,addr=0.3,target=us0 \
> -device cxl-downstream,port=0,bus=us0,id=swport0,slot=3 \
> -device cxl-downstream,port=0,bus=us1,id=swport1,slot=4 \
> -device cxl-type3,bus=swport0,volatile-dc-memdev=cxl-mem1,id=cxl-dcd0,lsa=cxl-lsa1,num-dc-regions=8,sn=99 \
> -device cxl-type3,bus=swport1,volatile-dc-memdev=cxl-mem2,id=cxl-dcd1,lsa=cxl-lsa2,num-dc-regions=8,sn=100 \
> -machine cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.size=8G,cxl-fmw.1.targets.0=cxl.1,cxl-fmw.1.size=8G"
>
> Joshua Lant (1):
> cpmu: teardown cpmu timers on device exit
>
> hw/cxl/cxl-cpmu.c | 8 ++++++++
> hw/mem/cxl_type3.c | 5 +++++
> hw/pci-bridge/cxl_downstream.c | 2 ++
> hw/pci-bridge/cxl_upstream.c | 4 ++++
> include/hw/cxl/cxl_cpmu.h | 1 +
> 5 files changed, 20 insertions(+)
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-12-15 14:44 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-05 14:03 [QEMU PATCH 0/1] cpmu: segfault issue with hotplug event Joshua Lant
2025-12-05 14:03 ` [QEMU PATCH 1/1] cpmu: teardown cpmu timers on device exit Joshua Lant
2025-12-15 14:44 ` [QEMU PATCH 0/1] cpmu: segfault issue with hotplug event Jonathan Cameron
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.