From: Li Chen <me@linux.beauty>
To: Jonathan Cameron <jonathan.cameron@huawei.com>,
Fan Ni <fan.ni@samsung.com>
Cc: qemu-devel@nongnu.org, Li Chen <me@linux.beauty>
Subject: [PATCH 2/3] cxl: alias fixed memory windows to RAM under KVM
Date: Tue, 17 Mar 2026 11:33:02 +0800 [thread overview]
Message-ID: <20260317033304.3185291-3-me@linux.beauty> (raw)
In-Reply-To: <20260317033304.3185291-1-me@linux.beauty>
CXL fixed memory windows are currently modeled as an I/O MemoryRegion, so
KVM treats the whole range as MMIO. After Linux onlines the window as
system RAM, normal CPU stores can land in the window and trigger KVM
instruction emulation. Instructions like XSAVEC are not supported by the
emulator and abort the VM with a KVM internal error.
Turn the fixed window into a container that always contains the existing
MMIO dispatcher, and add an optional RAM alias overlay when the window
maps linearly to a Type-3 volatile memdev.
Signed-off-by: Li Chen <me@linux.beauty>
---
hw/cxl/cxl-host-stubs.c | 1 +
hw/cxl/cxl-host.c | 189 +++++++++++++++++++++++++++++++++++++-
include/hw/cxl/cxl.h | 5 +
include/hw/cxl/cxl_host.h | 1 +
4 files changed, 194 insertions(+), 2 deletions(-)
diff --git a/hw/cxl/cxl-host-stubs.c b/hw/cxl/cxl-host-stubs.c
index c015baac81..e7e955121f 100644
--- a/hw/cxl/cxl-host-stubs.c
+++ b/hw/cxl/cxl-host-stubs.c
@@ -16,5 +16,6 @@ hwaddr cxl_fmws_set_memmap(hwaddr base, hwaddr max_addr)
return base;
};
void cxl_fmws_update_mmio(void) {};
+void cxl_fmws_update_mappings(void) {};
const MemoryRegionOps cfmws_ops;
diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
index f3479b1991..7f6974626e 100644
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@@ -10,6 +10,7 @@
#include "qemu/bitmap.h"
#include "qemu/error-report.h"
#include "qapi/error.h"
+#include "system/kvm.h"
#include "system/qtest.h"
#include "hw/core/boards.h"
@@ -22,6 +23,164 @@
#include "hw/pci/pcie_port.h"
#include "hw/pci-bridge/pci_expander_bridge.h"
+static void cxl_fmw_disable_direct(CXLFixedWindow *fw)
+{
+ if (!fw->direct_target_mr) {
+ return;
+ }
+
+ memory_region_transaction_begin();
+ if (fw->direct_mapped) {
+ memory_region_del_subregion(&fw->mr, &fw->direct_mr);
+ }
+ object_unparent(OBJECT(&fw->direct_mr));
+ memory_region_transaction_commit();
+
+ fw->direct_mapped = false;
+ fw->direct_target_mr = NULL;
+ fw->direct_target_offset = 0;
+}
+
+static bool cxl_hdm_decoder_simple_target(uint32_t *cache_mem, hwaddr base,
+ hwaddr size, uint8_t *target)
+{
+ int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
+ unsigned int hdm_count;
+ uint32_t cap, global_ctrl;
+ int i;
+
+ global_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_GLOBAL_CONTROL);
+ if (!FIELD_EX32(global_ctrl, CXL_HDM_DECODER_GLOBAL_CONTROL,
+ HDM_DECODER_ENABLE)) {
+ return false;
+ }
+
+ cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY);
+ hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap,
+ CXL_HDM_DECODER_CAPABILITY,
+ DECODER_COUNT));
+ for (i = 0; i < hdm_count; i++) {
+ uint32_t low, high, ctrl;
+ uint64_t decoder_base, decoder_size;
+ uint32_t tlo;
+ uint8_t iw;
+
+ low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc);
+ high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc);
+ decoder_base = (low & 0xf0000000) | ((uint64_t)high << 32);
+
+ low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc);
+ high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc);
+ decoder_size = (low & 0xf0000000) | ((uint64_t)high << 32);
+
+ if (decoder_base != base || decoder_size != size) {
+ continue;
+ }
+
+ ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * hdm_inc);
+ if (!FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) {
+ continue;
+ }
+
+ iw = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW);
+ if (iw != 0) {
+ return false;
+ }
+
+ tlo = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_TARGET_LIST_LO +
+ i * hdm_inc);
+ *target = extract32(tlo, 0, 8);
+ return true;
+ }
+
+ return false;
+}
+
+static bool cxl_fmw_enable_direct(CXLFixedWindow *fw)
+{
+ CXLComponentState *hb_cstate;
+ PCIHostState *hb;
+ PCIDevice *rp, *d;
+ CXLType3Dev *ct3d;
+ MemoryRegion *target_mr = NULL;
+ hwaddr target_offset = 0;
+ uint32_t *cache_mem;
+ uint8_t target;
+ Error *local_err = NULL;
+
+ if (!kvm_enabled()) {
+ return false;
+ }
+
+ if (fw->num_targets != 1) {
+ return false;
+ }
+
+ if (!fw->base || !fw->target_hbs[0] ||
+ !fw->target_hbs[0]->cxl_host_bridge) {
+ return false;
+ }
+
+ hb = PCI_HOST_BRIDGE(fw->target_hbs[0]->cxl_host_bridge);
+ if (!hb || !hb->bus || !pci_bus_is_cxl(hb->bus)) {
+ return false;
+ }
+
+ hb_cstate = cxl_get_hb_cstate(hb);
+ if (!hb_cstate) {
+ return false;
+ }
+
+ cache_mem = hb_cstate->crb.cache_mem_registers;
+ if (!cxl_hdm_decoder_simple_target(cache_mem, fw->base, fw->size,
+ &target)) {
+ return false;
+ }
+
+ rp = pcie_find_port_by_pn(hb->bus, target);
+ if (!rp) {
+ return false;
+ }
+
+ d = pci_bridge_get_sec_bus(PCI_BRIDGE(rp))->devices[0];
+ if (!d) {
+ return false;
+ }
+
+ if (!object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
+ return false;
+ }
+ ct3d = CXL_TYPE3(d);
+
+ if (!cxl_type3_get_window_vmem_mapping(ct3d, fw->base, fw->size,
+ &target_mr, &target_offset,
+ &local_err)) {
+ error_free(local_err);
+ return false;
+ }
+ error_free(local_err);
+
+ if (fw->direct_mapped && fw->direct_target_mr == target_mr &&
+ fw->direct_target_offset == target_offset) {
+ return true;
+ }
+
+ cxl_fmw_disable_direct(fw);
+
+ memory_region_init_alias(&fw->direct_mr, OBJECT(fw),
+ "cxl-fixed-memory-region.direct", target_mr,
+ target_offset, fw->size);
+ memory_region_transaction_begin();
+ memory_region_add_subregion_overlap(&fw->mr, 0, &fw->direct_mr, 1);
+ memory_region_transaction_commit();
+
+ fw->direct_mapped = true;
+ fw->direct_target_mr = target_mr;
+ fw->direct_target_offset = target_offset;
+
+ return true;
+}
+
static void cxl_fixed_memory_window_config(CXLFixedMemoryWindowOptions *object,
int index, Error **errp)
{
@@ -432,6 +591,25 @@ void cxl_fmws_update_mmio(void)
object_child_foreach_recursive(object_get_root(), cxl_fmws_mmio_map, NULL);
}
+void cxl_fmws_update_mappings(void)
+{
+ GSList *cfmws_list, *iter;
+ CXLFixedWindow *fw;
+
+ if (!kvm_enabled()) {
+ return;
+ }
+
+ cfmws_list = cxl_fmws_get_all_sorted();
+ for (iter = cfmws_list; iter; iter = iter->next) {
+ fw = CXL_FMW(iter->data);
+ if (!cxl_fmw_enable_direct(fw)) {
+ cxl_fmw_disable_direct(fw);
+ }
+ }
+ g_slist_free(cfmws_list);
+}
+
hwaddr cxl_fmws_set_memmap(hwaddr base, hwaddr max_addr)
{
GSList *cfmws_list, *iter;
@@ -454,8 +632,15 @@ static void cxl_fmw_realize(DeviceState *dev, Error **errp)
{
CXLFixedWindow *fw = CXL_FMW(dev);
- memory_region_init_io(&fw->mr, OBJECT(dev), &cfmws_ops, fw,
- "cxl-fixed-memory-region", fw->size);
+ memory_region_init(&fw->mr, OBJECT(dev), "cxl-fixed-memory-region",
+ fw->size);
+ memory_region_init_io(&fw->io_mr, OBJECT(dev), &cfmws_ops, fw,
+ "cxl-fixed-memory-region.io", fw->size);
+ memory_region_add_subregion(&fw->mr, 0, &fw->io_mr);
+
+ fw->direct_mapped = false;
+ fw->direct_target_mr = NULL;
+ fw->direct_target_offset = 0;
sysbus_init_mmio(SYS_BUS_DEVICE(dev), &fw->mr);
}
diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
index 998f495a98..1ccd0f7e5f 100644
--- a/include/hw/cxl/cxl.h
+++ b/include/hw/cxl/cxl.h
@@ -38,6 +38,11 @@ typedef struct CXLFixedWindow {
uint8_t enc_int_gran;
/* Todo: XOR based interleaving */
MemoryRegion mr;
+ MemoryRegion io_mr;
+ MemoryRegion direct_mr;
+ MemoryRegion *direct_target_mr;
+ hwaddr direct_target_offset;
+ bool direct_mapped;
hwaddr base;
} CXLFixedWindow;
#define TYPE_CXL_FMW "cxl-fmw"
diff --git a/include/hw/cxl/cxl_host.h b/include/hw/cxl/cxl_host.h
index 21619bb748..7d5a384838 100644
--- a/include/hw/cxl/cxl_host.h
+++ b/include/hw/cxl/cxl_host.h
@@ -18,6 +18,7 @@ void cxl_fmws_link_targets(Error **errp);
void cxl_hook_up_pxb_registers(PCIBus *bus, CXLState *state, Error **errp);
hwaddr cxl_fmws_set_memmap(hwaddr base, hwaddr max_addr);
void cxl_fmws_update_mmio(void);
+void cxl_fmws_update_mappings(void);
GSList *cxl_fmws_get_all_sorted(void);
extern const MemoryRegionOps cfmws_ops;
--
2.52.0
next prev parent reply other threads:[~2026-03-17 3:34 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-17 3:33 [PATCH 0/3] cxl: avoid KVM internal error for fixed memory windows Li Chen
2026-03-17 3:33 ` [PATCH 1/3] cxl/type3: expose vmem mapping for fixed windows Li Chen
2026-03-17 3:33 ` Li Chen [this message]
2026-03-17 3:33 ` [PATCH 3/3] cxl: update fixed window mappings on decoder programming Li Chen
2026-03-17 16:57 ` [PATCH 0/3] cxl: avoid KVM internal error for fixed memory windows Jonathan Cameron via qemu development
2026-03-18 15:01 ` Alireza Sanaee via qemu development
2026-03-22 5:46 ` Li Chen
2026-03-23 9:51 ` Alireza Sanaee via qemu development
2026-03-22 5:49 ` Li Chen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260317033304.3185291-3-me@linux.beauty \
--to=me@linux.beauty \
--cc=fan.ni@samsung.com \
--cc=jonathan.cameron@huawei.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox