From: <mhonap@nvidia.com>
To: <aniketa@nvidia.com>, <ankita@nvidia.com>,
<alwilliamson@nvidia.com>, <vsethi@nvidia.com>, <jgg@nvidia.com>,
<mochs@nvidia.com>, <skolothumtho@nvidia.com>,
<alejandro.lucero-palau@amd.com>, <dave@stgolabs.net>,
<jonathan.cameron@huawei.com>, <dave.jiang@intel.com>,
<alison.schofield@intel.com>, <vishal.l.verma@intel.com>,
<ira.weiny@intel.com>, <dan.j.williams@intel.com>, <jgg@ziepe.ca>,
<yishaih@nvidia.com>, <kevin.tian@intel.com>
Cc: <cjia@nvidia.com>, <kwankhede@nvidia.com>, <targupta@nvidia.com>,
<zhiw@nvidia.com>, <kjaju@nvidia.com>,
<linux-kernel@vger.kernel.org>, <linux-cxl@vger.kernel.org>,
<kvm@vger.kernel.org>, <mhonap@nvidia.com>
Subject: [RFC v2 11/15] vfio/cxl: introduce the emulation of HDM registers
Date: Tue, 9 Dec 2025 22:20:15 +0530 [thread overview]
Message-ID: <20251209165019.2643142-12-mhonap@nvidia.com> (raw)
In-Reply-To: <20251209165019.2643142-1-mhonap@nvidia.com>
From: Manish Honap <mhonap@nvidia.com>
CXL devices have HDM registers in its CXL MMIO bar. Many HDM registers
requires a PA and they are owned by the host in virtualization.
Thus, the HDM registers needs to be emulated accordingly so that the
guest kernel CXL core can configure the virtual HDM decoders.
Intorduce the emulation of HDM registers that emulates the HDM decoders.
Co-developed-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Manish Honap <mhonap@nvidia.com>
---
drivers/vfio/pci/vfio_cxl_core.c | 7 +-
drivers/vfio/pci/vfio_cxl_core_emu.c | 242 +++++++++++++++++++++++++++
include/linux/vfio_pci_core.h | 2 +
3 files changed, 248 insertions(+), 3 deletions(-)
diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
index cb75e9f668a7..c0bdf55997da 100644
--- a/drivers/vfio/pci/vfio_cxl_core.c
+++ b/drivers/vfio/pci/vfio_cxl_core.c
@@ -247,8 +247,6 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
if (!dvsec)
return -ENODEV;
- cxl->dvsec = dvsec;
-
cxl_core = devm_cxl_dev_state_create(&pdev->dev, CXL_DEVTYPE_DEVMEM,
pdev->dev.id, dvsec, struct vfio_cxl,
cxlds, false);
@@ -257,9 +255,12 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
return -ENOMEM;
}
+ cxl->dvsec = dvsec;
+ cxl->cxl_core = cxl_core;
+
ret = find_comp_regs(cxl);
if (ret)
- return -ENODEV;
+ return ret;
ret = setup_virt_regs(cxl);
if (ret)
diff --git a/drivers/vfio/pci/vfio_cxl_core_emu.c b/drivers/vfio/pci/vfio_cxl_core_emu.c
index a0674bacecd7..6711ff8975ef 100644
--- a/drivers/vfio/pci/vfio_cxl_core_emu.c
+++ b/drivers/vfio/pci/vfio_cxl_core_emu.c
@@ -5,6 +5,239 @@
#include "vfio_cxl_core_priv.h"
+typedef ssize_t reg_handler_t(struct vfio_cxl_core_device *cxl, void *buf,
+ u64 offset, u64 size);
+
+static struct vfio_emulated_regblock *
+new_reg_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
+ reg_handler_t *read, reg_handler_t *write)
+{
+ struct vfio_emulated_regblock *block;
+
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (!block)
+ return ERR_PTR(-ENOMEM);
+
+ block->range.start = offset;
+ block->range.end = offset + size - 1;
+ block->read = read;
+ block->write = write;
+
+ INIT_LIST_HEAD(&block->list);
+
+ return block;
+}
+
+static int new_mmio_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
+ reg_handler_t *read, reg_handler_t *write)
+{
+ struct vfio_emulated_regblock *block;
+
+ block = new_reg_block(cxl, offset, size, read, write);
+ if (IS_ERR(block))
+ return PTR_ERR(block);
+
+ list_add_tail(&block->list, &cxl->mmio_regblocks_head);
+ return 0;
+}
+
+static u64 hdm_reg_base(struct vfio_cxl_core_device *cxl)
+{
+ return cxl->comp_reg_offset + cxl->hdm_reg_offset;
+}
+
+static u64 to_hdm_reg_offset(struct vfio_cxl_core_device *cxl, u64 offset)
+{
+ return offset - hdm_reg_base(cxl);
+}
+
+static void *hdm_reg_virt(struct vfio_cxl_core_device *cxl, u64 hdm_reg_offset)
+{
+ return cxl->comp_reg_virt + cxl->hdm_reg_offset + hdm_reg_offset;
+}
+
+static ssize_t virt_hdm_reg_read(struct vfio_cxl_core_device *cxl, void *buf,
+ u64 offset, u64 size)
+{
+ offset = to_hdm_reg_offset(cxl, offset);
+ memcpy(buf, hdm_reg_virt(cxl, offset), size);
+
+ return size;
+}
+
+static ssize_t virt_hdm_reg_write(struct vfio_cxl_core_device *cxl, void *buf,
+ u64 offset, u64 size)
+{
+ offset = to_hdm_reg_offset(cxl, offset);
+ memcpy(hdm_reg_virt(cxl, offset), buf, size);
+
+ return size;
+}
+
+static ssize_t virt_hdm_rev_reg_write(struct vfio_cxl_core_device *cxl,
+ void *buf, u64 offset, u64 size)
+{
+ /* Discard writes on reserved registers. */
+ return size;
+}
+
+static ssize_t hdm_decoder_n_lo_write(struct vfio_cxl_core_device *cxl,
+ void *buf, u64 offset, u64 size)
+{
+ u32 new_val = le32_to_cpu(*(u32 *)buf);
+
+ if (WARN_ON_ONCE(size != 4))
+ return -EINVAL;
+
+ /* Bit [27:0] are reserved. */
+ new_val &= ~GENMASK(27, 0);
+
+ new_val = cpu_to_le32(new_val);
+ offset = to_hdm_reg_offset(cxl, offset);
+ memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
+ return size;
+}
+
+static ssize_t hdm_decoder_global_ctrl_write(struct vfio_cxl_core_device *cxl,
+ void *buf, u64 offset, u64 size)
+{
+ u32 hdm_decoder_global_cap;
+ u32 new_val = le32_to_cpu(*(u32 *)buf);
+
+ if (WARN_ON_ONCE(size != 4))
+ return -EINVAL;
+
+ /* Bit [31:2] are reserved. */
+ new_val &= ~GENMASK(31, 2);
+
+ /* Poison On Decode Error Enable bit is 0 and RO if not support. */
+ hdm_decoder_global_cap = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, 0));
+ if (!(hdm_decoder_global_cap & BIT(10)))
+ new_val &= ~BIT(0);
+
+ new_val = cpu_to_le32(new_val);
+ offset = to_hdm_reg_offset(cxl, offset);
+ memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
+ return size;
+}
+
+static ssize_t hdm_decoder_n_ctrl_write(struct vfio_cxl_core_device *cxl,
+ void *buf, u64 offset, u64 size)
+{
+ u32 hdm_decoder_global_cap;
+ u32 ro_mask, rev_mask;
+ u32 new_val = le32_to_cpu(*(u32 *)buf);
+ u32 cur_val;
+
+ if (WARN_ON_ONCE(size != 4))
+ return -EINVAL;
+
+ offset = to_hdm_reg_offset(cxl, offset);
+ cur_val = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, offset));
+
+ /* Lock on commit */
+ if (cur_val & BIT(8))
+ return size;
+
+ hdm_decoder_global_cap = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, 0));
+
+ /* RO and reserved bits in the spec */
+ ro_mask = BIT(10) | BIT(11);
+ rev_mask = BIT(15) | GENMASK(31, 28);
+
+ /* bits are not valid for devices */
+ ro_mask |= BIT(12);
+ rev_mask |= GENMASK(19, 16) | GENMASK(23, 20);
+
+ /* bits are reserved when UIO is not supported */
+ if (!(hdm_decoder_global_cap & BIT(13)))
+ rev_mask |= BIT(14) | GENMASK(27, 24);
+
+ /* clear reserved bits */
+ new_val &= ~rev_mask;
+
+ /* keep the RO bits */
+ cur_val &= ro_mask;
+ new_val &= ~ro_mask;
+ new_val |= cur_val;
+
+ /* emulate HDM decoder commit/de-commit */
+ if (new_val & BIT(9))
+ new_val |= BIT(10);
+ else
+ new_val &= ~BIT(10);
+
+ new_val = cpu_to_le32(new_val);
+ memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
+ return size;
+}
+
+static int setup_mmio_emulation(struct vfio_cxl_core_device *cxl)
+{
+ u64 offset, base;
+ int ret;
+
+ base = hdm_reg_base(cxl);
+
+#define ALLOC_BLOCK(offset, size, read, write) do { \
+ ret = new_mmio_block(cxl, offset, size, read, write); \
+ if (ret) \
+ return ret; \
+ } while (0)
+
+ ALLOC_BLOCK(base + 0x4, 4,
+ virt_hdm_reg_read,
+ hdm_decoder_global_ctrl_write);
+
+ offset = base + 0x10;
+ while (offset < base + cxl->hdm_reg_size) {
+ /* HDM N BASE LOW */
+ ALLOC_BLOCK(offset, 4,
+ virt_hdm_reg_read,
+ hdm_decoder_n_lo_write);
+
+ /* HDM N BASE HIGH */
+ ALLOC_BLOCK(offset + 0x4, 4,
+ virt_hdm_reg_read,
+ virt_hdm_reg_write);
+
+ /* HDM N SIZE LOW */
+ ALLOC_BLOCK(offset + 0x8, 4,
+ virt_hdm_reg_read,
+ hdm_decoder_n_lo_write);
+
+ /* HDM N SIZE HIGH */
+ ALLOC_BLOCK(offset + 0xc, 4,
+ virt_hdm_reg_read,
+ virt_hdm_reg_write);
+
+ /* HDM N CONTROL */
+ ALLOC_BLOCK(offset + 0x10, 4,
+ virt_hdm_reg_read,
+ hdm_decoder_n_ctrl_write);
+
+ /* HDM N TARGET LIST LOW */
+ ALLOC_BLOCK(offset + 0x14, 0x4,
+ virt_hdm_reg_read,
+ virt_hdm_rev_reg_write);
+
+ /* HDM N TARGET LIST HIGH */
+ ALLOC_BLOCK(offset + 0x18, 0x4,
+ virt_hdm_reg_read,
+ virt_hdm_rev_reg_write);
+
+ /* HDM N REV */
+ ALLOC_BLOCK(offset + 0x1c, 0x4,
+ virt_hdm_reg_read,
+ virt_hdm_rev_reg_write);
+
+ offset += 0x20;
+ }
+
+#undef ALLOC_BLOCK
+ return 0;
+}
+
void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl)
{
struct list_head *pos, *n;
@@ -17,10 +250,19 @@ void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl)
int vfio_cxl_core_setup_register_emulation(struct vfio_cxl_core_device *cxl)
{
+ int ret;
+
INIT_LIST_HEAD(&cxl->config_regblocks_head);
INIT_LIST_HEAD(&cxl->mmio_regblocks_head);
+ ret = setup_mmio_emulation(cxl);
+ if (ret)
+ goto err;
+
return 0;
+err:
+ vfio_cxl_core_clean_register_emulation(cxl);
+ return ret;
}
static struct vfio_emulated_regblock *
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 12ded67c7db7..31fd28626846 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -251,5 +251,7 @@ ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *bu
size_t count, loff_t *ppos);
long vfio_cxl_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
unsigned long arg);
+int vfio_cxl_core_setup_register_emulation(struct vfio_cxl_core_device *cxl);
+void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl);
#endif /* VFIO_PCI_CORE_H */
--
2.25.1
next prev parent reply other threads:[~2025-12-09 16:52 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-09 16:50 [RFC v2 00/15] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Hello all, mhonap
2025-12-09 16:50 ` [RFC v2 01/15] cxl: factor out cxl_await_range_active() and cxl_media_ready() mhonap
2025-12-22 12:21 ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 02/15] cxl: introduce cxl_get_hdm_reg_info() mhonap
2025-12-09 16:50 ` [RFC v2 03/15] cxl: introduce cxl_find_comp_reglock_offset() mhonap
2025-12-09 16:50 ` [RFC v2 04/15] cxl: introduce devm_cxl_del_memdev() mhonap
2025-12-09 16:50 ` [RFC v2 05/15] cxl: introduce cxl_get_committed_regions() mhonap
2025-12-22 12:31 ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 06/15] vfio/cxl: introduce vfio-cxl core preludes mhonap
2025-12-22 13:54 ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 07/15] vfio/cxl: expose CXL region to the userspace via a new VFIO device region mhonap
2025-12-11 16:06 ` Dave Jiang
2025-12-11 17:31 ` Manish Honap
2025-12-11 18:01 ` Dave Jiang
2025-12-22 14:00 ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 08/15] vfio/cxl: discover precommitted CXL region mhonap
2025-12-22 14:09 ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 09/15] vfio/cxl: introduce vfio_cxl_core_{read, write}() mhonap
2025-12-09 16:50 ` [RFC v2 10/15] vfio/cxl: introduce the register emulation framework mhonap
2025-12-09 16:50 ` mhonap [this message]
2025-12-11 18:13 ` [RFC v2 11/15] vfio/cxl: introduce the emulation of HDM registers Dave Jiang
2025-12-09 16:50 ` [RFC v2 12/15] vfio/cxl: introduce the emulation of CXL configuration space mhonap
2025-12-09 16:50 ` [RFC v2 13/15] vfio/pci: introduce CXL device awareness mhonap
2025-12-09 16:50 ` [RFC v2 14/15] vfio/cxl: VFIO variant driver for QEMU CXL accel device mhonap
2025-12-09 16:50 ` [RFC v2 15/15] cxl/mem: Fix NULL pointer deference in memory device paths mhonap
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251209165019.2643142-12-mhonap@nvidia.com \
--to=mhonap@nvidia.com \
--cc=alejandro.lucero-palau@amd.com \
--cc=alison.schofield@intel.com \
--cc=alwilliamson@nvidia.com \
--cc=aniketa@nvidia.com \
--cc=ankita@nvidia.com \
--cc=cjia@nvidia.com \
--cc=dan.j.williams@intel.com \
--cc=dave.jiang@intel.com \
--cc=dave@stgolabs.net \
--cc=ira.weiny@intel.com \
--cc=jgg@nvidia.com \
--cc=jgg@ziepe.ca \
--cc=jonathan.cameron@huawei.com \
--cc=kevin.tian@intel.com \
--cc=kjaju@nvidia.com \
--cc=kvm@vger.kernel.org \
--cc=kwankhede@nvidia.com \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mochs@nvidia.com \
--cc=skolothumtho@nvidia.com \
--cc=targupta@nvidia.com \
--cc=vishal.l.verma@intel.com \
--cc=vsethi@nvidia.com \
--cc=yishaih@nvidia.com \
--cc=zhiw@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox