* [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough
@ 2024-09-21 7:14 Zhi Wang
2024-09-21 7:14 ` [RFC 1/1] vfio: support CXL device in VFIO stub Zhi Wang
2024-09-26 16:52 ` [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Cédric Le Goater
0 siblings, 2 replies; 4+ messages in thread
From: Zhi Wang @ 2024-09-21 7:14 UTC (permalink / raw)
To: kvm, linux-cxl
Cc: alex.williamson, kevin.tian, jgg, alison.schofield,
dan.j.williams, dave.jiang, dave, jonathan.cameron, ira.weiny,
vishal.l.verma, alucerop, clg, qemu-devel, acurrid, cjia, smitra,
ankita, aniketa, kwankhede, targupta, zhiw, zhiwang
Compute Express Link (CXL) is an open standard interconnect built upon
industrial PCI layers to enhance the performance and efficiency of data
centers by enabling high-speed, low-latency communication between CPUs
and various types of devices such as accelerators, memory.
Although CXL is built upon the PCI layers, passing a CXL type-2 device can
be different than PCI devices according to CXL specification. Thus,
addtional changes on are required.
vfio-cxl is introduced to support the CXL type-2 device passthrough.
This is the QEMU VFIOStub draft changes to support it.
More details (patches, repos, kernel config) all what you need to test
and hack around, plus a demo video shows the kernel/QEMU command line
can be found at:
https://lore.kernel.org/kvm/20240920223446.1908673-7-zhiw@nvidia.com/T/
Zhi Wang (1):
vfio: support CXL device in VFIO stub
hw/vfio/common.c | 3 +
hw/vfio/pci.c | 134 ++++++++++++++++++++++++++++++++++
hw/vfio/pci.h | 10 +++
include/hw/pci/pci.h | 2 +
include/hw/vfio/vfio-common.h | 1 +
linux-headers/linux/vfio.h | 14 ++++
6 files changed, 164 insertions(+)
--
2.34.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* [RFC 1/1] vfio: support CXL device in VFIO stub
2024-09-21 7:14 [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Zhi Wang
@ 2024-09-21 7:14 ` Zhi Wang
2024-10-11 21:47 ` Alex Williamson
2024-09-26 16:52 ` [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Cédric Le Goater
1 sibling, 1 reply; 4+ messages in thread
From: Zhi Wang @ 2024-09-21 7:14 UTC (permalink / raw)
To: kvm, linux-cxl
Cc: alex.williamson, kevin.tian, jgg, alison.schofield,
dan.j.williams, dave.jiang, dave, jonathan.cameron, ira.weiny,
vishal.l.verma, alucerop, clg, qemu-devel, acurrid, cjia, smitra,
ankita, aniketa, kwankhede, targupta, zhiw, zhiwang
To support CXL device passthrough, vfio-cxl-core is introduced. This
is the QEMU part.
Get the CXL caps from the vfio-cxl-core. Trap and emulate the HDM
decoder registers. Map the HDM decdoers when the guest commits a HDM
decoder.
Signed-off-by: Zhi Wang <zhiw@nvidia.com>
---
hw/vfio/common.c | 3 +
hw/vfio/pci.c | 134 ++++++++++++++++++++++++++++++++++
hw/vfio/pci.h | 10 +++
include/hw/pci/pci.h | 2 +
include/hw/vfio/vfio-common.h | 1 +
| 14 ++++
6 files changed, 164 insertions(+)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9aac21abb7..6dea606f62 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -237,6 +237,9 @@ void vfio_region_write(void *opaque, hwaddr addr,
break;
}
+ if (region->notify_change)
+ region->notify_change(opaque, addr, data, size);
+
if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
",%d) failed: %m",
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a205c6b113..431a588252 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -23,6 +23,7 @@
#include <sys/ioctl.h>
#include "hw/hw.h"
+#include "hw/cxl/cxl_component.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci_bridge.h"
@@ -2743,6 +2744,72 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
return 0;
}
+static bool read_region(VFIORegion *region, uint32_t *val, uint64_t offset)
+{
+ VFIODevice *vbasedev = region->vbasedev;
+
+ if (pread(vbasedev->fd, val, 4, region->fd_offset + offset) != 4) {
+ error_report("%s(%s, 0x%lx, 0x%x, 0x%x) failed: %m",
+ __func__,vbasedev->name, offset, *val, 4);
+ return false;
+ }
+ return true;
+}
+
+static void vfio_cxl_hdm_regs_changed(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIORegion *region = opaque;
+ VFIODevice *vbasedev = region->vbasedev;
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ VFIOCXL *cxl = &vdev->cxl;
+ MemoryRegion *address_space_mem = pci_get_bus(&vdev->pdev)->address_space_mem;
+ uint64_t offset, reg_offset, index;
+ uint32_t cur_val, write_val;
+
+ if (size != 4 || (addr & 0x3))
+ error_report("hdm_regs_changed: unsupported size or unaligned addr!\n");
+
+ offset = addr - cxl->hdm_regs_offset;
+ index = (offset - 0x10) / 0x20;
+ reg_offset = offset - 0x20 * index;
+
+ if (reg_offset != 0x20)
+ return;
+
+#define READ_REGION(val, offset) do { \
+ if (!read_region(region, val, offset)) \
+ return; \
+ } while(0)
+
+ write_val = (uint32_t)data;
+ READ_REGION(&cur_val, cxl->hdm_regs_offset + 0x20 * index + reg_offset);
+
+ if (!(cur_val & (1 << 10)) && (write_val & (1 << 9))) {
+ memory_region_transaction_begin();
+ memory_region_del_subregion(address_space_mem, cxl->region.mem);
+ memory_region_transaction_commit();
+ } else if (cur_val & (1 << 10) && !(write_val & (1 << 9))) {
+ /* commit -> not commit */
+ uint32_t base_hi, base_lo;
+ uint64_t base;
+
+ /* locked */
+ if (cur_val & (1 << 8))
+ return;
+
+ READ_REGION(&base_lo, cxl->hdm_regs_offset + 0x20 * index + 0x10);
+ READ_REGION(&base_hi, cxl->hdm_regs_offset + 0x20 * index + 0x14);
+
+ base = ((uint64_t)base_hi << 32) | (uint64_t)(base_lo >> 28);
+
+ memory_region_transaction_begin();
+ memory_region_add_subregion_overlap(address_space_mem,
+ base, cxl->region.mem, 0);
+ memory_region_transaction_commit();
+ }
+}
+
static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
{
VFIODevice *vbasedev = &vdev->vbasedev;
@@ -2780,6 +2847,11 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
}
QLIST_INIT(&vdev->bars[i].quirks);
+
+ if (vbasedev->flags & VFIO_DEVICE_FLAGS_CXL &&
+ i == vdev->cxl.hdm_regs_bar_index) {
+ vdev->bars[i].region.notify_change = vfio_cxl_hdm_regs_changed;
+ }
}
ret = vfio_get_region_info(vbasedev,
@@ -2974,6 +3046,62 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
vdev->req_enabled = false;
}
+static int vfio_cxl_setup(VFIOPCIDevice *vdev)
+{
+ VFIODevice *vbasedev = &vdev->vbasedev;
+ struct VFIOCXL *cxl = &vdev->cxl;
+ struct vfio_device_info_cap_cxl *cap;
+ g_autofree struct vfio_device_info *info = NULL;
+ struct vfio_info_cap_header *hdr;
+ struct vfio_region_info *region_info;
+ int ret;
+
+ if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_CXL))
+ return 0;
+
+ info = vfio_get_device_info(vbasedev->fd);
+ if (!info) {
+ return -ENODEV;
+ }
+
+ hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_CXL);
+ if (!hdr) {
+ return -ENODEV;
+ }
+
+ cap = (void *)hdr;
+
+ cxl->hdm_count = cap->hdm_count;
+ cxl->hdm_regs_bar_index = cap->hdm_regs_bar_index;
+ cxl->hdm_regs_size = cap->hdm_regs_size;
+ cxl->hdm_regs_offset = cap->hdm_regs_offset;
+ cxl->dpa_size = cap->dpa_size;
+
+ ret = vfio_get_dev_region_info(vbasedev,
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_CXL,
+ VFIO_REGION_SUBTYPE_CXL, ®ion_info);
+ if (ret) {
+ error_report("does not support requested CXL feature");
+ return ret;
+ }
+
+ ret = vfio_region_setup(OBJECT(vdev), vbasedev, &cxl->region,
+ region_info->index, "cxl region");
+ if (ret) {
+ error_report("fail to setup CXL region");
+ return ret;
+ }
+
+ g_free(region_info);
+
+ if (vfio_region_mmap(&cxl->region)) {
+ error_report("Failed to mmap %s cxl region",
+ vdev->vbasedev.name);
+ return -EFAULT;
+ }
+ return 0;
+}
+
static void vfio_realize(PCIDevice *pdev, Error **errp)
{
VFIOPCIDevice *vdev = VFIO_PCI(pdev);
@@ -3083,6 +3211,12 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
+ ret = vfio_cxl_setup(vdev);
+ if (ret) {
+ vfio_put_group(group);
+ goto error;
+ }
+
vfio_populate_device(vdev, &err);
if (err) {
error_propagate(errp, err);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index a2771b9ff3..6c5f5c1ea5 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -118,6 +118,15 @@ typedef struct VFIOMSIXInfo {
#define TYPE_VFIO_PCI "vfio-pci"
OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
+typedef struct VFIOCXL {
+ uint8_t hdm_count;
+ uint8_t hdm_regs_bar_index;
+ uint64_t hdm_regs_size;
+ uint64_t hdm_regs_offset;
+ uint64_t dpa_size;
+ VFIORegion region;
+} VFIOCXL;
+
struct VFIOPCIDevice {
PCIDevice pdev;
VFIODevice vbasedev;
@@ -177,6 +186,7 @@ struct VFIOPCIDevice {
bool clear_parent_atomics_on_exit;
VFIODisplay *dpy;
Notifier irqchip_change_notifier;
+ VFIOCXL cxl;
};
/* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index b70a0b95ff..fbf5786d00 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -117,6 +117,8 @@ extern bool pci_available;
#define PCI_DEVICE_ID_REDHAT_UFS 0x0013
#define PCI_DEVICE_ID_REDHAT_QXL 0x0100
+#define PCI_VENDOR_ID_CXL 0x1e98
+
#define FMT_PCIBUS PRIx64
typedef uint64_t pcibus_t;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index da43d27352..1c998c3ed6 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -56,6 +56,7 @@ typedef struct VFIORegion {
uint32_t nr_mmaps;
VFIOMmap *mmaps;
uint8_t nr; /* cache the region number for debug */
+ void (*notify_change)(void *, hwaddr, uint64_t, unsigned);
} VFIORegion;
typedef struct VFIOMigration {
--git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 16db89071e..22fb50ed34 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -214,6 +214,7 @@ struct vfio_device_info {
#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */
#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */
#define VFIO_DEVICE_FLAGS_CDX (1 << 8) /* vfio-cdx device */
+#define VFIO_DEVICE_FLAGS_CXL (1 << 9) /* vfio-cdx device */
__u32 num_regions; /* Max region index + 1 */
__u32 num_irqs; /* Max IRQ index + 1 */
__u32 cap_offset; /* Offset within info struct of first cap */
@@ -255,6 +256,16 @@ struct vfio_device_info_cap_pci_atomic_comp {
__u32 reserved;
};
+#define VFIO_DEVICE_INFO_CAP_CXL 6
+struct vfio_device_info_cap_cxl {
+ struct vfio_info_cap_header header;
+ __u8 hdm_count;
+ __u8 hdm_regs_bar_index;
+ __u64 hdm_regs_size;
+ __u64 hdm_regs_offset;
+ __u64 dpa_size;
+};
+
/**
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
* struct vfio_region_info)
@@ -371,6 +382,9 @@ struct vfio_region_info_cap_type {
/* sub-types for VFIO_REGION_TYPE_GFX */
#define VFIO_REGION_SUBTYPE_GFX_EDID (1)
+/* sub-types for VFIO CXL region */
+#define VFIO_REGION_SUBTYPE_CXL (1)
+
/**
* struct vfio_region_gfx_edid - EDID region layout.
*
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough
2024-09-21 7:14 [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Zhi Wang
2024-09-21 7:14 ` [RFC 1/1] vfio: support CXL device in VFIO stub Zhi Wang
@ 2024-09-26 16:52 ` Cédric Le Goater
1 sibling, 0 replies; 4+ messages in thread
From: Cédric Le Goater @ 2024-09-26 16:52 UTC (permalink / raw)
To: Zhi Wang, kvm, linux-cxl
Cc: alex.williamson, kevin.tian, jgg, alison.schofield,
dan.j.williams, dave.jiang, dave, jonathan.cameron, ira.weiny,
vishal.l.verma, alucerop, qemu-devel, acurrid, cjia, smitra,
ankita, aniketa, kwankhede, targupta, zhiwang
Hello Zhi,
On 9/21/24 09:14, Zhi Wang wrote:
> Compute Express Link (CXL) is an open standard interconnect built upon
> industrial PCI layers to enhance the performance and efficiency of data
> centers by enabling high-speed, low-latency communication between CPUs
> and various types of devices such as accelerators, memory.
>
> Although CXL is built upon the PCI layers, passing a CXL type-2 device can
> be different than PCI devices according to CXL specification. Thus,
> addtional changes on are required.
>
> vfio-cxl is introduced to support the CXL type-2 device passthrough.
> This is the QEMU VFIOStub draft changes to support it.
>
> More details (patches, repos, kernel config) all what you need to test
> and hack around, plus a demo video shows the kernel/QEMU command line
> can be found at:
> https://lore.kernel.org/kvm/20240920223446.1908673-7-zhiw@nvidia.com/T/
I have started looking at the software stack and the QEMU trees
are quite old. Could you please rebase the branches on the latest ?
Also, I think having a single branch per project would be easier.
For linux :
[v2] cxl: add Type2 device support
[RFC] vfio: introduce vfio-cxl to support CXL type-2
[RFC] samples: introduce QEMU CXL accel driver
Same for QEMU.
Thanks,
C.
>
> Zhi Wang (1):
> vfio: support CXL device in VFIO stub
>
> hw/vfio/common.c | 3 +
> hw/vfio/pci.c | 134 ++++++++++++++++++++++++++++++++++
> hw/vfio/pci.h | 10 +++
> include/hw/pci/pci.h | 2 +
> include/hw/vfio/vfio-common.h | 1 +
> linux-headers/linux/vfio.h | 14 ++++
> 6 files changed, 164 insertions(+)
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC 1/1] vfio: support CXL device in VFIO stub
2024-09-21 7:14 ` [RFC 1/1] vfio: support CXL device in VFIO stub Zhi Wang
@ 2024-10-11 21:47 ` Alex Williamson
0 siblings, 0 replies; 4+ messages in thread
From: Alex Williamson @ 2024-10-11 21:47 UTC (permalink / raw)
To: Zhi Wang
Cc: kvm, linux-cxl, kevin.tian, jgg, alison.schofield, dan.j.williams,
dave.jiang, dave, jonathan.cameron, ira.weiny, vishal.l.verma,
alucerop, clg, qemu-devel, acurrid, cjia, smitra, ankita, aniketa,
kwankhede, targupta, zhiwang
On Sat, 21 Sep 2024 00:14:40 -0700
Zhi Wang <zhiw@nvidia.com> wrote:
> To support CXL device passthrough, vfio-cxl-core is introduced. This
> is the QEMU part.
>
> Get the CXL caps from the vfio-cxl-core. Trap and emulate the HDM
> decoder registers. Map the HDM decdoers when the guest commits a HDM
> decoder.
It seems like this could all essentially be handled as a quirk, setting
things up based on the CXL flag or CXL device info capability, and the
update could be done in the quirk write handler rather than a new
change notifier callback. Thanks,
Alex
> Signed-off-by: Zhi Wang <zhiw@nvidia.com>
> ---
> hw/vfio/common.c | 3 +
> hw/vfio/pci.c | 134 ++++++++++++++++++++++++++++++++++
> hw/vfio/pci.h | 10 +++
> include/hw/pci/pci.h | 2 +
> include/hw/vfio/vfio-common.h | 1 +
> linux-headers/linux/vfio.h | 14 ++++
> 6 files changed, 164 insertions(+)
>
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 9aac21abb7..6dea606f62 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -237,6 +237,9 @@ void vfio_region_write(void *opaque, hwaddr addr,
> break;
> }
>
> + if (region->notify_change)
> + region->notify_change(opaque, addr, data, size);
> +
> if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
> error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
> ",%d) failed: %m",
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index a205c6b113..431a588252 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -23,6 +23,7 @@
> #include <sys/ioctl.h>
>
> #include "hw/hw.h"
> +#include "hw/cxl/cxl_component.h"
> #include "hw/pci/msi.h"
> #include "hw/pci/msix.h"
> #include "hw/pci/pci_bridge.h"
> @@ -2743,6 +2744,72 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
> return 0;
> }
>
> +static bool read_region(VFIORegion *region, uint32_t *val, uint64_t offset)
> +{
> + VFIODevice *vbasedev = region->vbasedev;
> +
> + if (pread(vbasedev->fd, val, 4, region->fd_offset + offset) != 4) {
> + error_report("%s(%s, 0x%lx, 0x%x, 0x%x) failed: %m",
> + __func__,vbasedev->name, offset, *val, 4);
> + return false;
> + }
> + return true;
> +}
> +
> +static void vfio_cxl_hdm_regs_changed(void *opaque, hwaddr addr,
> + uint64_t data, unsigned size)
> +{
> + VFIORegion *region = opaque;
> + VFIODevice *vbasedev = region->vbasedev;
> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> + VFIOCXL *cxl = &vdev->cxl;
> + MemoryRegion *address_space_mem = pci_get_bus(&vdev->pdev)->address_space_mem;
> + uint64_t offset, reg_offset, index;
> + uint32_t cur_val, write_val;
> +
> + if (size != 4 || (addr & 0x3))
> + error_report("hdm_regs_changed: unsupported size or unaligned addr!\n");
> +
> + offset = addr - cxl->hdm_regs_offset;
> + index = (offset - 0x10) / 0x20;
> + reg_offset = offset - 0x20 * index;
> +
> + if (reg_offset != 0x20)
> + return;
> +
> +#define READ_REGION(val, offset) do { \
> + if (!read_region(region, val, offset)) \
> + return; \
> + } while(0)
> +
> + write_val = (uint32_t)data;
> + READ_REGION(&cur_val, cxl->hdm_regs_offset + 0x20 * index + reg_offset);
> +
> + if (!(cur_val & (1 << 10)) && (write_val & (1 << 9))) {
> + memory_region_transaction_begin();
> + memory_region_del_subregion(address_space_mem, cxl->region.mem);
> + memory_region_transaction_commit();
> + } else if (cur_val & (1 << 10) && !(write_val & (1 << 9))) {
> + /* commit -> not commit */
> + uint32_t base_hi, base_lo;
> + uint64_t base;
> +
> + /* locked */
> + if (cur_val & (1 << 8))
> + return;
> +
> + READ_REGION(&base_lo, cxl->hdm_regs_offset + 0x20 * index + 0x10);
> + READ_REGION(&base_hi, cxl->hdm_regs_offset + 0x20 * index + 0x14);
> +
> + base = ((uint64_t)base_hi << 32) | (uint64_t)(base_lo >> 28);
> +
> + memory_region_transaction_begin();
> + memory_region_add_subregion_overlap(address_space_mem,
> + base, cxl->region.mem, 0);
> + memory_region_transaction_commit();
> + }
> +}
> +
> static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
> {
> VFIODevice *vbasedev = &vdev->vbasedev;
> @@ -2780,6 +2847,11 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
> }
>
> QLIST_INIT(&vdev->bars[i].quirks);
> +
> + if (vbasedev->flags & VFIO_DEVICE_FLAGS_CXL &&
> + i == vdev->cxl.hdm_regs_bar_index) {
> + vdev->bars[i].region.notify_change = vfio_cxl_hdm_regs_changed;
> + }
> }
>
> ret = vfio_get_region_info(vbasedev,
> @@ -2974,6 +3046,62 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
> vdev->req_enabled = false;
> }
>
> +static int vfio_cxl_setup(VFIOPCIDevice *vdev)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + struct VFIOCXL *cxl = &vdev->cxl;
> + struct vfio_device_info_cap_cxl *cap;
> + g_autofree struct vfio_device_info *info = NULL;
> + struct vfio_info_cap_header *hdr;
> + struct vfio_region_info *region_info;
> + int ret;
> +
> + if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_CXL))
> + return 0;
> +
> + info = vfio_get_device_info(vbasedev->fd);
> + if (!info) {
> + return -ENODEV;
> + }
> +
> + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_CXL);
> + if (!hdr) {
> + return -ENODEV;
> + }
> +
> + cap = (void *)hdr;
> +
> + cxl->hdm_count = cap->hdm_count;
> + cxl->hdm_regs_bar_index = cap->hdm_regs_bar_index;
> + cxl->hdm_regs_size = cap->hdm_regs_size;
> + cxl->hdm_regs_offset = cap->hdm_regs_offset;
> + cxl->dpa_size = cap->dpa_size;
> +
> + ret = vfio_get_dev_region_info(vbasedev,
> + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_CXL,
> + VFIO_REGION_SUBTYPE_CXL, ®ion_info);
> + if (ret) {
> + error_report("does not support requested CXL feature");
> + return ret;
> + }
> +
> + ret = vfio_region_setup(OBJECT(vdev), vbasedev, &cxl->region,
> + region_info->index, "cxl region");
> + if (ret) {
> + error_report("fail to setup CXL region");
> + return ret;
> + }
> +
> + g_free(region_info);
> +
> + if (vfio_region_mmap(&cxl->region)) {
> + error_report("Failed to mmap %s cxl region",
> + vdev->vbasedev.name);
> + return -EFAULT;
> + }
> + return 0;
> +}
> +
> static void vfio_realize(PCIDevice *pdev, Error **errp)
> {
> VFIOPCIDevice *vdev = VFIO_PCI(pdev);
> @@ -3083,6 +3211,12 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> goto error;
> }
>
> + ret = vfio_cxl_setup(vdev);
> + if (ret) {
> + vfio_put_group(group);
> + goto error;
> + }
> +
> vfio_populate_device(vdev, &err);
> if (err) {
> error_propagate(errp, err);
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index a2771b9ff3..6c5f5c1ea5 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -118,6 +118,15 @@ typedef struct VFIOMSIXInfo {
> #define TYPE_VFIO_PCI "vfio-pci"
> OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
>
> +typedef struct VFIOCXL {
> + uint8_t hdm_count;
> + uint8_t hdm_regs_bar_index;
> + uint64_t hdm_regs_size;
> + uint64_t hdm_regs_offset;
> + uint64_t dpa_size;
> + VFIORegion region;
> +} VFIOCXL;
> +
> struct VFIOPCIDevice {
> PCIDevice pdev;
> VFIODevice vbasedev;
> @@ -177,6 +186,7 @@ struct VFIOPCIDevice {
> bool clear_parent_atomics_on_exit;
> VFIODisplay *dpy;
> Notifier irqchip_change_notifier;
> + VFIOCXL cxl;
> };
>
> /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index b70a0b95ff..fbf5786d00 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -117,6 +117,8 @@ extern bool pci_available;
> #define PCI_DEVICE_ID_REDHAT_UFS 0x0013
> #define PCI_DEVICE_ID_REDHAT_QXL 0x0100
>
> +#define PCI_VENDOR_ID_CXL 0x1e98
> +
> #define FMT_PCIBUS PRIx64
>
> typedef uint64_t pcibus_t;
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index da43d27352..1c998c3ed6 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -56,6 +56,7 @@ typedef struct VFIORegion {
> uint32_t nr_mmaps;
> VFIOMmap *mmaps;
> uint8_t nr; /* cache the region number for debug */
> + void (*notify_change)(void *, hwaddr, uint64_t, unsigned);
> } VFIORegion;
>
> typedef struct VFIOMigration {
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 16db89071e..22fb50ed34 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -214,6 +214,7 @@ struct vfio_device_info {
> #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */
> #define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */
> #define VFIO_DEVICE_FLAGS_CDX (1 << 8) /* vfio-cdx device */
> +#define VFIO_DEVICE_FLAGS_CXL (1 << 9) /* vfio-cdx device */
> __u32 num_regions; /* Max region index + 1 */
> __u32 num_irqs; /* Max IRQ index + 1 */
> __u32 cap_offset; /* Offset within info struct of first cap */
> @@ -255,6 +256,16 @@ struct vfio_device_info_cap_pci_atomic_comp {
> __u32 reserved;
> };
>
> +#define VFIO_DEVICE_INFO_CAP_CXL 6
> +struct vfio_device_info_cap_cxl {
> + struct vfio_info_cap_header header;
> + __u8 hdm_count;
> + __u8 hdm_regs_bar_index;
> + __u64 hdm_regs_size;
> + __u64 hdm_regs_offset;
> + __u64 dpa_size;
> +};
> +
> /**
> * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
> * struct vfio_region_info)
> @@ -371,6 +382,9 @@ struct vfio_region_info_cap_type {
> /* sub-types for VFIO_REGION_TYPE_GFX */
> #define VFIO_REGION_SUBTYPE_GFX_EDID (1)
>
> +/* sub-types for VFIO CXL region */
> +#define VFIO_REGION_SUBTYPE_CXL (1)
> +
> /**
> * struct vfio_region_gfx_edid - EDID region layout.
> *
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-10-11 21:49 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-21 7:14 [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Zhi Wang
2024-09-21 7:14 ` [RFC 1/1] vfio: support CXL device in VFIO stub Zhi Wang
2024-10-11 21:47 ` Alex Williamson
2024-09-26 16:52 ` [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Cédric Le Goater
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).