qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough
@ 2024-09-21  7:14 Zhi Wang
  2024-09-21  7:14 ` [RFC 1/1] vfio: support CXL device in VFIO stub Zhi Wang
  2024-09-26 16:52 ` [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Cédric Le Goater
  0 siblings, 2 replies; 4+ messages in thread
From: Zhi Wang @ 2024-09-21  7:14 UTC (permalink / raw)
  To: kvm, linux-cxl
  Cc: alex.williamson, kevin.tian, jgg, alison.schofield,
	dan.j.williams, dave.jiang, dave, jonathan.cameron, ira.weiny,
	vishal.l.verma, alucerop, clg, qemu-devel, acurrid, cjia, smitra,
	ankita, aniketa, kwankhede, targupta, zhiw, zhiwang

Compute Express Link (CXL) is an open standard interconnect built upon
industrial PCI layers to enhance the performance and efficiency of data
centers by enabling high-speed, low-latency communication between CPUs
and various types of devices such as accelerators, memory.

Although CXL is built upon the PCI layers, passing a CXL type-2 device can
be different than PCI devices according to CXL specification. Thus,
addtional changes on are required.

vfio-cxl is introduced to support the CXL type-2 device passthrough.
This is the QEMU VFIOStub draft changes to support it.

More details (patches, repos, kernel config) all what you need to test
and hack around, plus a demo video shows the kernel/QEMU command line
can be found at:
https://lore.kernel.org/kvm/20240920223446.1908673-7-zhiw@nvidia.com/T/

Zhi Wang (1):
  vfio: support CXL device in VFIO stub

 hw/vfio/common.c              |   3 +
 hw/vfio/pci.c                 | 134 ++++++++++++++++++++++++++++++++++
 hw/vfio/pci.h                 |  10 +++
 include/hw/pci/pci.h          |   2 +
 include/hw/vfio/vfio-common.h |   1 +
 linux-headers/linux/vfio.h    |  14 ++++
 6 files changed, 164 insertions(+)

-- 
2.34.1



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [RFC 1/1] vfio: support CXL device in VFIO stub
  2024-09-21  7:14 [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Zhi Wang
@ 2024-09-21  7:14 ` Zhi Wang
  2024-10-11 21:47   ` Alex Williamson
  2024-09-26 16:52 ` [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Cédric Le Goater
  1 sibling, 1 reply; 4+ messages in thread
From: Zhi Wang @ 2024-09-21  7:14 UTC (permalink / raw)
  To: kvm, linux-cxl
  Cc: alex.williamson, kevin.tian, jgg, alison.schofield,
	dan.j.williams, dave.jiang, dave, jonathan.cameron, ira.weiny,
	vishal.l.verma, alucerop, clg, qemu-devel, acurrid, cjia, smitra,
	ankita, aniketa, kwankhede, targupta, zhiw, zhiwang

To support CXL device passthrough, vfio-cxl-core is introduced. This
is the QEMU part.

Get the CXL caps from the vfio-cxl-core. Trap and emulate the HDM
decoder registers. Map the HDM decdoers when the guest commits a HDM
decoder.

Signed-off-by: Zhi Wang <zhiw@nvidia.com>
---
 hw/vfio/common.c              |   3 +
 hw/vfio/pci.c                 | 134 ++++++++++++++++++++++++++++++++++
 hw/vfio/pci.h                 |  10 +++
 include/hw/pci/pci.h          |   2 +
 include/hw/vfio/vfio-common.h |   1 +
 linux-headers/linux/vfio.h    |  14 ++++
 6 files changed, 164 insertions(+)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9aac21abb7..6dea606f62 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -237,6 +237,9 @@ void vfio_region_write(void *opaque, hwaddr addr,
         break;
     }
 
+    if (region->notify_change)
+        region->notify_change(opaque, addr, data, size);
+
     if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
         error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
                      ",%d) failed: %m",
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a205c6b113..431a588252 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -23,6 +23,7 @@
 #include <sys/ioctl.h>
 
 #include "hw/hw.h"
+#include "hw/cxl/cxl_component.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/pci_bridge.h"
@@ -2743,6 +2744,72 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
     return 0;
 }
 
+static bool read_region(VFIORegion *region, uint32_t *val, uint64_t offset)
+{
+    VFIODevice *vbasedev = region->vbasedev;
+
+    if (pread(vbasedev->fd, val, 4, region->fd_offset + offset) != 4) {
+        error_report("%s(%s, 0x%lx, 0x%x, 0x%x) failed: %m",
+                     __func__,vbasedev->name, offset, *val, 4);
+        return false;
+    }
+    return true;
+}
+
+static void vfio_cxl_hdm_regs_changed(void *opaque, hwaddr addr,
+                                      uint64_t data, unsigned size)
+{
+    VFIORegion *region = opaque;
+    VFIODevice *vbasedev = region->vbasedev;
+    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+    VFIOCXL *cxl = &vdev->cxl;
+    MemoryRegion *address_space_mem = pci_get_bus(&vdev->pdev)->address_space_mem;
+    uint64_t offset, reg_offset, index;
+    uint32_t cur_val, write_val;
+
+    if (size != 4 || (addr & 0x3))
+        error_report("hdm_regs_changed: unsupported size or unaligned addr!\n");
+
+    offset = addr - cxl->hdm_regs_offset;
+    index = (offset - 0x10) / 0x20;
+    reg_offset = offset - 0x20 * index;
+
+    if (reg_offset != 0x20)
+        return;
+
+#define READ_REGION(val, offset) do { \
+    if (!read_region(region, val, offset)) \
+        return; \
+    } while(0)
+
+    write_val = (uint32_t)data;
+    READ_REGION(&cur_val, cxl->hdm_regs_offset + 0x20 * index + reg_offset);
+
+    if (!(cur_val & (1 << 10)) && (write_val & (1 << 9))) {
+        memory_region_transaction_begin();
+        memory_region_del_subregion(address_space_mem, cxl->region.mem);
+        memory_region_transaction_commit();
+    } else if (cur_val & (1 << 10) && !(write_val & (1 << 9))) {
+        /* commit -> not commit */
+        uint32_t base_hi, base_lo;
+        uint64_t base;
+
+        /* locked */
+        if (cur_val & (1 << 8))
+            return;
+
+        READ_REGION(&base_lo, cxl->hdm_regs_offset +  0x20 * index + 0x10);
+        READ_REGION(&base_hi, cxl->hdm_regs_offset +  0x20 * index + 0x14);
+
+        base = ((uint64_t)base_hi << 32) | (uint64_t)(base_lo >> 28);
+
+        memory_region_transaction_begin();
+        memory_region_add_subregion_overlap(address_space_mem,
+                                            base, cxl->region.mem, 0);
+        memory_region_transaction_commit();
+    }
+}
+
 static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
 {
     VFIODevice *vbasedev = &vdev->vbasedev;
@@ -2780,6 +2847,11 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
         }
 
         QLIST_INIT(&vdev->bars[i].quirks);
+
+        if (vbasedev->flags & VFIO_DEVICE_FLAGS_CXL &&
+            i == vdev->cxl.hdm_regs_bar_index) {
+            vdev->bars[i].region.notify_change = vfio_cxl_hdm_regs_changed;
+        }
     }
 
     ret = vfio_get_region_info(vbasedev,
@@ -2974,6 +3046,62 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static int vfio_cxl_setup(VFIOPCIDevice *vdev)
+{
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    struct VFIOCXL *cxl = &vdev->cxl;
+    struct vfio_device_info_cap_cxl *cap;
+    g_autofree struct vfio_device_info *info = NULL;
+    struct vfio_info_cap_header *hdr;
+    struct vfio_region_info *region_info;
+    int ret;
+
+    if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_CXL))
+        return 0;
+
+    info = vfio_get_device_info(vbasedev->fd);
+    if (!info) {
+        return -ENODEV;
+    }
+
+    hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_CXL);
+    if (!hdr) {
+        return -ENODEV;
+    }
+
+    cap = (void *)hdr;
+
+    cxl->hdm_count = cap->hdm_count;
+    cxl->hdm_regs_bar_index = cap->hdm_regs_bar_index;
+    cxl->hdm_regs_size = cap->hdm_regs_size;
+    cxl->hdm_regs_offset = cap->hdm_regs_offset;
+    cxl->dpa_size = cap->dpa_size;
+
+    ret = vfio_get_dev_region_info(vbasedev,
+            VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_CXL,
+            VFIO_REGION_SUBTYPE_CXL, &region_info);
+    if (ret) {
+        error_report("does not support requested CXL feature");
+        return ret;
+    }
+
+    ret = vfio_region_setup(OBJECT(vdev), vbasedev, &cxl->region,
+            region_info->index, "cxl region");
+    if (ret) {
+        error_report("fail to setup CXL region");
+        return ret;
+    }
+
+    g_free(region_info);
+
+    if (vfio_region_mmap(&cxl->region)) {
+        error_report("Failed to mmap %s cxl region",
+                     vdev->vbasedev.name);
+        return -EFAULT;
+    }
+    return 0;
+}
+
 static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
     VFIOPCIDevice *vdev = VFIO_PCI(pdev);
@@ -3083,6 +3211,12 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
         goto error;
     }
 
+    ret = vfio_cxl_setup(vdev);
+    if (ret) {
+        vfio_put_group(group);
+        goto error;
+    }
+
     vfio_populate_device(vdev, &err);
     if (err) {
         error_propagate(errp, err);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index a2771b9ff3..6c5f5c1ea5 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -118,6 +118,15 @@ typedef struct VFIOMSIXInfo {
 #define TYPE_VFIO_PCI "vfio-pci"
 OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
 
+typedef struct VFIOCXL {
+    uint8_t hdm_count;
+    uint8_t hdm_regs_bar_index;
+    uint64_t hdm_regs_size;
+    uint64_t hdm_regs_offset;
+    uint64_t dpa_size;
+    VFIORegion region;
+} VFIOCXL;
+
 struct VFIOPCIDevice {
     PCIDevice pdev;
     VFIODevice vbasedev;
@@ -177,6 +186,7 @@ struct VFIOPCIDevice {
     bool clear_parent_atomics_on_exit;
     VFIODisplay *dpy;
     Notifier irqchip_change_notifier;
+    VFIOCXL cxl;
 };
 
 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index b70a0b95ff..fbf5786d00 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -117,6 +117,8 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_REDHAT_UFS         0x0013
 #define PCI_DEVICE_ID_REDHAT_QXL         0x0100
 
+#define PCI_VENDOR_ID_CXL                0x1e98
+
 #define FMT_PCIBUS                      PRIx64
 
 typedef uint64_t pcibus_t;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index da43d27352..1c998c3ed6 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -56,6 +56,7 @@ typedef struct VFIORegion {
     uint32_t nr_mmaps;
     VFIOMmap *mmaps;
     uint8_t nr; /* cache the region number for debug */
+    void (*notify_change)(void *, hwaddr, uint64_t, unsigned);
 } VFIORegion;
 
 typedef struct VFIOMigration {
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 16db89071e..22fb50ed34 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -214,6 +214,7 @@ struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
 #define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
 #define VFIO_DEVICE_FLAGS_CDX	(1 << 8)	/* vfio-cdx device */
+#define VFIO_DEVICE_FLAGS_CXL	(1 << 9)	/* vfio-cdx device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 	__u32   cap_offset;	/* Offset within info struct of first cap */
@@ -255,6 +256,16 @@ struct vfio_device_info_cap_pci_atomic_comp {
 	__u32 reserved;
 };
 
+#define VFIO_DEVICE_INFO_CAP_CXL               6
+struct vfio_device_info_cap_cxl {
+	struct vfio_info_cap_header header;
+	__u8 hdm_count;
+	__u8 hdm_regs_bar_index;
+	__u64 hdm_regs_size;
+	__u64 hdm_regs_offset;
+	__u64 dpa_size;
+};
+
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
  *				       struct vfio_region_info)
@@ -371,6 +382,9 @@ struct vfio_region_info_cap_type {
 /* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
+/* sub-types for VFIO CXL region */
+#define VFIO_REGION_SUBTYPE_CXL                 (1)
+
 /**
  * struct vfio_region_gfx_edid - EDID region layout.
  *
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough
  2024-09-21  7:14 [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Zhi Wang
  2024-09-21  7:14 ` [RFC 1/1] vfio: support CXL device in VFIO stub Zhi Wang
@ 2024-09-26 16:52 ` Cédric Le Goater
  1 sibling, 0 replies; 4+ messages in thread
From: Cédric Le Goater @ 2024-09-26 16:52 UTC (permalink / raw)
  To: Zhi Wang, kvm, linux-cxl
  Cc: alex.williamson, kevin.tian, jgg, alison.schofield,
	dan.j.williams, dave.jiang, dave, jonathan.cameron, ira.weiny,
	vishal.l.verma, alucerop, qemu-devel, acurrid, cjia, smitra,
	ankita, aniketa, kwankhede, targupta, zhiwang

Hello Zhi,

On 9/21/24 09:14, Zhi Wang wrote:
> Compute Express Link (CXL) is an open standard interconnect built upon
> industrial PCI layers to enhance the performance and efficiency of data
> centers by enabling high-speed, low-latency communication between CPUs
> and various types of devices such as accelerators, memory.
> 
> Although CXL is built upon the PCI layers, passing a CXL type-2 device can
> be different than PCI devices according to CXL specification. Thus,
> addtional changes on are required.
> 
> vfio-cxl is introduced to support the CXL type-2 device passthrough.
> This is the QEMU VFIOStub draft changes to support it.
> 
> More details (patches, repos, kernel config) all what you need to test
> and hack around, plus a demo video shows the kernel/QEMU command line
> can be found at:
> https://lore.kernel.org/kvm/20240920223446.1908673-7-zhiw@nvidia.com/T/


I have started looking at the software stack and the QEMU trees
are quite old. Could you please rebase the branches on the latest ?

Also, I think having a single branch per project would be easier.

For linux :
   [v2] cxl: add Type2 device support
   [RFC] vfio: introduce vfio-cxl to support CXL type-2
   [RFC] samples: introduce QEMU CXL accel driver

Same for QEMU.

Thanks,

C.



> 
> Zhi Wang (1):
>    vfio: support CXL device in VFIO stub
> 
>   hw/vfio/common.c              |   3 +
>   hw/vfio/pci.c                 | 134 ++++++++++++++++++++++++++++++++++
>   hw/vfio/pci.h                 |  10 +++
>   include/hw/pci/pci.h          |   2 +
>   include/hw/vfio/vfio-common.h |   1 +
>   linux-headers/linux/vfio.h    |  14 ++++
>   6 files changed, 164 insertions(+)
> 



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC 1/1] vfio: support CXL device in VFIO stub
  2024-09-21  7:14 ` [RFC 1/1] vfio: support CXL device in VFIO stub Zhi Wang
@ 2024-10-11 21:47   ` Alex Williamson
  0 siblings, 0 replies; 4+ messages in thread
From: Alex Williamson @ 2024-10-11 21:47 UTC (permalink / raw)
  To: Zhi Wang
  Cc: kvm, linux-cxl, kevin.tian, jgg, alison.schofield, dan.j.williams,
	dave.jiang, dave, jonathan.cameron, ira.weiny, vishal.l.verma,
	alucerop, clg, qemu-devel, acurrid, cjia, smitra, ankita, aniketa,
	kwankhede, targupta, zhiwang

On Sat, 21 Sep 2024 00:14:40 -0700
Zhi Wang <zhiw@nvidia.com> wrote:

> To support CXL device passthrough, vfio-cxl-core is introduced. This
> is the QEMU part.
> 
> Get the CXL caps from the vfio-cxl-core. Trap and emulate the HDM
> decoder registers. Map the HDM decdoers when the guest commits a HDM
> decoder.

It seems like this could all essentially be handled as a quirk, setting
things up based on the CXL flag or CXL device info capability, and the
update could be done in the quirk write handler rather than a new
change notifier callback.  Thanks,

Alex

> Signed-off-by: Zhi Wang <zhiw@nvidia.com>
> ---
>  hw/vfio/common.c              |   3 +
>  hw/vfio/pci.c                 | 134 ++++++++++++++++++++++++++++++++++
>  hw/vfio/pci.h                 |  10 +++
>  include/hw/pci/pci.h          |   2 +
>  include/hw/vfio/vfio-common.h |   1 +
>  linux-headers/linux/vfio.h    |  14 ++++
>  6 files changed, 164 insertions(+)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 9aac21abb7..6dea606f62 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -237,6 +237,9 @@ void vfio_region_write(void *opaque, hwaddr addr,
>          break;
>      }
>  
> +    if (region->notify_change)
> +        region->notify_change(opaque, addr, data, size);
> +
>      if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
>          error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
>                       ",%d) failed: %m",
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index a205c6b113..431a588252 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -23,6 +23,7 @@
>  #include <sys/ioctl.h>
>  
>  #include "hw/hw.h"
> +#include "hw/cxl/cxl_component.h"
>  #include "hw/pci/msi.h"
>  #include "hw/pci/msix.h"
>  #include "hw/pci/pci_bridge.h"
> @@ -2743,6 +2744,72 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
>      return 0;
>  }
>  
> +static bool read_region(VFIORegion *region, uint32_t *val, uint64_t offset)
> +{
> +    VFIODevice *vbasedev = region->vbasedev;
> +
> +    if (pread(vbasedev->fd, val, 4, region->fd_offset + offset) != 4) {
> +        error_report("%s(%s, 0x%lx, 0x%x, 0x%x) failed: %m",
> +                     __func__,vbasedev->name, offset, *val, 4);
> +        return false;
> +    }
> +    return true;
> +}
> +
> +static void vfio_cxl_hdm_regs_changed(void *opaque, hwaddr addr,
> +                                      uint64_t data, unsigned size)
> +{
> +    VFIORegion *region = opaque;
> +    VFIODevice *vbasedev = region->vbasedev;
> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +    VFIOCXL *cxl = &vdev->cxl;
> +    MemoryRegion *address_space_mem = pci_get_bus(&vdev->pdev)->address_space_mem;
> +    uint64_t offset, reg_offset, index;
> +    uint32_t cur_val, write_val;
> +
> +    if (size != 4 || (addr & 0x3))
> +        error_report("hdm_regs_changed: unsupported size or unaligned addr!\n");
> +
> +    offset = addr - cxl->hdm_regs_offset;
> +    index = (offset - 0x10) / 0x20;
> +    reg_offset = offset - 0x20 * index;
> +
> +    if (reg_offset != 0x20)
> +        return;
> +
> +#define READ_REGION(val, offset) do { \
> +    if (!read_region(region, val, offset)) \
> +        return; \
> +    } while(0)
> +
> +    write_val = (uint32_t)data;
> +    READ_REGION(&cur_val, cxl->hdm_regs_offset + 0x20 * index + reg_offset);
> +
> +    if (!(cur_val & (1 << 10)) && (write_val & (1 << 9))) {
> +        memory_region_transaction_begin();
> +        memory_region_del_subregion(address_space_mem, cxl->region.mem);
> +        memory_region_transaction_commit();
> +    } else if (cur_val & (1 << 10) && !(write_val & (1 << 9))) {
> +        /* commit -> not commit */
> +        uint32_t base_hi, base_lo;
> +        uint64_t base;
> +
> +        /* locked */
> +        if (cur_val & (1 << 8))
> +            return;
> +
> +        READ_REGION(&base_lo, cxl->hdm_regs_offset +  0x20 * index + 0x10);
> +        READ_REGION(&base_hi, cxl->hdm_regs_offset +  0x20 * index + 0x14);
> +
> +        base = ((uint64_t)base_hi << 32) | (uint64_t)(base_lo >> 28);
> +
> +        memory_region_transaction_begin();
> +        memory_region_add_subregion_overlap(address_space_mem,
> +                                            base, cxl->region.mem, 0);
> +        memory_region_transaction_commit();
> +    }
> +}
> +
>  static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>  {
>      VFIODevice *vbasedev = &vdev->vbasedev;
> @@ -2780,6 +2847,11 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>          }
>  
>          QLIST_INIT(&vdev->bars[i].quirks);
> +
> +        if (vbasedev->flags & VFIO_DEVICE_FLAGS_CXL &&
> +            i == vdev->cxl.hdm_regs_bar_index) {
> +            vdev->bars[i].region.notify_change = vfio_cxl_hdm_regs_changed;
> +        }
>      }
>  
>      ret = vfio_get_region_info(vbasedev,
> @@ -2974,6 +3046,62 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +static int vfio_cxl_setup(VFIOPCIDevice *vdev)
> +{
> +    VFIODevice *vbasedev = &vdev->vbasedev;
> +    struct VFIOCXL *cxl = &vdev->cxl;
> +    struct vfio_device_info_cap_cxl *cap;
> +    g_autofree struct vfio_device_info *info = NULL;
> +    struct vfio_info_cap_header *hdr;
> +    struct vfio_region_info *region_info;
> +    int ret;
> +
> +    if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_CXL))
> +        return 0;
> +
> +    info = vfio_get_device_info(vbasedev->fd);
> +    if (!info) {
> +        return -ENODEV;
> +    }
> +
> +    hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_CXL);
> +    if (!hdr) {
> +        return -ENODEV;
> +    }
> +
> +    cap = (void *)hdr;
> +
> +    cxl->hdm_count = cap->hdm_count;
> +    cxl->hdm_regs_bar_index = cap->hdm_regs_bar_index;
> +    cxl->hdm_regs_size = cap->hdm_regs_size;
> +    cxl->hdm_regs_offset = cap->hdm_regs_offset;
> +    cxl->dpa_size = cap->dpa_size;
> +
> +    ret = vfio_get_dev_region_info(vbasedev,
> +            VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_CXL,
> +            VFIO_REGION_SUBTYPE_CXL, &region_info);
> +    if (ret) {
> +        error_report("does not support requested CXL feature");
> +        return ret;
> +    }
> +
> +    ret = vfio_region_setup(OBJECT(vdev), vbasedev, &cxl->region,
> +            region_info->index, "cxl region");
> +    if (ret) {
> +        error_report("fail to setup CXL region");
> +        return ret;
> +    }
> +
> +    g_free(region_info);
> +
> +    if (vfio_region_mmap(&cxl->region)) {
> +        error_report("Failed to mmap %s cxl region",
> +                     vdev->vbasedev.name);
> +        return -EFAULT;
> +    }
> +    return 0;
> +}
> +
>  static void vfio_realize(PCIDevice *pdev, Error **errp)
>  {
>      VFIOPCIDevice *vdev = VFIO_PCI(pdev);
> @@ -3083,6 +3211,12 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>          goto error;
>      }
>  
> +    ret = vfio_cxl_setup(vdev);
> +    if (ret) {
> +        vfio_put_group(group);
> +        goto error;
> +    }
> +
>      vfio_populate_device(vdev, &err);
>      if (err) {
>          error_propagate(errp, err);
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index a2771b9ff3..6c5f5c1ea5 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -118,6 +118,15 @@ typedef struct VFIOMSIXInfo {
>  #define TYPE_VFIO_PCI "vfio-pci"
>  OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
>  
> +typedef struct VFIOCXL {
> +    uint8_t hdm_count;
> +    uint8_t hdm_regs_bar_index;
> +    uint64_t hdm_regs_size;
> +    uint64_t hdm_regs_offset;
> +    uint64_t dpa_size;
> +    VFIORegion region;
> +} VFIOCXL;
> +
>  struct VFIOPCIDevice {
>      PCIDevice pdev;
>      VFIODevice vbasedev;
> @@ -177,6 +186,7 @@ struct VFIOPCIDevice {
>      bool clear_parent_atomics_on_exit;
>      VFIODisplay *dpy;
>      Notifier irqchip_change_notifier;
> +    VFIOCXL cxl;
>  };
>  
>  /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index b70a0b95ff..fbf5786d00 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -117,6 +117,8 @@ extern bool pci_available;
>  #define PCI_DEVICE_ID_REDHAT_UFS         0x0013
>  #define PCI_DEVICE_ID_REDHAT_QXL         0x0100
>  
> +#define PCI_VENDOR_ID_CXL                0x1e98
> +
>  #define FMT_PCIBUS                      PRIx64
>  
>  typedef uint64_t pcibus_t;
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index da43d27352..1c998c3ed6 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -56,6 +56,7 @@ typedef struct VFIORegion {
>      uint32_t nr_mmaps;
>      VFIOMmap *mmaps;
>      uint8_t nr; /* cache the region number for debug */
> +    void (*notify_change)(void *, hwaddr, uint64_t, unsigned);
>  } VFIORegion;
>  
>  typedef struct VFIOMigration {
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 16db89071e..22fb50ed34 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -214,6 +214,7 @@ struct vfio_device_info {
>  #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
>  #define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
>  #define VFIO_DEVICE_FLAGS_CDX	(1 << 8)	/* vfio-cdx device */
> +#define VFIO_DEVICE_FLAGS_CXL	(1 << 9)	/* vfio-cdx device */
>  	__u32	num_regions;	/* Max region index + 1 */
>  	__u32	num_irqs;	/* Max IRQ index + 1 */
>  	__u32   cap_offset;	/* Offset within info struct of first cap */
> @@ -255,6 +256,16 @@ struct vfio_device_info_cap_pci_atomic_comp {
>  	__u32 reserved;
>  };
>  
> +#define VFIO_DEVICE_INFO_CAP_CXL               6
> +struct vfio_device_info_cap_cxl {
> +	struct vfio_info_cap_header header;
> +	__u8 hdm_count;
> +	__u8 hdm_regs_bar_index;
> +	__u64 hdm_regs_size;
> +	__u64 hdm_regs_offset;
> +	__u64 dpa_size;
> +};
> +
>  /**
>   * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
>   *				       struct vfio_region_info)
> @@ -371,6 +382,9 @@ struct vfio_region_info_cap_type {
>  /* sub-types for VFIO_REGION_TYPE_GFX */
>  #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
>  
> +/* sub-types for VFIO CXL region */
> +#define VFIO_REGION_SUBTYPE_CXL                 (1)
> +
>  /**
>   * struct vfio_region_gfx_edid - EDID region layout.
>   *



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-10-11 21:49 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-21  7:14 [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Zhi Wang
2024-09-21  7:14 ` [RFC 1/1] vfio: support CXL device in VFIO stub Zhi Wang
2024-10-11 21:47   ` Alex Williamson
2024-09-26 16:52 ` [RFC 0/1] Introduce vfio-cxl to support CXL type-2 device passthrough Cédric Le Goater

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).