public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: <mhonap@nvidia.com>
To: <aniketa@nvidia.com>, <ankita@nvidia.com>,
	<alwilliamson@nvidia.com>, <vsethi@nvidia.com>, <jgg@nvidia.com>,
	<mochs@nvidia.com>, <skolothumtho@nvidia.com>,
	<alejandro.lucero-palau@amd.com>, <dave@stgolabs.net>,
	<jonathan.cameron@huawei.com>, <dave.jiang@intel.com>,
	<alison.schofield@intel.com>, <vishal.l.verma@intel.com>,
	<ira.weiny@intel.com>, <dan.j.williams@intel.com>, <jgg@ziepe.ca>,
	<yishaih@nvidia.com>, <kevin.tian@intel.com>
Cc: <cjia@nvidia.com>, <kwankhede@nvidia.com>, <targupta@nvidia.com>,
	<zhiw@nvidia.com>, <kjaju@nvidia.com>,
	<linux-kernel@vger.kernel.org>, <linux-cxl@vger.kernel.org>,
	<kvm@vger.kernel.org>, <mhonap@nvidia.com>
Subject: [RFC v2 13/15] vfio/pci: introduce CXL device awareness
Date: Tue, 9 Dec 2025 22:20:17 +0530	[thread overview]
Message-ID: <20251209165019.2643142-14-mhonap@nvidia.com> (raw)
In-Reply-To: <20251209165019.2643142-1-mhonap@nvidia.com>

From: Zhi Wang <zhiw@nvidia.com>

CXL device programming interfaces are built upon PCI interfaces. Thus
the vfio-pci-core can be leveraged to handle a CXL device.

However, CXL device also has difference with PCI devicce:

- No INTX support, only MSI/MSIX is supported.
- Reset is done via CXL reset. FLR only reset CXL.io.

Introduce the CXL device awareness to the vfio-pci-core. Expose a new
VFIO device flags to the userspace to identify the VFIO device is a CXL
device. Disable INTX support in the vfio-pci-core. Disable FLR reset for
the CXL device as the kernel CXL core hasn't support CXL reset yet.
Disable mmap support on the CXL MMIO BAR in vfio-pci-core.

Signed-off-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Manish Honap <mhonap@nvidia.com>
---
 drivers/vfio/pci/vfio_cxl_core.c | 18 +++++++++++++++++
 drivers/vfio/pci/vfio_pci_core.c | 33 ++++++++++++++++++++++++++++----
 drivers/vfio/pci/vfio_pci_rdwr.c | 11 ++++++++---
 include/linux/vfio_pci_core.h    |  3 +++
 include/uapi/linux/vfio.h        | 10 ++++++++++
 5 files changed, 68 insertions(+), 7 deletions(-)

diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
index c0bdf55997da..84e4f42d97de 100644
--- a/drivers/vfio/pci/vfio_cxl_core.c
+++ b/drivers/vfio/pci/vfio_cxl_core.c
@@ -25,6 +25,19 @@
 #define DRIVER_AUTHOR "Zhi Wang <zhiw@nvidia.com>"
 #define DRIVER_DESC "core driver for VFIO based CXL devices"
 
+static void init_cxl_cap(struct vfio_cxl_core_device *cxl)
+{
+	struct vfio_pci_core_device *pci = &cxl->pci_core;
+	struct vfio_device_info_cap_cxl *cap = &pci->cxl_cap;
+
+	cap->header.id = VFIO_DEVICE_INFO_CAP_CXL;
+	cap->header.version = 1;
+	cap->hdm_count = cxl->hdm_count;
+	cap->hdm_reg_offset = cxl->comp_reg_offset + cxl->hdm_reg_offset;
+	cap->hdm_reg_size = cxl->hdm_reg_size;
+	cap->hdm_reg_bar_index = cxl->comp_reg_bar;
+}
+
 /* Standard CXL-type 2 driver initialization sequence */
 static int enable_cxl(struct vfio_cxl_core_device *cxl, u16 dvsec,
 		      struct vfio_cxl_dev_info *info)
@@ -74,6 +87,8 @@ static int enable_cxl(struct vfio_cxl_core_device *cxl, u16 dvsec,
 	if (IS_ERR(cxl_core->cxlmd))
 		return PTR_ERR(cxl_core->cxlmd);
 
+	init_cxl_cap(cxl);
+
 	cxl_core->region.noncached = info->noncached_region;
 
 	return 0;
@@ -266,6 +281,9 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
 	if (ret)
 		return ret;
 
+	pci->is_cxl = true;
+	pci->comp_reg_bar = cxl->comp_reg_bar;
+
 	ret = vfio_pci_core_enable(pci);
 	if (ret)
 		goto err_pci_core_enable;
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 502880e927fc..5f8334748841 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -483,7 +483,12 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
 		goto out_power;
 
 	/* If reset fails because of the device lock, fail this path entirely */
-	ret = pci_try_reset_function(pdev);
+	if (!vdev->is_cxl)
+		ret = pci_try_reset_function(pdev);
+	else
+		/* TODO: CXL reset support is on-going. */
+		ret = -ENODEV;
+
 	if (ret == -EAGAIN)
 		goto out_disable_device;
 
@@ -618,8 +623,12 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
 		if (!vdev->barmap[bar])
 			continue;
 		pci_iounmap(pdev, vdev->barmap[bar]);
-		pci_release_selected_regions(pdev, 1 << bar);
 		vdev->barmap[bar] = NULL;
+
+		if (vdev->is_cxl && i == vdev->comp_reg_bar)
+			continue;
+
+		pci_release_selected_regions(pdev, 1 << bar);
 	}
 
 	list_for_each_entry_safe(dummy_res, tmp,
@@ -960,6 +969,15 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
 	if (vdev->reset_works)
 		info.flags |= VFIO_DEVICE_FLAGS_RESET;
 
+	if (vdev->is_cxl) {
+		ret = vfio_info_add_capability(&caps, &vdev->cxl_cap.header,
+					       sizeof(vdev->cxl_cap));
+		if (ret)
+			return ret;
+
+		info.flags |= VFIO_DEVICE_FLAGS_CXL;
+	}
+
 	info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
 	info.num_irqs = VFIO_PCI_NUM_IRQS;
 
@@ -1752,14 +1770,21 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
 	 * we need to request the region and the barmap tracks that.
 	 */
 	if (!vdev->barmap[index]) {
+		int bars;
+
+		if (vdev->is_cxl && vdev->comp_reg_bar == index)
+			bars = 0;
+		else
+			bars = 1 << index;
+
 		ret = pci_request_selected_regions(pdev,
-						   1 << index, "vfio-pci");
+						   bars, "vfio-pci");
 		if (ret)
 			return ret;
 
 		vdev->barmap[index] = pci_iomap(pdev, index, 0);
 		if (!vdev->barmap[index]) {
-			pci_release_selected_regions(pdev, 1 << index);
+			pci_release_selected_regions(pdev, bars);
 			return -ENOMEM;
 		}
 	}
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 6192788c8ba3..057cd0c69f2a 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -201,19 +201,24 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
 {
 	struct pci_dev *pdev = vdev->pdev;
-	int ret;
+	int bars, ret;
 	void __iomem *io;
 
 	if (vdev->barmap[bar])
 		return 0;
 
-	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
+	if (vdev->is_cxl && vdev->comp_reg_bar == bar)
+		bars = 0;
+	else
+		bars = 1 << bar;
+
+	ret = pci_request_selected_regions(pdev, bars, "vfio");
 	if (ret)
 		return ret;
 
 	io = pci_iomap(pdev, bar, 0);
 	if (!io) {
-		pci_release_selected_regions(pdev, 1 << bar);
+		pci_release_selected_regions(pdev, bars);
 		return -ENOMEM;
 	}
 
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 8293910e0a96..0a354c7788b3 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -82,6 +82,9 @@ struct vfio_pci_core_device {
 	bool			needs_pm_restore:1;
 	bool			pm_intx_masked:1;
 	bool			pm_runtime_engaged:1;
+	bool                    is_cxl:1;
+	int                     comp_reg_bar;
+	struct vfio_device_info_cap_cxl cxl_cap;
 	struct pci_saved_state	*pci_saved_state;
 	struct pci_saved_state	*pm_save;
 	int			ioeventfds_nr;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 95be987d2ed5..0a9968cd6601 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -214,6 +214,7 @@ struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
 #define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
 #define VFIO_DEVICE_FLAGS_CDX	(1 << 8)	/* vfio-cdx device */
+#define VFIO_DEVICE_FLAGS_CXL   (1 << 9)        /* Device supports CXL */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 	__u32   cap_offset;	/* Offset within info struct of first cap */
@@ -256,6 +257,15 @@ struct vfio_device_info_cap_pci_atomic_comp {
 	__u32 reserved;
 };
 
+#define VFIO_DEVICE_INFO_CAP_CXL                6
+struct vfio_device_info_cap_cxl {
+	struct vfio_info_cap_header header;
+	__u8 hdm_count;
+	__u8 hdm_reg_bar_index;
+	__u64 hdm_reg_size;
+	__u64 hdm_reg_offset;
+};
+
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
  *				       struct vfio_region_info)
-- 
2.25.1


  parent reply	other threads:[~2025-12-09 16:53 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-09 16:50 [RFC v2 00/15] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Hello all, mhonap
2025-12-09 16:50 ` [RFC v2 01/15] cxl: factor out cxl_await_range_active() and cxl_media_ready() mhonap
2025-12-22 12:21   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 02/15] cxl: introduce cxl_get_hdm_reg_info() mhonap
2025-12-09 16:50 ` [RFC v2 03/15] cxl: introduce cxl_find_comp_reglock_offset() mhonap
2025-12-09 16:50 ` [RFC v2 04/15] cxl: introduce devm_cxl_del_memdev() mhonap
2025-12-09 16:50 ` [RFC v2 05/15] cxl: introduce cxl_get_committed_regions() mhonap
2025-12-22 12:31   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 06/15] vfio/cxl: introduce vfio-cxl core preludes mhonap
2025-12-22 13:54   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 07/15] vfio/cxl: expose CXL region to the userspace via a new VFIO device region mhonap
2025-12-11 16:06   ` Dave Jiang
2025-12-11 17:31     ` Manish Honap
2025-12-11 18:01       ` Dave Jiang
2025-12-22 14:00   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 08/15] vfio/cxl: discover precommitted CXL region mhonap
2025-12-22 14:09   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 09/15] vfio/cxl: introduce vfio_cxl_core_{read, write}() mhonap
2025-12-09 16:50 ` [RFC v2 10/15] vfio/cxl: introduce the register emulation framework mhonap
2025-12-09 16:50 ` [RFC v2 11/15] vfio/cxl: introduce the emulation of HDM registers mhonap
2025-12-11 18:13   ` Dave Jiang
2025-12-09 16:50 ` [RFC v2 12/15] vfio/cxl: introduce the emulation of CXL configuration space mhonap
2025-12-09 16:50 ` mhonap [this message]
2025-12-09 16:50 ` [RFC v2 14/15] vfio/cxl: VFIO variant driver for QEMU CXL accel device mhonap
2025-12-09 16:50 ` [RFC v2 15/15] cxl/mem: Fix NULL pointer deference in memory device paths mhonap

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251209165019.2643142-14-mhonap@nvidia.com \
    --to=mhonap@nvidia.com \
    --cc=alejandro.lucero-palau@amd.com \
    --cc=alison.schofield@intel.com \
    --cc=alwilliamson@nvidia.com \
    --cc=aniketa@nvidia.com \
    --cc=ankita@nvidia.com \
    --cc=cjia@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=ira.weiny@intel.com \
    --cc=jgg@nvidia.com \
    --cc=jgg@ziepe.ca \
    --cc=jonathan.cameron@huawei.com \
    --cc=kevin.tian@intel.com \
    --cc=kjaju@nvidia.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mochs@nvidia.com \
    --cc=skolothumtho@nvidia.com \
    --cc=targupta@nvidia.com \
    --cc=vishal.l.verma@intel.com \
    --cc=vsethi@nvidia.com \
    --cc=yishaih@nvidia.com \
    --cc=zhiw@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox