public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: <mhonap@nvidia.com>
To: <aniketa@nvidia.com>, <ankita@nvidia.com>,
	<alwilliamson@nvidia.com>, <vsethi@nvidia.com>, <jgg@nvidia.com>,
	<mochs@nvidia.com>, <skolothumtho@nvidia.com>,
	<alejandro.lucero-palau@amd.com>, <dave@stgolabs.net>,
	<jonathan.cameron@huawei.com>, <dave.jiang@intel.com>,
	<alison.schofield@intel.com>, <vishal.l.verma@intel.com>,
	<ira.weiny@intel.com>, <dan.j.williams@intel.com>, <jgg@ziepe.ca>,
	<yishaih@nvidia.com>, <kevin.tian@intel.com>
Cc: <cjia@nvidia.com>, <kwankhede@nvidia.com>, <targupta@nvidia.com>,
	<zhiw@nvidia.com>, <kjaju@nvidia.com>,
	<linux-kernel@vger.kernel.org>, <linux-cxl@vger.kernel.org>,
	<kvm@vger.kernel.org>, <mhonap@nvidia.com>
Subject: [RFC v2 06/15] vfio/cxl: introduce vfio-cxl core preludes
Date: Tue, 9 Dec 2025 22:20:10 +0530	[thread overview]
Message-ID: <20251209165019.2643142-7-mhonap@nvidia.com> (raw)
In-Reply-To: <20251209165019.2643142-1-mhonap@nvidia.com>

From: Manish Honap <mhonap@nvidia.com>

In VFIO, common functions that used by VFIO variant drivers are managed
in a set of "core" functions. E.g. the vfio-pci-core provides the common
functions used by VFIO variant drviers to support PCI device
passhthrough.

Although the CXL type-2 device has a PCI-compatible interface for device
configuration and programming, they still needs special handlings when
initialize the device:

- Probing the CXL DVSECs in the configuration.
- Probing the CXL register groups implemented by the device.
- Configuring the CXL device state required by the kernel CXL core.
- Create the CXL region.
- Special handlings of the CXL MMIO BAR.

Introduce vfio-cxl core preludes to hold all the common functions used
by VFIO variant drivers to support CXL device passthrough.

Co-developed-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Manish Honap <mhonap@nvidia.com>
---
 drivers/vfio/pci/Kconfig         |  10 ++
 drivers/vfio/pci/Makefile        |   3 +
 drivers/vfio/pci/vfio_cxl_core.c | 238 +++++++++++++++++++++++++++++++
 include/linux/vfio_pci_core.h    |  50 +++++++
 4 files changed, 301 insertions(+)
 create mode 100644 drivers/vfio/pci/vfio_cxl_core.c

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 2b0172f54665..2f441d118f1c 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -7,6 +7,16 @@ config VFIO_PCI_CORE
 	select VFIO_VIRQFD
 	select IRQ_BYPASS_MANAGER
 
+config VFIO_CXL_CORE
+	tristate "VFIO CXL core"
+	select VFIO_PCI_CORE
+	depends on CXL_BUS
+	help
+	  Support for the generic PCI VFIO-CXL bus driver which can
+	  connect CXL devices to the VFIO framework.
+
+	  If you don't know what to do here, say N.
+
 config VFIO_PCI_INTX
 	def_bool y if !S390
 	depends on VFIO_PCI_CORE
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index cf00c0a7e55c..b51221b94b0b 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -8,6 +8,9 @@ vfio-pci-y := vfio_pci.o
 vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
 
+vfio-cxl-core-y := vfio_cxl_core.o
+obj-$(CONFIG_VFIO_CXL_CORE) += vfio-cxl-core.o
+
 obj-$(CONFIG_MLX5_VFIO_PCI)           += mlx5/
 
 obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/
diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
new file mode 100644
index 000000000000..cf53720c0cb7
--- /dev/null
+++ b/drivers/vfio/pci/vfio_cxl_core.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include "vfio_pci_priv.h"
+
+#define DRIVER_AUTHOR "Zhi Wang <zhiw@nvidia.com>"
+#define DRIVER_DESC "core driver for VFIO based CXL devices"
+
+/* Standard CXL-type 2 driver initialization sequence */
+static int enable_cxl(struct vfio_cxl_core_device *cxl, u16 dvsec,
+		      struct vfio_cxl_dev_info *info)
+{
+	struct vfio_pci_core_device *pci = &cxl->pci_core;
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+	struct pci_dev *pdev = pci->pdev;
+	u64 offset, size, count;
+	int ret;
+
+	ret = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT,
+				 &cxl_core->cxlds.reg_map);
+	if (ret) {
+		pci_err(pdev, "VFIO-CXL: CXL component registers not found\n");
+		return ret;
+	}
+
+	ret = cxl_get_hdm_reg_info(&cxl_core->cxlds, &count, &offset, &size);
+	if (ret)
+		return ret;
+
+	if (WARN_ON(!count || !size))
+		return -ENODEV;
+
+	cxl->hdm_count = count;
+	cxl->hdm_reg_offset = offset;
+	cxl->hdm_reg_size = size;
+
+	if (!info->no_media_ready) {
+		ret = cxl_await_range_active(&cxl_core->cxlds);
+		if (ret)
+			return -ENODEV;
+
+		cxl_core->cxlds.media_ready = true;
+	} else {
+		/* Some devices don't have media ready support. E.g. AMD SFC. */
+		cxl_core->cxlds.media_ready = true;
+	}
+
+	if (cxl_set_capacity(&cxl_core->cxlds, SZ_256M)) {
+		pci_err(pdev, "dpa capacity setup failed\n");
+		return -ENODEV;
+	}
+
+	cxl_core->cxlmd = devm_cxl_add_memdev(&pdev->dev,
+					      &cxl_core->cxlds, NULL);
+	if (IS_ERR(cxl_core->cxlmd))
+		return PTR_ERR(cxl_core->cxlmd);
+
+	cxl_core->region.noncached = info->noncached_region;
+
+	return 0;
+}
+
+static void disable_cxl(struct vfio_cxl_core_device *cxl)
+{
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+
+	WARN_ON(cxl_core->region.region);
+
+	if (!cxl->hdm_count)
+		return;
+
+	if (cxl_core->cxled) {
+		cxl_decoder_detach(NULL, cxl_core->cxled, 0, DETACH_INVALIDATE);
+		cxl_dpa_free(cxl_core->cxled);
+	}
+
+	if (cxl_core->cxlrd)
+		cxl_put_root_decoder(cxl_core->cxlrd);
+}
+
+int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
+			 struct vfio_cxl_dev_info *info)
+{
+	struct vfio_pci_core_device *pci = &cxl->pci_core;
+	struct pci_dev *pdev = pci->pdev;
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+	u16 dvsec;
+	int ret;
+
+	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_DEVICE);
+	if (!dvsec)
+		return -ENODEV;
+
+	cxl_core = devm_cxl_dev_state_create(&pdev->dev, CXL_DEVTYPE_DEVMEM,
+					     pdev->dev.id, dvsec, struct vfio_cxl,
+					     cxlds, false);
+	if (!cxl_core) {
+		pci_err(pdev, "VFIO-CXL: CXL state creation failed");
+		return -ENOMEM;
+	}
+
+	ret = vfio_pci_core_enable(pci);
+	if (ret)
+		return ret;
+
+	ret = enable_cxl(cxl, dvsec, info);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	vfio_pci_core_disable(pci);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_enable);
+
+void vfio_cxl_core_finish_enable(struct vfio_cxl_core_device *cxl)
+{
+	struct vfio_pci_core_device *pci = &cxl->pci_core;
+
+	vfio_pci_core_finish_enable(pci);
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_finish_enable);
+
+static void disable_device(struct vfio_cxl_core_device *cxl)
+{
+	disable_cxl(cxl);
+}
+
+void vfio_cxl_core_disable(struct vfio_cxl_core_device *cxl)
+{
+	disable_device(cxl);
+	vfio_pci_core_disable(&cxl->pci_core);
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_disable);
+
+void vfio_cxl_core_close_device(struct vfio_device *vdev)
+{
+	struct vfio_pci_core_device *pci =
+		container_of(vdev, struct vfio_pci_core_device, vdev);
+	struct vfio_cxl_core_device *cxl = vfio_pci_core_to_cxl(pci);
+
+	disable_device(cxl);
+	vfio_pci_core_close_device(vdev);
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_close_device);
+
+static int get_hpa_and_request_dpa(struct vfio_cxl_core_device *cxl, u64 size)
+{
+	u64 max;
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+
+	cxl_core->cxlrd = cxl_get_hpa_freespace(cxl_core->cxlmd, 1,
+						CXL_DECODER_F_RAM |
+						CXL_DECODER_F_TYPE2,
+						&max);
+	if (IS_ERR(cxl_core->cxlrd))
+		return PTR_ERR(cxl_core->cxlrd);
+
+	if (max < size)
+		return -ENOSPC;
+
+	cxl_core->cxled = cxl_request_dpa(cxl_core->cxlmd, CXL_PARTMODE_RAM, size);
+	if (IS_ERR(cxl_core->cxled))
+		return PTR_ERR(cxl_core->cxled);
+
+	return 0;
+}
+
+int vfio_cxl_core_create_cxl_region(struct vfio_cxl_core_device *cxl, u64 size)
+{
+	struct cxl_region *region;
+	struct range range;
+	int ret;
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+
+	if (WARN_ON(cxl_core->region.region))
+		return -EEXIST;
+
+	ret = get_hpa_and_request_dpa(cxl, size);
+	if (ret)
+		return ret;
+
+	region = cxl_create_region(cxl_core->cxlrd, &cxl_core->cxled, true);
+	if (IS_ERR(region)) {
+		ret = PTR_ERR(region);
+		cxl_dpa_free(cxl_core->cxled);
+		return ret;
+	}
+
+	cxl_get_region_range(region, &range);
+
+	cxl_core->region.addr = range.start;
+	cxl_core->region.size = size;
+	cxl_core->region.region = region;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_create_cxl_region);
+
+void vfio_cxl_core_destroy_cxl_region(struct vfio_cxl_core_device *cxl)
+{
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+
+	if (!cxl_core->region.region)
+		return;
+
+	cxl_decoder_detach(NULL, cxl_core->cxled, 0, DETACH_INVALIDATE);
+	cxl_put_root_decoder(cxl_core->cxlrd);
+	cxl_dpa_free(cxl_core->cxled);
+	cxl_core->region.region = NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_destroy_cxl_region);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_IMPORT_NS("CXL");
+MODULE_SOFTDEP("pre: cxl_core cxl_port cxl_acpi cxl-mem");
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index f541044e42a2..a343b91d2580 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -15,6 +15,8 @@
 #include <linux/types.h>
 #include <linux/uuid.h>
 #include <linux/notifier.h>
+#include <cxl/cxl.h>
+#include <cxl/pci.h>
 
 #ifndef VFIO_PCI_CORE_H
 #define VFIO_PCI_CORE_H
@@ -96,6 +98,40 @@ struct vfio_pci_core_device {
 	struct rw_semaphore	memory_lock;
 };
 
+struct vfio_cxl_region {
+	struct cxl_region *region;
+	u64 size;
+	u64 addr;
+	bool noncached;
+};
+
+struct vfio_cxl {
+	struct cxl_dev_state cxlds;
+	struct cxl_memdev *cxlmd;
+	struct cxl_root_decoder *cxlrd;
+	struct cxl_port *endpoint;
+	struct cxl_endpoint_decoder *cxled;
+
+	struct vfio_cxl_region region;
+};
+
+struct vfio_cxl_core_device {
+	struct vfio_pci_core_device pci_core;
+	struct vfio_cxl *cxl_core;
+
+	u32 hdm_count;
+	u64 hdm_reg_offset;
+	u64 hdm_reg_size;
+};
+
+struct vfio_cxl_dev_info {
+	unsigned long *dev_caps;
+	struct resource dpa_res;
+	struct resource ram_res;
+	bool no_media_ready;
+	bool noncached_region;
+};
+
 /* Will be exported for vfio pci drivers usage */
 int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
 				      unsigned int type, unsigned int subtype,
@@ -161,4 +197,18 @@ VFIO_IOREAD_DECLARATION(32)
 VFIO_IOREAD_DECLARATION(64)
 #endif
 
+static inline struct vfio_cxl_core_device *
+vfio_pci_core_to_cxl(struct vfio_pci_core_device *pci)
+{
+	return container_of(pci, struct vfio_cxl_core_device, pci_core);
+}
+
+int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
+			 struct vfio_cxl_dev_info *info);
+void vfio_cxl_core_finish_enable(struct vfio_cxl_core_device *cxl);
+void vfio_cxl_core_disable(struct vfio_cxl_core_device *cxl);
+void vfio_cxl_core_close_device(struct vfio_device *vdev);
+int vfio_cxl_core_create_cxl_region(struct vfio_cxl_core_device *cxl, u64 size);
+void vfio_cxl_core_destroy_cxl_region(struct vfio_cxl_core_device *cxl);
+
 #endif /* VFIO_PCI_CORE_H */
-- 
2.25.1


  parent reply	other threads:[~2025-12-09 16:52 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-09 16:50 [RFC v2 00/15] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Hello all, mhonap
2025-12-09 16:50 ` [RFC v2 01/15] cxl: factor out cxl_await_range_active() and cxl_media_ready() mhonap
2025-12-22 12:21   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 02/15] cxl: introduce cxl_get_hdm_reg_info() mhonap
2025-12-09 16:50 ` [RFC v2 03/15] cxl: introduce cxl_find_comp_reglock_offset() mhonap
2025-12-09 16:50 ` [RFC v2 04/15] cxl: introduce devm_cxl_del_memdev() mhonap
2025-12-09 16:50 ` [RFC v2 05/15] cxl: introduce cxl_get_committed_regions() mhonap
2025-12-22 12:31   ` Jonathan Cameron
2025-12-09 16:50 ` mhonap [this message]
2025-12-22 13:54   ` [RFC v2 06/15] vfio/cxl: introduce vfio-cxl core preludes Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 07/15] vfio/cxl: expose CXL region to the userspace via a new VFIO device region mhonap
2025-12-11 16:06   ` Dave Jiang
2025-12-11 17:31     ` Manish Honap
2025-12-11 18:01       ` Dave Jiang
2025-12-22 14:00   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 08/15] vfio/cxl: discover precommitted CXL region mhonap
2025-12-22 14:09   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 09/15] vfio/cxl: introduce vfio_cxl_core_{read, write}() mhonap
2025-12-09 16:50 ` [RFC v2 10/15] vfio/cxl: introduce the register emulation framework mhonap
2025-12-09 16:50 ` [RFC v2 11/15] vfio/cxl: introduce the emulation of HDM registers mhonap
2025-12-11 18:13   ` Dave Jiang
2025-12-09 16:50 ` [RFC v2 12/15] vfio/cxl: introduce the emulation of CXL configuration space mhonap
2025-12-09 16:50 ` [RFC v2 13/15] vfio/pci: introduce CXL device awareness mhonap
2025-12-09 16:50 ` [RFC v2 14/15] vfio/cxl: VFIO variant driver for QEMU CXL accel device mhonap
2025-12-09 16:50 ` [RFC v2 15/15] cxl/mem: Fix NULL pointer deference in memory device paths mhonap

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251209165019.2643142-7-mhonap@nvidia.com \
    --to=mhonap@nvidia.com \
    --cc=alejandro.lucero-palau@amd.com \
    --cc=alison.schofield@intel.com \
    --cc=alwilliamson@nvidia.com \
    --cc=aniketa@nvidia.com \
    --cc=ankita@nvidia.com \
    --cc=cjia@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=ira.weiny@intel.com \
    --cc=jgg@nvidia.com \
    --cc=jgg@ziepe.ca \
    --cc=jonathan.cameron@huawei.com \
    --cc=kevin.tian@intel.com \
    --cc=kjaju@nvidia.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mochs@nvidia.com \
    --cc=skolothumtho@nvidia.com \
    --cc=targupta@nvidia.com \
    --cc=vishal.l.verma@intel.com \
    --cc=vsethi@nvidia.com \
    --cc=yishaih@nvidia.com \
    --cc=zhiw@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox