All of lore.kernel.org
 help / color / mirror / Atom feed
From: <mhonap@nvidia.com>
To: <aniketa@nvidia.com>, <ankita@nvidia.com>,
	<alwilliamson@nvidia.com>, <vsethi@nvidia.com>, <jgg@nvidia.com>,
	<mochs@nvidia.com>, <skolothumtho@nvidia.com>,
	<alejandro.lucero-palau@amd.com>, <dave@stgolabs.net>,
	<jonathan.cameron@huawei.com>, <dave.jiang@intel.com>,
	<alison.schofield@intel.com>, <vishal.l.verma@intel.com>,
	<ira.weiny@intel.com>, <dan.j.williams@intel.com>, <jgg@ziepe.ca>,
	<yishaih@nvidia.com>, <kevin.tian@intel.com>
Cc: <cjia@nvidia.com>, <kwankhede@nvidia.com>, <targupta@nvidia.com>,
	<zhiw@nvidia.com>, <kjaju@nvidia.com>,
	<linux-kernel@vger.kernel.org>, <linux-cxl@vger.kernel.org>,
	<kvm@vger.kernel.org>, <mhonap@nvidia.com>
Subject: [RFC v2 06/15] vfio/cxl: introduce vfio-cxl core preludes
Date: Tue, 9 Dec 2025 22:20:10 +0530	[thread overview]
Message-ID: <20251209165019.2643142-7-mhonap@nvidia.com> (raw)
In-Reply-To: <20251209165019.2643142-1-mhonap@nvidia.com>

From: Manish Honap <mhonap@nvidia.com>

In VFIO, common functions that used by VFIO variant drivers are managed
in a set of "core" functions. E.g. the vfio-pci-core provides the common
functions used by VFIO variant drviers to support PCI device
passhthrough.

Although the CXL type-2 device has a PCI-compatible interface for device
configuration and programming, they still needs special handlings when
initialize the device:

- Probing the CXL DVSECs in the configuration.
- Probing the CXL register groups implemented by the device.
- Configuring the CXL device state required by the kernel CXL core.
- Create the CXL region.
- Special handlings of the CXL MMIO BAR.

Introduce vfio-cxl core preludes to hold all the common functions used
by VFIO variant drivers to support CXL device passthrough.

Co-developed-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Manish Honap <mhonap@nvidia.com>
---
 drivers/vfio/pci/Kconfig         |  10 ++
 drivers/vfio/pci/Makefile        |   3 +
 drivers/vfio/pci/vfio_cxl_core.c | 238 +++++++++++++++++++++++++++++++
 include/linux/vfio_pci_core.h    |  50 +++++++
 4 files changed, 301 insertions(+)
 create mode 100644 drivers/vfio/pci/vfio_cxl_core.c

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 2b0172f54665..2f441d118f1c 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -7,6 +7,16 @@ config VFIO_PCI_CORE
 	select VFIO_VIRQFD
 	select IRQ_BYPASS_MANAGER
 
+config VFIO_CXL_CORE
+	tristate "VFIO CXL core"
+	select VFIO_PCI_CORE
+	depends on CXL_BUS
+	help
+	  Support for the generic PCI VFIO-CXL bus driver which can
+	  connect CXL devices to the VFIO framework.
+
+	  If you don't know what to do here, say N.
+
 config VFIO_PCI_INTX
 	def_bool y if !S390
 	depends on VFIO_PCI_CORE
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index cf00c0a7e55c..b51221b94b0b 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -8,6 +8,9 @@ vfio-pci-y := vfio_pci.o
 vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
 
+vfio-cxl-core-y := vfio_cxl_core.o
+obj-$(CONFIG_VFIO_CXL_CORE) += vfio-cxl-core.o
+
 obj-$(CONFIG_MLX5_VFIO_PCI)           += mlx5/
 
 obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/
diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
new file mode 100644
index 000000000000..cf53720c0cb7
--- /dev/null
+++ b/drivers/vfio/pci/vfio_cxl_core.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include "vfio_pci_priv.h"
+
+#define DRIVER_AUTHOR "Zhi Wang <zhiw@nvidia.com>"
+#define DRIVER_DESC "core driver for VFIO based CXL devices"
+
+/* Standard CXL-type 2 driver initialization sequence */
+static int enable_cxl(struct vfio_cxl_core_device *cxl, u16 dvsec,
+		      struct vfio_cxl_dev_info *info)
+{
+	struct vfio_pci_core_device *pci = &cxl->pci_core;
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+	struct pci_dev *pdev = pci->pdev;
+	u64 offset, size, count;
+	int ret;
+
+	ret = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT,
+				 &cxl_core->cxlds.reg_map);
+	if (ret) {
+		pci_err(pdev, "VFIO-CXL: CXL component registers not found\n");
+		return ret;
+	}
+
+	ret = cxl_get_hdm_reg_info(&cxl_core->cxlds, &count, &offset, &size);
+	if (ret)
+		return ret;
+
+	if (WARN_ON(!count || !size))
+		return -ENODEV;
+
+	cxl->hdm_count = count;
+	cxl->hdm_reg_offset = offset;
+	cxl->hdm_reg_size = size;
+
+	if (!info->no_media_ready) {
+		ret = cxl_await_range_active(&cxl_core->cxlds);
+		if (ret)
+			return -ENODEV;
+
+		cxl_core->cxlds.media_ready = true;
+	} else {
+		/* Some devices don't have media ready support. E.g. AMD SFC. */
+		cxl_core->cxlds.media_ready = true;
+	}
+
+	if (cxl_set_capacity(&cxl_core->cxlds, SZ_256M)) {
+		pci_err(pdev, "dpa capacity setup failed\n");
+		return -ENODEV;
+	}
+
+	cxl_core->cxlmd = devm_cxl_add_memdev(&pdev->dev,
+					      &cxl_core->cxlds, NULL);
+	if (IS_ERR(cxl_core->cxlmd))
+		return PTR_ERR(cxl_core->cxlmd);
+
+	cxl_core->region.noncached = info->noncached_region;
+
+	return 0;
+}
+
+static void disable_cxl(struct vfio_cxl_core_device *cxl)
+{
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+
+	WARN_ON(cxl_core->region.region);
+
+	if (!cxl->hdm_count)
+		return;
+
+	if (cxl_core->cxled) {
+		cxl_decoder_detach(NULL, cxl_core->cxled, 0, DETACH_INVALIDATE);
+		cxl_dpa_free(cxl_core->cxled);
+	}
+
+	if (cxl_core->cxlrd)
+		cxl_put_root_decoder(cxl_core->cxlrd);
+}
+
+int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
+			 struct vfio_cxl_dev_info *info)
+{
+	struct vfio_pci_core_device *pci = &cxl->pci_core;
+	struct pci_dev *pdev = pci->pdev;
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+	u16 dvsec;
+	int ret;
+
+	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_DEVICE);
+	if (!dvsec)
+		return -ENODEV;
+
+	cxl_core = devm_cxl_dev_state_create(&pdev->dev, CXL_DEVTYPE_DEVMEM,
+					     pdev->dev.id, dvsec, struct vfio_cxl,
+					     cxlds, false);
+	if (!cxl_core) {
+		pci_err(pdev, "VFIO-CXL: CXL state creation failed");
+		return -ENOMEM;
+	}
+
+	ret = vfio_pci_core_enable(pci);
+	if (ret)
+		return ret;
+
+	ret = enable_cxl(cxl, dvsec, info);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	vfio_pci_core_disable(pci);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_enable);
+
+void vfio_cxl_core_finish_enable(struct vfio_cxl_core_device *cxl)
+{
+	struct vfio_pci_core_device *pci = &cxl->pci_core;
+
+	vfio_pci_core_finish_enable(pci);
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_finish_enable);
+
+static void disable_device(struct vfio_cxl_core_device *cxl)
+{
+	disable_cxl(cxl);
+}
+
+void vfio_cxl_core_disable(struct vfio_cxl_core_device *cxl)
+{
+	disable_device(cxl);
+	vfio_pci_core_disable(&cxl->pci_core);
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_disable);
+
+void vfio_cxl_core_close_device(struct vfio_device *vdev)
+{
+	struct vfio_pci_core_device *pci =
+		container_of(vdev, struct vfio_pci_core_device, vdev);
+	struct vfio_cxl_core_device *cxl = vfio_pci_core_to_cxl(pci);
+
+	disable_device(cxl);
+	vfio_pci_core_close_device(vdev);
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_close_device);
+
+static int get_hpa_and_request_dpa(struct vfio_cxl_core_device *cxl, u64 size)
+{
+	u64 max;
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+
+	cxl_core->cxlrd = cxl_get_hpa_freespace(cxl_core->cxlmd, 1,
+						CXL_DECODER_F_RAM |
+						CXL_DECODER_F_TYPE2,
+						&max);
+	if (IS_ERR(cxl_core->cxlrd))
+		return PTR_ERR(cxl_core->cxlrd);
+
+	if (max < size)
+		return -ENOSPC;
+
+	cxl_core->cxled = cxl_request_dpa(cxl_core->cxlmd, CXL_PARTMODE_RAM, size);
+	if (IS_ERR(cxl_core->cxled))
+		return PTR_ERR(cxl_core->cxled);
+
+	return 0;
+}
+
+int vfio_cxl_core_create_cxl_region(struct vfio_cxl_core_device *cxl, u64 size)
+{
+	struct cxl_region *region;
+	struct range range;
+	int ret;
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+
+	if (WARN_ON(cxl_core->region.region))
+		return -EEXIST;
+
+	ret = get_hpa_and_request_dpa(cxl, size);
+	if (ret)
+		return ret;
+
+	region = cxl_create_region(cxl_core->cxlrd, &cxl_core->cxled, true);
+	if (IS_ERR(region)) {
+		ret = PTR_ERR(region);
+		cxl_dpa_free(cxl_core->cxled);
+		return ret;
+	}
+
+	cxl_get_region_range(region, &range);
+
+	cxl_core->region.addr = range.start;
+	cxl_core->region.size = size;
+	cxl_core->region.region = region;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_create_cxl_region);
+
+void vfio_cxl_core_destroy_cxl_region(struct vfio_cxl_core_device *cxl)
+{
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+
+	if (!cxl_core->region.region)
+		return;
+
+	cxl_decoder_detach(NULL, cxl_core->cxled, 0, DETACH_INVALIDATE);
+	cxl_put_root_decoder(cxl_core->cxlrd);
+	cxl_dpa_free(cxl_core->cxled);
+	cxl_core->region.region = NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_destroy_cxl_region);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_IMPORT_NS("CXL");
+MODULE_SOFTDEP("pre: cxl_core cxl_port cxl_acpi cxl-mem");
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index f541044e42a2..a343b91d2580 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -15,6 +15,8 @@
 #include <linux/types.h>
 #include <linux/uuid.h>
 #include <linux/notifier.h>
+#include <cxl/cxl.h>
+#include <cxl/pci.h>
 
 #ifndef VFIO_PCI_CORE_H
 #define VFIO_PCI_CORE_H
@@ -96,6 +98,40 @@ struct vfio_pci_core_device {
 	struct rw_semaphore	memory_lock;
 };
 
+struct vfio_cxl_region {
+	struct cxl_region *region;
+	u64 size;
+	u64 addr;
+	bool noncached;
+};
+
+struct vfio_cxl {
+	struct cxl_dev_state cxlds;
+	struct cxl_memdev *cxlmd;
+	struct cxl_root_decoder *cxlrd;
+	struct cxl_port *endpoint;
+	struct cxl_endpoint_decoder *cxled;
+
+	struct vfio_cxl_region region;
+};
+
+struct vfio_cxl_core_device {
+	struct vfio_pci_core_device pci_core;
+	struct vfio_cxl *cxl_core;
+
+	u32 hdm_count;
+	u64 hdm_reg_offset;
+	u64 hdm_reg_size;
+};
+
+struct vfio_cxl_dev_info {
+	unsigned long *dev_caps;
+	struct resource dpa_res;
+	struct resource ram_res;
+	bool no_media_ready;
+	bool noncached_region;
+};
+
 /* Will be exported for vfio pci drivers usage */
 int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
 				      unsigned int type, unsigned int subtype,
@@ -161,4 +197,18 @@ VFIO_IOREAD_DECLARATION(32)
 VFIO_IOREAD_DECLARATION(64)
 #endif
 
+static inline struct vfio_cxl_core_device *
+vfio_pci_core_to_cxl(struct vfio_pci_core_device *pci)
+{
+	return container_of(pci, struct vfio_cxl_core_device, pci_core);
+}
+
+int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
+			 struct vfio_cxl_dev_info *info);
+void vfio_cxl_core_finish_enable(struct vfio_cxl_core_device *cxl);
+void vfio_cxl_core_disable(struct vfio_cxl_core_device *cxl);
+void vfio_cxl_core_close_device(struct vfio_device *vdev);
+int vfio_cxl_core_create_cxl_region(struct vfio_cxl_core_device *cxl, u64 size);
+void vfio_cxl_core_destroy_cxl_region(struct vfio_cxl_core_device *cxl);
+
 #endif /* VFIO_PCI_CORE_H */
-- 
2.25.1


  parent reply	other threads:[~2025-12-09 16:52 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-09 16:50 [RFC v2 00/15] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Hello all, mhonap
2025-12-09 16:50 ` [RFC v2 01/15] cxl: factor out cxl_await_range_active() and cxl_media_ready() mhonap
2025-12-22 12:21   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 02/15] cxl: introduce cxl_get_hdm_reg_info() mhonap
2025-12-09 16:50 ` [RFC v2 03/15] cxl: introduce cxl_find_comp_reglock_offset() mhonap
2025-12-09 16:50 ` [RFC v2 04/15] cxl: introduce devm_cxl_del_memdev() mhonap
2025-12-09 16:50 ` [RFC v2 05/15] cxl: introduce cxl_get_committed_regions() mhonap
2025-12-22 12:31   ` Jonathan Cameron
2025-12-09 16:50 ` mhonap [this message]
2025-12-22 13:54   ` [RFC v2 06/15] vfio/cxl: introduce vfio-cxl core preludes Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 07/15] vfio/cxl: expose CXL region to the userspace via a new VFIO device region mhonap
2025-12-11 16:06   ` Dave Jiang
2025-12-11 17:31     ` Manish Honap
2025-12-11 18:01       ` Dave Jiang
2025-12-22 14:00   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 08/15] vfio/cxl: discover precommitted CXL region mhonap
2025-12-22 14:09   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 09/15] vfio/cxl: introduce vfio_cxl_core_{read, write}() mhonap
2025-12-09 16:50 ` [RFC v2 10/15] vfio/cxl: introduce the register emulation framework mhonap
2025-12-09 16:50 ` [RFC v2 11/15] vfio/cxl: introduce the emulation of HDM registers mhonap
2025-12-11 18:13   ` Dave Jiang
2025-12-09 16:50 ` [RFC v2 12/15] vfio/cxl: introduce the emulation of CXL configuration space mhonap
2025-12-09 16:50 ` [RFC v2 13/15] vfio/pci: introduce CXL device awareness mhonap
2025-12-09 16:50 ` [RFC v2 14/15] vfio/cxl: VFIO variant driver for QEMU CXL accel device mhonap
2025-12-09 16:50 ` [RFC v2 15/15] cxl/mem: Fix NULL pointer deference in memory device paths mhonap

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251209165019.2643142-7-mhonap@nvidia.com \
    --to=mhonap@nvidia.com \
    --cc=alejandro.lucero-palau@amd.com \
    --cc=alison.schofield@intel.com \
    --cc=alwilliamson@nvidia.com \
    --cc=aniketa@nvidia.com \
    --cc=ankita@nvidia.com \
    --cc=cjia@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=ira.weiny@intel.com \
    --cc=jgg@nvidia.com \
    --cc=jgg@ziepe.ca \
    --cc=jonathan.cameron@huawei.com \
    --cc=kevin.tian@intel.com \
    --cc=kjaju@nvidia.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mochs@nvidia.com \
    --cc=skolothumtho@nvidia.com \
    --cc=targupta@nvidia.com \
    --cc=vishal.l.verma@intel.com \
    --cc=vsethi@nvidia.com \
    --cc=yishaih@nvidia.com \
    --cc=zhiw@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.