All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zhi Wang <zhiw@nvidia.com>
To: <kvm@vger.kernel.org>, <linux-cxl@vger.kernel.org>
Cc: <alex.williamson@redhat.com>, <kevin.tian@intel.com>,
	<jgg@nvidia.com>, <alison.schofield@intel.com>,
	<dan.j.williams@intel.com>, <dave.jiang@intel.com>,
	<dave@stgolabs.net>, <jonathan.cameron@huawei.com>,
	<ira.weiny@intel.com>, <vishal.l.verma@intel.com>,
	<alucerop@amd.com>, <acurrid@nvidia.com>, <cjia@nvidia.com>,
	<smitra@nvidia.com>, <ankita@nvidia.com>, <aniketa@nvidia.com>,
	<kwankhede@nvidia.com>, <targupta@nvidia.com>, <zhiw@nvidia.com>,
	<zhiwang@kernel.org>
Subject: [RFC 04/13] vfio: introduce vfio-cxl core preludes
Date: Fri, 20 Sep 2024 15:34:37 -0700	[thread overview]
Message-ID: <20240920223446.1908673-5-zhiw@nvidia.com> (raw)
In-Reply-To: <20240920223446.1908673-1-zhiw@nvidia.com>

In VFIO, common functions that used by VFIO variant drivers are managed
in a set of "core" functions. E.g. the vfio-pci-core provides the common
functions used by VFIO variant drviers to support PCI device
passhthrough.

Although the CXL type-2 device has a PCI-compatible interface for device
configuration and programming, they still needs special handlings when
initialize the device:

- Probing the CXL DVSECs in the configuration.
- Probing the CXL register groups implemented by the device.
- Configuring the CXL device state required by the kernel CXL core.
- Create the CXL region.
- Special handlings of the CXL MMIO BAR.

Introduce vfio-cxl core predules to hold all the common functions used
by VFIO variant drivers to support CXL device passthrough.

Signed-off-by: Zhi Wang <zhiw@nvidia.com>
---
 drivers/vfio/pci/Kconfig         |   4 +
 drivers/vfio/pci/Makefile        |   3 +
 drivers/vfio/pci/vfio_cxl_core.c | 264 +++++++++++++++++++++++++++++++
 include/linux/vfio_pci_core.h    |  37 +++++
 4 files changed, 308 insertions(+)
 create mode 100644 drivers/vfio/pci/vfio_cxl_core.c

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index bf50ffa10bde..2196e79b132b 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -7,6 +7,10 @@ config VFIO_PCI_CORE
 	select VFIO_VIRQFD
 	select IRQ_BYPASS_MANAGER
 
+config VFIO_CXL_CORE
+	tristate
+	select VFIO_PCI_CORE
+
 config VFIO_PCI_MMAP
 	def_bool y if !S390
 	depends on VFIO_PCI_CORE
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index cf00c0a7e55c..b51221b94b0b 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -8,6 +8,9 @@ vfio-pci-y := vfio_pci.o
 vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
 
+vfio-cxl-core-y := vfio_cxl_core.o
+obj-$(CONFIG_VFIO_CXL_CORE) += vfio-cxl-core.o
+
 obj-$(CONFIG_MLX5_VFIO_PCI)           += mlx5/
 
 obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/
diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
new file mode 100644
index 000000000000..6a7859333f67
--- /dev/null
+++ b/drivers/vfio/pci/vfio_cxl_core.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include "vfio_pci_priv.h"
+
+#define DRIVER_AUTHOR "Zhi Wang <zhiw@nvidia.com>"
+#define DRIVER_DESC "core driver for VFIO based CXL devices"
+
+static int get_hpa_and_request_dpa(struct vfio_pci_core_device *core_dev)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+	struct pci_dev *pdev = core_dev->pdev;
+	u64 max;
+
+	cxl->cxlrd = cxl_get_hpa_freespace(cxl->endpoint, 1,
+					   CXL_DECODER_F_RAM |
+					   CXL_DECODER_F_TYPE2,
+					   &max);
+	if (IS_ERR(cxl->cxlrd)) {
+		pci_err(pdev, "Fail to get HPA space.\n");
+		return PTR_ERR(cxl->cxlrd);
+	}
+
+	if (max < cxl->region.size) {
+		pci_err(pdev, "No enough free HPA space %llu < %llu\n",
+			max, cxl->region.size);
+		return -ENOSPC;
+	}
+
+	cxl->cxled = cxl_request_dpa(cxl->endpoint, true, cxl->region.size,
+				     cxl->region.size);
+	if (IS_ERR(cxl->cxled)) {
+		pci_err(pdev, "Fail to request DPA\n");
+		return PTR_ERR(cxl->cxled);
+	}
+
+	return 0;
+}
+
+static int create_cxl_region(struct vfio_pci_core_device *core_dev)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+	struct pci_dev *pdev = core_dev->pdev;
+	resource_size_t start, end;
+	int ret;
+
+	ret = cxl_accel_request_resource(cxl->cxlds, true);
+	if (ret) {
+		pci_err(pdev, "Fail to request CXL resource\n");
+		return ret;
+	}
+
+	if (!cxl_await_media_ready(cxl->cxlds)) {
+		cxl_accel_set_media_ready(cxl->cxlds);
+	} else {
+		pci_err(pdev, "CXL media is not active\n");
+		return ret;
+	}
+
+	cxl->cxlmd = devm_cxl_add_memdev(&pdev->dev, cxl->cxlds);
+	if (IS_ERR(cxl->cxlmd)) {
+		pci_err(pdev, "Fail to create CXL memdev\n");
+		return PTR_ERR(cxl->cxlmd);
+	}
+
+	cxl->endpoint = cxl_acquire_endpoint(cxl->cxlmd);
+	if (IS_ERR(cxl->endpoint)) {
+		pci_err(pdev, "Fail to acquire CXL endpoint\n");
+		return PTR_ERR(cxl->endpoint);
+	}
+
+	ret = get_hpa_and_request_dpa(core_dev);
+	if (ret)
+		goto out;
+
+	cxl->region.region = cxl_create_region(cxl->cxlrd, &cxl->cxled, 1);
+	if (IS_ERR(cxl->region.region)) {
+		ret = PTR_ERR(cxl->region.region);
+		pci_err(pdev, "Fail to create CXL region\n");
+		cxl_dpa_free(cxl->cxled);
+		goto out;
+	}
+
+	cxl_accel_get_region_params(cxl->region.region, &start, &end);
+
+	cxl->region.addr = start;
+out:
+	cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
+	return ret;
+}
+
+/* Standard CXL-type 2 driver initialization sequence */
+static int enable_cxl(struct vfio_pci_core_device *core_dev, u16 dvsec)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+	struct pci_dev *pdev = core_dev->pdev;
+	u32 count;
+	u64 offset, size;
+	int ret;
+
+	cxl->cxlds = cxl_accel_state_create(&pdev->dev, cxl->caps);
+	if (IS_ERR(cxl->cxlds))
+		return PTR_ERR(cxl->cxlds);
+
+	cxl_accel_set_dvsec(cxl->cxlds, dvsec);
+	cxl_accel_set_serial(cxl->cxlds, pdev->dev.id);
+
+	cxl_accel_set_resource(cxl->cxlds, cxl->dpa_res, CXL_ACCEL_RES_DPA);
+	cxl_accel_set_resource(cxl->cxlds, cxl->ram_res, CXL_ACCEL_RES_RAM);
+
+	ret = cxl_pci_accel_setup_regs(pdev, cxl->cxlds);
+	if (ret) {
+		pci_err(pdev, "Fail to setup CXL accel regs\n");
+		return ret;
+	}
+
+	ret = cxl_get_hdm_info(cxl->cxlds, &count, &offset, &size);
+	if (ret)
+		return ret;
+
+	if (!count || !size) {
+		pci_err(pdev, "Fail to find CXL HDM reg offset\n");
+		return -ENODEV;
+	}
+
+	cxl->hdm_count = count;
+	cxl->hdm_reg_offset = offset;
+	cxl->hdm_reg_size = size;
+
+	return create_cxl_region(core_dev);
+}
+
+static void disable_cxl(struct vfio_pci_core_device *core_dev)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+
+	if (cxl->region.region)
+		cxl_region_detach(cxl->cxled);
+
+	if (cxl->cxled)
+		cxl_dpa_free(cxl->cxled);
+}
+
+int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+	struct pci_dev *pdev = core_dev->pdev;
+	u16 dvsec;
+	int ret;
+
+	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					  CXL_DVSEC_PCIE_DEVICE);
+	if (!dvsec)
+		return -ENODEV;
+
+	if (!cxl->region.size)
+		return -EINVAL;
+
+	ret = vfio_pci_core_enable(core_dev);
+	if (ret)
+		return ret;
+
+	ret = enable_cxl(core_dev, dvsec);
+	if (ret)
+		goto err_enable_cxl_device;
+
+	return 0;
+
+err_enable_cxl_device:
+	vfio_pci_core_disable(core_dev);
+	return ret;
+}
+EXPORT_SYMBOL(vfio_cxl_core_enable);
+
+void vfio_cxl_core_finish_enable(struct vfio_pci_core_device *core_dev)
+{
+	vfio_pci_core_finish_enable(core_dev);
+}
+EXPORT_SYMBOL(vfio_cxl_core_finish_enable);
+
+void vfio_cxl_core_close_device(struct vfio_device *vdev)
+{
+	struct vfio_pci_core_device *core_dev =
+		container_of(vdev, struct vfio_pci_core_device, vdev);
+
+	disable_cxl(core_dev);
+	vfio_pci_core_close_device(vdev);
+}
+EXPORT_SYMBOL(vfio_cxl_core_close_device);
+
+/*
+ * Configure the resource required by the kernel CXL core:
+ * device DPA and device RAM size
+ */
+void vfio_cxl_core_set_resource(struct vfio_pci_core_device *core_dev,
+				struct resource res,
+				enum accel_resource type)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+
+	switch (type) {
+	case CXL_ACCEL_RES_DPA:
+		cxl->dpa_size = res.end - res.start + 1;
+		cxl->dpa_res = res;
+		break;
+
+	case CXL_ACCEL_RES_RAM:
+		cxl->ram_res = res;
+		break;
+
+	default:
+		WARN(1, "invalid resource type: %d\n", type);
+		break;
+	}
+}
+EXPORT_SYMBOL(vfio_cxl_core_set_resource);
+
+/* Configure the expected CXL region size to be created */
+void vfio_cxl_core_set_region_size(struct vfio_pci_core_device *core_dev,
+				   u64 size)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+
+	if (WARN_ON(size > cxl->dpa_size))
+		return;
+
+	if (WARN_ON(cxl->region.region))
+		return;
+
+	cxl->region.size = size;
+}
+EXPORT_SYMBOL(vfio_cxl_core_set_region_size);
+
+/* Configure the driver cap required by the kernel CXL core */
+void vfio_cxl_core_set_driver_hdm_cap(struct vfio_pci_core_device *core_dev)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+
+	cxl->caps |= CXL_ACCEL_DRIVER_CAP_HDM;
+}
+EXPORT_SYMBOL(vfio_cxl_core_set_driver_hdm_cap);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_IMPORT_NS(CXL);
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index fbb472dd99b3..7762d4a3e825 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -15,6 +15,8 @@
 #include <linux/types.h>
 #include <linux/uuid.h>
 #include <linux/notifier.h>
+#include <linux/cxl_accel_mem.h>
+#include <linux/cxl_accel_pci.h>
 
 #ifndef VFIO_PCI_CORE_H
 #define VFIO_PCI_CORE_H
@@ -49,6 +51,31 @@ struct vfio_pci_region {
 	u32				flags;
 };
 
+struct vfio_cxl_region {
+	u64 size;
+	u64 addr;
+	struct cxl_region *region;
+};
+
+struct vfio_cxl {
+	u8 caps;
+	u64 dpa_size;
+
+	u32 hdm_count;
+	u64 hdm_reg_offset;
+	u64 hdm_reg_size;
+
+	struct cxl_dev_state *cxlds;
+	struct cxl_memdev *cxlmd;
+	struct cxl_root_decoder *cxlrd;
+	struct cxl_port *endpoint;
+	struct cxl_endpoint_decoder *cxled;
+	struct resource dpa_res;
+	struct resource ram_res;
+
+	struct vfio_cxl_region region;
+};
+
 struct vfio_pci_core_device {
 	struct vfio_device	vdev;
 	struct pci_dev		*pdev;
@@ -94,6 +121,7 @@ struct vfio_pci_core_device {
 	struct vfio_pci_core_device	*sriov_pf_core_dev;
 	struct notifier_block	nb;
 	struct rw_semaphore	memory_lock;
+	struct vfio_cxl		cxl;
 };
 
 /* Will be exported for vfio pci drivers usage */
@@ -159,4 +187,13 @@ VFIO_IOREAD_DECLARATION(32)
 VFIO_IOREAD_DECLARATION(64)
 #endif
 
+int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev);
+void vfio_cxl_core_finish_enable(struct vfio_pci_core_device *core_dev);
+void vfio_cxl_core_close_device(struct vfio_device *vdev);
+void vfio_cxl_core_set_resource(struct vfio_pci_core_device *core_dev,
+				struct resource res,
+				enum accel_resource type);
+void vfio_cxl_core_set_region_size(struct vfio_pci_core_device *core_dev,
+				   u64 size);
+void vfio_cxl_core_set_driver_hdm_cap(struct vfio_pci_core_device *core_dev);
 #endif /* VFIO_PCI_CORE_H */
-- 
2.34.1


  parent reply	other threads:[~2024-09-20 22:35 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-20 22:34 [RFC 00/13] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Zhi Wang
2024-09-20 22:34 ` [RFC 01/13] cxl: allow a type-2 device not to have memory device registers Zhi Wang
2024-09-23  8:01   ` Tian, Kevin
2024-09-23 15:38   ` Dave Jiang
2024-09-24  8:03     ` Zhi Wang
2024-09-20 22:34 ` [RFC 02/13] cxl: introduce cxl_get_hdm_info() Zhi Wang
2024-10-17 15:44   ` Jonathan Cameron
2024-10-19  5:38     ` Zhi Wang
2024-09-20 22:34 ` [RFC 03/13] cxl: introduce cxl_find_comp_reglock_offset() Zhi Wang
2024-09-20 22:34 ` Zhi Wang [this message]
2024-10-11 18:33   ` [RFC 04/13] vfio: introduce vfio-cxl core preludes Alex Williamson
2024-09-20 22:34 ` [RFC 05/13] vfio/cxl: expose CXL region to the usersapce via a new VFIO device region Zhi Wang
2024-10-11 19:12   ` Alex Williamson
2024-09-20 22:34 ` [RFC 06/13] vfio/pci: expose vfio_pci_rw() Zhi Wang
2024-09-20 22:34 ` [RFC 07/13] vfio/cxl: introduce vfio_cxl_core_{read, write}() Zhi Wang
2024-09-20 22:34 ` [RFC 08/13] vfio/cxl: emulate HDM decoder registers Zhi Wang
2024-09-20 22:34 ` [RFC 09/13] vfio/pci: introduce CXL device awareness Zhi Wang
2024-10-11 20:37   ` Alex Williamson
2024-09-20 22:34 ` [RFC 10/13] vfio/pci: emulate CXL DVSEC registers in the configuration space Zhi Wang
2024-10-11 21:02   ` Alex Williamson
2024-09-20 22:34 ` [RFC 11/13] vfio/cxl: introduce VFIO CXL device cap Zhi Wang
2024-10-11 21:14   ` Alex Williamson
2024-09-20 22:34 ` [RFC 12/13] vfio/cxl: VFIO variant driver for QEMU CXL accel device Zhi Wang
2024-09-20 22:34 ` [RFC 13/13] vfio/cxl: workaround: don't take resource region when cxl is enabled Zhi Wang
2024-09-23  8:00 ` [RFC 00/13] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Tian, Kevin
2024-09-24  8:30   ` Zhi Wang
2024-09-25 13:05     ` Jonathan Cameron
2024-09-27  7:18       ` Zhi Wang
2024-10-04 11:40         ` Jonathan Cameron
2024-10-19  5:30           ` Zhi Wang
2024-10-21 11:07             ` Alejandro Lucero Palau
2024-09-26  6:55     ` Tian, Kevin
2024-09-25 10:11 ` Alejandro Lucero Palau
2024-09-27  7:38   ` Zhi Wang
2024-09-27  7:38   ` Zhi Wang
2024-10-21 10:49 ` Zhi Wang
2024-10-21 13:10   ` Alejandro Lucero Palau
2024-10-30 11:56 ` Zhi Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240920223446.1908673-5-zhiw@nvidia.com \
    --to=zhiw@nvidia.com \
    --cc=acurrid@nvidia.com \
    --cc=alex.williamson@redhat.com \
    --cc=alison.schofield@intel.com \
    --cc=alucerop@amd.com \
    --cc=aniketa@nvidia.com \
    --cc=ankita@nvidia.com \
    --cc=cjia@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=ira.weiny@intel.com \
    --cc=jgg@nvidia.com \
    --cc=jonathan.cameron@huawei.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=smitra@nvidia.com \
    --cc=targupta@nvidia.com \
    --cc=vishal.l.verma@intel.com \
    --cc=zhiwang@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.