public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: <mhonap@nvidia.com>
To: <aniketa@nvidia.com>, <ankita@nvidia.com>,
	<alwilliamson@nvidia.com>, <vsethi@nvidia.com>, <jgg@nvidia.com>,
	<mochs@nvidia.com>, <skolothumtho@nvidia.com>,
	<alejandro.lucero-palau@amd.com>, <dave@stgolabs.net>,
	<jonathan.cameron@huawei.com>, <dave.jiang@intel.com>,
	<alison.schofield@intel.com>, <vishal.l.verma@intel.com>,
	<ira.weiny@intel.com>, <dan.j.williams@intel.com>, <jgg@ziepe.ca>,
	<yishaih@nvidia.com>, <kevin.tian@intel.com>
Cc: <cjia@nvidia.com>, <kwankhede@nvidia.com>, <targupta@nvidia.com>,
	<zhiw@nvidia.com>, <kjaju@nvidia.com>,
	<linux-kernel@vger.kernel.org>, <linux-cxl@vger.kernel.org>,
	<kvm@vger.kernel.org>, <mhonap@nvidia.com>
Subject: [RFC v2 07/15] vfio/cxl: expose CXL region to the userspace via a new VFIO device region
Date: Tue, 9 Dec 2025 22:20:11 +0530	[thread overview]
Message-ID: <20251209165019.2643142-8-mhonap@nvidia.com> (raw)
In-Reply-To: <20251209165019.2643142-1-mhonap@nvidia.com>

From: Manish Honap <mhonap@nvidia.com>

To directly access the device memory, a CXL region is required. Creating
a CXL region requires to configure HDM decoders on the path to map the
access of HPA level by level and evetually hit the DPA in the CXL
topology.

For the userspace, e.g. QEMU, to access the CXL region, the region is
required to be exposed via VFIO interfaces.

Introduce a new VFIO device region and region ops to expose the created
CXL region when initialize the device in the vfio-cxl-core. Introduce a
new sub region type for the userspace to identify a CXL region.

Co-developed-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Manish Honap <mhonap@nvidia.com>
---
 drivers/vfio/pci/vfio_cxl_core.c | 122 +++++++++++++++++++++++++++++++
 drivers/vfio/pci/vfio_pci_core.c |   3 +-
 include/linux/vfio_pci_core.h    |   5 ++
 include/uapi/linux/vfio.h        |   4 +
 4 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
index cf53720c0cb7..35d95de47fa8 100644
--- a/drivers/vfio/pci/vfio_cxl_core.c
+++ b/drivers/vfio/pci/vfio_cxl_core.c
@@ -231,6 +231,128 @@ void vfio_cxl_core_destroy_cxl_region(struct vfio_cxl_core_device *cxl)
 }
 EXPORT_SYMBOL_GPL(vfio_cxl_core_destroy_cxl_region);
 
+static int vfio_cxl_region_mmap(struct vfio_pci_core_device *pci,
+				struct vfio_pci_region *region,
+				struct vm_area_struct *vma)
+{
+	struct vfio_cxl_region *cxl_region = region->data;
+	u64 req_len, pgoff, req_start, end;
+	int ret;
+
+	if (!(region->flags & VFIO_REGION_INFO_FLAG_MMAP))
+		return -EINVAL;
+
+	if (!(region->flags & VFIO_REGION_INFO_FLAG_READ) &&
+	    (vma->vm_flags & VM_READ))
+		return -EPERM;
+
+	if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE) &&
+	    (vma->vm_flags & VM_WRITE))
+		return -EPERM;
+
+	pgoff = vma->vm_pgoff &
+		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+
+	if (check_sub_overflow(vma->vm_end, vma->vm_start, &req_len) ||
+	    check_add_overflow(PHYS_PFN(cxl_region->addr), pgoff, &req_start) ||
+	    check_add_overflow(PFN_PHYS(pgoff), req_len, &end))
+		return -EOVERFLOW;
+
+	if (end > cxl_region->size)
+		return -EINVAL;
+
+	if (cxl_region->noncached)
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
+	vm_flags_set(vma, VM_ALLOW_ANY_UNCACHED | VM_IO | VM_PFNMAP |
+		     VM_DONTEXPAND | VM_DONTDUMP);
+
+	ret = remap_pfn_range(vma, vma->vm_start, req_start,
+			      req_len, vma->vm_page_prot);
+	if (ret)
+		return ret;
+
+	vma->vm_pgoff = req_start;
+
+	return 0;
+}
+
+static ssize_t vfio_cxl_region_rw(struct vfio_pci_core_device *core_dev,
+				  char __user *buf, size_t count, loff_t *ppos,
+				  bool iswrite)
+{
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+	struct vfio_cxl_region *cxl_region = core_dev->region[i].data;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+
+	if (!count)
+		return 0;
+
+	return vfio_pci_core_do_io_rw(core_dev, false,
+				      cxl_region->vaddr,
+				      (char __user *)buf, pos, count,
+				      0, 0, iswrite);
+}
+
+static void vfio_cxl_region_release(struct vfio_pci_core_device *vdev,
+				    struct vfio_pci_region *region)
+{
+}
+
+static const struct vfio_pci_regops vfio_cxl_regops = {
+	.rw             = vfio_cxl_region_rw,
+	.mmap           = vfio_cxl_region_mmap,
+	.release        = vfio_cxl_region_release,
+};
+
+int vfio_cxl_core_register_cxl_region(struct vfio_cxl_core_device *cxl)
+{
+	struct vfio_pci_core_device *pci = &cxl->pci_core;
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+	u32 flags;
+	int ret;
+
+	if (WARN_ON(!cxl_core->region.region || cxl_core->region.vaddr))
+		return -EEXIST;
+
+	cxl_core->region.vaddr = ioremap(cxl_core->region.addr, cxl_core->region.size);
+	if (!cxl_core->region.addr)
+		return -EFAULT;
+
+	flags = VFIO_REGION_INFO_FLAG_READ |
+		VFIO_REGION_INFO_FLAG_WRITE |
+		VFIO_REGION_INFO_FLAG_MMAP;
+
+	ret = vfio_pci_core_register_dev_region(pci,
+						PCI_VENDOR_ID_CXL |
+						VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+						VFIO_REGION_SUBTYPE_CXL,
+						&vfio_cxl_regops,
+						cxl_core->region.size, flags,
+						&cxl_core->region);
+	if (ret) {
+		iounmap(cxl_core->region.vaddr);
+		cxl_core->region.vaddr = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_register_cxl_region);
+
+void vfio_cxl_core_unregister_cxl_region(struct vfio_cxl_core_device *cxl)
+{
+	struct vfio_cxl *cxl_core = cxl->cxl_core;
+
+	if (WARN_ON(!cxl_core->region.region || !cxl_core->region.vaddr))
+		return;
+
+	iounmap(cxl_core->region.vaddr);
+	cxl_core->region.vaddr = NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_core_unregister_cxl_region);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 7dcf5439dedc..c0695b5db66d 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1698,12 +1698,13 @@ static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf)
 	return vfio_pci_mmap_huge_fault(vmf, 0);
 }
 
-static const struct vm_operations_struct vfio_pci_mmap_ops = {
+const struct vm_operations_struct vfio_pci_mmap_ops = {
 	.fault = vfio_pci_mmap_page_fault,
 #ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP
 	.huge_fault = vfio_pci_mmap_huge_fault,
 #endif
 };
+EXPORT_SYMBOL_GPL(vfio_pci_mmap_ops);
 
 int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
 {
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index a343b91d2580..3474835f5d65 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -102,6 +102,7 @@ struct vfio_cxl_region {
 	struct cxl_region *region;
 	u64 size;
 	u64 addr;
+	void *vaddr;
 	bool noncached;
 };
 
@@ -203,6 +204,8 @@ vfio_pci_core_to_cxl(struct vfio_pci_core_device *pci)
 	return container_of(pci, struct vfio_cxl_core_device, pci_core);
 }
 
+extern const struct vm_operations_struct vfio_pci_mmap_ops;
+
 int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
 			 struct vfio_cxl_dev_info *info);
 void vfio_cxl_core_finish_enable(struct vfio_cxl_core_device *cxl);
@@ -210,5 +213,7 @@ void vfio_cxl_core_disable(struct vfio_cxl_core_device *cxl);
 void vfio_cxl_core_close_device(struct vfio_device *vdev);
 int vfio_cxl_core_create_cxl_region(struct vfio_cxl_core_device *cxl, u64 size);
 void vfio_cxl_core_destroy_cxl_region(struct vfio_cxl_core_device *cxl);
+int vfio_cxl_core_register_cxl_region(struct vfio_cxl_core_device *cxl);
+void vfio_cxl_core_unregister_cxl_region(struct vfio_cxl_core_device *cxl);
 
 #endif /* VFIO_PCI_CORE_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 75100bf009ba..95be987d2ed5 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -372,6 +372,10 @@ struct vfio_region_info_cap_type {
 /* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
+/* 1e98 vendor PCI sub-types */
+/* sub-type for VFIO CXL region */
+#define VFIO_REGION_SUBTYPE_CXL                 (1)
+
 /**
  * struct vfio_region_gfx_edid - EDID region layout.
  *
-- 
2.25.1


  parent reply	other threads:[~2025-12-09 16:52 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-09 16:50 [RFC v2 00/15] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Hello all, mhonap
2025-12-09 16:50 ` [RFC v2 01/15] cxl: factor out cxl_await_range_active() and cxl_media_ready() mhonap
2025-12-22 12:21   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 02/15] cxl: introduce cxl_get_hdm_reg_info() mhonap
2025-12-09 16:50 ` [RFC v2 03/15] cxl: introduce cxl_find_comp_reglock_offset() mhonap
2025-12-09 16:50 ` [RFC v2 04/15] cxl: introduce devm_cxl_del_memdev() mhonap
2025-12-09 16:50 ` [RFC v2 05/15] cxl: introduce cxl_get_committed_regions() mhonap
2025-12-22 12:31   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 06/15] vfio/cxl: introduce vfio-cxl core preludes mhonap
2025-12-22 13:54   ` Jonathan Cameron
2025-12-09 16:50 ` mhonap [this message]
2025-12-11 16:06   ` [RFC v2 07/15] vfio/cxl: expose CXL region to the userspace via a new VFIO device region Dave Jiang
2025-12-11 17:31     ` Manish Honap
2025-12-11 18:01       ` Dave Jiang
2025-12-22 14:00   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 08/15] vfio/cxl: discover precommitted CXL region mhonap
2025-12-22 14:09   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 09/15] vfio/cxl: introduce vfio_cxl_core_{read, write}() mhonap
2025-12-09 16:50 ` [RFC v2 10/15] vfio/cxl: introduce the register emulation framework mhonap
2025-12-09 16:50 ` [RFC v2 11/15] vfio/cxl: introduce the emulation of HDM registers mhonap
2025-12-11 18:13   ` Dave Jiang
2025-12-09 16:50 ` [RFC v2 12/15] vfio/cxl: introduce the emulation of CXL configuration space mhonap
2025-12-09 16:50 ` [RFC v2 13/15] vfio/pci: introduce CXL device awareness mhonap
2025-12-09 16:50 ` [RFC v2 14/15] vfio/cxl: VFIO variant driver for QEMU CXL accel device mhonap
2025-12-09 16:50 ` [RFC v2 15/15] cxl/mem: Fix NULL pointer deference in memory device paths mhonap

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251209165019.2643142-8-mhonap@nvidia.com \
    --to=mhonap@nvidia.com \
    --cc=alejandro.lucero-palau@amd.com \
    --cc=alison.schofield@intel.com \
    --cc=alwilliamson@nvidia.com \
    --cc=aniketa@nvidia.com \
    --cc=ankita@nvidia.com \
    --cc=cjia@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=ira.weiny@intel.com \
    --cc=jgg@nvidia.com \
    --cc=jgg@ziepe.ca \
    --cc=jonathan.cameron@huawei.com \
    --cc=kevin.tian@intel.com \
    --cc=kjaju@nvidia.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mochs@nvidia.com \
    --cc=skolothumtho@nvidia.com \
    --cc=targupta@nvidia.com \
    --cc=vishal.l.verma@intel.com \
    --cc=vsethi@nvidia.com \
    --cc=yishaih@nvidia.com \
    --cc=zhiw@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox