public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Zhi Wang <zhiw@nvidia.com>
To: <kvm@vger.kernel.org>, <linux-cxl@vger.kernel.org>
Cc: <alex.williamson@redhat.com>, <kevin.tian@intel.com>,
	<jgg@nvidia.com>, <alison.schofield@intel.com>,
	<dan.j.williams@intel.com>, <dave.jiang@intel.com>,
	<dave@stgolabs.net>, <jonathan.cameron@huawei.com>,
	<ira.weiny@intel.com>, <vishal.l.verma@intel.com>,
	<alucerop@amd.com>, <acurrid@nvidia.com>, <cjia@nvidia.com>,
	<smitra@nvidia.com>, <ankita@nvidia.com>, <aniketa@nvidia.com>,
	<kwankhede@nvidia.com>, <targupta@nvidia.com>, <zhiw@nvidia.com>,
	<zhiwang@kernel.org>
Subject: [RFC 08/13] vfio/cxl: emulate HDM decoder registers
Date: Fri, 20 Sep 2024 15:34:41 -0700	[thread overview]
Message-ID: <20240920223446.1908673-9-zhiw@nvidia.com> (raw)
In-Reply-To: <20240920223446.1908673-1-zhiw@nvidia.com>

To directly access the device memory, the HDM decoder registers on the
path from CXL root port to the device needs to be configured when
creating a CXL region.

However, the physical HDM decoders are owned by the kernel CXL core when
creating and configuring a CXL region. Thus the VM is forbidden to
access and configure the phsyical HDM decoder registers. The HDM decoder
register in the CXL component register group needs to be emulated.

Emulate the HDM decoder registers in the vfio-cxl-core. Locate the BAR
where the component registers sit. Take a snapshot of component
registers before initialize the CXL device. Emulate the HDM decoder
registers when VM access them from vfio_device_ops->{read, write}.

Signed-off-by: Zhi Wang <zhiw@nvidia.com>
---
 drivers/vfio/pci/vfio_cxl_core.c | 208 ++++++++++++++++++++++++++++++-
 include/linux/cxl_accel_pci.h    |   6 +
 include/linux/vfio_pci_core.h    |   5 +
 3 files changed, 216 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
index 68a935515256..bbb968cb1b70 100644
--- a/drivers/vfio/pci/vfio_cxl_core.c
+++ b/drivers/vfio/pci/vfio_cxl_core.c
@@ -283,6 +283,90 @@ static const struct vfio_pci_regops vfio_cxl_regops = {
 	.release	= vfio_cxl_region_release,
 };
 
+static int find_bar(struct pci_dev *pdev, u64 *offset, int *bar, u64 size)
+{
+	u64 start, end, flags;
+	int index, i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		index = i + PCI_STD_RESOURCES;
+		flags = pci_resource_flags(pdev, index);
+
+		start = pci_resource_start(pdev, index);
+		end = pci_resource_end(pdev, index);
+
+		if (*offset >= start && *offset + size - 1 <= end)
+			break;
+
+		if (flags & IORESOURCE_MEM_64)
+			i++;
+	}
+
+	if (i == PCI_STD_NUM_BARS)
+		return -ENODEV;
+
+	*offset = *offset - start;
+	*bar = index;
+
+	return 0;
+}
+
+static int find_comp_regs(struct vfio_pci_core_device *core_dev)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+	struct pci_dev *pdev = core_dev->pdev;
+	u64 offset;
+	int ret, bar;
+
+	ret = cxl_find_comp_regblock_offset(pdev, &offset);
+	if (ret)
+		return ret;
+
+	ret = find_bar(pdev, &offset, &bar, SZ_64K);
+	if (ret)
+		return ret;
+
+	cxl->comp_reg_bar = bar;
+	cxl->comp_reg_offset = offset;
+	cxl->comp_reg_size = SZ_64K;
+	return 0;
+}
+
+static void clean_virt_comp_regs(struct vfio_pci_core_device *core_dev)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+
+	kvfree(cxl->comp_reg_virt);
+}
+
+static int setup_virt_comp_regs(struct vfio_pci_core_device *core_dev)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+	struct pci_dev *pdev = core_dev->pdev;
+	u64 offset = cxl->comp_reg_offset;
+	int bar = cxl->comp_reg_bar;
+	u64 size = cxl->comp_reg_size;
+	void *regs;
+	unsigned int i;
+
+	cxl->comp_reg_virt = kvzalloc(size, GFP_KERNEL);
+	if (!cxl->comp_reg_virt)
+		return -ENOMEM;
+
+	regs = ioremap(pci_resource_start(pdev, bar) + offset, size);
+	if (!regs) {
+		kvfree(cxl->comp_reg_virt);
+		return -EFAULT;
+	}
+
+	for (i = 0; i < size; i += 4)
+		*(u32 *)(cxl->comp_reg_virt + i) = readl(regs + i);
+
+	iounmap(regs);
+
+	return 0;
+}
+
 int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev)
 {
 	struct vfio_cxl *cxl = &core_dev->cxl;
@@ -299,10 +383,18 @@ int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev)
 	if (!cxl->region.size)
 		return -EINVAL;
 
-	ret = vfio_pci_core_enable(core_dev);
+	ret = find_comp_regs(core_dev);
+	if (ret)
+		return ret;
+
+	ret = setup_virt_comp_regs(core_dev);
 	if (ret)
 		return ret;
 
+	ret = vfio_pci_core_enable(core_dev);
+	if (ret)
+		goto err_pci_core_enable;
+
 	ret = enable_cxl(core_dev, dvsec);
 	if (ret)
 		goto err_enable_cxl_device;
@@ -324,6 +416,8 @@ int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev)
 	disable_cxl(core_dev);
 err_enable_cxl_device:
 	vfio_pci_core_disable(core_dev);
+err_pci_core_enable:
+	clean_virt_comp_regs(core_dev);
 	return ret;
 }
 EXPORT_SYMBOL(vfio_cxl_core_enable);
@@ -341,6 +435,7 @@ void vfio_cxl_core_close_device(struct vfio_device *vdev)
 
 	disable_cxl(core_dev);
 	vfio_pci_core_close_device(vdev);
+	clean_virt_comp_regs(core_dev);
 }
 EXPORT_SYMBOL(vfio_cxl_core_close_device);
 
@@ -396,13 +491,102 @@ void vfio_cxl_core_set_driver_hdm_cap(struct vfio_pci_core_device *core_dev)
 }
 EXPORT_SYMBOL(vfio_cxl_core_set_driver_hdm_cap);
 
+static bool is_hdm_regblock(struct vfio_cxl *cxl, u64 offset, size_t count)
+{
+	return offset >= cxl->hdm_reg_offset &&
+	       offset + count < cxl->hdm_reg_offset +
+	       cxl->hdm_reg_size;
+}
+
+static void write_hdm_decoder_global(void *virt, u64 offset, u32 v)
+{
+	if (offset == 0x4)
+		*(u32 *)(virt + offset) = v & GENMASK(1, 0);
+}
+
+static void write_hdm_decoder_n(void *virt, u64 offset, u32 v)
+{
+	u32 cur, index;
+
+	index = (offset - 0x10) / 0x20;
+
+	/* HDM decoder registers are locked? */
+	cur = *(u32 *)(virt + index * 0x20 + 0x20);
+
+	if (cur & CXL_HDM_DECODER0_CTRL_LOCK &&
+	    cur & CXL_HDM_DECODER0_CTRL_COMMITTED)
+		return;
+
+	/* emulate HDM_DECODER_CTRL. */
+	if (offset == CXL_HDM_DECODER0_CTRL_OFFSET(index)) {
+		v &= ~CXL_HDM_DECODER0_CTRL_COMMIT_ERROR;
+
+		/* commit/de-commit */
+		if (v & CXL_HDM_DECODER0_CTRL_COMMIT)
+			v |= CXL_HDM_DECODER0_CTRL_COMMITTED;
+		else
+			v &= ~CXL_HDM_DECODER0_CTRL_COMMITTED;
+	}
+	*(u32 *)(virt + offset) = v;
+}
+
+static ssize_t
+emulate_hdm_regblock(struct vfio_device *vdev, char __user *buf,
+		     size_t count, loff_t *ppos, bool write)
+{
+	struct vfio_pci_core_device *core_dev =
+		container_of(vdev, struct vfio_pci_core_device, vdev);
+	struct vfio_cxl *cxl = &core_dev->cxl;
+	u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	void *hdm_reg_virt;
+	u64 hdm_offset;
+	u32 v;
+
+	hdm_offset = pos - cxl->hdm_reg_offset;
+	hdm_reg_virt = cxl->comp_reg_virt +
+		       (cxl->hdm_reg_offset - cxl->comp_reg_offset);
+
+	if (!write) {
+		v = *(u32 *)(hdm_reg_virt + hdm_offset);
+
+		if (copy_to_user(buf, &v, 4))
+			return -EFAULT;
+	} else {
+		if (copy_from_user(&v, buf, 4))
+			return -EFAULT;
+
+		if (hdm_offset < 0x10)
+			write_hdm_decoder_global(hdm_reg_virt, hdm_offset, v);
+		else
+			write_hdm_decoder_n(hdm_reg_virt, hdm_offset, v);
+	}
+	return count;
+}
+
 ssize_t vfio_cxl_core_read(struct vfio_device *core_vdev, char __user *buf,
 		size_t count, loff_t *ppos)
 {
 	struct vfio_pci_core_device *vdev =
 		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+	struct vfio_cxl *cxl = &vdev->cxl;
+	u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+
+	if (!count)
+		return 0;
+
+	if (index != cxl->comp_reg_bar)
+		return vfio_pci_rw(vdev, buf, count, ppos, false);
+
+	if (WARN_ON_ONCE(!IS_ALIGNED(pos, 4) || count != 4))
+		return -EINVAL;
 
-	return vfio_pci_rw(vdev, buf, count, ppos, false);
+	if (is_hdm_regblock(cxl, pos, count))
+		return emulate_hdm_regblock(core_vdev, buf, count,
+					    ppos, false);
+	else
+		return vfio_pci_rw(vdev, (char __user *)buf, count,
+				   ppos, false);
 }
 EXPORT_SYMBOL_GPL(vfio_cxl_core_read);
 
@@ -411,8 +595,26 @@ ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *bu
 {
 	struct vfio_pci_core_device *vdev =
 		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+	struct vfio_cxl *cxl = &vdev->cxl;
+	u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+
+	if (!count)
+		return 0;
+
+	if (index != cxl->comp_reg_bar)
+		return vfio_pci_rw(vdev, (char __user *)buf, count, ppos,
+				   true);
+
+	if (WARN_ON_ONCE(!IS_ALIGNED(pos, 4) || count != 4))
+		return -EINVAL;
 
-	return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true);
+	if (is_hdm_regblock(cxl, pos, count))
+		return emulate_hdm_regblock(core_vdev, (char __user *)buf,
+					    count, ppos, true);
+	else
+		return vfio_pci_rw(vdev, (char __user *)buf, count, ppos,
+				   true);
 }
 EXPORT_SYMBOL_GPL(vfio_cxl_core_write);
 
diff --git a/include/linux/cxl_accel_pci.h b/include/linux/cxl_accel_pci.h
index c337ae8797e6..090f60fb9a3f 100644
--- a/include/linux/cxl_accel_pci.h
+++ b/include/linux/cxl_accel_pci.h
@@ -20,4 +20,10 @@
 #define   CXL_DVSEC_RANGE_BASE_LOW(i)	(0x24 + (i * 0x10))
 #define     CXL_DVSEC_MEM_BASE_LOW_MASK	GENMASK(31, 28)
 
+#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20)
+#define   CXL_HDM_DECODER0_CTRL_LOCK BIT(8)
+#define   CXL_HDM_DECODER0_CTRL_COMMIT BIT(9)
+#define   CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10)
+#define   CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11)
+
 #endif
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 64ccdcdfa95e..9d295ca9382a 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -62,6 +62,11 @@ struct vfio_cxl {
 	u8 caps;
 	u64 dpa_size;
 
+	int comp_reg_bar;
+	u64 comp_reg_offset;
+	u64 comp_reg_size;
+	void *comp_reg_virt;
+
 	u32 hdm_count;
 	u64 hdm_reg_offset;
 	u64 hdm_reg_size;
-- 
2.34.1


  parent reply	other threads:[~2024-09-20 22:35 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-20 22:34 [RFC 00/13] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Zhi Wang
2024-09-20 22:34 ` [RFC 01/13] cxl: allow a type-2 device not to have memory device registers Zhi Wang
2024-09-23  8:01   ` Tian, Kevin
2024-09-23 15:38   ` Dave Jiang
2024-09-24  8:03     ` Zhi Wang
2024-09-20 22:34 ` [RFC 02/13] cxl: introduce cxl_get_hdm_info() Zhi Wang
2024-10-17 15:44   ` Jonathan Cameron
2024-10-19  5:38     ` Zhi Wang
2024-09-20 22:34 ` [RFC 03/13] cxl: introduce cxl_find_comp_reglock_offset() Zhi Wang
2024-09-20 22:34 ` [RFC 04/13] vfio: introduce vfio-cxl core preludes Zhi Wang
2024-10-11 18:33   ` Alex Williamson
2024-09-20 22:34 ` [RFC 05/13] vfio/cxl: expose CXL region to the usersapce via a new VFIO device region Zhi Wang
2024-10-11 19:12   ` Alex Williamson
2024-09-20 22:34 ` [RFC 06/13] vfio/pci: expose vfio_pci_rw() Zhi Wang
2024-09-20 22:34 ` [RFC 07/13] vfio/cxl: introduce vfio_cxl_core_{read, write}() Zhi Wang
2024-09-20 22:34 ` Zhi Wang [this message]
2024-09-20 22:34 ` [RFC 09/13] vfio/pci: introduce CXL device awareness Zhi Wang
2024-10-11 20:37   ` Alex Williamson
2024-09-20 22:34 ` [RFC 10/13] vfio/pci: emulate CXL DVSEC registers in the configuration space Zhi Wang
2024-10-11 21:02   ` Alex Williamson
2024-09-20 22:34 ` [RFC 11/13] vfio/cxl: introduce VFIO CXL device cap Zhi Wang
2024-10-11 21:14   ` Alex Williamson
2024-09-20 22:34 ` [RFC 12/13] vfio/cxl: VFIO variant driver for QEMU CXL accel device Zhi Wang
2024-09-20 22:34 ` [RFC 13/13] vfio/cxl: workaround: don't take resource region when cxl is enabled Zhi Wang
2024-09-23  8:00 ` [RFC 00/13] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Tian, Kevin
2024-09-24  8:30   ` Zhi Wang
2024-09-25 13:05     ` Jonathan Cameron
2024-09-27  7:18       ` Zhi Wang
2024-10-04 11:40         ` Jonathan Cameron
2024-10-19  5:30           ` Zhi Wang
2024-10-21 11:07             ` Alejandro Lucero Palau
2024-09-26  6:55     ` Tian, Kevin
2024-09-25 10:11 ` Alejandro Lucero Palau
2024-09-27  7:38   ` Zhi Wang
2024-09-27  7:38   ` Zhi Wang
2024-10-21 10:49 ` Zhi Wang
2024-10-21 13:10   ` Alejandro Lucero Palau
2024-10-30 11:56 ` Zhi Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240920223446.1908673-9-zhiw@nvidia.com \
    --to=zhiw@nvidia.com \
    --cc=acurrid@nvidia.com \
    --cc=alex.williamson@redhat.com \
    --cc=alison.schofield@intel.com \
    --cc=alucerop@amd.com \
    --cc=aniketa@nvidia.com \
    --cc=ankita@nvidia.com \
    --cc=cjia@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=ira.weiny@intel.com \
    --cc=jgg@nvidia.com \
    --cc=jonathan.cameron@huawei.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=smitra@nvidia.com \
    --cc=targupta@nvidia.com \
    --cc=vishal.l.verma@intel.com \
    --cc=zhiwang@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox