public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Dave Jiang <dave.jiang@intel.com>
To: mhonap@nvidia.com, aniketa@nvidia.com, ankita@nvidia.com,
	alwilliamson@nvidia.com, vsethi@nvidia.com, jgg@nvidia.com,
	mochs@nvidia.com, skolothumtho@nvidia.com,
	alejandro.lucero-palau@amd.com, dave@stgolabs.net,
	jonathan.cameron@huawei.com, alison.schofield@intel.com,
	vishal.l.verma@intel.com, ira.weiny@intel.com,
	dan.j.williams@intel.com, jgg@ziepe.ca, yishaih@nvidia.com,
	kevin.tian@intel.com
Cc: cjia@nvidia.com, kwankhede@nvidia.com, targupta@nvidia.com,
	zhiw@nvidia.com, kjaju@nvidia.com, linux-kernel@vger.kernel.org,
	linux-cxl@vger.kernel.org, kvm@vger.kernel.org
Subject: Re: [RFC v2 11/15] vfio/cxl: introduce the emulation of HDM registers
Date: Thu, 11 Dec 2025 11:13:21 -0700	[thread overview]
Message-ID: <80c04058-833b-4056-b47c-54a3a50f5f89@intel.com> (raw)
In-Reply-To: <20251209165019.2643142-12-mhonap@nvidia.com>



On 12/9/25 9:50 AM, mhonap@nvidia.com wrote:
> From: Manish Honap <mhonap@nvidia.com>
> 
> CXL devices have HDM registers in its CXL MMIO bar. Many HDM registers
> requires a PA and they are owned by the host in virtualization.
> 
> Thus, the HDM registers needs to be emulated accordingly so that the
> guest kernel CXL core can configure the virtual HDM decoders.
> 
> Intorduce the emulation of HDM registers that emulates the HDM decoders.
> 
> Co-developed-by: Zhi Wang <zhiw@nvidia.com>
> Signed-off-by: Zhi Wang <zhiw@nvidia.com>
> Signed-off-by: Manish Honap <mhonap@nvidia.com>
> ---
>  drivers/vfio/pci/vfio_cxl_core.c     |   7 +-
>  drivers/vfio/pci/vfio_cxl_core_emu.c | 242 +++++++++++++++++++++++++++
>  include/linux/vfio_pci_core.h        |   2 +
>  3 files changed, 248 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
> index cb75e9f668a7..c0bdf55997da 100644
> --- a/drivers/vfio/pci/vfio_cxl_core.c
> +++ b/drivers/vfio/pci/vfio_cxl_core.c
> @@ -247,8 +247,6 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
>  	if (!dvsec)
>  		return -ENODEV;
>  
> -	cxl->dvsec = dvsec;
> -
>  	cxl_core = devm_cxl_dev_state_create(&pdev->dev, CXL_DEVTYPE_DEVMEM,
>  					     pdev->dev.id, dvsec, struct vfio_cxl,
>  					     cxlds, false);
> @@ -257,9 +255,12 @@ int vfio_cxl_core_enable(struct vfio_cxl_core_device *cxl,
>  		return -ENOMEM;
>  	}
>  
> +	cxl->dvsec = dvsec;
> +	cxl->cxl_core = cxl_core;
> +
>  	ret = find_comp_regs(cxl);
>  	if (ret)
> -		return -ENODEV;
> +		return ret;
>  
>  	ret = setup_virt_regs(cxl);
>  	if (ret)
> diff --git a/drivers/vfio/pci/vfio_cxl_core_emu.c b/drivers/vfio/pci/vfio_cxl_core_emu.c
> index a0674bacecd7..6711ff8975ef 100644
> --- a/drivers/vfio/pci/vfio_cxl_core_emu.c
> +++ b/drivers/vfio/pci/vfio_cxl_core_emu.c
> @@ -5,6 +5,239 @@
>  
>  #include "vfio_cxl_core_priv.h"
>  
> +typedef ssize_t reg_handler_t(struct vfio_cxl_core_device *cxl, void *buf,
> +			      u64 offset, u64 size);
> +
> +static struct vfio_emulated_regblock *
> +new_reg_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
> +	      reg_handler_t *read, reg_handler_t *write)
> +{
> +	struct vfio_emulated_regblock *block;
> +
> +	block = kzalloc(sizeof(*block), GFP_KERNEL);
> +	if (!block)
> +		return ERR_PTR(-ENOMEM);
> +
> +	block->range.start = offset;
> +	block->range.end = offset + size - 1;
> +	block->read = read;
> +	block->write = write;
> +
> +	INIT_LIST_HEAD(&block->list);
> +
> +	return block;
> +}
> +
> +static int new_mmio_block(struct vfio_cxl_core_device *cxl, u64 offset, u64 size,
> +			  reg_handler_t *read, reg_handler_t *write)
> +{
> +	struct vfio_emulated_regblock *block;
> +
> +	block = new_reg_block(cxl, offset, size, read, write);
> +	if (IS_ERR(block))
> +		return PTR_ERR(block);
> +
> +	list_add_tail(&block->list, &cxl->mmio_regblocks_head);
> +	return 0;
> +}
> +
> +static u64 hdm_reg_base(struct vfio_cxl_core_device *cxl)
> +{
> +	return cxl->comp_reg_offset + cxl->hdm_reg_offset;
> +}
> +
> +static u64 to_hdm_reg_offset(struct vfio_cxl_core_device *cxl, u64 offset)
> +{
> +	return offset - hdm_reg_base(cxl);
> +}
> +
> +static void *hdm_reg_virt(struct vfio_cxl_core_device *cxl, u64 hdm_reg_offset)
> +{
> +	return cxl->comp_reg_virt + cxl->hdm_reg_offset + hdm_reg_offset;
> +}
> +
> +static ssize_t virt_hdm_reg_read(struct vfio_cxl_core_device *cxl, void *buf,
> +				 u64 offset, u64 size)
> +{
> +	offset = to_hdm_reg_offset(cxl, offset);
> +	memcpy(buf, hdm_reg_virt(cxl, offset), size);
> +
> +	return size;
> +}
> +
> +static ssize_t virt_hdm_reg_write(struct vfio_cxl_core_device *cxl, void *buf,
> +				  u64 offset, u64 size)
> +{
> +	offset = to_hdm_reg_offset(cxl, offset);
> +	memcpy(hdm_reg_virt(cxl, offset), buf, size);
> +
> +	return size;
> +}
> +
> +static ssize_t virt_hdm_rev_reg_write(struct vfio_cxl_core_device *cxl,
> +				      void *buf, u64 offset, u64 size)
> +{
> +	/* Discard writes on reserved registers. */
> +	return size;
> +}
> +
> +static ssize_t hdm_decoder_n_lo_write(struct vfio_cxl_core_device *cxl,
> +				      void *buf, u64 offset, u64 size)
> +{
> +	u32 new_val = le32_to_cpu(*(u32 *)buf);
> +
> +	if (WARN_ON_ONCE(size != 4))
> +		return -EINVAL;
> +
> +	/* Bit [27:0] are reserved. */
> +	new_val &= ~GENMASK(27, 0);

maybe define the mask

> +
> +	new_val = cpu_to_le32(new_val);
> +	offset = to_hdm_reg_offset(cxl, offset);
> +	memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
> +	return size;
> +}
> +
> +static ssize_t hdm_decoder_global_ctrl_write(struct vfio_cxl_core_device *cxl,
> +					     void *buf, u64 offset, u64 size)
> +{
> +	u32 hdm_decoder_global_cap;
> +	u32 new_val = le32_to_cpu(*(u32 *)buf);
> +
> +	if (WARN_ON_ONCE(size != 4))
> +		return -EINVAL;
> +
> +	/* Bit [31:2] are reserved. */
> +	new_val &= ~GENMASK(31, 2);

same here re mask

> +
> +	/* Poison On Decode Error Enable bit is 0 and RO if not support. */
> +	hdm_decoder_global_cap = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, 0));
> +	if (!(hdm_decoder_global_cap & BIT(10)))
> +		new_val &= ~BIT(0);

Would be good to define the register bits to ease reading the code

> +
> +	new_val = cpu_to_le32(new_val);
> +	offset = to_hdm_reg_offset(cxl, offset);
> +	memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
> +	return size;
> +}
> +
> +static ssize_t hdm_decoder_n_ctrl_write(struct vfio_cxl_core_device *cxl,
> +					void *buf, u64 offset, u64 size)
> +{
> +	u32 hdm_decoder_global_cap;
> +	u32 ro_mask, rev_mask;
> +	u32 new_val = le32_to_cpu(*(u32 *)buf);
> +	u32 cur_val;
> +
> +	if (WARN_ON_ONCE(size != 4))
> +		return -EINVAL;
> +
> +	offset = to_hdm_reg_offset(cxl, offset);
> +	cur_val = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, offset));
> +
> +	/* Lock on commit */
> +	if (cur_val & BIT(8))

define bit(s). same comment for the rest of the patch.

DJ

> +		return size;
> +
> +	hdm_decoder_global_cap = le32_to_cpu(*(u32 *)hdm_reg_virt(cxl, 0));
> +
> +	/* RO and reserved bits in the spec */
> +	ro_mask = BIT(10) | BIT(11);
> +	rev_mask = BIT(15) | GENMASK(31, 28);
> +
> +	/* bits are not valid for devices */
> +	ro_mask |= BIT(12);
> +	rev_mask |= GENMASK(19, 16) | GENMASK(23, 20);
> +
> +	/* bits are reserved when UIO is not supported */
> +	if (!(hdm_decoder_global_cap & BIT(13)))
> +		rev_mask |= BIT(14) | GENMASK(27, 24);
> +
> +	/* clear reserved bits */
> +	new_val &= ~rev_mask;
> +
> +	/* keep the RO bits */
> +	cur_val &= ro_mask;
> +	new_val &= ~ro_mask;
> +	new_val |= cur_val;
> +
> +	/* emulate HDM decoder commit/de-commit */
> +	if (new_val & BIT(9))
> +		new_val |= BIT(10);
> +	else
> +		new_val &= ~BIT(10);
> +
> +	new_val = cpu_to_le32(new_val);
> +	memcpy(hdm_reg_virt(cxl, offset), &new_val, size);
> +	return size;
> +}
> +
> +static int setup_mmio_emulation(struct vfio_cxl_core_device *cxl)
> +{
> +	u64 offset, base;
> +	int ret;
> +
> +	base = hdm_reg_base(cxl);
> +
> +#define ALLOC_BLOCK(offset, size, read, write) do {			\
> +		ret = new_mmio_block(cxl, offset, size, read, write);	\
> +		if (ret)						\
> +			return ret;					\
> +	} while (0)
> +
> +	ALLOC_BLOCK(base + 0x4, 4,
> +		    virt_hdm_reg_read,
> +		    hdm_decoder_global_ctrl_write);
> +
> +	offset = base + 0x10;
> +	while (offset < base + cxl->hdm_reg_size) {
> +		/* HDM N BASE LOW */
> +		ALLOC_BLOCK(offset, 4,
> +			    virt_hdm_reg_read,
> +			    hdm_decoder_n_lo_write);
> +
> +		/* HDM N BASE HIGH */
> +		ALLOC_BLOCK(offset + 0x4, 4,
> +			    virt_hdm_reg_read,
> +			    virt_hdm_reg_write);
> +
> +		/* HDM N SIZE LOW */
> +		ALLOC_BLOCK(offset + 0x8, 4,
> +			    virt_hdm_reg_read,
> +			    hdm_decoder_n_lo_write);
> +
> +		/* HDM N SIZE HIGH */
> +		ALLOC_BLOCK(offset + 0xc, 4,
> +			    virt_hdm_reg_read,
> +			    virt_hdm_reg_write);
> +
> +		/* HDM N CONTROL */
> +		ALLOC_BLOCK(offset + 0x10, 4,
> +			    virt_hdm_reg_read,
> +			    hdm_decoder_n_ctrl_write);
> +
> +		/* HDM N TARGET LIST LOW */
> +		ALLOC_BLOCK(offset + 0x14, 0x4,
> +			    virt_hdm_reg_read,
> +			    virt_hdm_rev_reg_write);
> +
> +		/* HDM N TARGET LIST HIGH */
> +		ALLOC_BLOCK(offset + 0x18, 0x4,
> +			    virt_hdm_reg_read,
> +			    virt_hdm_rev_reg_write);
> +
> +		/* HDM N REV */
> +		ALLOC_BLOCK(offset + 0x1c, 0x4,
> +			    virt_hdm_reg_read,
> +			    virt_hdm_rev_reg_write);
> +
> +		offset += 0x20;
> +	}
> +
> +#undef ALLOC_BLOCK
> +	return 0;
> +}
> +
>  void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl)
>  {
>  	struct list_head *pos, *n;
> @@ -17,10 +250,19 @@ void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl)
>  
>  int vfio_cxl_core_setup_register_emulation(struct vfio_cxl_core_device *cxl)
>  {
> +	int ret;
> +
>  	INIT_LIST_HEAD(&cxl->config_regblocks_head);
>  	INIT_LIST_HEAD(&cxl->mmio_regblocks_head);
>  
> +	ret = setup_mmio_emulation(cxl);
> +	if (ret)
> +		goto err;
> +
>  	return 0;
> +err:
> +	vfio_cxl_core_clean_register_emulation(cxl);
> +	return ret;
>  }
>  
>  static struct vfio_emulated_regblock *
> diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
> index 12ded67c7db7..31fd28626846 100644
> --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -251,5 +251,7 @@ ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *bu
>  			    size_t count, loff_t *ppos);
>  long vfio_cxl_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
>  			 unsigned long arg);
> +int vfio_cxl_core_setup_register_emulation(struct vfio_cxl_core_device *cxl);
> +void vfio_cxl_core_clean_register_emulation(struct vfio_cxl_core_device *cxl);
>  
>  #endif /* VFIO_PCI_CORE_H */


  reply	other threads:[~2025-12-11 18:13 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-09 16:50 [RFC v2 00/15] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Hello all, mhonap
2025-12-09 16:50 ` [RFC v2 01/15] cxl: factor out cxl_await_range_active() and cxl_media_ready() mhonap
2025-12-22 12:21   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 02/15] cxl: introduce cxl_get_hdm_reg_info() mhonap
2025-12-09 16:50 ` [RFC v2 03/15] cxl: introduce cxl_find_comp_reglock_offset() mhonap
2025-12-09 16:50 ` [RFC v2 04/15] cxl: introduce devm_cxl_del_memdev() mhonap
2025-12-09 16:50 ` [RFC v2 05/15] cxl: introduce cxl_get_committed_regions() mhonap
2025-12-22 12:31   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 06/15] vfio/cxl: introduce vfio-cxl core preludes mhonap
2025-12-22 13:54   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 07/15] vfio/cxl: expose CXL region to the userspace via a new VFIO device region mhonap
2025-12-11 16:06   ` Dave Jiang
2025-12-11 17:31     ` Manish Honap
2025-12-11 18:01       ` Dave Jiang
2025-12-22 14:00   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 08/15] vfio/cxl: discover precommitted CXL region mhonap
2025-12-22 14:09   ` Jonathan Cameron
2025-12-09 16:50 ` [RFC v2 09/15] vfio/cxl: introduce vfio_cxl_core_{read, write}() mhonap
2025-12-09 16:50 ` [RFC v2 10/15] vfio/cxl: introduce the register emulation framework mhonap
2025-12-09 16:50 ` [RFC v2 11/15] vfio/cxl: introduce the emulation of HDM registers mhonap
2025-12-11 18:13   ` Dave Jiang [this message]
2025-12-09 16:50 ` [RFC v2 12/15] vfio/cxl: introduce the emulation of CXL configuration space mhonap
2025-12-09 16:50 ` [RFC v2 13/15] vfio/pci: introduce CXL device awareness mhonap
2025-12-09 16:50 ` [RFC v2 14/15] vfio/cxl: VFIO variant driver for QEMU CXL accel device mhonap
2025-12-09 16:50 ` [RFC v2 15/15] cxl/mem: Fix NULL pointer deference in memory device paths mhonap

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=80c04058-833b-4056-b47c-54a3a50f5f89@intel.com \
    --to=dave.jiang@intel.com \
    --cc=alejandro.lucero-palau@amd.com \
    --cc=alison.schofield@intel.com \
    --cc=alwilliamson@nvidia.com \
    --cc=aniketa@nvidia.com \
    --cc=ankita@nvidia.com \
    --cc=cjia@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave@stgolabs.net \
    --cc=ira.weiny@intel.com \
    --cc=jgg@nvidia.com \
    --cc=jgg@ziepe.ca \
    --cc=jonathan.cameron@huawei.com \
    --cc=kevin.tian@intel.com \
    --cc=kjaju@nvidia.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mhonap@nvidia.com \
    --cc=mochs@nvidia.com \
    --cc=skolothumtho@nvidia.com \
    --cc=targupta@nvidia.com \
    --cc=vishal.l.verma@intel.com \
    --cc=vsethi@nvidia.com \
    --cc=yishaih@nvidia.com \
    --cc=zhiw@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox