All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: Zhi Wang <zhiw@nvidia.com>
Cc: <kvm@vger.kernel.org>, <linux-cxl@vger.kernel.org>,
	<kevin.tian@intel.com>, <jgg@nvidia.com>,
	<alison.schofield@intel.com>, <dan.j.williams@intel.com>,
	<dave.jiang@intel.com>, <dave@stgolabs.net>,
	<jonathan.cameron@huawei.com>, <ira.weiny@intel.com>,
	<vishal.l.verma@intel.com>, <alucerop@amd.com>,
	<acurrid@nvidia.com>, <cjia@nvidia.com>, <smitra@nvidia.com>,
	<ankita@nvidia.com>, <aniketa@nvidia.com>, <kwankhede@nvidia.com>,
	<targupta@nvidia.com>, <zhiwang@kernel.org>
Subject: Re: [RFC 09/13] vfio/pci: introduce CXL device awareness
Date: Fri, 11 Oct 2024 14:37:19 -0600	[thread overview]
Message-ID: <20241011143719.2c6e0458.alex.williamson@redhat.com> (raw)
In-Reply-To: <20240920223446.1908673-10-zhiw@nvidia.com>

On Fri, 20 Sep 2024 15:34:42 -0700
Zhi Wang <zhiw@nvidia.com> wrote:

> CXL device programming interfaces are built upon PCI interfaces. Thus
> the vfio-pci-core can be leveraged to handle a CXL device.
> 
> However, CXL device also has difference with PCI devicce:
> 
> - No INTX support, only MSI/MSIX is supported.
> - Resest is one via CXL reset. FLR only resets CXL.io.
> 
> Introduce the CXL device awareness to the vfio-pci-core. Expose a new
> VFIO device flags to the userspace to identify the VFIO device is a CXL
> device. Disable INTX support in the vfio-pci-core. Disable FLR reset for
> the CXL device as the kernel CXL core hasn't support CXL reset yet.
> Disable mmap support on the CXL MMIO BAR in vfio-pci-core.

Why are we disabling mmap on the entire BAR when we have sparse mmap
support to handle disabling mmap only on a portion of the BAR, which
seems to be the case needed here.  Am I mistaken?
 
> Signed-off-by: Zhi Wang <zhiw@nvidia.com>
> ---
>  drivers/vfio/pci/vfio_cxl_core.c |  8 ++++++
>  drivers/vfio/pci/vfio_pci_core.c | 42 +++++++++++++++++++++-----------
>  include/linux/vfio_pci_core.h    |  2 ++
>  include/uapi/linux/vfio.h        |  1 +
>  4 files changed, 39 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
> index bbb968cb1b70..d8b51f8792a2 100644
> --- a/drivers/vfio/pci/vfio_cxl_core.c
> +++ b/drivers/vfio/pci/vfio_cxl_core.c
> @@ -391,6 +391,8 @@ int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev)
>  	if (ret)
>  		return ret;
>  
> +	vfio_pci_core_enable_cxl(core_dev);
> +
>  	ret = vfio_pci_core_enable(core_dev);
>  	if (ret)
>  		goto err_pci_core_enable;
> @@ -618,6 +620,12 @@ ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *bu
>  }
>  EXPORT_SYMBOL_GPL(vfio_cxl_core_write);
>  
> +void vfio_pci_core_enable_cxl(struct vfio_pci_core_device *core_dev)
> +{
> +	core_dev->has_cxl = true;
> +}
> +EXPORT_SYMBOL(vfio_pci_core_enable_cxl);
> +
>  MODULE_LICENSE("GPL");
>  MODULE_AUTHOR(DRIVER_AUTHOR);
>  MODULE_DESCRIPTION(DRIVER_DESC);
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 9373942f1acb..e0f23b538858 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -126,6 +126,9 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
>  		if (!(res->flags & IORESOURCE_MEM))
>  			goto no_mmap;
>  
> +		if (vdev->has_cxl && bar == vdev->cxl.comp_reg_bar)
> +			goto no_mmap;
> +
>  		/*
>  		 * The PCI core shouldn't set up a resource with a
>  		 * type but zero size. But there may be bugs that
> @@ -487,10 +490,15 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
>  	if (ret)
>  		goto out_power;
>  
> -	/* If reset fails because of the device lock, fail this path entirely */
> -	ret = pci_try_reset_function(pdev);
> -	if (ret == -EAGAIN)
> -		goto out_disable_device;
> +	if (!vdev->has_cxl) {
> +		/* If reset fails because of the device lock, fail this path entirely */
> +		ret = pci_try_reset_function(pdev);
> +		if (ret == -EAGAIN)
> +			goto out_disable_device;
> +	} else {
> +		/* CXL Reset is missing in CXL core. FLR only resets CXL.io path. */
> +		ret = -ENODEV;
> +	}

Seems like this should perhaps be a prerequisite to exposing the device
to userspace, otherwise how is the state sanitized between uses?

>  
>  	vdev->reset_works = !ret;
>  	pci_save_state(pdev);
> @@ -498,14 +506,17 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
>  	if (!vdev->pci_saved_state)
>  		pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);
>  
> -	if (likely(!nointxmask)) {
> -		if (vfio_pci_nointx(pdev)) {
> -			pci_info(pdev, "Masking broken INTx support\n");
> -			vdev->nointx = true;
> -			pci_intx(pdev, 0);
> -		} else
> -			vdev->pci_2_3 = pci_intx_mask_supported(pdev);
> -	}
> +	if (!vdev->has_cxl) {
> +		if (likely(!nointxmask)) {
> +			if (vfio_pci_nointx(pdev)) {
> +				pci_info(pdev, "Masking broken INTx support\n");
> +				vdev->nointx = true;
> +				pci_intx(pdev, 0);
> +			} else
> +				vdev->pci_2_3 = pci_intx_mask_supported(pdev);
> +		}
> +	} else
> +		vdev->nointx = true; /* CXL device doesn't have INTX. */
>  

Why do we need to do anything here?  nointx is for exposing a device
with INTx support as if it does not have INTx support.  If a CXL device
simply does not support INTx, like SR-IOV VFs, this is not necessary,
the interrupt pin register should be zero.  Is that not the case on a
CXL device?

>  	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
>  	if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {
> @@ -541,7 +552,6 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
>  	if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
>  		vdev->has_vga = true;
>  
> -

Gratuitous whitespace change.

>  	return 0;
>  
>  out_free_zdev:
> @@ -657,7 +667,8 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
>  	 * Disable INTx and MSI, presumably to avoid spurious interrupts
>  	 * during reset.  Stolen from pci_reset_function()
>  	 */
> -	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
> +	if (!vdev->nointx)
> +		pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);

No, this is not what nointx is for.  Regardless it should be a no-op on
a device that doesn't support INTx.

>  
>  	/*
>  	 * Try to get the locks ourselves to prevent a deadlock. The
> @@ -973,6 +984,9 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
>  	if (vdev->reset_works)
>  		info.flags |= VFIO_DEVICE_FLAGS_RESET;
>  
> +	if (vdev->has_cxl)
> +		info.flags |= VFIO_DEVICE_FLAGS_CXL;
> +

So the proposal is that a vfio-cxl device will expose *both* PCI and
CXL device compatibility flags?  That means existing userspace
expecting only a PCI device will try to make use of this.  Shouldn't
the device be bound to vfio-pci rather than a vfio-cxl driver for that,
if it's even valid?

>  	info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
>  	info.num_irqs = VFIO_PCI_NUM_IRQS;
>  
> diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
> index 9d295ca9382a..e5646aad3eb3 100644
> --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -113,6 +113,7 @@ struct vfio_pci_core_device {
>  	bool			needs_pm_restore:1;
>  	bool			pm_intx_masked:1;
>  	bool			pm_runtime_engaged:1;
> +	bool			has_cxl:1;

I wonder if has_cxl is based on has_vga, I would have expected is_cxl.
A PCI device that supports VGA is a much more discrete add-on, versus
my limited understanding of CXL is that it is a CXL device where PCI is
mostly just the configuration and enumeration interface, ie. CXL.io.

>  	struct pci_saved_state	*pci_saved_state;
>  	struct pci_saved_state	*pm_save;
>  	int			ioeventfds_nr;
> @@ -208,5 +209,6 @@ ssize_t vfio_cxl_core_read(struct vfio_device *core_vdev, char __user *buf,
>  			   size_t count, loff_t *ppos);
>  ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *buf,
>  			    size_t count, loff_t *ppos);
> +void vfio_pci_core_enable_cxl(struct vfio_pci_core_device *core_dev);
>  
>  #endif /* VFIO_PCI_CORE_H */
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 71f766c29060..0895183feaac 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -214,6 +214,7 @@ struct vfio_device_info {
>  #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
>  #define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
>  #define VFIO_DEVICE_FLAGS_CDX	(1 << 8)	/* vfio-cdx device */
> +#define VFIO_DEVICE_FLAGS_CXL	(1 << 9)	/* Device supports CXL support */

Comment wording.  Thanks,

Alex

>  	__u32	num_regions;	/* Max region index + 1 */
>  	__u32	num_irqs;	/* Max IRQ index + 1 */
>  	__u32   cap_offset;	/* Offset within info struct of first cap */


  reply	other threads:[~2024-10-11 20:37 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-20 22:34 [RFC 00/13] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Zhi Wang
2024-09-20 22:34 ` [RFC 01/13] cxl: allow a type-2 device not to have memory device registers Zhi Wang
2024-09-23  8:01   ` Tian, Kevin
2024-09-23 15:38   ` Dave Jiang
2024-09-24  8:03     ` Zhi Wang
2024-09-20 22:34 ` [RFC 02/13] cxl: introduce cxl_get_hdm_info() Zhi Wang
2024-10-17 15:44   ` Jonathan Cameron
2024-10-19  5:38     ` Zhi Wang
2024-09-20 22:34 ` [RFC 03/13] cxl: introduce cxl_find_comp_reglock_offset() Zhi Wang
2024-09-20 22:34 ` [RFC 04/13] vfio: introduce vfio-cxl core preludes Zhi Wang
2024-10-11 18:33   ` Alex Williamson
2024-09-20 22:34 ` [RFC 05/13] vfio/cxl: expose CXL region to the usersapce via a new VFIO device region Zhi Wang
2024-10-11 19:12   ` Alex Williamson
2024-09-20 22:34 ` [RFC 06/13] vfio/pci: expose vfio_pci_rw() Zhi Wang
2024-09-20 22:34 ` [RFC 07/13] vfio/cxl: introduce vfio_cxl_core_{read, write}() Zhi Wang
2024-09-20 22:34 ` [RFC 08/13] vfio/cxl: emulate HDM decoder registers Zhi Wang
2024-09-20 22:34 ` [RFC 09/13] vfio/pci: introduce CXL device awareness Zhi Wang
2024-10-11 20:37   ` Alex Williamson [this message]
2024-09-20 22:34 ` [RFC 10/13] vfio/pci: emulate CXL DVSEC registers in the configuration space Zhi Wang
2024-10-11 21:02   ` Alex Williamson
2024-09-20 22:34 ` [RFC 11/13] vfio/cxl: introduce VFIO CXL device cap Zhi Wang
2024-10-11 21:14   ` Alex Williamson
2024-09-20 22:34 ` [RFC 12/13] vfio/cxl: VFIO variant driver for QEMU CXL accel device Zhi Wang
2024-09-20 22:34 ` [RFC 13/13] vfio/cxl: workaround: don't take resource region when cxl is enabled Zhi Wang
2024-09-23  8:00 ` [RFC 00/13] vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough Tian, Kevin
2024-09-24  8:30   ` Zhi Wang
2024-09-25 13:05     ` Jonathan Cameron
2024-09-27  7:18       ` Zhi Wang
2024-10-04 11:40         ` Jonathan Cameron
2024-10-19  5:30           ` Zhi Wang
2024-10-21 11:07             ` Alejandro Lucero Palau
2024-09-26  6:55     ` Tian, Kevin
2024-09-25 10:11 ` Alejandro Lucero Palau
2024-09-27  7:38   ` Zhi Wang
2024-09-27  7:38   ` Zhi Wang
2024-10-21 10:49 ` Zhi Wang
2024-10-21 13:10   ` Alejandro Lucero Palau
2024-10-30 11:56 ` Zhi Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241011143719.2c6e0458.alex.williamson@redhat.com \
    --to=alex.williamson@redhat.com \
    --cc=acurrid@nvidia.com \
    --cc=alison.schofield@intel.com \
    --cc=alucerop@amd.com \
    --cc=aniketa@nvidia.com \
    --cc=ankita@nvidia.com \
    --cc=cjia@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=ira.weiny@intel.com \
    --cc=jgg@nvidia.com \
    --cc=jonathan.cameron@huawei.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwankhede@nvidia.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=smitra@nvidia.com \
    --cc=targupta@nvidia.com \
    --cc=vishal.l.verma@intel.com \
    --cc=zhiw@nvidia.com \
    --cc=zhiwang@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.