public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: Yi Liu <yi.l.liu@intel.com>
Cc: jgg@nvidia.com, kevin.tian@intel.com, joro@8bytes.org,
	robin.murphy@arm.com, cohuck@redhat.com, eric.auger@redhat.com,
	nicolinc@nvidia.com, kvm@vger.kernel.org, mjrosato@linux.ibm.com,
	chao.p.peng@linux.intel.com, yi.y.sun@linux.intel.com,
	peterx@redhat.com, jasowang@redhat.com,
	shameerali.kolothum.thodi@huawei.com, lulu@redhat.com,
	suravee.suthikulpanit@amd.com,
	intel-gvt-dev@lists.freedesktop.org,
	intel-gfx@lists.freedesktop.org, linux-s390@vger.kernel.org,
	xudong.hao@intel.com, yan.y.zhao@intel.com,
	terrence.xu@intel.com, yanting.jiang@intel.com,
	zhenzhong.duan@intel.com
Subject: Re: [PATCH v4 9/9] vfio/pci: Allow passing zero-length fd array in VFIO_DEVICE_PCI_HOT_RESET
Date: Thu, 27 Apr 2023 15:55:24 -0600	[thread overview]
Message-ID: <20230427155524.732c878d.alex.williamson@redhat.com> (raw)
In-Reply-To: <20230426145419.450922-10-yi.l.liu@intel.com>

On Wed, 26 Apr 2023 07:54:19 -0700
Yi Liu <yi.l.liu@intel.com> wrote:

> This is the way user to invoke hot-reset for the devices opened by cdev
> interface. User should check the flag VFIO_PCI_HOT_RESET_FLAG_RESETTABLE
> in the output of VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl before doing
> hot-reset for cdev devices.
> 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Tested-by: Yanting Jiang <yanting.jiang@intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>  drivers/vfio/pci/vfio_pci_core.c | 66 +++++++++++++++++++++++++++-----
>  include/uapi/linux/vfio.h        | 22 +++++++++++
>  2 files changed, 79 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 43858d471447..f70e3b948b16 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
>  struct vfio_pci_group_info;
>  static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
>  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> -				      struct vfio_pci_group_info *groups);
> +				      struct vfio_pci_group_info *groups,
> +				      struct iommufd_ctx *iommufd_ctx);
>  
>  /*
>   * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
> @@ -1364,8 +1365,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
>  	if (ret)
>  		return ret;
>  
> -	/* Somewhere between 1 and count is OK */
> -	if (!array_count || array_count > count)
> +	if (array_count > count)
>  		return -EINVAL;

Doesn't this need a || vfio_device_cdev_opened(vdev) test as well?
It's invalid to pass fds for a cdev device.  Presumably it would fail
later collecting group fds as well, but might as well enforce the
semantics early.

>  
>  	group_fds = kcalloc(array_count, sizeof(*group_fds), GFP_KERNEL);
> @@ -1414,7 +1414,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
>  	info.count = array_count;
>  	info.files = files;
>  
> -	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
> +	ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
>  
>  hot_reset_release:
>  	for (file_idx--; file_idx >= 0; file_idx--)
> @@ -1429,6 +1429,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
>  {
>  	unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count);
>  	struct vfio_pci_hot_reset hdr;
> +	struct iommufd_ctx *iommufd;
>  	bool slot = false;
>  
>  	if (copy_from_user(&hdr, arg, minsz))
> @@ -1443,7 +1444,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
>  	else if (pci_probe_reset_bus(vdev->pdev->bus))
>  		return -ENODEV;
>  
> -	return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> +	if (hdr.count)
> +		return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg);
> +
> +	iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
> +
> +	return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd);

Why did we need to store iommufd in a variable?

>  }
>  
>  static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
> @@ -2415,6 +2421,9 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
>  {
>  	unsigned int i;
>  
> +	if (!groups)
> +		return false;
> +
>  	for (i = 0; i < groups->count; i++)
>  		if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
>  			return true;
> @@ -2488,13 +2497,38 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
>  	return ret;
>  }
>  
> +static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev,
> +				    struct iommufd_ctx *iommufd_ctx)
> +{
> +	struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
> +	struct iommu_group *iommu_group;
> +
> +	if (!iommufd_ctx)
> +		return false;
> +
> +	if (iommufd == iommufd_ctx)
> +		return true;
> +
> +	iommu_group = iommu_group_get(vdev->vdev.dev);
> +	if (!iommu_group)
> +		return false;
> +
> +	/*
> +	 * Try to check if any device within iommu_group is bound with
> +	 * the input iommufd_ctx.
> +	 */
> +	return vfio_devset_iommufd_has_group(vdev->vdev.dev_set,
> +					     iommufd_ctx, iommu_group);
> +}

This last test makes this not do what the function name suggests it
does.  If it were true, the device is not in the iommufd_ctx, it simply
cannot be within another iommu ctx.

> +
>  /*
>   * We need to get memory_lock for each device, but devices can share mmap_lock,
>   * therefore we need to zap and hold the vma_lock for each device, and only then
>   * get each memory_lock.
>   */
>  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> -				      struct vfio_pci_group_info *groups)
> +				      struct vfio_pci_group_info *groups,
> +				      struct iommufd_ctx *iommufd_ctx)
>  {
>  	struct vfio_pci_core_device *cur_mem;
>  	struct vfio_pci_core_device *cur_vma;
> @@ -2525,10 +2559,24 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>  
>  	list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
>  		/*
> -		 * Test whether all the affected devices are contained by the
> -		 * set of groups provided by the user.
> +		 * Test whether all the affected devices can be reset by the
> +		 * user.
> +		 *
> +		 * If user provides a set of groups, all the opened devices
> +		 * in the dev_set should be contained by the set of groups
> +		 * provided by the user.
> +		 *
> +		 * If user provides a zero-length group fd array, then all
> +		 * the affected devices must be bound to same iommufd_ctx as
> +		 * the input iommufd_ctx.  If there is device that has not
> +		 * been bound to iommufd_ctx yet, shall check if there is any
> +		 * device within its iommu_group that has been bound to the
> +		 * input iommufd_ctx.
> +		 *
> +		 * Otherwise, reset is not allowed.
>  		 */
> -		if (!vfio_dev_in_groups(cur_vma, groups)) {
> +		if (!vfio_dev_in_groups(cur_vma, groups) &&
> +		    !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx)) {


Rather than mangling vfio_dev_in_groups() and inventing
vfio_dev_in_iommufd_ctx() that doesn't do what it implies, how about:

bool vfio_device_owned(struct vfio_device *vdev,
		       struct vfio_pci_group_info *groups,
		       struct iommufd_ctx *iommufd_ctx)
{
	struct iommu_group *group;

	WARN_ON(!!groups == !!iommufd_ctx);

	if (groups)
		return vfio_dev_in_groups(vdev, groups));

	if (vfio_iommufd_physical_ictx(vdev) == iommufd_ctx)
		return true;

	group = iommu_group_get(vdev->dev);
	if (group)
		return vfio_devset_iommufd_has_group(vdev->vdev.dev_set,
						     iommufd_ctx, group);
	return false;
}

Seems like such a function would live in vfio_main.c

>  			ret = -EINVAL;
>  			goto err_undo;
>  		}
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 4b4e2c28984b..1241d02d8701 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -710,6 +710,28 @@ struct vfio_pci_hot_reset_info {
>   * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
>   *				    struct vfio_pci_hot_reset)
>   *
> + * Userspace requests hot reset for the devices it operates.  Due to the
> + * underlying topology, multiple devices can be affected in the reset
> + * while some might be opened by another user.  To avoid interference
> + * the calling user must ensure all affected devices are owned by itself.
> + * The ownership proof needs to refer the output of
> + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO.  Ownership can be proved as:
> + *
> + *   1) An array of group fds - This is used for the devices opened via
> + *				the group/container interface.
> + *   2) A zero-length array - This is used for the devices opened via
> + *			      the cdev interface.  User should check the
> + *			      flag VFIO_PCI_HOT_RESET_FLAG_IOMMUFD_DEV_ID
> + *			      and flag VFIO_PCI_HOT_RESET_FLAG_RESETTABLE
> + *			      before using this method.
> + *
> + * In case a non void group fd array is passed, the devices affected by
> + * the reset must belong to those opened VFIO groups.  In case a zero
> + * length array is passed, the other devices affected by the reset, if
> + * any, must be either bound to the same iommufd as this VFIO device or
> + * in the same iommu_group with a device that does.  Either of the two
> + * methods is applied to check the feasibility of the hot reset.

This should probably just refer to the concept of ownership described
in the INFO ioctl and clarify that cdev opened device must exclusively
provide an empty array and group opened devices must exclusively use an
array of group fds for proof of ownership.  Mixed access to devices
between cdev and legacy groups are not supported by this interface.
Thanks,

Alex

> + *
>   * Return: 0 on success, -errno on failure.
>   */
>  struct vfio_pci_hot_reset {


  parent reply	other threads:[~2023-04-27 21:56 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-26 14:54 [PATCH v4 0/9] Enhance vfio PCI hot reset for vfio cdev device Yi Liu
2023-04-26 14:54 ` [PATCH v4 1/9] vfio: Determine noiommu in vfio_device registration Yi Liu
2023-04-27  6:36   ` Tian, Kevin
2023-04-27  7:05     ` Liu, Yi L
2023-04-27 18:35       ` Alex Williamson
2023-04-26 14:54 ` [PATCH v4 2/9] vfio-iommufd: Create iommufd_access for noiommu devices Yi Liu
2023-04-27  6:39   ` Tian, Kevin
2023-04-27  6:59     ` Liu, Yi L
2023-04-27 18:32       ` Alex Williamson
2023-04-28  6:21         ` Yi Liu
2023-04-28  7:00           ` Tian, Kevin
2023-04-28  7:04             ` Yi Liu
2023-04-28 12:07           ` Jason Gunthorpe
2023-04-28 16:07             ` Yi Liu
2023-05-02 18:12               ` Jason Gunthorpe
2023-05-03  9:48                 ` Liu, Yi L
2023-05-03 19:42                   ` Jason Gunthorpe
2023-05-08 15:46                   ` Liu, Yi L
2023-04-28 16:13         ` Yi Liu
2023-05-02 18:22           ` Jason Gunthorpe
2023-05-03  9:57             ` Liu, Yi L
2023-05-03 19:41               ` Jason Gunthorpe
2023-05-03 22:49                 ` Alex Williamson
2023-04-26 14:54 ` [PATCH v4 3/9] vfio/pci: Update comment around group_fd get in vfio_pci_ioctl_pci_hot_reset() Yi Liu
2023-04-26 14:54 ` [PATCH v4 4/9] vfio/pci: Move the existing hot reset logic to be a helper Yi Liu
2023-04-27  6:39   ` Tian, Kevin
2023-04-26 14:54 ` [PATCH v4 5/9] vfio: Mark cdev usage in vfio_device Yi Liu
2023-04-27  6:40   ` Tian, Kevin
2023-04-27 18:43   ` Alex Williamson
2023-04-28  6:42     ` Yi Liu
2023-04-26 14:54 ` [PATCH v4 6/9] iommufd: Reserved -1 in the iommufd xarray Yi Liu
2023-04-27  6:41   ` Tian, Kevin
2023-04-27  7:09     ` Liu, Yi L
2023-04-27 11:55       ` Jason Gunthorpe
2023-04-26 14:54 ` [PATCH v4 7/9] vfio-iommufd: Add helper to retrieve iommufd_ctx and devid for vfio_device Yi Liu
2023-04-27  6:45   ` Tian, Kevin
2023-04-27  7:15     ` Liu, Yi L
2023-04-26 14:54 ` [PATCH v4 8/9] vfio/pci: Extend VFIO_DEVICE_GET_PCI_HOT_RESET_INFO for vfio device cdev Yi Liu
2023-04-27  6:51   ` Tian, Kevin
2023-04-27 20:04   ` Alex Williamson
2023-04-27 20:15     ` Alex Williamson
2023-05-08 15:32       ` Liu, Yi L
2023-05-08 20:29         ` Alex Williamson
2023-04-26 14:54 ` [PATCH v4 9/9] vfio/pci: Allow passing zero-length fd array in VFIO_DEVICE_PCI_HOT_RESET Yi Liu
2023-04-27  6:54   ` Tian, Kevin
2023-04-27  7:02     ` Liu, Yi L
2023-04-27 21:55   ` Alex Williamson [this message]
2023-05-02 12:55     ` Liu, Yi L
2023-04-26 15:07 ` [PATCH v4 0/9] Enhance vfio PCI hot reset for vfio cdev device Liu, Yi L
2023-04-28  9:28 ` Jiang, Yanting

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230427155524.732c878d.alex.williamson@redhat.com \
    --to=alex.williamson@redhat.com \
    --cc=chao.p.peng@linux.intel.com \
    --cc=cohuck@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=intel-gvt-dev@lists.freedesktop.org \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=lulu@redhat.com \
    --cc=mjrosato@linux.ibm.com \
    --cc=nicolinc@nvidia.com \
    --cc=peterx@redhat.com \
    --cc=robin.murphy@arm.com \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=terrence.xu@intel.com \
    --cc=xudong.hao@intel.com \
    --cc=yan.y.zhao@intel.com \
    --cc=yanting.jiang@intel.com \
    --cc=yi.l.liu@intel.com \
    --cc=yi.y.sun@linux.intel.com \
    --cc=zhenzhong.duan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox