From: Jason Gunthorpe <jgg@nvidia.com>
To: "Liu, Yi L" <yi.l.liu@intel.com>
Cc: "Tian, Kevin" <kevin.tian@intel.com>,
"alex.williamson@redhat.com" <alex.williamson@redhat.com>,
"joro@8bytes.org" <joro@8bytes.org>,
"cohuck@redhat.com" <cohuck@redhat.com>,
"eric.auger@redhat.com" <eric.auger@redhat.com>,
"nicolinc@nvidia.com" <nicolinc@nvidia.com>,
"kvm@vger.kernel.org" <kvm@vger.kernel.org>,
"mjrosato@linux.ibm.com" <mjrosato@linux.ibm.com>,
"chao.p.peng@linux.intel.com" <chao.p.peng@linux.intel.com>,
"yi.y.sun@linux.intel.com" <yi.y.sun@linux.intel.com>,
"peterx@redhat.com" <peterx@redhat.com>,
"jasowang@redhat.com" <jasowang@redhat.com>,
"shameerali.kolothum.thodi@huawei.com"
<shameerali.kolothum.thodi@huawei.com>,
"lulu@redhat.com" <lulu@redhat.com>,
"suravee.suthikulpanit@amd.com" <suravee.suthikulpanit@amd.com>,
"intel-gvt-dev@lists.freedesktop.org"
<intel-gvt-dev@lists.freedesktop.org>,
"intel-gfx@lists.freedesktop.org"
<intel-gfx@lists.freedesktop.org>,
"linux-s390@vger.kernel.org" <linux-s390@vger.kernel.org>,
"Zhao, Yan Y" <yan.y.zhao@intel.com>,
"Hao, Xudong" <xudong.hao@intel.com>,
"Xu, Terrence" <terrence.xu@intel.com>
Subject: Re: [PATCH v4 09/19] vfio/pci: Accept device fd for hot reset
Date: Wed, 22 Feb 2023 13:17:44 -0400 [thread overview]
Message-ID: <Y/ZOOClu8nXy2toX@nvidia.com> (raw)
In-Reply-To: <DS0PR11MB7529B33D098225CFAAA7D63FC3AA9@DS0PR11MB7529.namprd11.prod.outlook.com>
On Wed, Feb 22, 2023 at 01:35:06PM +0000, Liu, Yi L wrote:
> > btw this patch is insufficient to handle device fd. The current logic
> > requires every device in the dev_set covered by provided fd's:
Yes, which is what it should be
> > static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
> > struct vfio_pci_group_info *groups)
> > {
> > unsigned int i;
> >
> > for (i = 0; i < groups->count; i++)
> > if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
> > return true;
> > return false;
> > }
> >
> > Presumably when cdev fd is provided above should compare iommu
> > group of the fd and that of the vdev. Otherwise it expects the user
> > to have full access to every device in the set which is impractical.
No, it should check the dev's directly, userspace has to provide every
dev in the dev set to do a reset. We should not allow userspace to
take a shortcut based on hidden group stuff.
The dev set is already unrelated to the groups, and userspace cannot
discover the devset, so nothing has changed.
This is looking worse to me. I think we should not require userspace
to pass in lists of devices here. The simpler solution is to just take
in a single iommufd and use that as the ownership proof. Something
like the below.
Jason
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index d81f93a321afcb..a5833bfdd7307e 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -114,6 +114,34 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD);
+/**
+ * iommufd_ctx_has_device - True if the struct device is bound to this ictx
+ * @ictx: iommufd file descriptor
+ * @dev: Pointer to a physical device struct
+ *
+ * True if a iommufd_device_bind() is present for dev.
+ */
+bool iommufd_ctx_has_device(struct iommufd_ctx *ictx, struct device *dev)
+{
+ unsigned long index;
+ struct iommufd_object *obj;
+
+ if (!ictx)
+ return false;
+
+ xa_lock(&ictx->objects);
+ xa_for_each(&ictx->objects, index, obj) {
+ if (obj->type == IOMMUFD_OBJ_DEVICE &&
+ container_of(obj, struct iommufd_device, obj)->dev == dev) {
+ xa_unlock(&ictx->objects);
+ return true;
+ }
+ }
+ xa_unlock(&ictx->objects);
+ return false;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_device, IOMMUFD);
+
/**
* iommufd_device_unbind - Undo iommufd_device_bind()
* @idev: Device returned by iommufd_device_bind()
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 26a541cc64d114..28f6db1b81c1af 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -27,6 +27,7 @@
#include <linux/vgaarb.h>
#include <linux/nospec.h>
#include <linux/sched/mm.h>
+#include <linux/iommufd.h>
#if IS_ENABLED(CONFIG_EEH)
#include <asm/eeh.h>
#endif
@@ -179,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
struct vfio_pci_group_info;
static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
- struct vfio_pci_group_info *groups);
+ struct vfio_pci_group_info *groups,
+ struct iommufd_ctx *iommufd_ctx);
/*
* INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
@@ -1254,29 +1256,17 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
return ret;
}
-static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
- struct vfio_pci_hot_reset __user *arg)
+static int
+vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev,
+ struct vfio_pci_hot_reset *hdr,
+ struct vfio_pci_hot_reset __user *arg)
{
- unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count);
- struct vfio_pci_hot_reset hdr;
int32_t *group_fds;
struct file **files;
struct vfio_pci_group_info info;
bool slot = false;
int file_idx, count = 0, ret = 0;
- if (copy_from_user(&hdr, arg, minsz))
- return -EFAULT;
-
- if (hdr.argsz < minsz || hdr.flags)
- return -EINVAL;
-
- /* Can we do a slot or bus reset or neither? */
- if (!pci_probe_reset_slot(vdev->pdev->slot))
- slot = true;
- else if (pci_probe_reset_bus(vdev->pdev->bus))
- return -ENODEV;
-
/*
* We can't let userspace give us an arbitrarily large buffer to copy,
* so verify how many we think there could be. Note groups can have
@@ -1288,11 +1278,11 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
return ret;
/* Somewhere between 1 and count is OK */
- if (!hdr.count || hdr.count > count)
+ if (!hdr->count || hdr->count > count)
return -EINVAL;
- group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
- files = kcalloc(hdr.count, sizeof(*files), GFP_KERNEL);
+ group_fds = kcalloc(hdr->count, sizeof(*group_fds), GFP_KERNEL);
+ files = kcalloc(hdr->count, sizeof(*files), GFP_KERNEL);
if (!group_fds || !files) {
kfree(group_fds);
kfree(files);
@@ -1300,7 +1290,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
}
if (copy_from_user(group_fds, arg->group_fds,
- hdr.count * sizeof(*group_fds))) {
+ hdr->count * sizeof(*group_fds))) {
kfree(group_fds);
kfree(files);
return -EFAULT;
@@ -1311,7 +1301,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
* interface and store the group and iommu ID. This ensures the group
* is held across the reset.
*/
- for (file_idx = 0; file_idx < hdr.count; file_idx++) {
+ for (file_idx = 0; file_idx < hdr->count; file_idx++) {
struct file *file = fget(group_fds[file_idx]);
if (!file) {
@@ -1335,10 +1325,10 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
if (ret)
goto hot_reset_release;
- info.count = hdr.count;
+ info.count = hdr->count;
info.files = files;
- ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
+ ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
hot_reset_release:
for (file_idx--; file_idx >= 0; file_idx--)
@@ -1348,6 +1338,50 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
return ret;
}
+static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
+ struct vfio_pci_hot_reset __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count);
+ struct vfio_pci_hot_reset hdr;
+ struct iommufd_ctx *iommufd;
+ bool slot = false;
+ struct fd f;
+ int32_t fd;
+ int ret;
+
+ if (copy_from_user(&hdr, arg, minsz))
+ return -EFAULT;
+
+ if (hdr.argsz < minsz || hdr.flags)
+ return -EINVAL;
+
+ /* Can we do a slot or bus reset or neither? */
+ if (!pci_probe_reset_slot(vdev->pdev->slot))
+ slot = true;
+ else if (pci_probe_reset_bus(vdev->pdev->bus))
+ return -ENODEV;
+
+ if (hdr.count != 1)
+ return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, arg);
+
+ if (copy_from_user(&fd, arg->group_fds, sizeof(fd)))
+ return -EFAULT;
+
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+ iommufd = iommufd_ctx_from_file(f.file);
+ if (IS_ERR(iommufd)) {
+ fdput(f);
+ return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, arg);
+ }
+ fdput(f);
+
+ ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd);
+ iommufd_ctx_put(iommufd);
+ return ret;
+}
+
static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
struct vfio_device_ioeventfd __user *arg)
{
@@ -2317,6 +2351,9 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
{
unsigned int i;
+ if (!groups)
+ return false;
+
for (i = 0; i < groups->count; i++)
if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
return true;
@@ -2398,7 +2435,8 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
* get each memory_lock.
*/
static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
- struct vfio_pci_group_info *groups)
+ struct vfio_pci_group_info *groups,
+ struct iommufd_ctx *iommufd_ctx)
{
struct vfio_pci_core_device *cur_mem;
struct vfio_pci_core_device *cur_vma;
@@ -2432,7 +2470,8 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
* Test whether all the affected devices are contained by the
* set of groups provided by the user.
*/
- if (!vfio_dev_in_groups(cur_vma, groups)) {
+ if (!vfio_dev_in_groups(cur_vma, groups) &&
+ !iommufd_ctx_has_device(iommufd_ctx, &cur_vma->pdev->dev)) {
ret = -EINVAL;
goto err_undo;
}
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 650d45629647a7..1f58673701cb1e 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -58,6 +58,7 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
void *data, size_t len, unsigned int flags);
int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
+bool iommufd_ctx_has_device(struct iommufd_ctx *ictx, struct device *dev);
#else /* !CONFIG_IOMMUFD */
static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
{
@@ -94,5 +95,12 @@ static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx,
{
return -EOPNOTSUPP;
}
+
+static inline bool iommufd_ctx_has_device(struct iommufd_ctx *ictx,
+ struct device *dev)
+{
+ return false;
+}
+
#endif /* CONFIG_IOMMUFD */
#endif
next prev parent reply other threads:[~2023-02-22 17:17 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-21 3:47 [PATCH v4 00/19] Add vfio_device cdev for iommufd support Yi Liu
2023-02-21 3:47 ` [PATCH v4 01/19] vfio: Allocate per device file structure Yi Liu
2023-02-21 3:47 ` [PATCH v4 02/19] vfio: Refine vfio file kAPIs Yi Liu
2023-02-21 3:47 ` [PATCH v4 03/19] vfio: Accept vfio device file in the driver facing kAPI Yi Liu
2023-02-22 7:15 ` Tian, Kevin
2023-02-26 12:20 ` Liu, Yi L
2023-02-21 3:47 ` [PATCH v4 04/19] kvm/vfio: Rename kvm_vfio_group to prepare for accepting vfio device fd Yi Liu
2023-02-21 3:47 ` [PATCH v4 05/19] kvm/vfio: Accept vfio device file from userspace Yi Liu
2023-02-22 7:17 ` Tian, Kevin
2023-02-23 10:33 ` Liu, Yi L
2023-02-21 3:47 ` [PATCH v4 06/19] vfio: Pass struct vfio_device_file * to vfio_device_open/close() Yi Liu
2023-02-21 3:48 ` [PATCH v4 07/19] vfio: Block device access via device fd until device is opened Yi Liu
2023-02-22 7:55 ` Yan Zhao
2023-02-22 8:29 ` Liu, Yi L
2023-02-21 3:48 ` [PATCH v4 08/19] vfio/pci: Update comment around group_fd get in vfio_pci_ioctl_pci_hot_reset() Yi Liu
2023-02-22 7:20 ` Tian, Kevin
2023-02-21 3:48 ` [PATCH v4 09/19] vfio/pci: Accept device fd for hot reset Yi Liu
2023-02-22 7:26 ` Tian, Kevin
2023-02-22 13:35 ` Liu, Yi L
2023-02-22 17:17 ` Jason Gunthorpe [this message]
2023-02-23 7:55 ` Tian, Kevin
2023-02-23 13:21 ` Jason Gunthorpe
2023-02-24 2:21 ` Tian, Kevin
2023-02-24 2:36 ` Jason Gunthorpe
2023-02-24 2:48 ` Tian, Kevin
2023-02-24 3:43 ` Liu, Yi L
2023-02-24 3:56 ` Tian, Kevin
2023-02-24 5:09 ` Liu, Yi L
2023-02-24 14:30 ` Jason Gunthorpe
2023-02-26 8:59 ` Liu, Yi L
2023-02-26 23:40 ` Jason Gunthorpe
2023-02-27 2:53 ` Liu, Yi L
2023-02-21 3:48 ` [PATCH v4 10/19] vfio: Add infrastructure for bind_iommufd from userspace Yi Liu
2023-02-21 3:48 ` [PATCH v4 11/19] vfio-iommufd: Add detach_ioas support for physical VFIO devices Yi Liu
2023-02-21 3:48 ` [PATCH v4 12/19] vfio-iommufd: Add detach_ioas for emulated " Yi Liu
2023-02-21 3:48 ` [PATCH v4 13/19] vfio: Add cdev_device_open_cnt to vfio_group Yi Liu
2023-02-22 7:31 ` Tian, Kevin
2023-02-21 3:48 ` [PATCH v4 14/19] vfio: Make vfio_device_open() single open for device cdev path Yi Liu
2023-02-22 7:32 ` Tian, Kevin
2023-02-21 3:48 ` [PATCH v4 15/19] vfio: Add cdev for vfio_device Yi Liu
2023-02-22 7:34 ` Tian, Kevin
2023-02-21 3:48 ` [PATCH v4 16/19] vfio: Add VFIO_DEVICE_BIND_IOMMUFD Yi Liu
2023-02-22 7:39 ` Tian, Kevin
2023-02-22 7:44 ` Liu, Yi L
2023-02-22 7:59 ` Tian, Kevin
2023-02-22 12:59 ` Jason Gunthorpe
2023-02-24 4:58 ` Yan Zhao
2023-02-24 14:31 ` Jason Gunthorpe
2023-02-27 4:46 ` Yan Zhao
2023-02-22 7:53 ` Yan Zhao
2023-02-22 8:28 ` Liu, Yi L
2023-02-21 3:48 ` [PATCH v4 17/19] vfio: Add VFIO_DEVICE_AT[DE]TACH_IOMMUFD_PT Yi Liu
2023-02-22 7:41 ` Tian, Kevin
2023-02-21 3:48 ` [PATCH v4 18/19] vfio: Compile group optionally Yi Liu
2023-02-21 3:48 ` [PATCH v4 19/19] docs: vfio: Add vfio device cdev description Yi Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y/ZOOClu8nXy2toX@nvidia.com \
--to=jgg@nvidia.com \
--cc=alex.williamson@redhat.com \
--cc=chao.p.peng@linux.intel.com \
--cc=cohuck@redhat.com \
--cc=eric.auger@redhat.com \
--cc=intel-gfx@lists.freedesktop.org \
--cc=intel-gvt-dev@lists.freedesktop.org \
--cc=jasowang@redhat.com \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=lulu@redhat.com \
--cc=mjrosato@linux.ibm.com \
--cc=nicolinc@nvidia.com \
--cc=peterx@redhat.com \
--cc=shameerali.kolothum.thodi@huawei.com \
--cc=suravee.suthikulpanit@amd.com \
--cc=terrence.xu@intel.com \
--cc=xudong.hao@intel.com \
--cc=yan.y.zhao@intel.com \
--cc=yi.l.liu@intel.com \
--cc=yi.y.sun@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox