* [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-05 0:59 ` Jason Gunthorpe
0 siblings, 0 replies; 25+ messages in thread
From: Jason Gunthorpe @ 2022-07-05 0:59 UTC (permalink / raw)
To: Alexander Gordeev, David Airlie, Alex Williamson,
Christian Borntraeger, Cornelia Huck, Daniel Vetter, dri-devel,
Harald Freudenberger, Vasily Gorbik, Heiko Carstens, intel-gfx,
intel-gvt-dev, Jani Nikula, Jason Herne, Joonas Lahtinen, kvm,
linux-s390, Matthew Rosato, Peter Oberparleiter, Halil Pasic,
Rodrigo Vivi, Sven Schnelle, Tvrtko Ursulin, Vineeth Vijayan,
Zhenyu Wang, Zhi Wang
Cc: Tony Krowiak, Kevin Tian, Eric Farman, Christoph Hellwig
Instead of having drivers register the notifier with explicit code just
have them provide a dma_unmap callback op in their driver ops and rely on
the core code to wire it up.
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/gpu/drm/i915/gvt/gvt.h | 1 -
drivers/gpu/drm/i915/gvt/kvmgt.c | 75 ++++-----------
drivers/s390/cio/vfio_ccw_ops.c | 41 ++------
drivers/s390/cio/vfio_ccw_private.h | 2 -
drivers/s390/crypto/vfio_ap_ops.c | 53 ++---------
drivers/s390/crypto/vfio_ap_private.h | 3 -
drivers/vfio/vfio.c | 129 +++++++++-----------------
drivers/vfio/vfio.h | 3 +
include/linux/vfio.h | 21 +----
9 files changed, 88 insertions(+), 240 deletions(-)
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index aee1a45da74bcb..705689e6401197 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -226,7 +226,6 @@ struct intel_vgpu {
unsigned long nr_cache_entries;
struct mutex cache_lock;
- struct notifier_block iommu_notifier;
atomic_t released;
struct kvm_page_track_notifier_node track_node;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index e2f6c56ab3420c..ecd5bb37b63a2a 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
return ret;
}
-static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
+static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
+ u64 length)
{
- struct intel_vgpu *vgpu =
- container_of(nb, struct intel_vgpu, iommu_notifier);
-
- if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
- struct vfio_iommu_type1_dma_unmap *unmap = data;
- struct gvt_dma *entry;
- unsigned long iov_pfn, end_iov_pfn;
+ struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+ struct gvt_dma *entry;
+ u64 iov_pfn = iova >> PAGE_SHIFT;
+ u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
- iov_pfn = unmap->iova >> PAGE_SHIFT;
- end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
+ mutex_lock(&vgpu->cache_lock);
+ for (; iov_pfn < end_iov_pfn; iov_pfn++) {
+ entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
+ if (!entry)
+ continue;
- mutex_lock(&vgpu->cache_lock);
- for (; iov_pfn < end_iov_pfn; iov_pfn++) {
- entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
- if (!entry)
- continue;
-
- gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
- entry->size);
- __gvt_cache_remove_entry(vgpu, entry);
- }
- mutex_unlock(&vgpu->cache_lock);
+ gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
+ entry->size);
+ __gvt_cache_remove_entry(vgpu, entry);
}
-
- return NOTIFY_OK;
+ mutex_unlock(&vgpu->cache_lock);
}
static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
@@ -783,36 +774,20 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
- unsigned long events;
- int ret;
-
- vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
- events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
- ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
- &vgpu->iommu_notifier);
- if (ret != 0) {
- gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
- ret);
- goto out;
- }
-
- ret = -EEXIST;
if (vgpu->attached)
- goto undo_iommu;
+ return -EEXIST;
- ret = -ESRCH;
if (!vgpu->vfio_device.kvm ||
vgpu->vfio_device.kvm->mm != current->mm) {
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
- goto undo_iommu;
+ return -ESRCH;
}
kvm_get_kvm(vgpu->vfio_device.kvm);
- ret = -EEXIST;
if (__kvmgt_vgpu_exist(vgpu))
- goto undo_iommu;
+ return -EEXIST;
vgpu->attached = true;
@@ -831,12 +806,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
atomic_set(&vgpu->released, 0);
return 0;
-
-undo_iommu:
- vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
- &vgpu->iommu_notifier);
-out:
- return ret;
}
static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
@@ -853,8 +822,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
- struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
- int ret;
if (!vgpu->attached)
return;
@@ -864,11 +831,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
intel_gvt_release_vgpu(vgpu);
- ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY,
- &vgpu->iommu_notifier);
- drm_WARN(&i915->drm, ret,
- "vfio_unregister_notifier for iommu failed: %d\n", ret);
-
debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
@@ -1610,6 +1572,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
.write = intel_vgpu_write,
.mmap = intel_vgpu_mmap,
.ioctl = intel_vgpu_ioctl,
+ .dma_unmap = intel_vgpu_dma_unmap,
};
static int intel_vgpu_probe(struct mdev_device *mdev)
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index b49e2e9db2dc6f..09e0ce7b72324c 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
return ret;
}
-static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
- unsigned long action,
- void *data)
+static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
{
struct vfio_ccw_private *private =
- container_of(nb, struct vfio_ccw_private, nb);
-
- /*
- * Vendor drivers MUST unpin pages in response to an
- * invalidation.
- */
- if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
- struct vfio_iommu_type1_dma_unmap *unmap = data;
-
- if (!cp_iova_pinned(&private->cp, unmap->iova))
- return NOTIFY_OK;
+ container_of(vdev, struct vfio_ccw_private, vdev);
- if (vfio_ccw_mdev_reset(private))
- return NOTIFY_BAD;
+ /* Drivers MUST unpin pages in response to an invalidation. */
+ if (!cp_iova_pinned(&private->cp, iova))
+ return;
- cp_free(&private->cp);
- return NOTIFY_OK;
- }
+ if (vfio_ccw_mdev_reset(private))
+ return;
- return NOTIFY_DONE;
+ cp_free(&private->cp);
}
static ssize_t name_show(struct mdev_type *mtype,
@@ -178,19 +166,11 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
{
struct vfio_ccw_private *private =
container_of(vdev, struct vfio_ccw_private, vdev);
- unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
int ret;
- private->nb.notifier_call = vfio_ccw_mdev_notifier;
-
- ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY,
- &events, &private->nb);
- if (ret)
- return ret;
-
ret = vfio_ccw_register_async_dev_regions(private);
if (ret)
- goto out_unregister;
+ return ret;
ret = vfio_ccw_register_schib_dev_regions(private);
if (ret)
@@ -204,7 +184,6 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
out_unregister:
vfio_ccw_unregister_dev_regions(private);
- vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
return ret;
}
@@ -222,7 +201,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev)
cp_free(&private->cp);
vfio_ccw_unregister_dev_regions(private);
- vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
}
static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private,
@@ -645,6 +623,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = {
.write = vfio_ccw_mdev_write,
.ioctl = vfio_ccw_mdev_ioctl,
.request = vfio_ccw_mdev_request,
+ .dma_unmap = vfio_ccw_dma_unmap,
};
struct mdev_driver vfio_ccw_mdev_driver = {
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index b7163bac8cc75d..61418109238976 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -74,7 +74,6 @@ struct vfio_ccw_crw {
* @completion: synchronization helper of the I/O completion
* @avail: available for creating a mediated device
* @mdev: pointer to the mediated device
- * @nb: notifier for vfio events
* @io_region: MMIO region to input/output I/O arguments/results
* @io_mutex: protect against concurrent update of I/O regions
* @region: additional regions for other subchannel operations
@@ -98,7 +97,6 @@ struct vfio_ccw_private {
struct completion *completion;
atomic_t avail;
struct mdev_device *mdev;
- struct notifier_block nb;
struct ccw_io_region *io_region;
struct mutex io_mutex;
struct vfio_ccw_region *region;
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index a7d2a95796d360..bb1a1677c5c230 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1226,34 +1226,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
return 0;
}
-/**
- * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
- *
- * @nb: The notifier block
- * @action: Action to be taken
- * @data: data associated with the request
- *
- * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
- * pinned before). Other requests are ignored.
- *
- * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
- */
-static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
+static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova,
+ u64 length)
{
- struct ap_matrix_mdev *matrix_mdev;
-
- matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
-
- if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
- struct vfio_iommu_type1_dma_unmap *unmap = data;
- unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
-
- vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
- return NOTIFY_OK;
- }
+ struct ap_matrix_mdev *matrix_mdev =
+ container_of(vdev, struct ap_matrix_mdev, vdev);
+ unsigned long g_pfn = iova >> PAGE_SHIFT;
- return NOTIFY_DONE;
+ vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
}
/**
@@ -1380,27 +1360,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
{
struct ap_matrix_mdev *matrix_mdev =
container_of(vdev, struct ap_matrix_mdev, vdev);
- unsigned long events;
- int ret;
if (!vdev->kvm)
return -EINVAL;
- ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
- if (ret)
- return ret;
-
- matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
- events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
- ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
- &matrix_mdev->iommu_notifier);
- if (ret)
- goto err_kvm;
- return 0;
-
-err_kvm:
- vfio_ap_mdev_unset_kvm(matrix_mdev);
- return ret;
+ return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
}
static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
@@ -1408,8 +1372,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
struct ap_matrix_mdev *matrix_mdev =
container_of(vdev, struct ap_matrix_mdev, vdev);
- vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
- &matrix_mdev->iommu_notifier);
vfio_ap_mdev_unset_kvm(matrix_mdev);
}
@@ -1461,6 +1423,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
.open_device = vfio_ap_mdev_open_device,
.close_device = vfio_ap_mdev_close_device,
.ioctl = vfio_ap_mdev_ioctl,
+ .dma_unmap = vfio_ap_mdev_dma_unmap,
};
static struct mdev_driver vfio_ap_matrix_driver = {
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index a26efd804d0df3..abb59d59f81b20 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -81,8 +81,6 @@ struct ap_matrix {
* @node: allows the ap_matrix_mdev struct to be added to a list
* @matrix: the adapters, usage domains and control domains assigned to the
* mediated matrix device.
- * @iommu_notifier: notifier block used for specifying callback function for
- * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
* @kvm: the struct holding guest's state
* @pqap_hook: the function pointer to the interception handler for the
* PQAP(AQIC) instruction.
@@ -92,7 +90,6 @@ struct ap_matrix_mdev {
struct vfio_device vdev;
struct list_head node;
struct ap_matrix matrix;
- struct notifier_block iommu_notifier;
struct kvm *kvm;
crypto_hook pqap_hook;
struct mdev_device *mdev;
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 61e71c1154be67..610bb884d9197b 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
{
struct vfio_iommu_driver *driver, *tmp;
+ if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier))
+ return -EINVAL;
+
driver = kzalloc(sizeof(*driver), GFP_KERNEL);
if (!driver)
return -ENOMEM;
@@ -1077,8 +1080,20 @@ static void vfio_device_unassign_container(struct vfio_device *device)
up_write(&device->group->group_rwsem);
}
+static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct vfio_device *vfio_device =
+ container_of(nb, struct vfio_device, iommu_nb);
+ struct vfio_iommu_type1_dma_unmap *unmap = data;
+
+ vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
+ return NOTIFY_OK;
+}
+
static struct file *vfio_device_open(struct vfio_device *device)
{
+ struct vfio_iommu_driver *iommu_driver;
struct file *filep;
int ret;
@@ -1109,6 +1124,18 @@ static struct file *vfio_device_open(struct vfio_device *device)
if (ret)
goto err_undo_count;
}
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (device->ops->dma_unmap && iommu_driver &&
+ iommu_driver->ops->register_notifier) {
+ unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+
+ device->iommu_nb.notifier_call = vfio_iommu_notifier;
+ iommu_driver->ops->register_notifier(
+ device->group->container->iommu_data, &events,
+ &device->iommu_nb);
+ }
+
up_read(&device->group->group_rwsem);
}
mutex_unlock(&device->dev_set->lock);
@@ -1143,8 +1170,16 @@ static struct file *vfio_device_open(struct vfio_device *device)
err_close_device:
mutex_lock(&device->dev_set->lock);
down_read(&device->group->group_rwsem);
- if (device->open_count == 1 && device->ops->close_device)
+ if (device->open_count == 1 && device->ops->close_device) {
device->ops->close_device(device);
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (device->ops->dma_unmap && iommu_driver &&
+ iommu_driver->ops->unregister_notifier)
+ iommu_driver->ops->unregister_notifier(
+ device->group->container->iommu_data,
+ &device->iommu_nb);
+ }
err_undo_count:
device->open_count--;
if (device->open_count == 0 && device->kvm)
@@ -1339,12 +1374,20 @@ static const struct file_operations vfio_group_fops = {
static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_device *device = filep->private_data;
+ struct vfio_iommu_driver *iommu_driver;
mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
down_read(&device->group->group_rwsem);
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (device->ops->dma_unmap && iommu_driver &&
+ iommu_driver->ops->unregister_notifier)
+ iommu_driver->ops->unregister_notifier(
+ device->group->container->iommu_data,
+ &device->iommu_nb);
up_read(&device->group->group_rwsem);
device->open_count--;
if (device->open_count == 0)
@@ -2027,90 +2070,6 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data,
}
EXPORT_SYMBOL(vfio_dma_rw);
-static int vfio_register_iommu_notifier(struct vfio_group *group,
- unsigned long *events,
- struct notifier_block *nb)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret;
-
- lockdep_assert_held_read(&group->group_rwsem);
-
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->register_notifier))
- ret = driver->ops->register_notifier(container->iommu_data,
- events, nb);
- else
- ret = -ENOTTY;
-
- return ret;
-}
-
-static int vfio_unregister_iommu_notifier(struct vfio_group *group,
- struct notifier_block *nb)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret;
-
- lockdep_assert_held_read(&group->group_rwsem);
-
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->unregister_notifier))
- ret = driver->ops->unregister_notifier(container->iommu_data,
- nb);
- else
- ret = -ENOTTY;
-
- return ret;
-}
-
-int vfio_register_notifier(struct vfio_device *device,
- enum vfio_notify_type type, unsigned long *events,
- struct notifier_block *nb)
-{
- struct vfio_group *group = device->group;
- int ret;
-
- if (!nb || !events || (*events == 0) ||
- !vfio_assert_device_open(device))
- return -EINVAL;
-
- switch (type) {
- case VFIO_IOMMU_NOTIFY:
- ret = vfio_register_iommu_notifier(group, events, nb);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
-}
-EXPORT_SYMBOL(vfio_register_notifier);
-
-int vfio_unregister_notifier(struct vfio_device *device,
- enum vfio_notify_type type,
- struct notifier_block *nb)
-{
- struct vfio_group *group = device->group;
- int ret;
-
- if (!nb || !vfio_assert_device_open(device))
- return -EINVAL;
-
- switch (type) {
- case VFIO_IOMMU_NOTIFY:
- ret = vfio_unregister_iommu_notifier(group, nb);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
-}
-EXPORT_SYMBOL(vfio_unregister_notifier);
-
/*
* Module/class support
*/
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index a6713022115155..25da02ca1568fc 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -33,6 +33,9 @@ enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
};
+/* events for register_notifier() */
+#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
+
/**
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
*/
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 49580fa2073a8d..8c5c389d42d918 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,6 +44,7 @@ struct vfio_device {
unsigned int open_count;
struct completion comp;
struct list_head group_next;
+ struct notifier_block iommu_nb;
};
/**
@@ -60,6 +61,8 @@ struct vfio_device {
* @match: Optional device name match callback (return: 0 for no-match, >0 for
* match, -errno for abort (ex. match with insufficient or incorrect
* additional args)
+ * @dma_unmap: Called when userspace unmaps IOVA from the container
+ * this device is attached to.
* @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
* @migration_set_state: Optional callback to change the migration state for
* devices that support migration. It's mandatory for
@@ -85,6 +88,7 @@ struct vfio_device_ops {
int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
void (*request)(struct vfio_device *vdev, unsigned int count);
int (*match)(struct vfio_device *vdev, char *buf);
+ void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
int (*device_feature)(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz);
struct file *(*migration_set_state)(
@@ -154,23 +158,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
void *data, size_t len, bool write);
-/* each type has independent events */
-enum vfio_notify_type {
- VFIO_IOMMU_NOTIFY = 0,
-};
-
-/* events for VFIO_IOMMU_NOTIFY */
-#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
-
-int vfio_register_notifier(struct vfio_device *device,
- enum vfio_notify_type type,
- unsigned long *required_events,
- struct notifier_block *nb);
-int vfio_unregister_notifier(struct vfio_device *device,
- enum vfio_notify_type type,
- struct notifier_block *nb);
-
-
/*
* Sub-module helpers
*/
--
2.37.0
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-05 0:59 ` Jason Gunthorpe
0 siblings, 0 replies; 25+ messages in thread
From: Jason Gunthorpe @ 2022-07-05 0:59 UTC (permalink / raw)
To: Alexander Gordeev, David Airlie, Alex Williamson,
Christian Borntraeger, Cornelia Huck, Daniel Vetter, dri-devel,
Harald Freudenberger, Vasily Gorbik, Heiko Carstens, intel-gfx,
intel-gvt-dev, Jani Nikula, Jason Herne, Joonas Lahtinen, kvm,
linux-s390, Matthew Rosato, Peter Oberparleiter, Halil Pasic,
Rodrigo Vivi, Sven Schnelle, Tvrtko Ursulin, Vineeth Vijayan,
Zhenyu Wang, Zhi Wang
Cc: Tony Krowiak, Eric Farman, Christoph Hellwig, Kevin Tian
Instead of having drivers register the notifier with explicit code just
have them provide a dma_unmap callback op in their driver ops and rely on
the core code to wire it up.
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/gpu/drm/i915/gvt/gvt.h | 1 -
drivers/gpu/drm/i915/gvt/kvmgt.c | 75 ++++-----------
drivers/s390/cio/vfio_ccw_ops.c | 41 ++------
drivers/s390/cio/vfio_ccw_private.h | 2 -
drivers/s390/crypto/vfio_ap_ops.c | 53 ++---------
drivers/s390/crypto/vfio_ap_private.h | 3 -
drivers/vfio/vfio.c | 129 +++++++++-----------------
drivers/vfio/vfio.h | 3 +
include/linux/vfio.h | 21 +----
9 files changed, 88 insertions(+), 240 deletions(-)
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index aee1a45da74bcb..705689e6401197 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -226,7 +226,6 @@ struct intel_vgpu {
unsigned long nr_cache_entries;
struct mutex cache_lock;
- struct notifier_block iommu_notifier;
atomic_t released;
struct kvm_page_track_notifier_node track_node;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index e2f6c56ab3420c..ecd5bb37b63a2a 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
return ret;
}
-static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
+static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
+ u64 length)
{
- struct intel_vgpu *vgpu =
- container_of(nb, struct intel_vgpu, iommu_notifier);
-
- if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
- struct vfio_iommu_type1_dma_unmap *unmap = data;
- struct gvt_dma *entry;
- unsigned long iov_pfn, end_iov_pfn;
+ struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+ struct gvt_dma *entry;
+ u64 iov_pfn = iova >> PAGE_SHIFT;
+ u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
- iov_pfn = unmap->iova >> PAGE_SHIFT;
- end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
+ mutex_lock(&vgpu->cache_lock);
+ for (; iov_pfn < end_iov_pfn; iov_pfn++) {
+ entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
+ if (!entry)
+ continue;
- mutex_lock(&vgpu->cache_lock);
- for (; iov_pfn < end_iov_pfn; iov_pfn++) {
- entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
- if (!entry)
- continue;
-
- gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
- entry->size);
- __gvt_cache_remove_entry(vgpu, entry);
- }
- mutex_unlock(&vgpu->cache_lock);
+ gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
+ entry->size);
+ __gvt_cache_remove_entry(vgpu, entry);
}
-
- return NOTIFY_OK;
+ mutex_unlock(&vgpu->cache_lock);
}
static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
@@ -783,36 +774,20 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
- unsigned long events;
- int ret;
-
- vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
- events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
- ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
- &vgpu->iommu_notifier);
- if (ret != 0) {
- gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
- ret);
- goto out;
- }
-
- ret = -EEXIST;
if (vgpu->attached)
- goto undo_iommu;
+ return -EEXIST;
- ret = -ESRCH;
if (!vgpu->vfio_device.kvm ||
vgpu->vfio_device.kvm->mm != current->mm) {
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
- goto undo_iommu;
+ return -ESRCH;
}
kvm_get_kvm(vgpu->vfio_device.kvm);
- ret = -EEXIST;
if (__kvmgt_vgpu_exist(vgpu))
- goto undo_iommu;
+ return -EEXIST;
vgpu->attached = true;
@@ -831,12 +806,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
atomic_set(&vgpu->released, 0);
return 0;
-
-undo_iommu:
- vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
- &vgpu->iommu_notifier);
-out:
- return ret;
}
static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
@@ -853,8 +822,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
- struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
- int ret;
if (!vgpu->attached)
return;
@@ -864,11 +831,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
intel_gvt_release_vgpu(vgpu);
- ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY,
- &vgpu->iommu_notifier);
- drm_WARN(&i915->drm, ret,
- "vfio_unregister_notifier for iommu failed: %d\n", ret);
-
debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
@@ -1610,6 +1572,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
.write = intel_vgpu_write,
.mmap = intel_vgpu_mmap,
.ioctl = intel_vgpu_ioctl,
+ .dma_unmap = intel_vgpu_dma_unmap,
};
static int intel_vgpu_probe(struct mdev_device *mdev)
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index b49e2e9db2dc6f..09e0ce7b72324c 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
return ret;
}
-static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
- unsigned long action,
- void *data)
+static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
{
struct vfio_ccw_private *private =
- container_of(nb, struct vfio_ccw_private, nb);
-
- /*
- * Vendor drivers MUST unpin pages in response to an
- * invalidation.
- */
- if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
- struct vfio_iommu_type1_dma_unmap *unmap = data;
-
- if (!cp_iova_pinned(&private->cp, unmap->iova))
- return NOTIFY_OK;
+ container_of(vdev, struct vfio_ccw_private, vdev);
- if (vfio_ccw_mdev_reset(private))
- return NOTIFY_BAD;
+ /* Drivers MUST unpin pages in response to an invalidation. */
+ if (!cp_iova_pinned(&private->cp, iova))
+ return;
- cp_free(&private->cp);
- return NOTIFY_OK;
- }
+ if (vfio_ccw_mdev_reset(private))
+ return;
- return NOTIFY_DONE;
+ cp_free(&private->cp);
}
static ssize_t name_show(struct mdev_type *mtype,
@@ -178,19 +166,11 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
{
struct vfio_ccw_private *private =
container_of(vdev, struct vfio_ccw_private, vdev);
- unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
int ret;
- private->nb.notifier_call = vfio_ccw_mdev_notifier;
-
- ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY,
- &events, &private->nb);
- if (ret)
- return ret;
-
ret = vfio_ccw_register_async_dev_regions(private);
if (ret)
- goto out_unregister;
+ return ret;
ret = vfio_ccw_register_schib_dev_regions(private);
if (ret)
@@ -204,7 +184,6 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
out_unregister:
vfio_ccw_unregister_dev_regions(private);
- vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
return ret;
}
@@ -222,7 +201,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev)
cp_free(&private->cp);
vfio_ccw_unregister_dev_regions(private);
- vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
}
static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private,
@@ -645,6 +623,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = {
.write = vfio_ccw_mdev_write,
.ioctl = vfio_ccw_mdev_ioctl,
.request = vfio_ccw_mdev_request,
+ .dma_unmap = vfio_ccw_dma_unmap,
};
struct mdev_driver vfio_ccw_mdev_driver = {
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index b7163bac8cc75d..61418109238976 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -74,7 +74,6 @@ struct vfio_ccw_crw {
* @completion: synchronization helper of the I/O completion
* @avail: available for creating a mediated device
* @mdev: pointer to the mediated device
- * @nb: notifier for vfio events
* @io_region: MMIO region to input/output I/O arguments/results
* @io_mutex: protect against concurrent update of I/O regions
* @region: additional regions for other subchannel operations
@@ -98,7 +97,6 @@ struct vfio_ccw_private {
struct completion *completion;
atomic_t avail;
struct mdev_device *mdev;
- struct notifier_block nb;
struct ccw_io_region *io_region;
struct mutex io_mutex;
struct vfio_ccw_region *region;
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index a7d2a95796d360..bb1a1677c5c230 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1226,34 +1226,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
return 0;
}
-/**
- * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
- *
- * @nb: The notifier block
- * @action: Action to be taken
- * @data: data associated with the request
- *
- * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
- * pinned before). Other requests are ignored.
- *
- * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
- */
-static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
+static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova,
+ u64 length)
{
- struct ap_matrix_mdev *matrix_mdev;
-
- matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
-
- if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
- struct vfio_iommu_type1_dma_unmap *unmap = data;
- unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
-
- vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
- return NOTIFY_OK;
- }
+ struct ap_matrix_mdev *matrix_mdev =
+ container_of(vdev, struct ap_matrix_mdev, vdev);
+ unsigned long g_pfn = iova >> PAGE_SHIFT;
- return NOTIFY_DONE;
+ vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
}
/**
@@ -1380,27 +1360,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
{
struct ap_matrix_mdev *matrix_mdev =
container_of(vdev, struct ap_matrix_mdev, vdev);
- unsigned long events;
- int ret;
if (!vdev->kvm)
return -EINVAL;
- ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
- if (ret)
- return ret;
-
- matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
- events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
- ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
- &matrix_mdev->iommu_notifier);
- if (ret)
- goto err_kvm;
- return 0;
-
-err_kvm:
- vfio_ap_mdev_unset_kvm(matrix_mdev);
- return ret;
+ return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
}
static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
@@ -1408,8 +1372,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
struct ap_matrix_mdev *matrix_mdev =
container_of(vdev, struct ap_matrix_mdev, vdev);
- vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
- &matrix_mdev->iommu_notifier);
vfio_ap_mdev_unset_kvm(matrix_mdev);
}
@@ -1461,6 +1423,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
.open_device = vfio_ap_mdev_open_device,
.close_device = vfio_ap_mdev_close_device,
.ioctl = vfio_ap_mdev_ioctl,
+ .dma_unmap = vfio_ap_mdev_dma_unmap,
};
static struct mdev_driver vfio_ap_matrix_driver = {
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index a26efd804d0df3..abb59d59f81b20 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -81,8 +81,6 @@ struct ap_matrix {
* @node: allows the ap_matrix_mdev struct to be added to a list
* @matrix: the adapters, usage domains and control domains assigned to the
* mediated matrix device.
- * @iommu_notifier: notifier block used for specifying callback function for
- * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
* @kvm: the struct holding guest's state
* @pqap_hook: the function pointer to the interception handler for the
* PQAP(AQIC) instruction.
@@ -92,7 +90,6 @@ struct ap_matrix_mdev {
struct vfio_device vdev;
struct list_head node;
struct ap_matrix matrix;
- struct notifier_block iommu_notifier;
struct kvm *kvm;
crypto_hook pqap_hook;
struct mdev_device *mdev;
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 61e71c1154be67..610bb884d9197b 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
{
struct vfio_iommu_driver *driver, *tmp;
+ if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier))
+ return -EINVAL;
+
driver = kzalloc(sizeof(*driver), GFP_KERNEL);
if (!driver)
return -ENOMEM;
@@ -1077,8 +1080,20 @@ static void vfio_device_unassign_container(struct vfio_device *device)
up_write(&device->group->group_rwsem);
}
+static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct vfio_device *vfio_device =
+ container_of(nb, struct vfio_device, iommu_nb);
+ struct vfio_iommu_type1_dma_unmap *unmap = data;
+
+ vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
+ return NOTIFY_OK;
+}
+
static struct file *vfio_device_open(struct vfio_device *device)
{
+ struct vfio_iommu_driver *iommu_driver;
struct file *filep;
int ret;
@@ -1109,6 +1124,18 @@ static struct file *vfio_device_open(struct vfio_device *device)
if (ret)
goto err_undo_count;
}
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (device->ops->dma_unmap && iommu_driver &&
+ iommu_driver->ops->register_notifier) {
+ unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+
+ device->iommu_nb.notifier_call = vfio_iommu_notifier;
+ iommu_driver->ops->register_notifier(
+ device->group->container->iommu_data, &events,
+ &device->iommu_nb);
+ }
+
up_read(&device->group->group_rwsem);
}
mutex_unlock(&device->dev_set->lock);
@@ -1143,8 +1170,16 @@ static struct file *vfio_device_open(struct vfio_device *device)
err_close_device:
mutex_lock(&device->dev_set->lock);
down_read(&device->group->group_rwsem);
- if (device->open_count == 1 && device->ops->close_device)
+ if (device->open_count == 1 && device->ops->close_device) {
device->ops->close_device(device);
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (device->ops->dma_unmap && iommu_driver &&
+ iommu_driver->ops->unregister_notifier)
+ iommu_driver->ops->unregister_notifier(
+ device->group->container->iommu_data,
+ &device->iommu_nb);
+ }
err_undo_count:
device->open_count--;
if (device->open_count == 0 && device->kvm)
@@ -1339,12 +1374,20 @@ static const struct file_operations vfio_group_fops = {
static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_device *device = filep->private_data;
+ struct vfio_iommu_driver *iommu_driver;
mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
down_read(&device->group->group_rwsem);
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (device->ops->dma_unmap && iommu_driver &&
+ iommu_driver->ops->unregister_notifier)
+ iommu_driver->ops->unregister_notifier(
+ device->group->container->iommu_data,
+ &device->iommu_nb);
up_read(&device->group->group_rwsem);
device->open_count--;
if (device->open_count == 0)
@@ -2027,90 +2070,6 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data,
}
EXPORT_SYMBOL(vfio_dma_rw);
-static int vfio_register_iommu_notifier(struct vfio_group *group,
- unsigned long *events,
- struct notifier_block *nb)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret;
-
- lockdep_assert_held_read(&group->group_rwsem);
-
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->register_notifier))
- ret = driver->ops->register_notifier(container->iommu_data,
- events, nb);
- else
- ret = -ENOTTY;
-
- return ret;
-}
-
-static int vfio_unregister_iommu_notifier(struct vfio_group *group,
- struct notifier_block *nb)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret;
-
- lockdep_assert_held_read(&group->group_rwsem);
-
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->unregister_notifier))
- ret = driver->ops->unregister_notifier(container->iommu_data,
- nb);
- else
- ret = -ENOTTY;
-
- return ret;
-}
-
-int vfio_register_notifier(struct vfio_device *device,
- enum vfio_notify_type type, unsigned long *events,
- struct notifier_block *nb)
-{
- struct vfio_group *group = device->group;
- int ret;
-
- if (!nb || !events || (*events == 0) ||
- !vfio_assert_device_open(device))
- return -EINVAL;
-
- switch (type) {
- case VFIO_IOMMU_NOTIFY:
- ret = vfio_register_iommu_notifier(group, events, nb);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
-}
-EXPORT_SYMBOL(vfio_register_notifier);
-
-int vfio_unregister_notifier(struct vfio_device *device,
- enum vfio_notify_type type,
- struct notifier_block *nb)
-{
- struct vfio_group *group = device->group;
- int ret;
-
- if (!nb || !vfio_assert_device_open(device))
- return -EINVAL;
-
- switch (type) {
- case VFIO_IOMMU_NOTIFY:
- ret = vfio_unregister_iommu_notifier(group, nb);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
-}
-EXPORT_SYMBOL(vfio_unregister_notifier);
-
/*
* Module/class support
*/
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index a6713022115155..25da02ca1568fc 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -33,6 +33,9 @@ enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
};
+/* events for register_notifier() */
+#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
+
/**
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
*/
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 49580fa2073a8d..8c5c389d42d918 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,6 +44,7 @@ struct vfio_device {
unsigned int open_count;
struct completion comp;
struct list_head group_next;
+ struct notifier_block iommu_nb;
};
/**
@@ -60,6 +61,8 @@ struct vfio_device {
* @match: Optional device name match callback (return: 0 for no-match, >0 for
* match, -errno for abort (ex. match with insufficient or incorrect
* additional args)
+ * @dma_unmap: Called when userspace unmaps IOVA from the container
+ * this device is attached to.
* @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
* @migration_set_state: Optional callback to change the migration state for
* devices that support migration. It's mandatory for
@@ -85,6 +88,7 @@ struct vfio_device_ops {
int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
void (*request)(struct vfio_device *vdev, unsigned int count);
int (*match)(struct vfio_device *vdev, char *buf);
+ void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
int (*device_feature)(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz);
struct file *(*migration_set_state)(
@@ -154,23 +158,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
void *data, size_t len, bool write);
-/* each type has independent events */
-enum vfio_notify_type {
- VFIO_IOMMU_NOTIFY = 0,
-};
-
-/* events for VFIO_IOMMU_NOTIFY */
-#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
-
-int vfio_register_notifier(struct vfio_device *device,
- enum vfio_notify_type type,
- unsigned long *required_events,
- struct notifier_block *nb);
-int vfio_unregister_notifier(struct vfio_device *device,
- enum vfio_notify_type type,
- struct notifier_block *nb);
-
-
/*
* Sub-module helpers
*/
--
2.37.0
^ permalink raw reply related [flat|nested] 25+ messages in thread* Re: [Intel-gfx] [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
2022-07-05 0:59 ` Jason Gunthorpe
(?)
@ 2022-07-05 8:31 ` Zhenyu Wang
-1 siblings, 0 replies; 25+ messages in thread
From: Zhenyu Wang @ 2022-07-05 8:31 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: kvm, David Airlie, dri-devel, Vineeth Vijayan, Alexander Gordeev,
Christoph Hellwig, linux-s390, Matthew Rosato, Halil Pasic,
Christian Borntraeger, intel-gfx, Tony Krowiak, Eric Farman,
Vasily Gorbik, Heiko Carstens, Harald Freudenberger, Rodrigo Vivi,
intel-gvt-dev, Jason Herne, Cornelia Huck, Peter Oberparleiter,
Sven Schnelle
[-- Attachment #1: Type: text/plain, Size: 22825 bytes --]
On 2022.07.04 21:59:03 -0300, Jason Gunthorpe wrote:
> Instead of having drivers register the notifier with explicit code just
> have them provide a dma_unmap callback op in their driver ops and rely on
> the core code to wire it up.
>
> Suggested-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
> Reviewed-by: Eric Farman <farman@linux.ibm.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
> drivers/gpu/drm/i915/gvt/gvt.h | 1 -
> drivers/gpu/drm/i915/gvt/kvmgt.c | 75 ++++-----------
gvt change looks fine to me.
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
> drivers/s390/cio/vfio_ccw_ops.c | 41 ++------
> drivers/s390/cio/vfio_ccw_private.h | 2 -
> drivers/s390/crypto/vfio_ap_ops.c | 53 ++---------
> drivers/s390/crypto/vfio_ap_private.h | 3 -
> drivers/vfio/vfio.c | 129 +++++++++-----------------
> drivers/vfio/vfio.h | 3 +
> include/linux/vfio.h | 21 +----
> 9 files changed, 88 insertions(+), 240 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> index aee1a45da74bcb..705689e6401197 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.h
> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> @@ -226,7 +226,6 @@ struct intel_vgpu {
> unsigned long nr_cache_entries;
> struct mutex cache_lock;
>
> - struct notifier_block iommu_notifier;
> atomic_t released;
>
> struct kvm_page_track_notifier_node track_node;
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index e2f6c56ab3420c..ecd5bb37b63a2a 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
> return ret;
> }
>
> -static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
> - unsigned long action, void *data)
> +static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
> + u64 length)
> {
> - struct intel_vgpu *vgpu =
> - container_of(nb, struct intel_vgpu, iommu_notifier);
> -
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> - struct gvt_dma *entry;
> - unsigned long iov_pfn, end_iov_pfn;
> + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> + struct gvt_dma *entry;
> + u64 iov_pfn = iova >> PAGE_SHIFT;
> + u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
>
> - iov_pfn = unmap->iova >> PAGE_SHIFT;
> - end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
> + mutex_lock(&vgpu->cache_lock);
> + for (; iov_pfn < end_iov_pfn; iov_pfn++) {
> + entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
> + if (!entry)
> + continue;
>
> - mutex_lock(&vgpu->cache_lock);
> - for (; iov_pfn < end_iov_pfn; iov_pfn++) {
> - entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
> - if (!entry)
> - continue;
> -
> - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
> - entry->size);
> - __gvt_cache_remove_entry(vgpu, entry);
> - }
> - mutex_unlock(&vgpu->cache_lock);
> + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
> + entry->size);
> + __gvt_cache_remove_entry(vgpu, entry);
> }
> -
> - return NOTIFY_OK;
> + mutex_unlock(&vgpu->cache_lock);
> }
>
> static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
> @@ -783,36 +774,20 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
> static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
> {
> struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> - unsigned long events;
> - int ret;
> -
> - vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
>
> - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> - ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
> - &vgpu->iommu_notifier);
> - if (ret != 0) {
> - gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
> - ret);
> - goto out;
> - }
> -
> - ret = -EEXIST;
> if (vgpu->attached)
> - goto undo_iommu;
> + return -EEXIST;
>
> - ret = -ESRCH;
> if (!vgpu->vfio_device.kvm ||
> vgpu->vfio_device.kvm->mm != current->mm) {
> gvt_vgpu_err("KVM is required to use Intel vGPU\n");
> - goto undo_iommu;
> + return -ESRCH;
> }
>
> kvm_get_kvm(vgpu->vfio_device.kvm);
>
> - ret = -EEXIST;
> if (__kvmgt_vgpu_exist(vgpu))
> - goto undo_iommu;
> + return -EEXIST;
>
> vgpu->attached = true;
>
> @@ -831,12 +806,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
>
> atomic_set(&vgpu->released, 0);
> return 0;
> -
> -undo_iommu:
> - vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
> - &vgpu->iommu_notifier);
> -out:
> - return ret;
> }
>
> static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
> @@ -853,8 +822,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
> static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
> {
> struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> - struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
> - int ret;
>
> if (!vgpu->attached)
> return;
> @@ -864,11 +831,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
>
> intel_gvt_release_vgpu(vgpu);
>
> - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY,
> - &vgpu->iommu_notifier);
> - drm_WARN(&i915->drm, ret,
> - "vfio_unregister_notifier for iommu failed: %d\n", ret);
> -
> debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
>
> kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
> @@ -1610,6 +1572,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
> .write = intel_vgpu_write,
> .mmap = intel_vgpu_mmap,
> .ioctl = intel_vgpu_ioctl,
> + .dma_unmap = intel_vgpu_dma_unmap,
> };
>
> static int intel_vgpu_probe(struct mdev_device *mdev)
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index b49e2e9db2dc6f..09e0ce7b72324c 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
> return ret;
> }
>
> -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
> - unsigned long action,
> - void *data)
> +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
> {
> struct vfio_ccw_private *private =
> - container_of(nb, struct vfio_ccw_private, nb);
> -
> - /*
> - * Vendor drivers MUST unpin pages in response to an
> - * invalidation.
> - */
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> -
> - if (!cp_iova_pinned(&private->cp, unmap->iova))
> - return NOTIFY_OK;
> + container_of(vdev, struct vfio_ccw_private, vdev);
>
> - if (vfio_ccw_mdev_reset(private))
> - return NOTIFY_BAD;
> + /* Drivers MUST unpin pages in response to an invalidation. */
> + if (!cp_iova_pinned(&private->cp, iova))
> + return;
>
> - cp_free(&private->cp);
> - return NOTIFY_OK;
> - }
> + if (vfio_ccw_mdev_reset(private))
> + return;
>
> - return NOTIFY_DONE;
> + cp_free(&private->cp);
> }
>
> static ssize_t name_show(struct mdev_type *mtype,
> @@ -178,19 +166,11 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
> {
> struct vfio_ccw_private *private =
> container_of(vdev, struct vfio_ccw_private, vdev);
> - unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> int ret;
>
> - private->nb.notifier_call = vfio_ccw_mdev_notifier;
> -
> - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY,
> - &events, &private->nb);
> - if (ret)
> - return ret;
> -
> ret = vfio_ccw_register_async_dev_regions(private);
> if (ret)
> - goto out_unregister;
> + return ret;
>
> ret = vfio_ccw_register_schib_dev_regions(private);
> if (ret)
> @@ -204,7 +184,6 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
>
> out_unregister:
> vfio_ccw_unregister_dev_regions(private);
> - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
> return ret;
> }
>
> @@ -222,7 +201,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev)
>
> cp_free(&private->cp);
> vfio_ccw_unregister_dev_regions(private);
> - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
> }
>
> static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private,
> @@ -645,6 +623,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = {
> .write = vfio_ccw_mdev_write,
> .ioctl = vfio_ccw_mdev_ioctl,
> .request = vfio_ccw_mdev_request,
> + .dma_unmap = vfio_ccw_dma_unmap,
> };
>
> struct mdev_driver vfio_ccw_mdev_driver = {
> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> index b7163bac8cc75d..61418109238976 100644
> --- a/drivers/s390/cio/vfio_ccw_private.h
> +++ b/drivers/s390/cio/vfio_ccw_private.h
> @@ -74,7 +74,6 @@ struct vfio_ccw_crw {
> * @completion: synchronization helper of the I/O completion
> * @avail: available for creating a mediated device
> * @mdev: pointer to the mediated device
> - * @nb: notifier for vfio events
> * @io_region: MMIO region to input/output I/O arguments/results
> * @io_mutex: protect against concurrent update of I/O regions
> * @region: additional regions for other subchannel operations
> @@ -98,7 +97,6 @@ struct vfio_ccw_private {
> struct completion *completion;
> atomic_t avail;
> struct mdev_device *mdev;
> - struct notifier_block nb;
> struct ccw_io_region *io_region;
> struct mutex io_mutex;
> struct vfio_ccw_region *region;
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index a7d2a95796d360..bb1a1677c5c230 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -1226,34 +1226,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
> return 0;
> }
>
> -/**
> - * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
> - *
> - * @nb: The notifier block
> - * @action: Action to be taken
> - * @data: data associated with the request
> - *
> - * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
> - * pinned before). Other requests are ignored.
> - *
> - * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
> - */
> -static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
> - unsigned long action, void *data)
> +static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova,
> + u64 length)
> {
> - struct ap_matrix_mdev *matrix_mdev;
> -
> - matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
> -
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> - unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
> -
> - vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
> - return NOTIFY_OK;
> - }
> + struct ap_matrix_mdev *matrix_mdev =
> + container_of(vdev, struct ap_matrix_mdev, vdev);
> + unsigned long g_pfn = iova >> PAGE_SHIFT;
>
> - return NOTIFY_DONE;
> + vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
> }
>
> /**
> @@ -1380,27 +1360,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
> {
> struct ap_matrix_mdev *matrix_mdev =
> container_of(vdev, struct ap_matrix_mdev, vdev);
> - unsigned long events;
> - int ret;
>
> if (!vdev->kvm)
> return -EINVAL;
>
> - ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
> - if (ret)
> - return ret;
> -
> - matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
> - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
> - &matrix_mdev->iommu_notifier);
> - if (ret)
> - goto err_kvm;
> - return 0;
> -
> -err_kvm:
> - vfio_ap_mdev_unset_kvm(matrix_mdev);
> - return ret;
> + return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
> }
>
> static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
> @@ -1408,8 +1372,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
> struct ap_matrix_mdev *matrix_mdev =
> container_of(vdev, struct ap_matrix_mdev, vdev);
>
> - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
> - &matrix_mdev->iommu_notifier);
> vfio_ap_mdev_unset_kvm(matrix_mdev);
> }
>
> @@ -1461,6 +1423,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
> .open_device = vfio_ap_mdev_open_device,
> .close_device = vfio_ap_mdev_close_device,
> .ioctl = vfio_ap_mdev_ioctl,
> + .dma_unmap = vfio_ap_mdev_dma_unmap,
> };
>
> static struct mdev_driver vfio_ap_matrix_driver = {
> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
> index a26efd804d0df3..abb59d59f81b20 100644
> --- a/drivers/s390/crypto/vfio_ap_private.h
> +++ b/drivers/s390/crypto/vfio_ap_private.h
> @@ -81,8 +81,6 @@ struct ap_matrix {
> * @node: allows the ap_matrix_mdev struct to be added to a list
> * @matrix: the adapters, usage domains and control domains assigned to the
> * mediated matrix device.
> - * @iommu_notifier: notifier block used for specifying callback function for
> - * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
> * @kvm: the struct holding guest's state
> * @pqap_hook: the function pointer to the interception handler for the
> * PQAP(AQIC) instruction.
> @@ -92,7 +90,6 @@ struct ap_matrix_mdev {
> struct vfio_device vdev;
> struct list_head node;
> struct ap_matrix matrix;
> - struct notifier_block iommu_notifier;
> struct kvm *kvm;
> crypto_hook pqap_hook;
> struct mdev_device *mdev;
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 61e71c1154be67..610bb884d9197b 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
> {
> struct vfio_iommu_driver *driver, *tmp;
>
> + if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier))
> + return -EINVAL;
> +
> driver = kzalloc(sizeof(*driver), GFP_KERNEL);
> if (!driver)
> return -ENOMEM;
> @@ -1077,8 +1080,20 @@ static void vfio_device_unassign_container(struct vfio_device *device)
> up_write(&device->group->group_rwsem);
> }
>
> +static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
> + void *data)
> +{
> + struct vfio_device *vfio_device =
> + container_of(nb, struct vfio_device, iommu_nb);
> + struct vfio_iommu_type1_dma_unmap *unmap = data;
> +
> + vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
> + return NOTIFY_OK;
> +}
> +
> static struct file *vfio_device_open(struct vfio_device *device)
> {
> + struct vfio_iommu_driver *iommu_driver;
> struct file *filep;
> int ret;
>
> @@ -1109,6 +1124,18 @@ static struct file *vfio_device_open(struct vfio_device *device)
> if (ret)
> goto err_undo_count;
> }
> +
> + iommu_driver = device->group->container->iommu_driver;
> + if (device->ops->dma_unmap && iommu_driver &&
> + iommu_driver->ops->register_notifier) {
> + unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> +
> + device->iommu_nb.notifier_call = vfio_iommu_notifier;
> + iommu_driver->ops->register_notifier(
> + device->group->container->iommu_data, &events,
> + &device->iommu_nb);
> + }
> +
> up_read(&device->group->group_rwsem);
> }
> mutex_unlock(&device->dev_set->lock);
> @@ -1143,8 +1170,16 @@ static struct file *vfio_device_open(struct vfio_device *device)
> err_close_device:
> mutex_lock(&device->dev_set->lock);
> down_read(&device->group->group_rwsem);
> - if (device->open_count == 1 && device->ops->close_device)
> + if (device->open_count == 1 && device->ops->close_device) {
> device->ops->close_device(device);
> +
> + iommu_driver = device->group->container->iommu_driver;
> + if (device->ops->dma_unmap && iommu_driver &&
> + iommu_driver->ops->unregister_notifier)
> + iommu_driver->ops->unregister_notifier(
> + device->group->container->iommu_data,
> + &device->iommu_nb);
> + }
> err_undo_count:
> device->open_count--;
> if (device->open_count == 0 && device->kvm)
> @@ -1339,12 +1374,20 @@ static const struct file_operations vfio_group_fops = {
> static int vfio_device_fops_release(struct inode *inode, struct file *filep)
> {
> struct vfio_device *device = filep->private_data;
> + struct vfio_iommu_driver *iommu_driver;
>
> mutex_lock(&device->dev_set->lock);
> vfio_assert_device_open(device);
> down_read(&device->group->group_rwsem);
> if (device->open_count == 1 && device->ops->close_device)
> device->ops->close_device(device);
> +
> + iommu_driver = device->group->container->iommu_driver;
> + if (device->ops->dma_unmap && iommu_driver &&
> + iommu_driver->ops->unregister_notifier)
> + iommu_driver->ops->unregister_notifier(
> + device->group->container->iommu_data,
> + &device->iommu_nb);
> up_read(&device->group->group_rwsem);
> device->open_count--;
> if (device->open_count == 0)
> @@ -2027,90 +2070,6 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data,
> }
> EXPORT_SYMBOL(vfio_dma_rw);
>
> -static int vfio_register_iommu_notifier(struct vfio_group *group,
> - unsigned long *events,
> - struct notifier_block *nb)
> -{
> - struct vfio_container *container;
> - struct vfio_iommu_driver *driver;
> - int ret;
> -
> - lockdep_assert_held_read(&group->group_rwsem);
> -
> - container = group->container;
> - driver = container->iommu_driver;
> - if (likely(driver && driver->ops->register_notifier))
> - ret = driver->ops->register_notifier(container->iommu_data,
> - events, nb);
> - else
> - ret = -ENOTTY;
> -
> - return ret;
> -}
> -
> -static int vfio_unregister_iommu_notifier(struct vfio_group *group,
> - struct notifier_block *nb)
> -{
> - struct vfio_container *container;
> - struct vfio_iommu_driver *driver;
> - int ret;
> -
> - lockdep_assert_held_read(&group->group_rwsem);
> -
> - container = group->container;
> - driver = container->iommu_driver;
> - if (likely(driver && driver->ops->unregister_notifier))
> - ret = driver->ops->unregister_notifier(container->iommu_data,
> - nb);
> - else
> - ret = -ENOTTY;
> -
> - return ret;
> -}
> -
> -int vfio_register_notifier(struct vfio_device *device,
> - enum vfio_notify_type type, unsigned long *events,
> - struct notifier_block *nb)
> -{
> - struct vfio_group *group = device->group;
> - int ret;
> -
> - if (!nb || !events || (*events == 0) ||
> - !vfio_assert_device_open(device))
> - return -EINVAL;
> -
> - switch (type) {
> - case VFIO_IOMMU_NOTIFY:
> - ret = vfio_register_iommu_notifier(group, events, nb);
> - break;
> - default:
> - ret = -EINVAL;
> - }
> - return ret;
> -}
> -EXPORT_SYMBOL(vfio_register_notifier);
> -
> -int vfio_unregister_notifier(struct vfio_device *device,
> - enum vfio_notify_type type,
> - struct notifier_block *nb)
> -{
> - struct vfio_group *group = device->group;
> - int ret;
> -
> - if (!nb || !vfio_assert_device_open(device))
> - return -EINVAL;
> -
> - switch (type) {
> - case VFIO_IOMMU_NOTIFY:
> - ret = vfio_unregister_iommu_notifier(group, nb);
> - break;
> - default:
> - ret = -EINVAL;
> - }
> - return ret;
> -}
> -EXPORT_SYMBOL(vfio_unregister_notifier);
> -
> /*
> * Module/class support
> */
> diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
> index a6713022115155..25da02ca1568fc 100644
> --- a/drivers/vfio/vfio.h
> +++ b/drivers/vfio/vfio.h
> @@ -33,6 +33,9 @@ enum vfio_iommu_notify_type {
> VFIO_IOMMU_CONTAINER_CLOSE = 0,
> };
>
> +/* events for register_notifier() */
> +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
> +
> /**
> * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
> */
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 49580fa2073a8d..8c5c389d42d918 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -44,6 +44,7 @@ struct vfio_device {
> unsigned int open_count;
> struct completion comp;
> struct list_head group_next;
> + struct notifier_block iommu_nb;
> };
>
> /**
> @@ -60,6 +61,8 @@ struct vfio_device {
> * @match: Optional device name match callback (return: 0 for no-match, >0 for
> * match, -errno for abort (ex. match with insufficient or incorrect
> * additional args)
> + * @dma_unmap: Called when userspace unmaps IOVA from the container
> + * this device is attached to.
> * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
> * @migration_set_state: Optional callback to change the migration state for
> * devices that support migration. It's mandatory for
> @@ -85,6 +88,7 @@ struct vfio_device_ops {
> int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
> void (*request)(struct vfio_device *vdev, unsigned int count);
> int (*match)(struct vfio_device *vdev, char *buf);
> + void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
> int (*device_feature)(struct vfio_device *device, u32 flags,
> void __user *arg, size_t argsz);
> struct file *(*migration_set_state)(
> @@ -154,23 +158,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
> int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
> void *data, size_t len, bool write);
>
> -/* each type has independent events */
> -enum vfio_notify_type {
> - VFIO_IOMMU_NOTIFY = 0,
> -};
> -
> -/* events for VFIO_IOMMU_NOTIFY */
> -#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
> -
> -int vfio_register_notifier(struct vfio_device *device,
> - enum vfio_notify_type type,
> - unsigned long *required_events,
> - struct notifier_block *nb);
> -int vfio_unregister_notifier(struct vfio_device *device,
> - enum vfio_notify_type type,
> - struct notifier_block *nb);
> -
> -
> /*
> * Sub-module helpers
> */
> --
> 2.37.0
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-05 8:31 ` Zhenyu Wang
0 siblings, 0 replies; 25+ messages in thread
From: Zhenyu Wang @ 2022-07-05 8:31 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: kvm, David Airlie, Kevin Tian, dri-devel, Vineeth Vijayan,
Alexander Gordeev, Christoph Hellwig, linux-s390, Matthew Rosato,
Halil Pasic, Christian Borntraeger, intel-gfx, Zhi Wang,
Tony Krowiak, Eric Farman, Vasily Gorbik, Heiko Carstens,
Alex Williamson, Harald Freudenberger, Rodrigo Vivi,
intel-gvt-dev, Jason Herne, Tvrtko Ursulin, Cornelia Huck,
Peter Oberparleiter, Sven Schnelle
[-- Attachment #1: Type: text/plain, Size: 22825 bytes --]
On 2022.07.04 21:59:03 -0300, Jason Gunthorpe wrote:
> Instead of having drivers register the notifier with explicit code just
> have them provide a dma_unmap callback op in their driver ops and rely on
> the core code to wire it up.
>
> Suggested-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
> Reviewed-by: Eric Farman <farman@linux.ibm.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
> drivers/gpu/drm/i915/gvt/gvt.h | 1 -
> drivers/gpu/drm/i915/gvt/kvmgt.c | 75 ++++-----------
gvt change looks fine to me.
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
> drivers/s390/cio/vfio_ccw_ops.c | 41 ++------
> drivers/s390/cio/vfio_ccw_private.h | 2 -
> drivers/s390/crypto/vfio_ap_ops.c | 53 ++---------
> drivers/s390/crypto/vfio_ap_private.h | 3 -
> drivers/vfio/vfio.c | 129 +++++++++-----------------
> drivers/vfio/vfio.h | 3 +
> include/linux/vfio.h | 21 +----
> 9 files changed, 88 insertions(+), 240 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> index aee1a45da74bcb..705689e6401197 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.h
> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> @@ -226,7 +226,6 @@ struct intel_vgpu {
> unsigned long nr_cache_entries;
> struct mutex cache_lock;
>
> - struct notifier_block iommu_notifier;
> atomic_t released;
>
> struct kvm_page_track_notifier_node track_node;
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index e2f6c56ab3420c..ecd5bb37b63a2a 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
> return ret;
> }
>
> -static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
> - unsigned long action, void *data)
> +static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
> + u64 length)
> {
> - struct intel_vgpu *vgpu =
> - container_of(nb, struct intel_vgpu, iommu_notifier);
> -
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> - struct gvt_dma *entry;
> - unsigned long iov_pfn, end_iov_pfn;
> + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> + struct gvt_dma *entry;
> + u64 iov_pfn = iova >> PAGE_SHIFT;
> + u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
>
> - iov_pfn = unmap->iova >> PAGE_SHIFT;
> - end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
> + mutex_lock(&vgpu->cache_lock);
> + for (; iov_pfn < end_iov_pfn; iov_pfn++) {
> + entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
> + if (!entry)
> + continue;
>
> - mutex_lock(&vgpu->cache_lock);
> - for (; iov_pfn < end_iov_pfn; iov_pfn++) {
> - entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
> - if (!entry)
> - continue;
> -
> - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
> - entry->size);
> - __gvt_cache_remove_entry(vgpu, entry);
> - }
> - mutex_unlock(&vgpu->cache_lock);
> + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
> + entry->size);
> + __gvt_cache_remove_entry(vgpu, entry);
> }
> -
> - return NOTIFY_OK;
> + mutex_unlock(&vgpu->cache_lock);
> }
>
> static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
> @@ -783,36 +774,20 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
> static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
> {
> struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> - unsigned long events;
> - int ret;
> -
> - vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
>
> - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> - ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
> - &vgpu->iommu_notifier);
> - if (ret != 0) {
> - gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
> - ret);
> - goto out;
> - }
> -
> - ret = -EEXIST;
> if (vgpu->attached)
> - goto undo_iommu;
> + return -EEXIST;
>
> - ret = -ESRCH;
> if (!vgpu->vfio_device.kvm ||
> vgpu->vfio_device.kvm->mm != current->mm) {
> gvt_vgpu_err("KVM is required to use Intel vGPU\n");
> - goto undo_iommu;
> + return -ESRCH;
> }
>
> kvm_get_kvm(vgpu->vfio_device.kvm);
>
> - ret = -EEXIST;
> if (__kvmgt_vgpu_exist(vgpu))
> - goto undo_iommu;
> + return -EEXIST;
>
> vgpu->attached = true;
>
> @@ -831,12 +806,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
>
> atomic_set(&vgpu->released, 0);
> return 0;
> -
> -undo_iommu:
> - vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
> - &vgpu->iommu_notifier);
> -out:
> - return ret;
> }
>
> static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
> @@ -853,8 +822,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
> static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
> {
> struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> - struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
> - int ret;
>
> if (!vgpu->attached)
> return;
> @@ -864,11 +831,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
>
> intel_gvt_release_vgpu(vgpu);
>
> - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY,
> - &vgpu->iommu_notifier);
> - drm_WARN(&i915->drm, ret,
> - "vfio_unregister_notifier for iommu failed: %d\n", ret);
> -
> debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
>
> kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
> @@ -1610,6 +1572,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
> .write = intel_vgpu_write,
> .mmap = intel_vgpu_mmap,
> .ioctl = intel_vgpu_ioctl,
> + .dma_unmap = intel_vgpu_dma_unmap,
> };
>
> static int intel_vgpu_probe(struct mdev_device *mdev)
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index b49e2e9db2dc6f..09e0ce7b72324c 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
> return ret;
> }
>
> -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
> - unsigned long action,
> - void *data)
> +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
> {
> struct vfio_ccw_private *private =
> - container_of(nb, struct vfio_ccw_private, nb);
> -
> - /*
> - * Vendor drivers MUST unpin pages in response to an
> - * invalidation.
> - */
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> -
> - if (!cp_iova_pinned(&private->cp, unmap->iova))
> - return NOTIFY_OK;
> + container_of(vdev, struct vfio_ccw_private, vdev);
>
> - if (vfio_ccw_mdev_reset(private))
> - return NOTIFY_BAD;
> + /* Drivers MUST unpin pages in response to an invalidation. */
> + if (!cp_iova_pinned(&private->cp, iova))
> + return;
>
> - cp_free(&private->cp);
> - return NOTIFY_OK;
> - }
> + if (vfio_ccw_mdev_reset(private))
> + return;
>
> - return NOTIFY_DONE;
> + cp_free(&private->cp);
> }
>
> static ssize_t name_show(struct mdev_type *mtype,
> @@ -178,19 +166,11 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
> {
> struct vfio_ccw_private *private =
> container_of(vdev, struct vfio_ccw_private, vdev);
> - unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> int ret;
>
> - private->nb.notifier_call = vfio_ccw_mdev_notifier;
> -
> - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY,
> - &events, &private->nb);
> - if (ret)
> - return ret;
> -
> ret = vfio_ccw_register_async_dev_regions(private);
> if (ret)
> - goto out_unregister;
> + return ret;
>
> ret = vfio_ccw_register_schib_dev_regions(private);
> if (ret)
> @@ -204,7 +184,6 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
>
> out_unregister:
> vfio_ccw_unregister_dev_regions(private);
> - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
> return ret;
> }
>
> @@ -222,7 +201,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev)
>
> cp_free(&private->cp);
> vfio_ccw_unregister_dev_regions(private);
> - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
> }
>
> static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private,
> @@ -645,6 +623,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = {
> .write = vfio_ccw_mdev_write,
> .ioctl = vfio_ccw_mdev_ioctl,
> .request = vfio_ccw_mdev_request,
> + .dma_unmap = vfio_ccw_dma_unmap,
> };
>
> struct mdev_driver vfio_ccw_mdev_driver = {
> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> index b7163bac8cc75d..61418109238976 100644
> --- a/drivers/s390/cio/vfio_ccw_private.h
> +++ b/drivers/s390/cio/vfio_ccw_private.h
> @@ -74,7 +74,6 @@ struct vfio_ccw_crw {
> * @completion: synchronization helper of the I/O completion
> * @avail: available for creating a mediated device
> * @mdev: pointer to the mediated device
> - * @nb: notifier for vfio events
> * @io_region: MMIO region to input/output I/O arguments/results
> * @io_mutex: protect against concurrent update of I/O regions
> * @region: additional regions for other subchannel operations
> @@ -98,7 +97,6 @@ struct vfio_ccw_private {
> struct completion *completion;
> atomic_t avail;
> struct mdev_device *mdev;
> - struct notifier_block nb;
> struct ccw_io_region *io_region;
> struct mutex io_mutex;
> struct vfio_ccw_region *region;
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index a7d2a95796d360..bb1a1677c5c230 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -1226,34 +1226,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
> return 0;
> }
>
> -/**
> - * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
> - *
> - * @nb: The notifier block
> - * @action: Action to be taken
> - * @data: data associated with the request
> - *
> - * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
> - * pinned before). Other requests are ignored.
> - *
> - * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
> - */
> -static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
> - unsigned long action, void *data)
> +static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova,
> + u64 length)
> {
> - struct ap_matrix_mdev *matrix_mdev;
> -
> - matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
> -
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> - unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
> -
> - vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
> - return NOTIFY_OK;
> - }
> + struct ap_matrix_mdev *matrix_mdev =
> + container_of(vdev, struct ap_matrix_mdev, vdev);
> + unsigned long g_pfn = iova >> PAGE_SHIFT;
>
> - return NOTIFY_DONE;
> + vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
> }
>
> /**
> @@ -1380,27 +1360,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
> {
> struct ap_matrix_mdev *matrix_mdev =
> container_of(vdev, struct ap_matrix_mdev, vdev);
> - unsigned long events;
> - int ret;
>
> if (!vdev->kvm)
> return -EINVAL;
>
> - ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
> - if (ret)
> - return ret;
> -
> - matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
> - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
> - &matrix_mdev->iommu_notifier);
> - if (ret)
> - goto err_kvm;
> - return 0;
> -
> -err_kvm:
> - vfio_ap_mdev_unset_kvm(matrix_mdev);
> - return ret;
> + return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
> }
>
> static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
> @@ -1408,8 +1372,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
> struct ap_matrix_mdev *matrix_mdev =
> container_of(vdev, struct ap_matrix_mdev, vdev);
>
> - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
> - &matrix_mdev->iommu_notifier);
> vfio_ap_mdev_unset_kvm(matrix_mdev);
> }
>
> @@ -1461,6 +1423,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
> .open_device = vfio_ap_mdev_open_device,
> .close_device = vfio_ap_mdev_close_device,
> .ioctl = vfio_ap_mdev_ioctl,
> + .dma_unmap = vfio_ap_mdev_dma_unmap,
> };
>
> static struct mdev_driver vfio_ap_matrix_driver = {
> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
> index a26efd804d0df3..abb59d59f81b20 100644
> --- a/drivers/s390/crypto/vfio_ap_private.h
> +++ b/drivers/s390/crypto/vfio_ap_private.h
> @@ -81,8 +81,6 @@ struct ap_matrix {
> * @node: allows the ap_matrix_mdev struct to be added to a list
> * @matrix: the adapters, usage domains and control domains assigned to the
> * mediated matrix device.
> - * @iommu_notifier: notifier block used for specifying callback function for
> - * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
> * @kvm: the struct holding guest's state
> * @pqap_hook: the function pointer to the interception handler for the
> * PQAP(AQIC) instruction.
> @@ -92,7 +90,6 @@ struct ap_matrix_mdev {
> struct vfio_device vdev;
> struct list_head node;
> struct ap_matrix matrix;
> - struct notifier_block iommu_notifier;
> struct kvm *kvm;
> crypto_hook pqap_hook;
> struct mdev_device *mdev;
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 61e71c1154be67..610bb884d9197b 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
> {
> struct vfio_iommu_driver *driver, *tmp;
>
> + if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier))
> + return -EINVAL;
> +
> driver = kzalloc(sizeof(*driver), GFP_KERNEL);
> if (!driver)
> return -ENOMEM;
> @@ -1077,8 +1080,20 @@ static void vfio_device_unassign_container(struct vfio_device *device)
> up_write(&device->group->group_rwsem);
> }
>
> +static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
> + void *data)
> +{
> + struct vfio_device *vfio_device =
> + container_of(nb, struct vfio_device, iommu_nb);
> + struct vfio_iommu_type1_dma_unmap *unmap = data;
> +
> + vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
> + return NOTIFY_OK;
> +}
> +
> static struct file *vfio_device_open(struct vfio_device *device)
> {
> + struct vfio_iommu_driver *iommu_driver;
> struct file *filep;
> int ret;
>
> @@ -1109,6 +1124,18 @@ static struct file *vfio_device_open(struct vfio_device *device)
> if (ret)
> goto err_undo_count;
> }
> +
> + iommu_driver = device->group->container->iommu_driver;
> + if (device->ops->dma_unmap && iommu_driver &&
> + iommu_driver->ops->register_notifier) {
> + unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> +
> + device->iommu_nb.notifier_call = vfio_iommu_notifier;
> + iommu_driver->ops->register_notifier(
> + device->group->container->iommu_data, &events,
> + &device->iommu_nb);
> + }
> +
> up_read(&device->group->group_rwsem);
> }
> mutex_unlock(&device->dev_set->lock);
> @@ -1143,8 +1170,16 @@ static struct file *vfio_device_open(struct vfio_device *device)
> err_close_device:
> mutex_lock(&device->dev_set->lock);
> down_read(&device->group->group_rwsem);
> - if (device->open_count == 1 && device->ops->close_device)
> + if (device->open_count == 1 && device->ops->close_device) {
> device->ops->close_device(device);
> +
> + iommu_driver = device->group->container->iommu_driver;
> + if (device->ops->dma_unmap && iommu_driver &&
> + iommu_driver->ops->unregister_notifier)
> + iommu_driver->ops->unregister_notifier(
> + device->group->container->iommu_data,
> + &device->iommu_nb);
> + }
> err_undo_count:
> device->open_count--;
> if (device->open_count == 0 && device->kvm)
> @@ -1339,12 +1374,20 @@ static const struct file_operations vfio_group_fops = {
> static int vfio_device_fops_release(struct inode *inode, struct file *filep)
> {
> struct vfio_device *device = filep->private_data;
> + struct vfio_iommu_driver *iommu_driver;
>
> mutex_lock(&device->dev_set->lock);
> vfio_assert_device_open(device);
> down_read(&device->group->group_rwsem);
> if (device->open_count == 1 && device->ops->close_device)
> device->ops->close_device(device);
> +
> + iommu_driver = device->group->container->iommu_driver;
> + if (device->ops->dma_unmap && iommu_driver &&
> + iommu_driver->ops->unregister_notifier)
> + iommu_driver->ops->unregister_notifier(
> + device->group->container->iommu_data,
> + &device->iommu_nb);
> up_read(&device->group->group_rwsem);
> device->open_count--;
> if (device->open_count == 0)
> @@ -2027,90 +2070,6 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data,
> }
> EXPORT_SYMBOL(vfio_dma_rw);
>
> -static int vfio_register_iommu_notifier(struct vfio_group *group,
> - unsigned long *events,
> - struct notifier_block *nb)
> -{
> - struct vfio_container *container;
> - struct vfio_iommu_driver *driver;
> - int ret;
> -
> - lockdep_assert_held_read(&group->group_rwsem);
> -
> - container = group->container;
> - driver = container->iommu_driver;
> - if (likely(driver && driver->ops->register_notifier))
> - ret = driver->ops->register_notifier(container->iommu_data,
> - events, nb);
> - else
> - ret = -ENOTTY;
> -
> - return ret;
> -}
> -
> -static int vfio_unregister_iommu_notifier(struct vfio_group *group,
> - struct notifier_block *nb)
> -{
> - struct vfio_container *container;
> - struct vfio_iommu_driver *driver;
> - int ret;
> -
> - lockdep_assert_held_read(&group->group_rwsem);
> -
> - container = group->container;
> - driver = container->iommu_driver;
> - if (likely(driver && driver->ops->unregister_notifier))
> - ret = driver->ops->unregister_notifier(container->iommu_data,
> - nb);
> - else
> - ret = -ENOTTY;
> -
> - return ret;
> -}
> -
> -int vfio_register_notifier(struct vfio_device *device,
> - enum vfio_notify_type type, unsigned long *events,
> - struct notifier_block *nb)
> -{
> - struct vfio_group *group = device->group;
> - int ret;
> -
> - if (!nb || !events || (*events == 0) ||
> - !vfio_assert_device_open(device))
> - return -EINVAL;
> -
> - switch (type) {
> - case VFIO_IOMMU_NOTIFY:
> - ret = vfio_register_iommu_notifier(group, events, nb);
> - break;
> - default:
> - ret = -EINVAL;
> - }
> - return ret;
> -}
> -EXPORT_SYMBOL(vfio_register_notifier);
> -
> -int vfio_unregister_notifier(struct vfio_device *device,
> - enum vfio_notify_type type,
> - struct notifier_block *nb)
> -{
> - struct vfio_group *group = device->group;
> - int ret;
> -
> - if (!nb || !vfio_assert_device_open(device))
> - return -EINVAL;
> -
> - switch (type) {
> - case VFIO_IOMMU_NOTIFY:
> - ret = vfio_unregister_iommu_notifier(group, nb);
> - break;
> - default:
> - ret = -EINVAL;
> - }
> - return ret;
> -}
> -EXPORT_SYMBOL(vfio_unregister_notifier);
> -
> /*
> * Module/class support
> */
> diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
> index a6713022115155..25da02ca1568fc 100644
> --- a/drivers/vfio/vfio.h
> +++ b/drivers/vfio/vfio.h
> @@ -33,6 +33,9 @@ enum vfio_iommu_notify_type {
> VFIO_IOMMU_CONTAINER_CLOSE = 0,
> };
>
> +/* events for register_notifier() */
> +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
> +
> /**
> * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
> */
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 49580fa2073a8d..8c5c389d42d918 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -44,6 +44,7 @@ struct vfio_device {
> unsigned int open_count;
> struct completion comp;
> struct list_head group_next;
> + struct notifier_block iommu_nb;
> };
>
> /**
> @@ -60,6 +61,8 @@ struct vfio_device {
> * @match: Optional device name match callback (return: 0 for no-match, >0 for
> * match, -errno for abort (ex. match with insufficient or incorrect
> * additional args)
> + * @dma_unmap: Called when userspace unmaps IOVA from the container
> + * this device is attached to.
> * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
> * @migration_set_state: Optional callback to change the migration state for
> * devices that support migration. It's mandatory for
> @@ -85,6 +88,7 @@ struct vfio_device_ops {
> int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
> void (*request)(struct vfio_device *vdev, unsigned int count);
> int (*match)(struct vfio_device *vdev, char *buf);
> + void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
> int (*device_feature)(struct vfio_device *device, u32 flags,
> void __user *arg, size_t argsz);
> struct file *(*migration_set_state)(
> @@ -154,23 +158,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
> int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
> void *data, size_t len, bool write);
>
> -/* each type has independent events */
> -enum vfio_notify_type {
> - VFIO_IOMMU_NOTIFY = 0,
> -};
> -
> -/* events for VFIO_IOMMU_NOTIFY */
> -#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
> -
> -int vfio_register_notifier(struct vfio_device *device,
> - enum vfio_notify_type type,
> - unsigned long *required_events,
> - struct notifier_block *nb);
> -int vfio_unregister_notifier(struct vfio_device *device,
> - enum vfio_notify_type type,
> - struct notifier_block *nb);
> -
> -
> /*
> * Sub-module helpers
> */
> --
> 2.37.0
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-05 8:31 ` Zhenyu Wang
0 siblings, 0 replies; 25+ messages in thread
From: Zhenyu Wang @ 2022-07-05 8:31 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Alexander Gordeev, David Airlie, Alex Williamson,
Christian Borntraeger, Cornelia Huck, Daniel Vetter, dri-devel,
Harald Freudenberger, Vasily Gorbik, Heiko Carstens, intel-gfx,
intel-gvt-dev, Jani Nikula, Jason Herne, Joonas Lahtinen, kvm,
linux-s390, Matthew Rosato, Peter Oberparleiter, Halil Pasic,
Rodrigo Vivi, Sven Schnelle, Tvrtko Ursulin, Vineeth Vijayan,
Zhenyu Wang, Zhi Wang, Tony Krowiak, Eric Farman,
Christoph Hellwig, Kevin Tian
[-- Attachment #1: Type: text/plain, Size: 22825 bytes --]
On 2022.07.04 21:59:03 -0300, Jason Gunthorpe wrote:
> Instead of having drivers register the notifier with explicit code just
> have them provide a dma_unmap callback op in their driver ops and rely on
> the core code to wire it up.
>
> Suggested-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
> Reviewed-by: Eric Farman <farman@linux.ibm.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
> drivers/gpu/drm/i915/gvt/gvt.h | 1 -
> drivers/gpu/drm/i915/gvt/kvmgt.c | 75 ++++-----------
gvt change looks fine to me.
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
> drivers/s390/cio/vfio_ccw_ops.c | 41 ++------
> drivers/s390/cio/vfio_ccw_private.h | 2 -
> drivers/s390/crypto/vfio_ap_ops.c | 53 ++---------
> drivers/s390/crypto/vfio_ap_private.h | 3 -
> drivers/vfio/vfio.c | 129 +++++++++-----------------
> drivers/vfio/vfio.h | 3 +
> include/linux/vfio.h | 21 +----
> 9 files changed, 88 insertions(+), 240 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> index aee1a45da74bcb..705689e6401197 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.h
> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> @@ -226,7 +226,6 @@ struct intel_vgpu {
> unsigned long nr_cache_entries;
> struct mutex cache_lock;
>
> - struct notifier_block iommu_notifier;
> atomic_t released;
>
> struct kvm_page_track_notifier_node track_node;
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index e2f6c56ab3420c..ecd5bb37b63a2a 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
> return ret;
> }
>
> -static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
> - unsigned long action, void *data)
> +static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
> + u64 length)
> {
> - struct intel_vgpu *vgpu =
> - container_of(nb, struct intel_vgpu, iommu_notifier);
> -
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> - struct gvt_dma *entry;
> - unsigned long iov_pfn, end_iov_pfn;
> + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> + struct gvt_dma *entry;
> + u64 iov_pfn = iova >> PAGE_SHIFT;
> + u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
>
> - iov_pfn = unmap->iova >> PAGE_SHIFT;
> - end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
> + mutex_lock(&vgpu->cache_lock);
> + for (; iov_pfn < end_iov_pfn; iov_pfn++) {
> + entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
> + if (!entry)
> + continue;
>
> - mutex_lock(&vgpu->cache_lock);
> - for (; iov_pfn < end_iov_pfn; iov_pfn++) {
> - entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
> - if (!entry)
> - continue;
> -
> - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
> - entry->size);
> - __gvt_cache_remove_entry(vgpu, entry);
> - }
> - mutex_unlock(&vgpu->cache_lock);
> + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
> + entry->size);
> + __gvt_cache_remove_entry(vgpu, entry);
> }
> -
> - return NOTIFY_OK;
> + mutex_unlock(&vgpu->cache_lock);
> }
>
> static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
> @@ -783,36 +774,20 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
> static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
> {
> struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> - unsigned long events;
> - int ret;
> -
> - vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
>
> - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> - ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
> - &vgpu->iommu_notifier);
> - if (ret != 0) {
> - gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
> - ret);
> - goto out;
> - }
> -
> - ret = -EEXIST;
> if (vgpu->attached)
> - goto undo_iommu;
> + return -EEXIST;
>
> - ret = -ESRCH;
> if (!vgpu->vfio_device.kvm ||
> vgpu->vfio_device.kvm->mm != current->mm) {
> gvt_vgpu_err("KVM is required to use Intel vGPU\n");
> - goto undo_iommu;
> + return -ESRCH;
> }
>
> kvm_get_kvm(vgpu->vfio_device.kvm);
>
> - ret = -EEXIST;
> if (__kvmgt_vgpu_exist(vgpu))
> - goto undo_iommu;
> + return -EEXIST;
>
> vgpu->attached = true;
>
> @@ -831,12 +806,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
>
> atomic_set(&vgpu->released, 0);
> return 0;
> -
> -undo_iommu:
> - vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
> - &vgpu->iommu_notifier);
> -out:
> - return ret;
> }
>
> static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
> @@ -853,8 +822,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
> static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
> {
> struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
> - struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
> - int ret;
>
> if (!vgpu->attached)
> return;
> @@ -864,11 +831,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
>
> intel_gvt_release_vgpu(vgpu);
>
> - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY,
> - &vgpu->iommu_notifier);
> - drm_WARN(&i915->drm, ret,
> - "vfio_unregister_notifier for iommu failed: %d\n", ret);
> -
> debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
>
> kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
> @@ -1610,6 +1572,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
> .write = intel_vgpu_write,
> .mmap = intel_vgpu_mmap,
> .ioctl = intel_vgpu_ioctl,
> + .dma_unmap = intel_vgpu_dma_unmap,
> };
>
> static int intel_vgpu_probe(struct mdev_device *mdev)
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index b49e2e9db2dc6f..09e0ce7b72324c 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
> return ret;
> }
>
> -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
> - unsigned long action,
> - void *data)
> +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
> {
> struct vfio_ccw_private *private =
> - container_of(nb, struct vfio_ccw_private, nb);
> -
> - /*
> - * Vendor drivers MUST unpin pages in response to an
> - * invalidation.
> - */
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> -
> - if (!cp_iova_pinned(&private->cp, unmap->iova))
> - return NOTIFY_OK;
> + container_of(vdev, struct vfio_ccw_private, vdev);
>
> - if (vfio_ccw_mdev_reset(private))
> - return NOTIFY_BAD;
> + /* Drivers MUST unpin pages in response to an invalidation. */
> + if (!cp_iova_pinned(&private->cp, iova))
> + return;
>
> - cp_free(&private->cp);
> - return NOTIFY_OK;
> - }
> + if (vfio_ccw_mdev_reset(private))
> + return;
>
> - return NOTIFY_DONE;
> + cp_free(&private->cp);
> }
>
> static ssize_t name_show(struct mdev_type *mtype,
> @@ -178,19 +166,11 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
> {
> struct vfio_ccw_private *private =
> container_of(vdev, struct vfio_ccw_private, vdev);
> - unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> int ret;
>
> - private->nb.notifier_call = vfio_ccw_mdev_notifier;
> -
> - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY,
> - &events, &private->nb);
> - if (ret)
> - return ret;
> -
> ret = vfio_ccw_register_async_dev_regions(private);
> if (ret)
> - goto out_unregister;
> + return ret;
>
> ret = vfio_ccw_register_schib_dev_regions(private);
> if (ret)
> @@ -204,7 +184,6 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
>
> out_unregister:
> vfio_ccw_unregister_dev_regions(private);
> - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
> return ret;
> }
>
> @@ -222,7 +201,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev)
>
> cp_free(&private->cp);
> vfio_ccw_unregister_dev_regions(private);
> - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
> }
>
> static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private,
> @@ -645,6 +623,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = {
> .write = vfio_ccw_mdev_write,
> .ioctl = vfio_ccw_mdev_ioctl,
> .request = vfio_ccw_mdev_request,
> + .dma_unmap = vfio_ccw_dma_unmap,
> };
>
> struct mdev_driver vfio_ccw_mdev_driver = {
> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> index b7163bac8cc75d..61418109238976 100644
> --- a/drivers/s390/cio/vfio_ccw_private.h
> +++ b/drivers/s390/cio/vfio_ccw_private.h
> @@ -74,7 +74,6 @@ struct vfio_ccw_crw {
> * @completion: synchronization helper of the I/O completion
> * @avail: available for creating a mediated device
> * @mdev: pointer to the mediated device
> - * @nb: notifier for vfio events
> * @io_region: MMIO region to input/output I/O arguments/results
> * @io_mutex: protect against concurrent update of I/O regions
> * @region: additional regions for other subchannel operations
> @@ -98,7 +97,6 @@ struct vfio_ccw_private {
> struct completion *completion;
> atomic_t avail;
> struct mdev_device *mdev;
> - struct notifier_block nb;
> struct ccw_io_region *io_region;
> struct mutex io_mutex;
> struct vfio_ccw_region *region;
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index a7d2a95796d360..bb1a1677c5c230 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -1226,34 +1226,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
> return 0;
> }
>
> -/**
> - * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
> - *
> - * @nb: The notifier block
> - * @action: Action to be taken
> - * @data: data associated with the request
> - *
> - * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
> - * pinned before). Other requests are ignored.
> - *
> - * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
> - */
> -static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
> - unsigned long action, void *data)
> +static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova,
> + u64 length)
> {
> - struct ap_matrix_mdev *matrix_mdev;
> -
> - matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
> -
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> - unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
> -
> - vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
> - return NOTIFY_OK;
> - }
> + struct ap_matrix_mdev *matrix_mdev =
> + container_of(vdev, struct ap_matrix_mdev, vdev);
> + unsigned long g_pfn = iova >> PAGE_SHIFT;
>
> - return NOTIFY_DONE;
> + vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
> }
>
> /**
> @@ -1380,27 +1360,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
> {
> struct ap_matrix_mdev *matrix_mdev =
> container_of(vdev, struct ap_matrix_mdev, vdev);
> - unsigned long events;
> - int ret;
>
> if (!vdev->kvm)
> return -EINVAL;
>
> - ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
> - if (ret)
> - return ret;
> -
> - matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
> - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
> - &matrix_mdev->iommu_notifier);
> - if (ret)
> - goto err_kvm;
> - return 0;
> -
> -err_kvm:
> - vfio_ap_mdev_unset_kvm(matrix_mdev);
> - return ret;
> + return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
> }
>
> static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
> @@ -1408,8 +1372,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
> struct ap_matrix_mdev *matrix_mdev =
> container_of(vdev, struct ap_matrix_mdev, vdev);
>
> - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
> - &matrix_mdev->iommu_notifier);
> vfio_ap_mdev_unset_kvm(matrix_mdev);
> }
>
> @@ -1461,6 +1423,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
> .open_device = vfio_ap_mdev_open_device,
> .close_device = vfio_ap_mdev_close_device,
> .ioctl = vfio_ap_mdev_ioctl,
> + .dma_unmap = vfio_ap_mdev_dma_unmap,
> };
>
> static struct mdev_driver vfio_ap_matrix_driver = {
> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
> index a26efd804d0df3..abb59d59f81b20 100644
> --- a/drivers/s390/crypto/vfio_ap_private.h
> +++ b/drivers/s390/crypto/vfio_ap_private.h
> @@ -81,8 +81,6 @@ struct ap_matrix {
> * @node: allows the ap_matrix_mdev struct to be added to a list
> * @matrix: the adapters, usage domains and control domains assigned to the
> * mediated matrix device.
> - * @iommu_notifier: notifier block used for specifying callback function for
> - * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
> * @kvm: the struct holding guest's state
> * @pqap_hook: the function pointer to the interception handler for the
> * PQAP(AQIC) instruction.
> @@ -92,7 +90,6 @@ struct ap_matrix_mdev {
> struct vfio_device vdev;
> struct list_head node;
> struct ap_matrix matrix;
> - struct notifier_block iommu_notifier;
> struct kvm *kvm;
> crypto_hook pqap_hook;
> struct mdev_device *mdev;
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 61e71c1154be67..610bb884d9197b 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
> {
> struct vfio_iommu_driver *driver, *tmp;
>
> + if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier))
> + return -EINVAL;
> +
> driver = kzalloc(sizeof(*driver), GFP_KERNEL);
> if (!driver)
> return -ENOMEM;
> @@ -1077,8 +1080,20 @@ static void vfio_device_unassign_container(struct vfio_device *device)
> up_write(&device->group->group_rwsem);
> }
>
> +static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
> + void *data)
> +{
> + struct vfio_device *vfio_device =
> + container_of(nb, struct vfio_device, iommu_nb);
> + struct vfio_iommu_type1_dma_unmap *unmap = data;
> +
> + vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
> + return NOTIFY_OK;
> +}
> +
> static struct file *vfio_device_open(struct vfio_device *device)
> {
> + struct vfio_iommu_driver *iommu_driver;
> struct file *filep;
> int ret;
>
> @@ -1109,6 +1124,18 @@ static struct file *vfio_device_open(struct vfio_device *device)
> if (ret)
> goto err_undo_count;
> }
> +
> + iommu_driver = device->group->container->iommu_driver;
> + if (device->ops->dma_unmap && iommu_driver &&
> + iommu_driver->ops->register_notifier) {
> + unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> +
> + device->iommu_nb.notifier_call = vfio_iommu_notifier;
> + iommu_driver->ops->register_notifier(
> + device->group->container->iommu_data, &events,
> + &device->iommu_nb);
> + }
> +
> up_read(&device->group->group_rwsem);
> }
> mutex_unlock(&device->dev_set->lock);
> @@ -1143,8 +1170,16 @@ static struct file *vfio_device_open(struct vfio_device *device)
> err_close_device:
> mutex_lock(&device->dev_set->lock);
> down_read(&device->group->group_rwsem);
> - if (device->open_count == 1 && device->ops->close_device)
> + if (device->open_count == 1 && device->ops->close_device) {
> device->ops->close_device(device);
> +
> + iommu_driver = device->group->container->iommu_driver;
> + if (device->ops->dma_unmap && iommu_driver &&
> + iommu_driver->ops->unregister_notifier)
> + iommu_driver->ops->unregister_notifier(
> + device->group->container->iommu_data,
> + &device->iommu_nb);
> + }
> err_undo_count:
> device->open_count--;
> if (device->open_count == 0 && device->kvm)
> @@ -1339,12 +1374,20 @@ static const struct file_operations vfio_group_fops = {
> static int vfio_device_fops_release(struct inode *inode, struct file *filep)
> {
> struct vfio_device *device = filep->private_data;
> + struct vfio_iommu_driver *iommu_driver;
>
> mutex_lock(&device->dev_set->lock);
> vfio_assert_device_open(device);
> down_read(&device->group->group_rwsem);
> if (device->open_count == 1 && device->ops->close_device)
> device->ops->close_device(device);
> +
> + iommu_driver = device->group->container->iommu_driver;
> + if (device->ops->dma_unmap && iommu_driver &&
> + iommu_driver->ops->unregister_notifier)
> + iommu_driver->ops->unregister_notifier(
> + device->group->container->iommu_data,
> + &device->iommu_nb);
> up_read(&device->group->group_rwsem);
> device->open_count--;
> if (device->open_count == 0)
> @@ -2027,90 +2070,6 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data,
> }
> EXPORT_SYMBOL(vfio_dma_rw);
>
> -static int vfio_register_iommu_notifier(struct vfio_group *group,
> - unsigned long *events,
> - struct notifier_block *nb)
> -{
> - struct vfio_container *container;
> - struct vfio_iommu_driver *driver;
> - int ret;
> -
> - lockdep_assert_held_read(&group->group_rwsem);
> -
> - container = group->container;
> - driver = container->iommu_driver;
> - if (likely(driver && driver->ops->register_notifier))
> - ret = driver->ops->register_notifier(container->iommu_data,
> - events, nb);
> - else
> - ret = -ENOTTY;
> -
> - return ret;
> -}
> -
> -static int vfio_unregister_iommu_notifier(struct vfio_group *group,
> - struct notifier_block *nb)
> -{
> - struct vfio_container *container;
> - struct vfio_iommu_driver *driver;
> - int ret;
> -
> - lockdep_assert_held_read(&group->group_rwsem);
> -
> - container = group->container;
> - driver = container->iommu_driver;
> - if (likely(driver && driver->ops->unregister_notifier))
> - ret = driver->ops->unregister_notifier(container->iommu_data,
> - nb);
> - else
> - ret = -ENOTTY;
> -
> - return ret;
> -}
> -
> -int vfio_register_notifier(struct vfio_device *device,
> - enum vfio_notify_type type, unsigned long *events,
> - struct notifier_block *nb)
> -{
> - struct vfio_group *group = device->group;
> - int ret;
> -
> - if (!nb || !events || (*events == 0) ||
> - !vfio_assert_device_open(device))
> - return -EINVAL;
> -
> - switch (type) {
> - case VFIO_IOMMU_NOTIFY:
> - ret = vfio_register_iommu_notifier(group, events, nb);
> - break;
> - default:
> - ret = -EINVAL;
> - }
> - return ret;
> -}
> -EXPORT_SYMBOL(vfio_register_notifier);
> -
> -int vfio_unregister_notifier(struct vfio_device *device,
> - enum vfio_notify_type type,
> - struct notifier_block *nb)
> -{
> - struct vfio_group *group = device->group;
> - int ret;
> -
> - if (!nb || !vfio_assert_device_open(device))
> - return -EINVAL;
> -
> - switch (type) {
> - case VFIO_IOMMU_NOTIFY:
> - ret = vfio_unregister_iommu_notifier(group, nb);
> - break;
> - default:
> - ret = -EINVAL;
> - }
> - return ret;
> -}
> -EXPORT_SYMBOL(vfio_unregister_notifier);
> -
> /*
> * Module/class support
> */
> diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
> index a6713022115155..25da02ca1568fc 100644
> --- a/drivers/vfio/vfio.h
> +++ b/drivers/vfio/vfio.h
> @@ -33,6 +33,9 @@ enum vfio_iommu_notify_type {
> VFIO_IOMMU_CONTAINER_CLOSE = 0,
> };
>
> +/* events for register_notifier() */
> +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
> +
> /**
> * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
> */
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 49580fa2073a8d..8c5c389d42d918 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -44,6 +44,7 @@ struct vfio_device {
> unsigned int open_count;
> struct completion comp;
> struct list_head group_next;
> + struct notifier_block iommu_nb;
> };
>
> /**
> @@ -60,6 +61,8 @@ struct vfio_device {
> * @match: Optional device name match callback (return: 0 for no-match, >0 for
> * match, -errno for abort (ex. match with insufficient or incorrect
> * additional args)
> + * @dma_unmap: Called when userspace unmaps IOVA from the container
> + * this device is attached to.
> * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
> * @migration_set_state: Optional callback to change the migration state for
> * devices that support migration. It's mandatory for
> @@ -85,6 +88,7 @@ struct vfio_device_ops {
> int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
> void (*request)(struct vfio_device *vdev, unsigned int count);
> int (*match)(struct vfio_device *vdev, char *buf);
> + void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
> int (*device_feature)(struct vfio_device *device, u32 flags,
> void __user *arg, size_t argsz);
> struct file *(*migration_set_state)(
> @@ -154,23 +158,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
> int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
> void *data, size_t len, bool write);
>
> -/* each type has independent events */
> -enum vfio_notify_type {
> - VFIO_IOMMU_NOTIFY = 0,
> -};
> -
> -/* events for VFIO_IOMMU_NOTIFY */
> -#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
> -
> -int vfio_register_notifier(struct vfio_device *device,
> - enum vfio_notify_type type,
> - unsigned long *required_events,
> - struct notifier_block *nb);
> -int vfio_unregister_notifier(struct vfio_device *device,
> - enum vfio_notify_type type,
> - struct notifier_block *nb);
> -
> -
> /*
> * Sub-module helpers
> */
> --
> 2.37.0
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [Intel-gfx] [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
2022-07-05 0:59 ` Jason Gunthorpe
(?)
@ 2022-07-07 21:37 ` Alex Williamson
-1 siblings, 0 replies; 25+ messages in thread
From: Alex Williamson @ 2022-07-07 21:37 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: kvm, David Airlie, dri-devel, Vineeth Vijayan, Alexander Gordeev,
Christoph Hellwig, linux-s390, Matthew Rosato, Halil Pasic,
Christian Borntraeger, intel-gfx, Tony Krowiak, Eric Farman,
Vasily Gorbik, Heiko Carstens, Harald Freudenberger, Rodrigo Vivi,
intel-gvt-dev, Jason Herne, Cornelia Huck, Peter Oberparleiter,
Sven Schnelle
On Mon, 4 Jul 2022 21:59:03 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index b49e2e9db2dc6f..09e0ce7b72324c 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
> return ret;
> }
>
> -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
> - unsigned long action,
> - void *data)
> +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
> {
> struct vfio_ccw_private *private =
> - container_of(nb, struct vfio_ccw_private, nb);
> -
> - /*
> - * Vendor drivers MUST unpin pages in response to an
> - * invalidation.
> - */
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> -
> - if (!cp_iova_pinned(&private->cp, unmap->iova))
> - return NOTIFY_OK;
> + container_of(vdev, struct vfio_ccw_private, vdev);
>
> - if (vfio_ccw_mdev_reset(private))
> - return NOTIFY_BAD;
> + /* Drivers MUST unpin pages in response to an invalidation. */
> + if (!cp_iova_pinned(&private->cp, iova))
> + return;
>
> - cp_free(&private->cp);
> - return NOTIFY_OK;
> - }
> + if (vfio_ccw_mdev_reset(private))
> + return;
>
> - return NOTIFY_DONE;
> + cp_free(&private->cp);
> }
The cp_free() call is gone here with [1], so I think this function now
just ends with:
...
vfio_ccw_mdev_reset(private);
}
There are also minor contextual differences elsewhere from that series,
so a quick respin to record the changes on list would be appreciated.
However the above kind of highlights that NOTIFY_BAD that silently gets
dropped here. I realize we weren't testing the return value of the
notifier call chain and really we didn't intend that notifiers could
return a failure here, but does this warrant some logging or suggest
future work to allow a device to go offline here? Thanks.
Alex
[1]https://lore.kernel.org/all/20220707135737.720765-1-farman@linux.ibm.com/
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-07 21:37 ` Alex Williamson
0 siblings, 0 replies; 25+ messages in thread
From: Alex Williamson @ 2022-07-07 21:37 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: kvm, David Airlie, Kevin Tian, dri-devel, Vineeth Vijayan,
Alexander Gordeev, Christoph Hellwig, linux-s390, Matthew Rosato,
Halil Pasic, Christian Borntraeger, intel-gfx, Zhi Wang,
Tony Krowiak, Eric Farman, Vasily Gorbik, Heiko Carstens,
Harald Freudenberger, Rodrigo Vivi, intel-gvt-dev, Jason Herne,
Tvrtko Ursulin, Cornelia Huck, Peter Oberparleiter, Sven Schnelle
On Mon, 4 Jul 2022 21:59:03 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index b49e2e9db2dc6f..09e0ce7b72324c 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
> return ret;
> }
>
> -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
> - unsigned long action,
> - void *data)
> +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
> {
> struct vfio_ccw_private *private =
> - container_of(nb, struct vfio_ccw_private, nb);
> -
> - /*
> - * Vendor drivers MUST unpin pages in response to an
> - * invalidation.
> - */
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> -
> - if (!cp_iova_pinned(&private->cp, unmap->iova))
> - return NOTIFY_OK;
> + container_of(vdev, struct vfio_ccw_private, vdev);
>
> - if (vfio_ccw_mdev_reset(private))
> - return NOTIFY_BAD;
> + /* Drivers MUST unpin pages in response to an invalidation. */
> + if (!cp_iova_pinned(&private->cp, iova))
> + return;
>
> - cp_free(&private->cp);
> - return NOTIFY_OK;
> - }
> + if (vfio_ccw_mdev_reset(private))
> + return;
>
> - return NOTIFY_DONE;
> + cp_free(&private->cp);
> }
The cp_free() call is gone here with [1], so I think this function now
just ends with:
...
vfio_ccw_mdev_reset(private);
}
There are also minor contextual differences elsewhere from that series,
so a quick respin to record the changes on list would be appreciated.
However the above kind of highlights that NOTIFY_BAD that silently gets
dropped here. I realize we weren't testing the return value of the
notifier call chain and really we didn't intend that notifiers could
return a failure here, but does this warrant some logging or suggest
future work to allow a device to go offline here? Thanks.
Alex
[1]https://lore.kernel.org/all/20220707135737.720765-1-farman@linux.ibm.com/
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-07 21:37 ` Alex Williamson
0 siblings, 0 replies; 25+ messages in thread
From: Alex Williamson @ 2022-07-07 21:37 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Alexander Gordeev, David Airlie, Christian Borntraeger,
Cornelia Huck, Daniel Vetter, dri-devel, Harald Freudenberger,
Vasily Gorbik, Heiko Carstens, intel-gfx, intel-gvt-dev,
Jani Nikula, Jason Herne, Joonas Lahtinen, kvm, linux-s390,
Matthew Rosato, Peter Oberparleiter, Halil Pasic, Rodrigo Vivi,
Sven Schnelle, Tvrtko Ursulin, Vineeth Vijayan, Zhenyu Wang,
Zhi Wang, Tony Krowiak, Eric Farman, Christoph Hellwig,
Kevin Tian
On Mon, 4 Jul 2022 21:59:03 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index b49e2e9db2dc6f..09e0ce7b72324c 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
> return ret;
> }
>
> -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
> - unsigned long action,
> - void *data)
> +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
> {
> struct vfio_ccw_private *private =
> - container_of(nb, struct vfio_ccw_private, nb);
> -
> - /*
> - * Vendor drivers MUST unpin pages in response to an
> - * invalidation.
> - */
> - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> - struct vfio_iommu_type1_dma_unmap *unmap = data;
> -
> - if (!cp_iova_pinned(&private->cp, unmap->iova))
> - return NOTIFY_OK;
> + container_of(vdev, struct vfio_ccw_private, vdev);
>
> - if (vfio_ccw_mdev_reset(private))
> - return NOTIFY_BAD;
> + /* Drivers MUST unpin pages in response to an invalidation. */
> + if (!cp_iova_pinned(&private->cp, iova))
> + return;
>
> - cp_free(&private->cp);
> - return NOTIFY_OK;
> - }
> + if (vfio_ccw_mdev_reset(private))
> + return;
>
> - return NOTIFY_DONE;
> + cp_free(&private->cp);
> }
The cp_free() call is gone here with [1], so I think this function now
just ends with:
...
vfio_ccw_mdev_reset(private);
}
There are also minor contextual differences elsewhere from that series,
so a quick respin to record the changes on list would be appreciated.
However the above kind of highlights that NOTIFY_BAD that silently gets
dropped here. I realize we weren't testing the return value of the
notifier call chain and really we didn't intend that notifiers could
return a failure here, but does this warrant some logging or suggest
future work to allow a device to go offline here? Thanks.
Alex
[1]https://lore.kernel.org/all/20220707135737.720765-1-farman@linux.ibm.com/
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [Intel-gfx] [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
2022-07-07 21:37 ` Alex Williamson
(?)
@ 2022-07-19 23:44 ` Jason Gunthorpe
-1 siblings, 0 replies; 25+ messages in thread
From: Jason Gunthorpe @ 2022-07-19 23:44 UTC (permalink / raw)
To: Alex Williamson
Cc: kvm, David Airlie, dri-devel, Vineeth Vijayan, Alexander Gordeev,
Christoph Hellwig, linux-s390, Matthew Rosato, Halil Pasic,
Christian Borntraeger, intel-gfx, Tony Krowiak, Eric Farman,
Vasily Gorbik, Heiko Carstens, Harald Freudenberger, Rodrigo Vivi,
intel-gvt-dev, Jason Herne, Cornelia Huck, Peter Oberparleiter,
Sven Schnelle
On Thu, Jul 07, 2022 at 03:37:16PM -0600, Alex Williamson wrote:
> On Mon, 4 Jul 2022 21:59:03 -0300
> Jason Gunthorpe <jgg@nvidia.com> wrote:
> > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> > index b49e2e9db2dc6f..09e0ce7b72324c 100644
> > --- a/drivers/s390/cio/vfio_ccw_ops.c
> > +++ b/drivers/s390/cio/vfio_ccw_ops.c
> > @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
> > return ret;
> > }
> >
> > -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
> > - unsigned long action,
> > - void *data)
> > +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
> > {
> > struct vfio_ccw_private *private =
> > - container_of(nb, struct vfio_ccw_private, nb);
> > -
> > - /*
> > - * Vendor drivers MUST unpin pages in response to an
> > - * invalidation.
> > - */
> > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> > - struct vfio_iommu_type1_dma_unmap *unmap = data;
> > -
> > - if (!cp_iova_pinned(&private->cp, unmap->iova))
> > - return NOTIFY_OK;
> > + container_of(vdev, struct vfio_ccw_private, vdev);
> >
> > - if (vfio_ccw_mdev_reset(private))
> > - return NOTIFY_BAD;
> > + /* Drivers MUST unpin pages in response to an invalidation. */
> > + if (!cp_iova_pinned(&private->cp, iova))
> > + return;
> >
> > - cp_free(&private->cp);
> > - return NOTIFY_OK;
> > - }
> > + if (vfio_ccw_mdev_reset(private))
> > + return;
> >
> > - return NOTIFY_DONE;
> > + cp_free(&private->cp);
> > }
>
>
> The cp_free() call is gone here with [1], so I think this function now
> just ends with:
>
> ...
> vfio_ccw_mdev_reset(private);
> }
>
> There are also minor contextual differences elsewhere from that series,
> so a quick respin to record the changes on list would be appreciated.
>
> However the above kind of highlights that NOTIFY_BAD that silently gets
> dropped here. I realize we weren't testing the return value of the
> notifier call chain and really we didn't intend that notifiers could
> return a failure here, but does this warrant some logging or suggest
> future work to allow a device to go offline here? Thanks.
It looks like no.
If the FSM trapped in a bad state here, such as
VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned
the pages and this is considered a success for this purpose
The return code here exists only to return to userspace so it can
detect during a VFIO_DEVICE_RESET that the device has crashed
irrecoverably.
Thus just continuing to silently ignore it seems like the best thing.
Jason
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-19 23:44 ` Jason Gunthorpe
0 siblings, 0 replies; 25+ messages in thread
From: Jason Gunthorpe @ 2022-07-19 23:44 UTC (permalink / raw)
To: Alex Williamson
Cc: kvm, David Airlie, Kevin Tian, dri-devel, Vineeth Vijayan,
Alexander Gordeev, Christoph Hellwig, linux-s390, Matthew Rosato,
Halil Pasic, Christian Borntraeger, intel-gfx, Zhi Wang,
Tony Krowiak, Eric Farman, Vasily Gorbik, Heiko Carstens,
Harald Freudenberger, Rodrigo Vivi, intel-gvt-dev, Jason Herne,
Tvrtko Ursulin, Cornelia Huck, Peter Oberparleiter, Sven Schnelle
On Thu, Jul 07, 2022 at 03:37:16PM -0600, Alex Williamson wrote:
> On Mon, 4 Jul 2022 21:59:03 -0300
> Jason Gunthorpe <jgg@nvidia.com> wrote:
> > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> > index b49e2e9db2dc6f..09e0ce7b72324c 100644
> > --- a/drivers/s390/cio/vfio_ccw_ops.c
> > +++ b/drivers/s390/cio/vfio_ccw_ops.c
> > @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
> > return ret;
> > }
> >
> > -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
> > - unsigned long action,
> > - void *data)
> > +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
> > {
> > struct vfio_ccw_private *private =
> > - container_of(nb, struct vfio_ccw_private, nb);
> > -
> > - /*
> > - * Vendor drivers MUST unpin pages in response to an
> > - * invalidation.
> > - */
> > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> > - struct vfio_iommu_type1_dma_unmap *unmap = data;
> > -
> > - if (!cp_iova_pinned(&private->cp, unmap->iova))
> > - return NOTIFY_OK;
> > + container_of(vdev, struct vfio_ccw_private, vdev);
> >
> > - if (vfio_ccw_mdev_reset(private))
> > - return NOTIFY_BAD;
> > + /* Drivers MUST unpin pages in response to an invalidation. */
> > + if (!cp_iova_pinned(&private->cp, iova))
> > + return;
> >
> > - cp_free(&private->cp);
> > - return NOTIFY_OK;
> > - }
> > + if (vfio_ccw_mdev_reset(private))
> > + return;
> >
> > - return NOTIFY_DONE;
> > + cp_free(&private->cp);
> > }
>
>
> The cp_free() call is gone here with [1], so I think this function now
> just ends with:
>
> ...
> vfio_ccw_mdev_reset(private);
> }
>
> There are also minor contextual differences elsewhere from that series,
> so a quick respin to record the changes on list would be appreciated.
>
> However the above kind of highlights that NOTIFY_BAD that silently gets
> dropped here. I realize we weren't testing the return value of the
> notifier call chain and really we didn't intend that notifiers could
> return a failure here, but does this warrant some logging or suggest
> future work to allow a device to go offline here? Thanks.
It looks like no.
If the FSM trapped in a bad state here, such as
VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned
the pages and this is considered a success for this purpose
The return code here exists only to return to userspace so it can
detect during a VFIO_DEVICE_RESET that the device has crashed
irrecoverably.
Thus just continuing to silently ignore it seems like the best thing.
Jason
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-19 23:44 ` Jason Gunthorpe
0 siblings, 0 replies; 25+ messages in thread
From: Jason Gunthorpe @ 2022-07-19 23:44 UTC (permalink / raw)
To: Alex Williamson
Cc: Alexander Gordeev, David Airlie, Christian Borntraeger,
Cornelia Huck, Daniel Vetter, dri-devel, Harald Freudenberger,
Vasily Gorbik, Heiko Carstens, intel-gfx, intel-gvt-dev,
Jani Nikula, Jason Herne, Joonas Lahtinen, kvm, linux-s390,
Matthew Rosato, Peter Oberparleiter, Halil Pasic, Rodrigo Vivi,
Sven Schnelle, Tvrtko Ursulin, Vineeth Vijayan, Zhenyu Wang,
Zhi Wang, Tony Krowiak, Eric Farman, Christoph Hellwig,
Kevin Tian
On Thu, Jul 07, 2022 at 03:37:16PM -0600, Alex Williamson wrote:
> On Mon, 4 Jul 2022 21:59:03 -0300
> Jason Gunthorpe <jgg@nvidia.com> wrote:
> > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> > index b49e2e9db2dc6f..09e0ce7b72324c 100644
> > --- a/drivers/s390/cio/vfio_ccw_ops.c
> > +++ b/drivers/s390/cio/vfio_ccw_ops.c
> > @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
> > return ret;
> > }
> >
> > -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
> > - unsigned long action,
> > - void *data)
> > +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
> > {
> > struct vfio_ccw_private *private =
> > - container_of(nb, struct vfio_ccw_private, nb);
> > -
> > - /*
> > - * Vendor drivers MUST unpin pages in response to an
> > - * invalidation.
> > - */
> > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> > - struct vfio_iommu_type1_dma_unmap *unmap = data;
> > -
> > - if (!cp_iova_pinned(&private->cp, unmap->iova))
> > - return NOTIFY_OK;
> > + container_of(vdev, struct vfio_ccw_private, vdev);
> >
> > - if (vfio_ccw_mdev_reset(private))
> > - return NOTIFY_BAD;
> > + /* Drivers MUST unpin pages in response to an invalidation. */
> > + if (!cp_iova_pinned(&private->cp, iova))
> > + return;
> >
> > - cp_free(&private->cp);
> > - return NOTIFY_OK;
> > - }
> > + if (vfio_ccw_mdev_reset(private))
> > + return;
> >
> > - return NOTIFY_DONE;
> > + cp_free(&private->cp);
> > }
>
>
> The cp_free() call is gone here with [1], so I think this function now
> just ends with:
>
> ...
> vfio_ccw_mdev_reset(private);
> }
>
> There are also minor contextual differences elsewhere from that series,
> so a quick respin to record the changes on list would be appreciated.
>
> However the above kind of highlights that NOTIFY_BAD that silently gets
> dropped here. I realize we weren't testing the return value of the
> notifier call chain and really we didn't intend that notifiers could
> return a failure here, but does this warrant some logging or suggest
> future work to allow a device to go offline here? Thanks.
It looks like no.
If the FSM trapped in a bad state here, such as
VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned
the pages and this is considered a success for this purpose
The return code here exists only to return to userspace so it can
detect during a VFIO_DEVICE_RESET that the device has crashed
irrecoverably.
Thus just continuing to silently ignore it seems like the best thing.
Jason
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [Intel-gfx] [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
2022-07-19 23:44 ` Jason Gunthorpe
(?)
@ 2022-07-20 7:47 ` Cornelia Huck
-1 siblings, 0 replies; 25+ messages in thread
From: Cornelia Huck @ 2022-07-20 7:47 UTC (permalink / raw)
To: Jason Gunthorpe, Alex Williamson
Cc: kvm, David Airlie, dri-devel, Vineeth Vijayan, Alexander Gordeev,
Christoph Hellwig, linux-s390, Matthew Rosato, Halil Pasic,
Christian Borntraeger, intel-gfx, Tony Krowiak, Eric Farman,
Vasily Gorbik, Heiko Carstens, Harald Freudenberger, Rodrigo Vivi,
intel-gvt-dev, Jason Herne, Peter Oberparleiter, Sven Schnelle
On Tue, Jul 19 2022, Jason Gunthorpe <jgg@nvidia.com> wrote:
> On Thu, Jul 07, 2022 at 03:37:16PM -0600, Alex Williamson wrote:
>> On Mon, 4 Jul 2022 21:59:03 -0300
>> Jason Gunthorpe <jgg@nvidia.com> wrote:
>> > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
>> > index b49e2e9db2dc6f..09e0ce7b72324c 100644
>> > --- a/drivers/s390/cio/vfio_ccw_ops.c
>> > +++ b/drivers/s390/cio/vfio_ccw_ops.c
>> > @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
>> > return ret;
>> > }
>> >
>> > -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
>> > - unsigned long action,
>> > - void *data)
>> > +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
>> > {
>> > struct vfio_ccw_private *private =
>> > - container_of(nb, struct vfio_ccw_private, nb);
>> > -
>> > - /*
>> > - * Vendor drivers MUST unpin pages in response to an
>> > - * invalidation.
>> > - */
>> > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
>> > - struct vfio_iommu_type1_dma_unmap *unmap = data;
>> > -
>> > - if (!cp_iova_pinned(&private->cp, unmap->iova))
>> > - return NOTIFY_OK;
>> > + container_of(vdev, struct vfio_ccw_private, vdev);
>> >
>> > - if (vfio_ccw_mdev_reset(private))
>> > - return NOTIFY_BAD;
>> > + /* Drivers MUST unpin pages in response to an invalidation. */
>> > + if (!cp_iova_pinned(&private->cp, iova))
>> > + return;
>> >
>> > - cp_free(&private->cp);
>> > - return NOTIFY_OK;
>> > - }
>> > + if (vfio_ccw_mdev_reset(private))
>> > + return;
>> >
>> > - return NOTIFY_DONE;
>> > + cp_free(&private->cp);
>> > }
>>
>>
>> The cp_free() call is gone here with [1], so I think this function now
>> just ends with:
>>
>> ...
>> vfio_ccw_mdev_reset(private);
>> }
>>
>> There are also minor contextual differences elsewhere from that series,
>> so a quick respin to record the changes on list would be appreciated.
>>
>> However the above kind of highlights that NOTIFY_BAD that silently gets
>> dropped here. I realize we weren't testing the return value of the
>> notifier call chain and really we didn't intend that notifiers could
>> return a failure here, but does this warrant some logging or suggest
>> future work to allow a device to go offline here? Thanks.
>
> It looks like no.
>
> If the FSM trapped in a bad state here, such as
> VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned
> the pages and this is considered a success for this purpose
A rather pathological case would be a subchannel that cannot be
quiesced and does not end up being non-operational; in theory, the
hardware could still try to access the buffers we provided for I/O. I'd
say that is extremely unlikely, we might log it, but really cannot do
anything else.
>
> The return code here exists only to return to userspace so it can
> detect during a VFIO_DEVICE_RESET that the device has crashed
> irrecoverably.
Does it imply only that ("it's dead, Jim"), or can it also imply a
runaway device? Not that userspace can do much in any case.
>
> Thus just continuing to silently ignore it seems like the best thing.
>
> Jason
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-20 7:47 ` Cornelia Huck
0 siblings, 0 replies; 25+ messages in thread
From: Cornelia Huck @ 2022-07-20 7:47 UTC (permalink / raw)
To: Jason Gunthorpe, Alex Williamson
Cc: kvm, David Airlie, Kevin Tian, dri-devel, Vineeth Vijayan,
Alexander Gordeev, Christoph Hellwig, linux-s390, Matthew Rosato,
Halil Pasic, Christian Borntraeger, intel-gfx, Zhi Wang,
Tony Krowiak, Eric Farman, Vasily Gorbik, Heiko Carstens,
Harald Freudenberger, Rodrigo Vivi, intel-gvt-dev, Jason Herne,
Tvrtko Ursulin, Peter Oberparleiter, Sven Schnelle
On Tue, Jul 19 2022, Jason Gunthorpe <jgg@nvidia.com> wrote:
> On Thu, Jul 07, 2022 at 03:37:16PM -0600, Alex Williamson wrote:
>> On Mon, 4 Jul 2022 21:59:03 -0300
>> Jason Gunthorpe <jgg@nvidia.com> wrote:
>> > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
>> > index b49e2e9db2dc6f..09e0ce7b72324c 100644
>> > --- a/drivers/s390/cio/vfio_ccw_ops.c
>> > +++ b/drivers/s390/cio/vfio_ccw_ops.c
>> > @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
>> > return ret;
>> > }
>> >
>> > -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
>> > - unsigned long action,
>> > - void *data)
>> > +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
>> > {
>> > struct vfio_ccw_private *private =
>> > - container_of(nb, struct vfio_ccw_private, nb);
>> > -
>> > - /*
>> > - * Vendor drivers MUST unpin pages in response to an
>> > - * invalidation.
>> > - */
>> > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
>> > - struct vfio_iommu_type1_dma_unmap *unmap = data;
>> > -
>> > - if (!cp_iova_pinned(&private->cp, unmap->iova))
>> > - return NOTIFY_OK;
>> > + container_of(vdev, struct vfio_ccw_private, vdev);
>> >
>> > - if (vfio_ccw_mdev_reset(private))
>> > - return NOTIFY_BAD;
>> > + /* Drivers MUST unpin pages in response to an invalidation. */
>> > + if (!cp_iova_pinned(&private->cp, iova))
>> > + return;
>> >
>> > - cp_free(&private->cp);
>> > - return NOTIFY_OK;
>> > - }
>> > + if (vfio_ccw_mdev_reset(private))
>> > + return;
>> >
>> > - return NOTIFY_DONE;
>> > + cp_free(&private->cp);
>> > }
>>
>>
>> The cp_free() call is gone here with [1], so I think this function now
>> just ends with:
>>
>> ...
>> vfio_ccw_mdev_reset(private);
>> }
>>
>> There are also minor contextual differences elsewhere from that series,
>> so a quick respin to record the changes on list would be appreciated.
>>
>> However the above kind of highlights that NOTIFY_BAD that silently gets
>> dropped here. I realize we weren't testing the return value of the
>> notifier call chain and really we didn't intend that notifiers could
>> return a failure here, but does this warrant some logging or suggest
>> future work to allow a device to go offline here? Thanks.
>
> It looks like no.
>
> If the FSM trapped in a bad state here, such as
> VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned
> the pages and this is considered a success for this purpose
A rather pathological case would be a subchannel that cannot be
quiesced and does not end up being non-operational; in theory, the
hardware could still try to access the buffers we provided for I/O. I'd
say that is extremely unlikely, we might log it, but really cannot do
anything else.
>
> The return code here exists only to return to userspace so it can
> detect during a VFIO_DEVICE_RESET that the device has crashed
> irrecoverably.
Does it imply only that ("it's dead, Jim"), or can it also imply a
runaway device? Not that userspace can do much in any case.
>
> Thus just continuing to silently ignore it seems like the best thing.
>
> Jason
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-20 7:47 ` Cornelia Huck
0 siblings, 0 replies; 25+ messages in thread
From: Cornelia Huck @ 2022-07-20 7:47 UTC (permalink / raw)
To: Jason Gunthorpe, Alex Williamson
Cc: Alexander Gordeev, David Airlie, Christian Borntraeger,
Daniel Vetter, dri-devel, Harald Freudenberger, Vasily Gorbik,
Heiko Carstens, intel-gfx, intel-gvt-dev, Jani Nikula,
Jason Herne, Joonas Lahtinen, kvm, linux-s390, Matthew Rosato,
Peter Oberparleiter, Halil Pasic, Rodrigo Vivi, Sven Schnelle,
Tvrtko Ursulin, Vineeth Vijayan, Zhenyu Wang, Zhi Wang,
Tony Krowiak, Eric Farman, Christoph Hellwig, Kevin Tian
On Tue, Jul 19 2022, Jason Gunthorpe <jgg@nvidia.com> wrote:
> On Thu, Jul 07, 2022 at 03:37:16PM -0600, Alex Williamson wrote:
>> On Mon, 4 Jul 2022 21:59:03 -0300
>> Jason Gunthorpe <jgg@nvidia.com> wrote:
>> > diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
>> > index b49e2e9db2dc6f..09e0ce7b72324c 100644
>> > --- a/drivers/s390/cio/vfio_ccw_ops.c
>> > +++ b/drivers/s390/cio/vfio_ccw_ops.c
>> > @@ -44,31 +44,19 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
>> > return ret;
>> > }
>> >
>> > -static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
>> > - unsigned long action,
>> > - void *data)
>> > +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
>> > {
>> > struct vfio_ccw_private *private =
>> > - container_of(nb, struct vfio_ccw_private, nb);
>> > -
>> > - /*
>> > - * Vendor drivers MUST unpin pages in response to an
>> > - * invalidation.
>> > - */
>> > - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
>> > - struct vfio_iommu_type1_dma_unmap *unmap = data;
>> > -
>> > - if (!cp_iova_pinned(&private->cp, unmap->iova))
>> > - return NOTIFY_OK;
>> > + container_of(vdev, struct vfio_ccw_private, vdev);
>> >
>> > - if (vfio_ccw_mdev_reset(private))
>> > - return NOTIFY_BAD;
>> > + /* Drivers MUST unpin pages in response to an invalidation. */
>> > + if (!cp_iova_pinned(&private->cp, iova))
>> > + return;
>> >
>> > - cp_free(&private->cp);
>> > - return NOTIFY_OK;
>> > - }
>> > + if (vfio_ccw_mdev_reset(private))
>> > + return;
>> >
>> > - return NOTIFY_DONE;
>> > + cp_free(&private->cp);
>> > }
>>
>>
>> The cp_free() call is gone here with [1], so I think this function now
>> just ends with:
>>
>> ...
>> vfio_ccw_mdev_reset(private);
>> }
>>
>> There are also minor contextual differences elsewhere from that series,
>> so a quick respin to record the changes on list would be appreciated.
>>
>> However the above kind of highlights that NOTIFY_BAD that silently gets
>> dropped here. I realize we weren't testing the return value of the
>> notifier call chain and really we didn't intend that notifiers could
>> return a failure here, but does this warrant some logging or suggest
>> future work to allow a device to go offline here? Thanks.
>
> It looks like no.
>
> If the FSM trapped in a bad state here, such as
> VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned
> the pages and this is considered a success for this purpose
A rather pathological case would be a subchannel that cannot be
quiesced and does not end up being non-operational; in theory, the
hardware could still try to access the buffers we provided for I/O. I'd
say that is extremely unlikely, we might log it, but really cannot do
anything else.
>
> The return code here exists only to return to userspace so it can
> detect during a VFIO_DEVICE_RESET that the device has crashed
> irrecoverably.
Does it imply only that ("it's dead, Jim"), or can it also imply a
runaway device? Not that userspace can do much in any case.
>
> Thus just continuing to silently ignore it seems like the best thing.
>
> Jason
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [Intel-gfx] [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
2022-07-20 7:47 ` Cornelia Huck
(?)
@ 2022-07-20 11:56 ` Jason Gunthorpe
-1 siblings, 0 replies; 25+ messages in thread
From: Jason Gunthorpe @ 2022-07-20 11:56 UTC (permalink / raw)
To: Cornelia Huck
Cc: kvm, David Airlie, dri-devel, Vineeth Vijayan, Alexander Gordeev,
Christoph Hellwig, linux-s390, Matthew Rosato, Halil Pasic,
Christian Borntraeger, intel-gfx, Tony Krowiak, Eric Farman,
Vasily Gorbik, Heiko Carstens, Harald Freudenberger, Rodrigo Vivi,
intel-gvt-dev, Jason Herne, Peter Oberparleiter, Sven Schnelle
On Wed, Jul 20, 2022 at 09:47:12AM +0200, Cornelia Huck wrote:
> > If the FSM trapped in a bad state here, such as
> > VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned
> > the pages and this is considered a success for this purpose
>
> A rather pathological case would be a subchannel that cannot be
> quiesced and does not end up being non-operational; in theory, the
> hardware could still try to access the buffers we provided for I/O. I'd
> say that is extremely unlikely, we might log it, but really cannot do
> anything else.
I think if the FSM can't reach NOT_OPER then it would be appropriate
to panic the kernel when it realizes it has lost control of the
device.
> > The return code here exists only to return to userspace so it can
> > detect during a VFIO_DEVICE_RESET that the device has crashed
> > irrecoverably.
>
> Does it imply only that ("it's dead, Jim"), or can it also imply a
> runaway device? Not that userspace can do much in any case.
The kernel cannot permit a runaway device, the driver must panic if it
unable to quiet the device's DMA.
I assume this return from RESET is for cases where quieting was
successful.
Jason
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-20 11:56 ` Jason Gunthorpe
0 siblings, 0 replies; 25+ messages in thread
From: Jason Gunthorpe @ 2022-07-20 11:56 UTC (permalink / raw)
To: Cornelia Huck
Cc: kvm, David Airlie, Kevin Tian, dri-devel, Vineeth Vijayan,
Alexander Gordeev, Christoph Hellwig, linux-s390, Matthew Rosato,
Halil Pasic, Christian Borntraeger, intel-gfx, Zhi Wang,
Tony Krowiak, Eric Farman, Vasily Gorbik, Heiko Carstens,
Alex Williamson, Harald Freudenberger, Rodrigo Vivi,
intel-gvt-dev, Jason Herne, Tvrtko Ursulin, Peter Oberparleiter,
Sven Schnelle
On Wed, Jul 20, 2022 at 09:47:12AM +0200, Cornelia Huck wrote:
> > If the FSM trapped in a bad state here, such as
> > VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned
> > the pages and this is considered a success for this purpose
>
> A rather pathological case would be a subchannel that cannot be
> quiesced and does not end up being non-operational; in theory, the
> hardware could still try to access the buffers we provided for I/O. I'd
> say that is extremely unlikely, we might log it, but really cannot do
> anything else.
I think if the FSM can't reach NOT_OPER then it would be appropriate
to panic the kernel when it realizes it has lost control of the
device.
> > The return code here exists only to return to userspace so it can
> > detect during a VFIO_DEVICE_RESET that the device has crashed
> > irrecoverably.
>
> Does it imply only that ("it's dead, Jim"), or can it also imply a
> runaway device? Not that userspace can do much in any case.
The kernel cannot permit a runaway device, the driver must panic if it
unable to quiet the device's DMA.
I assume this return from RESET is for cases where quieting was
successful.
Jason
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH v3 1/2] vfio: Replace the DMA unmapping notifier with a callback
@ 2022-07-20 11:56 ` Jason Gunthorpe
0 siblings, 0 replies; 25+ messages in thread
From: Jason Gunthorpe @ 2022-07-20 11:56 UTC (permalink / raw)
To: Cornelia Huck
Cc: Alex Williamson, Alexander Gordeev, David Airlie,
Christian Borntraeger, Daniel Vetter, dri-devel,
Harald Freudenberger, Vasily Gorbik, Heiko Carstens, intel-gfx,
intel-gvt-dev, Jani Nikula, Jason Herne, Joonas Lahtinen, kvm,
linux-s390, Matthew Rosato, Peter Oberparleiter, Halil Pasic,
Rodrigo Vivi, Sven Schnelle, Tvrtko Ursulin, Vineeth Vijayan,
Zhenyu Wang, Zhi Wang, Tony Krowiak, Eric Farman,
Christoph Hellwig, Kevin Tian
On Wed, Jul 20, 2022 at 09:47:12AM +0200, Cornelia Huck wrote:
> > If the FSM trapped in a bad state here, such as
> > VFIO_CCW_STATE_NOT_OPER, then it means it should have already unpinned
> > the pages and this is considered a success for this purpose
>
> A rather pathological case would be a subchannel that cannot be
> quiesced and does not end up being non-operational; in theory, the
> hardware could still try to access the buffers we provided for I/O. I'd
> say that is extremely unlikely, we might log it, but really cannot do
> anything else.
I think if the FSM can't reach NOT_OPER then it would be appropriate
to panic the kernel when it realizes it has lost control of the
device.
> > The return code here exists only to return to userspace so it can
> > detect during a VFIO_DEVICE_RESET that the device has crashed
> > irrecoverably.
>
> Does it imply only that ("it's dead, Jim"), or can it also imply a
> runaway device? Not that userspace can do much in any case.
The kernel cannot permit a runaway device, the driver must panic if it
unable to quiet the device's DMA.
I assume this return from RESET is for cases where quieting was
successful.
Jason
^ permalink raw reply [flat|nested] 25+ messages in thread