* [PATCH v4 1/2] vfio: Replace the DMA unmapping notifier with a callback
2022-07-20 0:02 [PATCH v4 0/2] Remove the VFIO_IOMMU_NOTIFY_DMA_UNMAP notifier Jason Gunthorpe
@ 2022-07-20 0:02 ` Jason Gunthorpe
2022-07-20 19:41 ` Alex Williamson
2022-07-20 0:02 ` [PATCH v4 2/2] vfio: Replace the iommu notifier with a device list Jason Gunthorpe
2022-07-22 22:50 ` [PATCH v4 0/2] Remove the VFIO_IOMMU_NOTIFY_DMA_UNMAP notifier Alex Williamson
2 siblings, 1 reply; 9+ messages in thread
From: Jason Gunthorpe @ 2022-07-20 0:02 UTC (permalink / raw)
To: Alexander Gordeev, David Airlie, Alex Williamson,
Christian Borntraeger, Cornelia Huck, Daniel Vetter, dri-devel,
Harald Freudenberger, Vasily Gorbik, Heiko Carstens, intel-gfx,
intel-gvt-dev, Jani Nikula, Jason Herne, Joonas Lahtinen, kvm,
linux-s390, Matthew Rosato, Peter Oberparleiter, Halil Pasic,
Rodrigo Vivi, Sven Schnelle, Tvrtko Ursulin, Vineeth Vijayan,
Zhi Wang
Cc: Tony Krowiak, Eric Farman, Christoph Hellwig, Kevin Tian,
Zhenyu Wang
Instead of having drivers register the notifier with explicit code just
have them provide a dma_unmap callback op in their driver ops and rely on
the core code to wire it up.
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/gpu/drm/i915/gvt/gvt.h | 1 -
drivers/gpu/drm/i915/gvt/kvmgt.c | 75 ++++-----------
drivers/s390/cio/vfio_ccw_ops.c | 39 ++------
drivers/s390/cio/vfio_ccw_private.h | 2 -
drivers/s390/crypto/vfio_ap_ops.c | 53 ++---------
drivers/s390/crypto/vfio_ap_private.h | 3 -
drivers/vfio/vfio.c | 129 +++++++++-----------------
drivers/vfio/vfio.h | 3 +
include/linux/vfio.h | 21 +----
9 files changed, 86 insertions(+), 240 deletions(-)
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index aee1a45da74bcb..705689e6401197 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -226,7 +226,6 @@ struct intel_vgpu {
unsigned long nr_cache_entries;
struct mutex cache_lock;
- struct notifier_block iommu_notifier;
atomic_t released;
struct kvm_page_track_notifier_node track_node;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index e2f6c56ab3420c..ecd5bb37b63a2a 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
return ret;
}
-static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
+static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
+ u64 length)
{
- struct intel_vgpu *vgpu =
- container_of(nb, struct intel_vgpu, iommu_notifier);
-
- if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
- struct vfio_iommu_type1_dma_unmap *unmap = data;
- struct gvt_dma *entry;
- unsigned long iov_pfn, end_iov_pfn;
+ struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+ struct gvt_dma *entry;
+ u64 iov_pfn = iova >> PAGE_SHIFT;
+ u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
- iov_pfn = unmap->iova >> PAGE_SHIFT;
- end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
+ mutex_lock(&vgpu->cache_lock);
+ for (; iov_pfn < end_iov_pfn; iov_pfn++) {
+ entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
+ if (!entry)
+ continue;
- mutex_lock(&vgpu->cache_lock);
- for (; iov_pfn < end_iov_pfn; iov_pfn++) {
- entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
- if (!entry)
- continue;
-
- gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
- entry->size);
- __gvt_cache_remove_entry(vgpu, entry);
- }
- mutex_unlock(&vgpu->cache_lock);
+ gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
+ entry->size);
+ __gvt_cache_remove_entry(vgpu, entry);
}
-
- return NOTIFY_OK;
+ mutex_unlock(&vgpu->cache_lock);
}
static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
@@ -783,36 +774,20 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
- unsigned long events;
- int ret;
-
- vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
- events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
- ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
- &vgpu->iommu_notifier);
- if (ret != 0) {
- gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
- ret);
- goto out;
- }
-
- ret = -EEXIST;
if (vgpu->attached)
- goto undo_iommu;
+ return -EEXIST;
- ret = -ESRCH;
if (!vgpu->vfio_device.kvm ||
vgpu->vfio_device.kvm->mm != current->mm) {
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
- goto undo_iommu;
+ return -ESRCH;
}
kvm_get_kvm(vgpu->vfio_device.kvm);
- ret = -EEXIST;
if (__kvmgt_vgpu_exist(vgpu))
- goto undo_iommu;
+ return -EEXIST;
vgpu->attached = true;
@@ -831,12 +806,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
atomic_set(&vgpu->released, 0);
return 0;
-
-undo_iommu:
- vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
- &vgpu->iommu_notifier);
-out:
- return ret;
}
static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
@@ -853,8 +822,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
- struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
- int ret;
if (!vgpu->attached)
return;
@@ -864,11 +831,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
intel_gvt_release_vgpu(vgpu);
- ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY,
- &vgpu->iommu_notifier);
- drm_WARN(&i915->drm, ret,
- "vfio_unregister_notifier for iommu failed: %d\n", ret);
-
debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
@@ -1610,6 +1572,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
.write = intel_vgpu_write,
.mmap = intel_vgpu_mmap,
.ioctl = intel_vgpu_ioctl,
+ .dma_unmap = intel_vgpu_dma_unmap,
};
static int intel_vgpu_probe(struct mdev_device *mdev)
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index bc2176421dc56e..0047fd88f93858 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -33,30 +33,16 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
return 0;
}
-static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
- unsigned long action,
- void *data)
+static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
{
struct vfio_ccw_private *private =
- container_of(nb, struct vfio_ccw_private, nb);
-
- /*
- * Vendor drivers MUST unpin pages in response to an
- * invalidation.
- */
- if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
- struct vfio_iommu_type1_dma_unmap *unmap = data;
-
- if (!cp_iova_pinned(&private->cp, unmap->iova))
- return NOTIFY_OK;
-
- if (vfio_ccw_mdev_reset(private))
- return NOTIFY_BAD;
+ container_of(vdev, struct vfio_ccw_private, vdev);
- return NOTIFY_OK;
- }
+ /* Drivers MUST unpin pages in response to an invalidation. */
+ if (!cp_iova_pinned(&private->cp, iova))
+ return;
- return NOTIFY_DONE;
+ vfio_ccw_mdev_reset(private);
}
static ssize_t name_show(struct mdev_type *mtype,
@@ -154,23 +140,15 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
{
struct vfio_ccw_private *private =
container_of(vdev, struct vfio_ccw_private, vdev);
- unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
int ret;
/* Device cannot simply be opened again from this state */
if (private->state == VFIO_CCW_STATE_NOT_OPER)
return -EINVAL;
- private->nb.notifier_call = vfio_ccw_mdev_notifier;
-
- ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY,
- &events, &private->nb);
- if (ret)
- return ret;
-
ret = vfio_ccw_register_async_dev_regions(private);
if (ret)
- goto out_unregister;
+ return ret;
ret = vfio_ccw_register_schib_dev_regions(private);
if (ret)
@@ -190,7 +168,6 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
out_unregister:
vfio_ccw_unregister_dev_regions(private);
- vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
return ret;
}
@@ -201,7 +178,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev)
vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE);
vfio_ccw_unregister_dev_regions(private);
- vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
}
static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private,
@@ -624,6 +600,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = {
.write = vfio_ccw_mdev_write,
.ioctl = vfio_ccw_mdev_ioctl,
.request = vfio_ccw_mdev_request,
+ .dma_unmap = vfio_ccw_dma_unmap,
};
struct mdev_driver vfio_ccw_mdev_driver = {
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index abac532bf03eb4..cd24b7fada91ca 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -73,7 +73,6 @@ struct vfio_ccw_crw {
* @state: internal state of the device
* @completion: synchronization helper of the I/O completion
* @avail: available for creating a mediated device
- * @nb: notifier for vfio events
* @io_region: MMIO region to input/output I/O arguments/results
* @io_mutex: protect against concurrent update of I/O regions
* @region: additional regions for other subchannel operations
@@ -96,7 +95,6 @@ struct vfio_ccw_private {
int state;
struct completion *completion;
atomic_t avail;
- struct notifier_block nb;
struct ccw_io_region *io_region;
struct mutex io_mutex;
struct vfio_ccw_region *region;
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index a7d2a95796d360..bb1a1677c5c230 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1226,34 +1226,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
return 0;
}
-/**
- * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
- *
- * @nb: The notifier block
- * @action: Action to be taken
- * @data: data associated with the request
- *
- * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
- * pinned before). Other requests are ignored.
- *
- * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
- */
-static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
+static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova,
+ u64 length)
{
- struct ap_matrix_mdev *matrix_mdev;
-
- matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
-
- if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
- struct vfio_iommu_type1_dma_unmap *unmap = data;
- unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
-
- vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
- return NOTIFY_OK;
- }
+ struct ap_matrix_mdev *matrix_mdev =
+ container_of(vdev, struct ap_matrix_mdev, vdev);
+ unsigned long g_pfn = iova >> PAGE_SHIFT;
- return NOTIFY_DONE;
+ vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
}
/**
@@ -1380,27 +1360,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
{
struct ap_matrix_mdev *matrix_mdev =
container_of(vdev, struct ap_matrix_mdev, vdev);
- unsigned long events;
- int ret;
if (!vdev->kvm)
return -EINVAL;
- ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
- if (ret)
- return ret;
-
- matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
- events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
- ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
- &matrix_mdev->iommu_notifier);
- if (ret)
- goto err_kvm;
- return 0;
-
-err_kvm:
- vfio_ap_mdev_unset_kvm(matrix_mdev);
- return ret;
+ return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
}
static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
@@ -1408,8 +1372,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
struct ap_matrix_mdev *matrix_mdev =
container_of(vdev, struct ap_matrix_mdev, vdev);
- vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
- &matrix_mdev->iommu_notifier);
vfio_ap_mdev_unset_kvm(matrix_mdev);
}
@@ -1461,6 +1423,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
.open_device = vfio_ap_mdev_open_device,
.close_device = vfio_ap_mdev_close_device,
.ioctl = vfio_ap_mdev_ioctl,
+ .dma_unmap = vfio_ap_mdev_dma_unmap,
};
static struct mdev_driver vfio_ap_matrix_driver = {
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index a26efd804d0df3..abb59d59f81b20 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -81,8 +81,6 @@ struct ap_matrix {
* @node: allows the ap_matrix_mdev struct to be added to a list
* @matrix: the adapters, usage domains and control domains assigned to the
* mediated matrix device.
- * @iommu_notifier: notifier block used for specifying callback function for
- * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
* @kvm: the struct holding guest's state
* @pqap_hook: the function pointer to the interception handler for the
* PQAP(AQIC) instruction.
@@ -92,7 +90,6 @@ struct ap_matrix_mdev {
struct vfio_device vdev;
struct list_head node;
struct ap_matrix matrix;
- struct notifier_block iommu_notifier;
struct kvm *kvm;
crypto_hook pqap_hook;
struct mdev_device *mdev;
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index bd84ca7c5e35c4..83c375fa242121 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
{
struct vfio_iommu_driver *driver, *tmp;
+ if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier))
+ return -EINVAL;
+
driver = kzalloc(sizeof(*driver), GFP_KERNEL);
if (!driver)
return -ENOMEM;
@@ -1079,8 +1082,20 @@ static void vfio_device_unassign_container(struct vfio_device *device)
up_write(&device->group->group_rwsem);
}
+static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct vfio_device *vfio_device =
+ container_of(nb, struct vfio_device, iommu_nb);
+ struct vfio_iommu_type1_dma_unmap *unmap = data;
+
+ vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
+ return NOTIFY_OK;
+}
+
static struct file *vfio_device_open(struct vfio_device *device)
{
+ struct vfio_iommu_driver *iommu_driver;
struct file *filep;
int ret;
@@ -1111,6 +1126,18 @@ static struct file *vfio_device_open(struct vfio_device *device)
if (ret)
goto err_undo_count;
}
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (device->ops->dma_unmap && iommu_driver &&
+ iommu_driver->ops->register_notifier) {
+ unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+
+ device->iommu_nb.notifier_call = vfio_iommu_notifier;
+ iommu_driver->ops->register_notifier(
+ device->group->container->iommu_data, &events,
+ &device->iommu_nb);
+ }
+
up_read(&device->group->group_rwsem);
}
mutex_unlock(&device->dev_set->lock);
@@ -1145,8 +1172,16 @@ static struct file *vfio_device_open(struct vfio_device *device)
err_close_device:
mutex_lock(&device->dev_set->lock);
down_read(&device->group->group_rwsem);
- if (device->open_count == 1 && device->ops->close_device)
+ if (device->open_count == 1 && device->ops->close_device) {
device->ops->close_device(device);
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (device->ops->dma_unmap && iommu_driver &&
+ iommu_driver->ops->unregister_notifier)
+ iommu_driver->ops->unregister_notifier(
+ device->group->container->iommu_data,
+ &device->iommu_nb);
+ }
err_undo_count:
up_read(&device->group->group_rwsem);
device->open_count--;
@@ -1341,12 +1376,20 @@ static const struct file_operations vfio_group_fops = {
static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_device *device = filep->private_data;
+ struct vfio_iommu_driver *iommu_driver;
mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
down_read(&device->group->group_rwsem);
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (device->ops->dma_unmap && iommu_driver &&
+ iommu_driver->ops->unregister_notifier)
+ iommu_driver->ops->unregister_notifier(
+ device->group->container->iommu_data,
+ &device->iommu_nb);
up_read(&device->group->group_rwsem);
device->open_count--;
if (device->open_count == 0)
@@ -2029,90 +2072,6 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data,
}
EXPORT_SYMBOL(vfio_dma_rw);
-static int vfio_register_iommu_notifier(struct vfio_group *group,
- unsigned long *events,
- struct notifier_block *nb)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret;
-
- lockdep_assert_held_read(&group->group_rwsem);
-
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->register_notifier))
- ret = driver->ops->register_notifier(container->iommu_data,
- events, nb);
- else
- ret = -ENOTTY;
-
- return ret;
-}
-
-static int vfio_unregister_iommu_notifier(struct vfio_group *group,
- struct notifier_block *nb)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret;
-
- lockdep_assert_held_read(&group->group_rwsem);
-
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->unregister_notifier))
- ret = driver->ops->unregister_notifier(container->iommu_data,
- nb);
- else
- ret = -ENOTTY;
-
- return ret;
-}
-
-int vfio_register_notifier(struct vfio_device *device,
- enum vfio_notify_type type, unsigned long *events,
- struct notifier_block *nb)
-{
- struct vfio_group *group = device->group;
- int ret;
-
- if (!nb || !events || (*events == 0) ||
- !vfio_assert_device_open(device))
- return -EINVAL;
-
- switch (type) {
- case VFIO_IOMMU_NOTIFY:
- ret = vfio_register_iommu_notifier(group, events, nb);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
-}
-EXPORT_SYMBOL(vfio_register_notifier);
-
-int vfio_unregister_notifier(struct vfio_device *device,
- enum vfio_notify_type type,
- struct notifier_block *nb)
-{
- struct vfio_group *group = device->group;
- int ret;
-
- if (!nb || !vfio_assert_device_open(device))
- return -EINVAL;
-
- switch (type) {
- case VFIO_IOMMU_NOTIFY:
- ret = vfio_unregister_iommu_notifier(group, nb);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
-}
-EXPORT_SYMBOL(vfio_unregister_notifier);
-
/*
* Module/class support
*/
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index a6713022115155..25da02ca1568fc 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -33,6 +33,9 @@ enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
};
+/* events for register_notifier() */
+#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
+
/**
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
*/
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 4d26e149db8182..1f9fc7a9be9efa 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -49,6 +49,7 @@ struct vfio_device {
unsigned int open_count;
struct completion comp;
struct list_head group_next;
+ struct notifier_block iommu_nb;
};
/**
@@ -65,6 +66,8 @@ struct vfio_device {
* @match: Optional device name match callback (return: 0 for no-match, >0 for
* match, -errno for abort (ex. match with insufficient or incorrect
* additional args)
+ * @dma_unmap: Called when userspace unmaps IOVA from the container
+ * this device is attached to.
* @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
*/
struct vfio_device_ops {
@@ -80,6 +83,7 @@ struct vfio_device_ops {
int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
void (*request)(struct vfio_device *vdev, unsigned int count);
int (*match)(struct vfio_device *vdev, char *buf);
+ void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
int (*device_feature)(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz);
};
@@ -164,23 +168,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
void *data, size_t len, bool write);
-/* each type has independent events */
-enum vfio_notify_type {
- VFIO_IOMMU_NOTIFY = 0,
-};
-
-/* events for VFIO_IOMMU_NOTIFY */
-#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
-
-int vfio_register_notifier(struct vfio_device *device,
- enum vfio_notify_type type,
- unsigned long *required_events,
- struct notifier_block *nb);
-int vfio_unregister_notifier(struct vfio_device *device,
- enum vfio_notify_type type,
- struct notifier_block *nb);
-
-
/*
* Sub-module helpers
*/
--
2.37.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v4 2/2] vfio: Replace the iommu notifier with a device list
2022-07-20 0:02 [PATCH v4 0/2] Remove the VFIO_IOMMU_NOTIFY_DMA_UNMAP notifier Jason Gunthorpe
2022-07-20 0:02 ` [PATCH v4 1/2] vfio: Replace the DMA unmapping notifier with a callback Jason Gunthorpe
@ 2022-07-20 0:02 ` Jason Gunthorpe
2022-07-22 22:50 ` [PATCH v4 0/2] Remove the VFIO_IOMMU_NOTIFY_DMA_UNMAP notifier Alex Williamson
2 siblings, 0 replies; 9+ messages in thread
From: Jason Gunthorpe @ 2022-07-20 0:02 UTC (permalink / raw)
To: Alexander Gordeev, David Airlie, Alex Williamson,
Christian Borntraeger, Cornelia Huck, Daniel Vetter, dri-devel,
Harald Freudenberger, Vasily Gorbik, Heiko Carstens, intel-gfx,
intel-gvt-dev, Jani Nikula, Jason Herne, Joonas Lahtinen, kvm,
linux-s390, Matthew Rosato, Peter Oberparleiter, Halil Pasic,
Rodrigo Vivi, Sven Schnelle, Tvrtko Ursulin, Vineeth Vijayan,
Zhi Wang
Cc: Tony Krowiak, Eric Farman, Christoph Hellwig, Kevin Tian,
Zhenyu Wang
Instead of bouncing the function call to the driver op through a blocking
notifier just have the iommu layer call it directly.
Register each device that is being attached to the iommu with the lower
driver which then threads them on a linked list and calls the appropriate
driver op at the right time.
Currently the only use is if dma_unmap() is defined.
Also, fully lock all the debugging tests on the pinning path that a
dma_unmap is registered.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/vfio/vfio.c | 41 ++++---------
drivers/vfio/vfio.h | 12 ++--
drivers/vfio/vfio_iommu_type1.c | 103 ++++++++++++++++++++------------
include/linux/vfio.h | 2 +-
4 files changed, 81 insertions(+), 77 deletions(-)
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 83c375fa242121..b3ce8073cfb1fe 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -231,7 +231,7 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
{
struct vfio_iommu_driver *driver, *tmp;
- if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier))
+ if (WARN_ON(!ops->register_device != !ops->unregister_device))
return -EINVAL;
driver = kzalloc(sizeof(*driver), GFP_KERNEL);
@@ -1082,17 +1082,6 @@ static void vfio_device_unassign_container(struct vfio_device *device)
up_write(&device->group->group_rwsem);
}
-static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
- void *data)
-{
- struct vfio_device *vfio_device =
- container_of(nb, struct vfio_device, iommu_nb);
- struct vfio_iommu_type1_dma_unmap *unmap = data;
-
- vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
- return NOTIFY_OK;
-}
-
static struct file *vfio_device_open(struct vfio_device *device)
{
struct vfio_iommu_driver *iommu_driver;
@@ -1128,15 +1117,9 @@ static struct file *vfio_device_open(struct vfio_device *device)
}
iommu_driver = device->group->container->iommu_driver;
- if (device->ops->dma_unmap && iommu_driver &&
- iommu_driver->ops->register_notifier) {
- unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
-
- device->iommu_nb.notifier_call = vfio_iommu_notifier;
- iommu_driver->ops->register_notifier(
- device->group->container->iommu_data, &events,
- &device->iommu_nb);
- }
+ if (iommu_driver && iommu_driver->ops->register_device)
+ iommu_driver->ops->register_device(
+ device->group->container->iommu_data, device);
up_read(&device->group->group_rwsem);
}
@@ -1176,11 +1159,9 @@ static struct file *vfio_device_open(struct vfio_device *device)
device->ops->close_device(device);
iommu_driver = device->group->container->iommu_driver;
- if (device->ops->dma_unmap && iommu_driver &&
- iommu_driver->ops->unregister_notifier)
- iommu_driver->ops->unregister_notifier(
- device->group->container->iommu_data,
- &device->iommu_nb);
+ if (iommu_driver && iommu_driver->ops->unregister_device)
+ iommu_driver->ops->unregister_device(
+ device->group->container->iommu_data, device);
}
err_undo_count:
up_read(&device->group->group_rwsem);
@@ -1385,11 +1366,9 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
device->ops->close_device(device);
iommu_driver = device->group->container->iommu_driver;
- if (device->ops->dma_unmap && iommu_driver &&
- iommu_driver->ops->unregister_notifier)
- iommu_driver->ops->unregister_notifier(
- device->group->container->iommu_data,
- &device->iommu_nb);
+ if (iommu_driver && iommu_driver->ops->unregister_device)
+ iommu_driver->ops->unregister_device(
+ device->group->container->iommu_data, device);
up_read(&device->group->group_rwsem);
device->open_count--;
if (device->open_count == 0)
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index 25da02ca1568fc..4a7db1f3c33e7e 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -33,9 +33,6 @@ enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
};
-/* events for register_notifier() */
-#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
-
/**
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
*/
@@ -58,11 +55,10 @@ struct vfio_iommu_driver_ops {
unsigned long *phys_pfn);
int (*unpin_pages)(void *iommu_data,
unsigned long *user_pfn, int npage);
- int (*register_notifier)(void *iommu_data,
- unsigned long *events,
- struct notifier_block *nb);
- int (*unregister_notifier)(void *iommu_data,
- struct notifier_block *nb);
+ void (*register_device)(void *iommu_data,
+ struct vfio_device *vdev);
+ void (*unregister_device)(void *iommu_data,
+ struct vfio_device *vdev);
int (*dma_rw)(void *iommu_data, dma_addr_t user_iova,
void *data, size_t count, bool write);
struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index db24062fb34335..026a1d2553a269 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -67,7 +67,8 @@ struct vfio_iommu {
struct list_head iova_list;
struct mutex lock;
struct rb_root dma_list;
- struct blocking_notifier_head notifier;
+ struct list_head device_list;
+ struct mutex device_list_lock;
unsigned int dma_avail;
unsigned int vaddr_invalid_count;
uint64_t pgsize_bitmap;
@@ -865,8 +866,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
}
}
- /* Fail if notifier list is empty */
- if (!iommu->notifier.head) {
+ /* Fail if no dma_umap notifier is registered */
+ if (list_empty(&iommu->device_list)) {
ret = -EINVAL;
goto pin_done;
}
@@ -1287,6 +1288,35 @@ static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size)
return 0;
}
+/*
+ * Notify VFIO drivers using vfio_register_emulated_iommu_dev() to invalidate
+ * and unmap iovas within the range we're about to unmap. Drivers MUST unpin
+ * pages in response to an invalidation.
+ */
+static void vfio_notify_dma_unmap(struct vfio_iommu *iommu,
+ struct vfio_dma *dma)
+{
+ struct vfio_device *device;
+
+ if (list_empty(&iommu->device_list))
+ return;
+
+ /*
+ * The device is expected to call vfio_unpin_pages() for any IOVA it has
+ * pinned within the range. Since vfio_unpin_pages() will eventually
+ * call back down to this code and try to obtain the iommu->lock we must
+ * drop it.
+ */
+ mutex_lock(&iommu->device_list_lock);
+ mutex_unlock(&iommu->lock);
+
+ list_for_each_entry(device, &iommu->device_list, iommu_entry)
+ device->ops->dma_unmap(device, dma->iova, dma->size);
+
+ mutex_unlock(&iommu->device_list_lock);
+ mutex_lock(&iommu->lock);
+}
+
static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
struct vfio_iommu_type1_dma_unmap *unmap,
struct vfio_bitmap *bitmap)
@@ -1400,8 +1430,6 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
}
if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
- struct vfio_iommu_type1_dma_unmap nb_unmap;
-
if (dma_last == dma) {
BUG_ON(++retries > 10);
} else {
@@ -1409,20 +1437,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
retries = 0;
}
- nb_unmap.iova = dma->iova;
- nb_unmap.size = dma->size;
-
- /*
- * Notify anyone (mdev vendor drivers) to invalidate and
- * unmap iovas within the range we're about to unmap.
- * Vendor drivers MUST unpin pages in response to an
- * invalidation.
- */
- mutex_unlock(&iommu->lock);
- blocking_notifier_call_chain(&iommu->notifier,
- VFIO_IOMMU_NOTIFY_DMA_UNMAP,
- &nb_unmap);
- mutex_lock(&iommu->lock);
+ vfio_notify_dma_unmap(iommu, dma);
goto again;
}
@@ -2475,7 +2490,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
if (list_empty(&iommu->emulated_iommu_groups) &&
list_empty(&iommu->domain_list)) {
- WARN_ON(iommu->notifier.head);
+ WARN_ON(!list_empty(&iommu->device_list));
vfio_iommu_unmap_unpin_all(iommu);
}
goto detach_group_done;
@@ -2507,7 +2522,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
if (list_empty(&domain->group_list)) {
if (list_is_singular(&iommu->domain_list)) {
if (list_empty(&iommu->emulated_iommu_groups)) {
- WARN_ON(iommu->notifier.head);
+ WARN_ON(!list_empty(
+ &iommu->device_list));
vfio_iommu_unmap_unpin_all(iommu);
} else {
vfio_iommu_unmap_unpin_reaccount(iommu);
@@ -2568,7 +2584,8 @@ static void *vfio_iommu_type1_open(unsigned long arg)
iommu->dma_avail = dma_entry_limit;
iommu->container_open = true;
mutex_init(&iommu->lock);
- BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
+ mutex_init(&iommu->device_list_lock);
+ INIT_LIST_HEAD(&iommu->device_list);
init_waitqueue_head(&iommu->vaddr_wait);
iommu->pgsize_bitmap = PAGE_MASK;
INIT_LIST_HEAD(&iommu->emulated_iommu_groups);
@@ -3005,28 +3022,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
}
}
-static int vfio_iommu_type1_register_notifier(void *iommu_data,
- unsigned long *events,
- struct notifier_block *nb)
+static void vfio_iommu_type1_register_device(void *iommu_data,
+ struct vfio_device *vdev)
{
struct vfio_iommu *iommu = iommu_data;
- /* clear known events */
- *events &= ~VFIO_IOMMU_NOTIFY_DMA_UNMAP;
-
- /* refuse to register if still events remaining */
- if (*events)
- return -EINVAL;
+ if (!vdev->ops->dma_unmap)
+ return;
- return blocking_notifier_chain_register(&iommu->notifier, nb);
+ /*
+ * list_empty(&iommu->device_list) is tested under the iommu->lock while
+ * iteration for dma_unmap must be done under the device_list_lock.
+ * Holding both locks here allows avoiding the device_list_lock in
+ * several fast paths. See vfio_notify_dma_unmap()
+ */
+ mutex_lock(&iommu->lock);
+ mutex_lock(&iommu->device_list_lock);
+ list_add(&vdev->iommu_entry, &iommu->device_list);
+ mutex_unlock(&iommu->device_list_lock);
+ mutex_unlock(&iommu->lock);
}
-static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
- struct notifier_block *nb)
+static void vfio_iommu_type1_unregister_device(void *iommu_data,
+ struct vfio_device *vdev)
{
struct vfio_iommu *iommu = iommu_data;
- return blocking_notifier_chain_unregister(&iommu->notifier, nb);
+ if (!vdev->ops->dma_unmap)
+ return;
+
+ mutex_lock(&iommu->lock);
+ mutex_lock(&iommu->device_list_lock);
+ list_del(&vdev->iommu_entry);
+ mutex_unlock(&iommu->device_list_lock);
+ mutex_unlock(&iommu->lock);
}
static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
@@ -3160,8 +3189,8 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
.detach_group = vfio_iommu_type1_detach_group,
.pin_pages = vfio_iommu_type1_pin_pages,
.unpin_pages = vfio_iommu_type1_unpin_pages,
- .register_notifier = vfio_iommu_type1_register_notifier,
- .unregister_notifier = vfio_iommu_type1_unregister_notifier,
+ .register_device = vfio_iommu_type1_register_device,
+ .unregister_device = vfio_iommu_type1_unregister_device,
.dma_rw = vfio_iommu_type1_dma_rw,
.group_iommu_domain = vfio_iommu_type1_group_iommu_domain,
.notify = vfio_iommu_type1_notify,
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 1f9fc7a9be9efa..19cefbaa3d06a0 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -49,7 +49,7 @@ struct vfio_device {
unsigned int open_count;
struct completion comp;
struct list_head group_next;
- struct notifier_block iommu_nb;
+ struct list_head iommu_entry;
};
/**
--
2.37.1
^ permalink raw reply related [flat|nested] 9+ messages in thread