public inbox for linux-media@vger.kernel.org
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon@kernel.org>
To: "Christian König" <christian.koenig@amd.com>
Cc: "Sumit Semwal" <sumit.semwal@linaro.org>,
	"Alex Deucher" <alexander.deucher@amd.com>,
	"David Airlie" <airlied@gmail.com>,
	"Simona Vetter" <simona@ffwll.ch>,
	"Gerd Hoffmann" <kraxel@redhat.com>,
	"Dmitry Osipenko" <dmitry.osipenko@collabora.com>,
	"Gurchetan Singh" <gurchetansingh@chromium.org>,
	"Chia-I Wu" <olvaffe@gmail.com>,
	"Maarten Lankhorst" <maarten.lankhorst@linux.intel.com>,
	"Maxime Ripard" <mripard@kernel.org>,
	"Thomas Zimmermann" <tzimmermann@suse.de>,
	"Lucas De Marchi" <lucas.demarchi@intel.com>,
	"Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	"Rodrigo Vivi" <rodrigo.vivi@intel.com>,
	"Jason Gunthorpe" <jgg@ziepe.ca>,
	"Kevin Tian" <kevin.tian@intel.com>,
	"Joerg Roedel" <joro@8bytes.org>, "Will Deacon" <will@kernel.org>,
	"Robin Murphy" <robin.murphy@arm.com>,
	"Felix Kuehling" <Felix.Kuehling@amd.com>,
	"Alex Williamson" <alex@shazbot.org>,
	"Ankit Agrawal" <ankita@nvidia.com>,
	"Vivek Kasireddy" <vivek.kasireddy@intel.com>,
	linux-media@vger.kernel.org, dri-devel@lists.freedesktop.org,
	linaro-mm-sig@lists.linaro.org, linux-kernel@vger.kernel.org,
	amd-gfx@lists.freedesktop.org, virtualization@lists.linux.dev,
	intel-xe@lists.freedesktop.org, linux-rdma@vger.kernel.org,
	iommu@lists.linux.dev, kvm@vger.kernel.org
Subject: Re: [PATCH v5 4/8] vfio: Wait for dma-buf invalidation to complete
Date: Fri, 30 Jan 2026 15:31:57 +0200	[thread overview]
Message-ID: <20260130133157.GP10992@unreal> (raw)
In-Reply-To: <d25bead8-8372-4791-a741-3371342f4698@amd.com>

On Fri, Jan 30, 2026 at 02:21:08PM +0100, Christian König wrote:
> On 1/30/26 14:01, Leon Romanovsky wrote:
> > On Fri, Jan 30, 2026 at 09:30:59AM +0100, Christian König wrote:
> >> On 1/24/26 20:14, Leon Romanovsky wrote:
> >>> From: Leon Romanovsky <leonro@nvidia.com>
> >>>
> >>> dma-buf invalidation is handled asynchronously by the hardware, so VFIO
> >>> must wait until all affected objects have been fully invalidated.
> >>>
> >>> In addition, the dma-buf exporter is expecting that all importers unmap any
> >>> buffers they previously mapped.
> >>>
> >>> Fixes: 5d74781ebc86 ("vfio/pci: Add dma-buf export support for MMIO regions")
> >>> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> >>> ---
> >>>  drivers/vfio/pci/vfio_pci_dmabuf.c | 71 ++++++++++++++++++++++++++++++++++++--
> >>>  1 file changed, 68 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
> >>> index d8ceafabef48..485515629fe4 100644
> >>> --- a/drivers/vfio/pci/vfio_pci_dmabuf.c
> >>> +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> >>> @@ -17,6 +17,8 @@ struct vfio_pci_dma_buf {
> >>>  	struct dma_buf_phys_vec *phys_vec;
> >>>  	struct p2pdma_provider *provider;
> >>>  	u32 nr_ranges;
> >>> +	struct kref kref;
> >>> +	struct completion comp;
> >>>  	u8 revoked : 1;
> >>>  };
> >>>  
> >>> @@ -44,27 +46,46 @@ static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf,
> >>>  	return 0;
> >>>  }
> >>>  
> >>> +static void vfio_pci_dma_buf_done(struct kref *kref)
> >>> +{
> >>> +	struct vfio_pci_dma_buf *priv =
> >>> +		container_of(kref, struct vfio_pci_dma_buf, kref);
> >>> +
> >>> +	complete(&priv->comp);
> >>> +}
> >>> +
> >>>  static struct sg_table *
> >>>  vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
> >>>  		     enum dma_data_direction dir)
> >>>  {
> >>>  	struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> >>> +	struct sg_table *ret;
> >>>  
> >>>  	dma_resv_assert_held(priv->dmabuf->resv);
> >>>  
> >>>  	if (priv->revoked)
> >>>  		return ERR_PTR(-ENODEV);
> >>>  
> >>> -	return dma_buf_phys_vec_to_sgt(attachment, priv->provider,
> >>> -				       priv->phys_vec, priv->nr_ranges,
> >>> -				       priv->size, dir);
> >>> +	ret = dma_buf_phys_vec_to_sgt(attachment, priv->provider,
> >>> +				      priv->phys_vec, priv->nr_ranges,
> >>> +				      priv->size, dir);
> >>> +	if (IS_ERR(ret))
> >>> +		return ret;
> >>> +
> >>> +	kref_get(&priv->kref);
> >>> +	return ret;
> >>>  }
> >>>  
> >>>  static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
> >>>  				   struct sg_table *sgt,
> >>>  				   enum dma_data_direction dir)
> >>>  {
> >>> +	struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> >>> +
> >>> +	dma_resv_assert_held(priv->dmabuf->resv);
> >>> +
> >>>  	dma_buf_free_sgt(attachment, sgt, dir);
> >>> +	kref_put(&priv->kref, vfio_pci_dma_buf_done);
> >>>  }
> >>>  
> >>>  static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf)
> >>> @@ -287,6 +308,9 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
> >>>  		goto err_dev_put;
> >>>  	}
> >>>  
> >>> +	kref_init(&priv->kref);
> >>> +	init_completion(&priv->comp);
> >>> +
> >>>  	/* dma_buf_put() now frees priv */
> >>>  	INIT_LIST_HEAD(&priv->dmabufs_elm);
> >>>  	down_write(&vdev->memory_lock);
> >>> @@ -326,6 +350,8 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
> >>>  	lockdep_assert_held_write(&vdev->memory_lock);
> >>>  
> >>>  	list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
> >>> +		unsigned long wait;
> >>> +
> >>>  		if (!get_file_active(&priv->dmabuf->file))
> >>>  			continue;
> >>>  
> >>> @@ -333,7 +359,37 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
> >>>  			dma_resv_lock(priv->dmabuf->resv, NULL);
> >>>  			priv->revoked = revoked;
> >>>  			dma_buf_invalidate_mappings(priv->dmabuf);
> >>> +			dma_resv_wait_timeout(priv->dmabuf->resv,
> >>> +					      DMA_RESV_USAGE_BOOKKEEP, false,
> >>> +					      MAX_SCHEDULE_TIMEOUT);
> >>>  			dma_resv_unlock(priv->dmabuf->resv);
> >>> +			if (revoked) {
> >>> +				kref_put(&priv->kref, vfio_pci_dma_buf_done);
> >>> +				/* Let's wait till all DMA unmap are completed. */
> >>> +				wait = wait_for_completion_timeout(
> >>> +					&priv->comp, secs_to_jiffies(1));
> >>> +				/*
> >>> +				 * If you see this WARN_ON, it means that
> >>> +				 * importer didn't call unmap in response to
> >>> +				 * dma_buf_invalidate_mappings() which is not
> >>> +				 * allowed.
> >>> +				 */
> >>> +				WARN(!wait,
> >>> +				     "Timed out waiting for DMABUF unmap, importer has a broken invalidate_mapping()");
> >>
> >> You can do the revoke to do your resource management, for example re-use the backing store for something else.
> >>
> >> But it is mandatory that you keep the mapping around indefinitely until the importer closes it.
> >>
> >> Before that you can't do things like runtime PM or remove or anything which would make the DMA addresses invalid.
> >>
> >> As far as I can see vfio_pci_dma_buf_move() is used exactly for that use case so this here is an absolutely clear NAK from my side for this approach.
> >>
> >> You can either split up the functionality of vfio_pci_dma_buf_move() into vfio_pci_dma_buf_invalidate_mappings() and vfio_pci_dma_buf_flush() and then call the later whenever necessary or you keep it in one function and block everybody until the importer has dropped all mappings.
> > 
> > No problem, I can change it to be:
> > 
> > diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
> > index d087d018d547..53772a84c93b 100644
> > --- a/drivers/vfio/pci/vfio_pci_dmabuf.c
> > +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> > @@ -357,23 +357,7 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
> >                         dma_resv_unlock(priv->dmabuf->resv);
> >                         if (revoked) {
> >                                 kref_put(&priv->kref, vfio_pci_dma_buf_done);
> > -                               /*
> > -                                * Let's wait for 1 second till all DMA unmap
> > -                                * are completed. It is supposed to catch dma-buf
> > -                                * importers which lied about their support
> > -                                * of dmabuf revoke. See dma_buf_invalidate_mappings()
> > -                                * for the expected behaviour.
> > -                                */
> > -                               wait = wait_for_completion_timeout(
> > -                                       &priv->comp, secs_to_jiffies(1));
> > -                               /*
> > -                                * If you see this WARN_ON, it means that
> > -                                * importer didn't call unmap in response to
> > -                                * dma_buf_invalidate_mappings() which is not
> > -                                * allowed.
> > -                                */
> > -                               WARN(!wait,
> > -                                    "Timed out waiting for DMABUF unmap, importer has a broken invalidate_mapping()");
> > +                               wait_for_completion(&priv->comp);
> >                         } else {
> >                                 /*
> >                                  * Kref is initialize again, because when revoke
> > 
> > Do you want me to send v6?
> 
> That would work for me.
> 
> Question is if you really want to do it this way? See usually exporters try to avoid blocking such functions.
> 
> What exporters usually do instead is to grab references, e.g. call pm_runtime_get_sync() when either a DMA-buf, a DMA-buf attachment or in your case here a mapping of this attachment is made.

I view this as an enhancement that can be addressed later down the road.

> 
> But all of this is just a suggestion, if you are fine with blocking then feel free to add my rb.

It is fine for initial version. We need to start somewhere.

Thanks

> 
> Regards,
> Christian.
> 
> > 
> > Thanks
> > 
> >>
> >>> +			} else {
> >>> +				/*
> >>> +				 * Kref is initialize again, because when revoke
> >>> +				 * was performed the reference counter was decreased
> >>> +				 * to zero to trigger completion.
> >>> +				 */
> >>> +				kref_init(&priv->kref);
> >>> +				/*
> >>> +				 * There is no need to wait as no mapping was
> >>> +				 * performed when the previous status was
> >>> +				 * priv->revoked == true.
> >>> +				 */
> >>> +				reinit_completion(&priv->comp);
> >>> +			}
> >>>  		}
> >>>  		fput(priv->dmabuf->file);
> >>
> >> This is also extremely questionable. Why doesn't the dmabuf have a reference while on the linked list?
> >>
> >> Regards,
> >> Christian.
> >>
> >>>  	}
> >>> @@ -346,6 +402,8 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
> >>>  
> >>>  	down_write(&vdev->memory_lock);
> >>>  	list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
> >>> +		unsigned long wait;
> >>> +
> >>>  		if (!get_file_active(&priv->dmabuf->file))
> >>>  			continue;
> >>>  
> >>> @@ -354,7 +412,14 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
> >>>  		priv->vdev = NULL;
> >>>  		priv->revoked = true;
> >>>  		dma_buf_invalidate_mappings(priv->dmabuf);
> >>> +		dma_resv_wait_timeout(priv->dmabuf->resv,
> >>> +				      DMA_RESV_USAGE_BOOKKEEP, false,
> >>> +				      MAX_SCHEDULE_TIMEOUT);
> >>>  		dma_resv_unlock(priv->dmabuf->resv);
> >>> +		kref_put(&priv->kref, vfio_pci_dma_buf_done);
> >>> +		wait = wait_for_completion_timeout(&priv->comp,
> >>> +						   secs_to_jiffies(1));
> >>> +		WARN_ON(!wait);
> >>>  		vfio_device_put_registration(&vdev->vdev);
> >>>  		fput(priv->dmabuf->file);
> >>>  	}
> >>>
> >>
> >>
> 
> 

  reply	other threads:[~2026-01-30 13:32 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-24 19:14 [PATCH v5 0/8] dma-buf: Use revoke mechanism to invalidate shared buffers Leon Romanovsky
2026-01-24 19:14 ` [PATCH v5 1/8] dma-buf: Rename .move_notify() callback to a clearer identifier Leon Romanovsky
2026-01-24 19:14 ` [PATCH v5 2/8] dma-buf: Rename dma_buf_move_notify() to dma_buf_invalidate_mappings() Leon Romanovsky
2026-01-27  9:21   ` Christian König
2026-01-24 19:14 ` [PATCH v5 3/8] dma-buf: Always build with DMABUF_MOVE_NOTIFY Leon Romanovsky
2026-01-27  9:26   ` Christian König
2026-01-27  9:58     ` Leon Romanovsky
2026-01-27 10:02       ` Christian König
2026-01-27 11:42         ` Leon Romanovsky
2026-01-27 20:45           ` Leon Romanovsky
2026-01-30  7:00         ` Leon Romanovsky
2026-01-24 19:14 ` [PATCH v5 4/8] vfio: Wait for dma-buf invalidation to complete Leon Romanovsky
2026-01-26 20:53   ` Pranjal Shrivastava
2026-01-27  8:58     ` Leon Romanovsky
2026-01-27 16:27       ` Jason Gunthorpe
2026-01-29  7:06         ` Tian, Kevin
2026-01-29  7:33           ` Leon Romanovsky
2026-01-29  8:13             ` Tian, Kevin
2026-01-29  8:41               ` Leon Romanovsky
2026-01-29 21:04                 ` Alex Williamson
2026-01-30  3:10                 ` Tian, Kevin
2026-01-29 14:58           ` Jason Gunthorpe
2026-01-30  3:12             ` Tian, Kevin
2026-01-30  5:43               ` Mauro Carvalho Chehab
2026-01-30  5:48                 ` Tian, Kevin
2026-01-30  8:46             ` Christian König
2026-01-30  8:30   ` Christian König
2026-01-30 13:01     ` Leon Romanovsky
2026-01-30 13:21       ` Christian König
2026-01-30 13:31         ` Leon Romanovsky [this message]
2026-01-30 13:56         ` Jason Gunthorpe
2026-01-30 14:11           ` Christian König
2026-01-30 14:44             ` Jason Gunthorpe
2026-02-02  8:42               ` Christian König
2026-02-02 15:12                 ` Jason Gunthorpe
2026-02-02 15:21                   ` Christian König
2026-02-02 15:55                     ` Jason Gunthorpe
2026-01-24 19:14 ` [PATCH v5 5/8] dma-buf: Make .invalidate_mapping() truly optional Leon Romanovsky
2026-01-30  8:30   ` Christian König
2026-01-30 12:55     ` Leon Romanovsky
2026-01-24 19:14 ` [PATCH v5 6/8] dma-buf: Add dma_buf_attach_revocable() Leon Romanovsky
2026-01-26 20:38   ` Pranjal Shrivastava
2026-01-26 21:01     ` Jason Gunthorpe
2026-01-30  8:43   ` Christian König
2026-01-30 14:00     ` Jason Gunthorpe
2026-01-24 19:14 ` [PATCH v5 7/8] vfio: Permit VFIO to work with pinned importers Leon Romanovsky
2026-01-29 21:04   ` Alex Williamson
2026-01-30  3:14   ` Tian, Kevin
2026-01-24 19:14 ` [PATCH v5 8/8] iommufd: Add dma_buf_pin() Leon Romanovsky
2026-01-29  7:08   ` Tian, Kevin
2026-01-30  0:17   ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260130133157.GP10992@unreal \
    --to=leon@kernel.org \
    --cc=Felix.Kuehling@amd.com \
    --cc=airlied@gmail.com \
    --cc=alex@shazbot.org \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=ankita@nvidia.com \
    --cc=christian.koenig@amd.com \
    --cc=dmitry.osipenko@collabora.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=gurchetansingh@chromium.org \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=iommu@lists.linux.dev \
    --cc=jgg@ziepe.ca \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kraxel@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=lucas.demarchi@intel.com \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=mripard@kernel.org \
    --cc=olvaffe@gmail.com \
    --cc=robin.murphy@arm.com \
    --cc=rodrigo.vivi@intel.com \
    --cc=simona@ffwll.ch \
    --cc=sumit.semwal@linaro.org \
    --cc=thomas.hellstrom@linux.intel.com \
    --cc=tzimmermann@suse.de \
    --cc=virtualization@lists.linux.dev \
    --cc=vivek.kasireddy@intel.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox