Re: [PATCH v4 05/10] drm/panfrost: Handle page mapping failure

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Boris Brezillon <boris.brezillon@collabora.com>
To: "Adrián Larumbe" <adrian.larumbe@collabora.com>
Cc: linux-kernel@vger.kernel.org, dri-devel@lists.freedesktop.org,
	Steven Price <steven.price@arm.com>,
	kernel@collabora.com, Rob Herring <robh@kernel.org>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	Thomas Zimmermann <tzimmermann@suse.de>,
	David Airlie <airlied@gmail.com>, Simona Vetter <simona@ffwll.ch>
Subject: Re: [PATCH v4 05/10] drm/panfrost: Handle page mapping failure
Date: Tue, 7 Oct 2025 09:51:29 +0200	[thread overview]
Message-ID: <20251007095129.4b0e88a8@fedora> (raw)
In-Reply-To: <42bsn4ngim6aucg5pzcs4knpvwn6bbvud7lf22tr7kmrdrgphm@vncex4yi2oc6>

On Tue, 7 Oct 2025 02:04:00 +0100
Adrián Larumbe <adrian.larumbe@collabora.com> wrote:

> Hi Boris,
> 
> On 01.10.2025 12:58, Boris Brezillon wrote:
> > On Wed,  1 Oct 2025 03:20:26 +0100
> > Adrián Larumbe <adrian.larumbe@collabora.com> wrote:
> >  
> > > When mapping the pages of a BO, either a heap type at page fault time or
> > > else a non-heap BO at object creation time, if the ARM page table mapping
> > > function fails, we unmap what had been mapped so far and bail out.
> > >
> > > Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> > > ---
> > >  drivers/gpu/drm/panfrost/panfrost_mmu.c | 49 ++++++++++++++++++++++---
> > >  1 file changed, 44 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > > index cf272b167feb..fb17c32855a5 100644
> > > --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > > +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > > @@ -393,13 +393,32 @@ static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
> > >  	pm_runtime_put_autosuspend(pfdev->base.dev);
> > >  }
> > >
> > > +static void mmu_unmap_range(struct panfrost_mmu *mmu, u64 iova, size_t len)
> > > +{
> > > +	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
> > > +	size_t pgsize, unmapped_len = 0;
> > > +	size_t unmapped_page, pgcount;
> > > +
> > > +	while (unmapped_len < len) {
> > > +		pgsize = get_pgsize(iova, len - unmapped_len, &pgcount);
> > > +
> > > +		unmapped_page = ops->unmap_pages(ops, iova, pgsize, pgcount, NULL);
> > > +		WARN_ON(unmapped_page != pgsize * pgcount);
> > > +
> > > +		iova += pgsize * pgcount;
> > > +		unmapped_len += pgsize * pgcount;
> > > +	}
> > > +}
> > > +
> > >  static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
> > >  		      u64 iova, int prot, struct sg_table *sgt)
> > >  {
> > >  	unsigned int count;
> > >  	struct scatterlist *sgl;
> > >  	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
> > > +	size_t total_mapped = 0;
> > >  	u64 start_iova = iova;
> > > +	int ret;
> > >
> > >  	for_each_sgtable_dma_sg(sgt, sgl, count) {
> > >  		unsigned long paddr = sg_dma_address(sgl);
> > > @@ -413,10 +432,14 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
> > >  			size_t pgcount, mapped = 0;
> > >  			size_t pgsize = get_pgsize(iova | paddr, len, &pgcount);
> > >
> > > -			ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
> > > +			ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
> > >  				       GFP_KERNEL, &mapped);
> > > +			if (ret)
> > > +				goto err_unmap_pages;
> > > +
> > >  			/* Don't get stuck if things have gone wrong */
> > >  			mapped = max(mapped, pgsize);
> > > +			total_mapped += mapped;
> > >  			iova += mapped;
> > >  			paddr += mapped;
> > >  			len -= mapped;
> > > @@ -426,6 +449,10 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
> > >  	panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova);
> > >
> > >  	return 0;
> > > +
> > > +err_unmap_pages:
> > > +	mmu_unmap_range(mmu, start_iova, total_mapped);
> > > +	return ret;
> > >  }
> > >
> > >  int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
> > > @@ -436,6 +463,7 @@ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
> > >  	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
> > >  	struct sg_table *sgt;
> > >  	int prot = IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE;
> > > +	int ret;
> > >
> > >  	if (WARN_ON(mapping->active))
> > >  		return 0;
> > > @@ -447,11 +475,18 @@ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
> > >  	if (WARN_ON(IS_ERR(sgt)))
> > >  		return PTR_ERR(sgt);
> > >
> > > -	mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT,
> > > -		   prot, sgt);
> > > +	ret = mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT,
> > > +			 prot, sgt);
> > > +	if (ret)
> > > +		goto err_put_pages;
> > > +
> > >  	mapping->active = true;
> > >
> > >  	return 0;
> > > +
> > > +err_put_pages:
> > > +	drm_gem_shmem_put_pages_locked(shmem);
> > > +	return ret;
> > >  }
> > >
> > >  void panfrost_mmu_unmap(struct panfrost_gem_mapping *mapping)
> > > @@ -635,8 +670,10 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
> > >  	if (ret)
> > >  		goto err_map;
> > >
> > > -	mmu_map_sg(pfdev, bomapping->mmu, addr,
> > > -		   IOMMU_WRITE | IOMMU_READ | IOMMU_CACHE | IOMMU_NOEXEC, sgt);
> > > +	ret = mmu_map_sg(pfdev, bomapping->mmu, addr,
> > > +			 IOMMU_WRITE | IOMMU_READ | IOMMU_CACHE | IOMMU_NOEXEC, sgt);
> > > +	if (ret)
> > > +		goto err_mmu_map_sg;
> > >
> > >  	bomapping->active = true;
> > >  	bo->heap_rss_size += SZ_2M;
> > > @@ -650,6 +687,8 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
> > >
> > >  	return 0;
> > >
> > > +err_mmu_map_sg:
> > > +	dma_unmap_sgtable(pfdev->base.dev, sgt, DMA_BIDIRECTIONAL, 0);  
> >
> > You also need to clear the sgts[]/pages[] entries you added earlier,
> > otherwise the next time you have a fault it will bail-out before
> > attempting an mmu_map_sg().
> >
> > IIRC, Dmitry had a similar fix in his shmem-shrinker series.  
> 
> Went over the mailing list and I think the commit you had in mind was 1fc9af813b25 ("drm/panfrost: Fix the error path in panfrost_mmu_map_fault_addr()")
> 
> I suspect there's a problem with the present code. If shmem_read_mapping_page() fails for let's say, page_offset+5, then when the interrupt is triggered
> again, because the page array had already been allocated and pages[page_offset] populated in the first try, then it would bail out immediately even though
> most pages haven't been retrieved yet.
> 
> On the other hand, depopulating the array for the IRQ to be triggered again seems wasteful. Because for any virtual address, a fault will map all the
> pages within its 2MiB boundaries, maybe we could change
> 
> if (pages[page_offset]) {
> 	/* Pages are already mapped, bail out. */
> 	goto out;
> }
> 
> to 'pages[page_offset+NUM_FAULT_PAGES-1]'

Or, we simply don't check the
pages[page_offset..page_offset+NUM_FAULT_PAGES-1] range and we let
the following loop walk over all entries and fill the missing ones, if
any.

> 
> And then, in the event that mmu_map_sg() fails:
> 
> err_mmu_map_sg:
> 	pages[page_offset+NUM_FAULT_PAGES-1] = NULL;
> 	dma_unmap_sgtable(pfdev->base.dev, sgt, DMA_BIDIRECTIONAL, 0);
> 
> So that it'll only fetch the very last page instead of all of them in case of a page fault reattempt.
> 
> > >  err_map:
> > >  	sg_free_table(sgt);
> > >  err_unlock:  
> 
> 
> Adrian Larumbe

next prev parent reply	other threads:[~2025-10-07  7:51 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-01  2:20 [PATCH v4 00/10] Some Panfrost fixes and improvements Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 01/10] drm/panfrost: Replace DRM driver allocation method with newer one Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 02/10] drm/panfrost: Handle inexistent GPU during probe Adrián Larumbe
2025-10-01 10:51   ` Boris Brezillon
2025-10-01  2:20 ` [PATCH v4 03/10] drm/panfrost: Handle job HW submit errors Adrián Larumbe
2025-10-06 16:07   ` Steven Price
2025-10-07  0:34     ` Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 04/10] drm/panfrost: Handle error when allocating AS number Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 05/10] drm/panfrost: Handle page mapping failure Adrián Larumbe
2025-10-01 10:58   ` Boris Brezillon
2025-10-07  1:04     ` Adrián Larumbe
2025-10-07  7:51       ` Boris Brezillon [this message]
2025-10-01  2:20 ` [PATCH v4 06/10] drm/panfrost: Don't rework job IRQ enable mask in the enable path Adrián Larumbe
2025-10-01 11:00   ` Boris Brezillon
2025-10-01  2:20 ` [PATCH v4 07/10] drm/panfrost: Make re-enabling job interrupts at device reset optional Adrián Larumbe
2025-10-01 11:02   ` Boris Brezillon
2025-10-01  2:20 ` [PATCH v4 08/10] drm/panfrost: Add forward declaration and types header Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 09/10] drm/panfrost: Remove unused device property Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 10/10] drm/panfrost: Rename panfrost_job functions to reflect real role Adrián Larumbe
2025-10-01 15:19   ` Boris Brezillon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251007095129.4b0e88a8@fedora \
    --to=boris.brezillon@collabora.com \
    --cc=adrian.larumbe@collabora.com \
    --cc=airlied@gmail.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=kernel@collabora.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=mripard@kernel.org \
    --cc=robh@kernel.org \
    --cc=simona@ffwll.ch \
    --cc=steven.price@arm.com \
    --cc=tzimmermann@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.