public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Boris Brezillon <boris.brezillon@collabora.com>
To: "Adrián Larumbe" <adrian.larumbe@collabora.com>
Cc: linux-kernel@vger.kernel.org, dri-devel@lists.freedesktop.org,
	Steven Price <steven.price@arm.com>,
	kernel@collabora.com, Rob Herring <robh@kernel.org>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	Thomas Zimmermann <tzimmermann@suse.de>,
	David Airlie <airlied@gmail.com>, Simona Vetter <simona@ffwll.ch>
Subject: Re: [PATCH v4 05/10] drm/panfrost: Handle page mapping failure
Date: Tue, 7 Oct 2025 09:51:29 +0200	[thread overview]
Message-ID: <20251007095129.4b0e88a8@fedora> (raw)
In-Reply-To: <42bsn4ngim6aucg5pzcs4knpvwn6bbvud7lf22tr7kmrdrgphm@vncex4yi2oc6>

On Tue, 7 Oct 2025 02:04:00 +0100
Adrián Larumbe <adrian.larumbe@collabora.com> wrote:

> Hi Boris,
> 
> On 01.10.2025 12:58, Boris Brezillon wrote:
> > On Wed,  1 Oct 2025 03:20:26 +0100
> > Adrián Larumbe <adrian.larumbe@collabora.com> wrote:
> >  
> > > When mapping the pages of a BO, either a heap type at page fault time or
> > > else a non-heap BO at object creation time, if the ARM page table mapping
> > > function fails, we unmap what had been mapped so far and bail out.
> > >
> > > Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> > > ---
> > >  drivers/gpu/drm/panfrost/panfrost_mmu.c | 49 ++++++++++++++++++++++---
> > >  1 file changed, 44 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > > index cf272b167feb..fb17c32855a5 100644
> > > --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > > +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > > @@ -393,13 +393,32 @@ static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
> > >  	pm_runtime_put_autosuspend(pfdev->base.dev);
> > >  }
> > >
> > > +static void mmu_unmap_range(struct panfrost_mmu *mmu, u64 iova, size_t len)
> > > +{
> > > +	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
> > > +	size_t pgsize, unmapped_len = 0;
> > > +	size_t unmapped_page, pgcount;
> > > +
> > > +	while (unmapped_len < len) {
> > > +		pgsize = get_pgsize(iova, len - unmapped_len, &pgcount);
> > > +
> > > +		unmapped_page = ops->unmap_pages(ops, iova, pgsize, pgcount, NULL);
> > > +		WARN_ON(unmapped_page != pgsize * pgcount);
> > > +
> > > +		iova += pgsize * pgcount;
> > > +		unmapped_len += pgsize * pgcount;
> > > +	}
> > > +}
> > > +
> > >  static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
> > >  		      u64 iova, int prot, struct sg_table *sgt)
> > >  {
> > >  	unsigned int count;
> > >  	struct scatterlist *sgl;
> > >  	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
> > > +	size_t total_mapped = 0;
> > >  	u64 start_iova = iova;
> > > +	int ret;
> > >
> > >  	for_each_sgtable_dma_sg(sgt, sgl, count) {
> > >  		unsigned long paddr = sg_dma_address(sgl);
> > > @@ -413,10 +432,14 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
> > >  			size_t pgcount, mapped = 0;
> > >  			size_t pgsize = get_pgsize(iova | paddr, len, &pgcount);
> > >
> > > -			ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
> > > +			ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
> > >  				       GFP_KERNEL, &mapped);
> > > +			if (ret)
> > > +				goto err_unmap_pages;
> > > +
> > >  			/* Don't get stuck if things have gone wrong */
> > >  			mapped = max(mapped, pgsize);
> > > +			total_mapped += mapped;
> > >  			iova += mapped;
> > >  			paddr += mapped;
> > >  			len -= mapped;
> > > @@ -426,6 +449,10 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
> > >  	panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova);
> > >
> > >  	return 0;
> > > +
> > > +err_unmap_pages:
> > > +	mmu_unmap_range(mmu, start_iova, total_mapped);
> > > +	return ret;
> > >  }
> > >
> > >  int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
> > > @@ -436,6 +463,7 @@ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
> > >  	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
> > >  	struct sg_table *sgt;
> > >  	int prot = IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE;
> > > +	int ret;
> > >
> > >  	if (WARN_ON(mapping->active))
> > >  		return 0;
> > > @@ -447,11 +475,18 @@ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
> > >  	if (WARN_ON(IS_ERR(sgt)))
> > >  		return PTR_ERR(sgt);
> > >
> > > -	mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT,
> > > -		   prot, sgt);
> > > +	ret = mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT,
> > > +			 prot, sgt);
> > > +	if (ret)
> > > +		goto err_put_pages;
> > > +
> > >  	mapping->active = true;
> > >
> > >  	return 0;
> > > +
> > > +err_put_pages:
> > > +	drm_gem_shmem_put_pages_locked(shmem);
> > > +	return ret;
> > >  }
> > >
> > >  void panfrost_mmu_unmap(struct panfrost_gem_mapping *mapping)
> > > @@ -635,8 +670,10 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
> > >  	if (ret)
> > >  		goto err_map;
> > >
> > > -	mmu_map_sg(pfdev, bomapping->mmu, addr,
> > > -		   IOMMU_WRITE | IOMMU_READ | IOMMU_CACHE | IOMMU_NOEXEC, sgt);
> > > +	ret = mmu_map_sg(pfdev, bomapping->mmu, addr,
> > > +			 IOMMU_WRITE | IOMMU_READ | IOMMU_CACHE | IOMMU_NOEXEC, sgt);
> > > +	if (ret)
> > > +		goto err_mmu_map_sg;
> > >
> > >  	bomapping->active = true;
> > >  	bo->heap_rss_size += SZ_2M;
> > > @@ -650,6 +687,8 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
> > >
> > >  	return 0;
> > >
> > > +err_mmu_map_sg:
> > > +	dma_unmap_sgtable(pfdev->base.dev, sgt, DMA_BIDIRECTIONAL, 0);  
> >
> > You also need to clear the sgts[]/pages[] entries you added earlier,
> > otherwise the next time you have a fault it will bail-out before
> > attempting an mmu_map_sg().
> >
> > IIRC, Dmitry had a similar fix in his shmem-shrinker series.  
> 
> Went over the mailing list and I think the commit you had in mind was 1fc9af813b25 ("drm/panfrost: Fix the error path in panfrost_mmu_map_fault_addr()")
> 
> I suspect there's a problem with the present code. If shmem_read_mapping_page() fails for let's say, page_offset+5, then when the interrupt is triggered
> again, because the page array had already been allocated and pages[page_offset] populated in the first try, then it would bail out immediately even though
> most pages haven't been retrieved yet.
> 
> On the other hand, depopulating the array for the IRQ to be triggered again seems wasteful. Because for any virtual address, a fault will map all the
> pages within its 2MiB boundaries, maybe we could change
> 
> if (pages[page_offset]) {
> 	/* Pages are already mapped, bail out. */
> 	goto out;
> }
> 
> to 'pages[page_offset+NUM_FAULT_PAGES-1]'

Or, we simply don't check the
pages[page_offset..page_offset+NUM_FAULT_PAGES-1] range and we let
the following loop walk over all entries and fill the missing ones, if
any.

> 
> And then, in the event that mmu_map_sg() fails:
> 
> err_mmu_map_sg:
> 	pages[page_offset+NUM_FAULT_PAGES-1] = NULL;
> 	dma_unmap_sgtable(pfdev->base.dev, sgt, DMA_BIDIRECTIONAL, 0);
> 
> So that it'll only fetch the very last page instead of all of them in case of a page fault reattempt.
> 
> > >  err_map:
> > >  	sg_free_table(sgt);
> > >  err_unlock:  
> 
> 
> Adrian Larumbe


  reply	other threads:[~2025-10-07  7:51 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-01  2:20 [PATCH v4 00/10] Some Panfrost fixes and improvements Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 01/10] drm/panfrost: Replace DRM driver allocation method with newer one Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 02/10] drm/panfrost: Handle inexistent GPU during probe Adrián Larumbe
2025-10-01 10:51   ` Boris Brezillon
2025-10-01  2:20 ` [PATCH v4 03/10] drm/panfrost: Handle job HW submit errors Adrián Larumbe
2025-10-06 16:07   ` Steven Price
2025-10-07  0:34     ` Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 04/10] drm/panfrost: Handle error when allocating AS number Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 05/10] drm/panfrost: Handle page mapping failure Adrián Larumbe
2025-10-01 10:58   ` Boris Brezillon
2025-10-07  1:04     ` Adrián Larumbe
2025-10-07  7:51       ` Boris Brezillon [this message]
2025-10-01  2:20 ` [PATCH v4 06/10] drm/panfrost: Don't rework job IRQ enable mask in the enable path Adrián Larumbe
2025-10-01 11:00   ` Boris Brezillon
2025-10-01  2:20 ` [PATCH v4 07/10] drm/panfrost: Make re-enabling job interrupts at device reset optional Adrián Larumbe
2025-10-01 11:02   ` Boris Brezillon
2025-10-01  2:20 ` [PATCH v4 08/10] drm/panfrost: Add forward declaration and types header Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 09/10] drm/panfrost: Remove unused device property Adrián Larumbe
2025-10-01  2:20 ` [PATCH v4 10/10] drm/panfrost: Rename panfrost_job functions to reflect real role Adrián Larumbe
2025-10-01 15:19   ` Boris Brezillon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251007095129.4b0e88a8@fedora \
    --to=boris.brezillon@collabora.com \
    --cc=adrian.larumbe@collabora.com \
    --cc=airlied@gmail.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=kernel@collabora.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=mripard@kernel.org \
    --cc=robh@kernel.org \
    --cc=simona@ffwll.ch \
    --cc=steven.price@arm.com \
    --cc=tzimmermann@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox