All of lore.kernel.org
 help / color / mirror / Atom feed
From: Boris Brezillon <boris.brezillon@collabora.com>
To: "Adrián Larumbe" <adrian.larumbe@collabora.com>
Cc: maarten.lankhorst@linux.intel.com, mripard@kernel.org,
	tzimmermann@suse.de, airlied@gmail.com, daniel@ffwll.ch,
	robdclark@gmail.com, quic_abhinavk@quicinc.com,
	dmitry.baryshkov@linaro.org, sean@poorly.run,
	marijn.suijten@somainline.org, robh@kernel.org,
	steven.price@arm.com, linux-arm-msm@vger.kernel.org,
	linux-kernel@vger.kernel.org, dri-devel@lists.freedesktop.org,
	healych@amazon.com, kernel@collabora.com,
	freedreno@lists.freedesktop.org
Subject: Re: [PATCH v2 5/6] drm/panfrost: Implement generic DRM object RSS reporting function
Date: Wed, 30 Aug 2023 12:52:10 +0200	[thread overview]
Message-ID: <20230830125210.3d1172db@collabora.com> (raw)
In-Reply-To: <20230824013604.466224-6-adrian.larumbe@collabora.com>

On Thu, 24 Aug 2023 02:34:48 +0100
Adrián Larumbe <adrian.larumbe@collabora.com> wrote:

> BO's RSS is updated every time new pages are allocated and mapped for the
> object, either in its entirety at creation time for non-heap buffers, or
> else on demand for heap buffers at GPU page fault's IRQ handler.
> 
> Same calculations had to be done for imported PRIME objects, since backing
> storage for it might have already been allocated by the exporting driver.
> 
> Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> ---
>  drivers/gpu/drm/panfrost/panfrost_gem.c | 22 ++++++++++++++++++++++
>  drivers/gpu/drm/panfrost/panfrost_gem.h |  5 +++++
>  drivers/gpu/drm/panfrost/panfrost_mmu.c | 16 +++++++++++-----
>  3 files changed, 38 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
> index aea16b0e4dda..c6bd1f16a6d4 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gem.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
> @@ -206,6 +206,17 @@ static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj
>  
>  	return res;
>  }
> +
> +size_t panfrost_gem_rss(struct drm_gem_object *obj)
> +{
> +	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
> +
> +	if (!bo->base.pages)
> +		return 0;
> +
> +	return bo->rss_size;
> +}
> +
>  static const struct drm_gem_object_funcs panfrost_gem_funcs = {
>  	.free = panfrost_gem_free_object,
>  	.open = panfrost_gem_open,
> @@ -218,6 +229,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = {
>  	.vunmap = drm_gem_shmem_object_vunmap,
>  	.mmap = drm_gem_shmem_object_mmap,
>  	.status = panfrost_gem_status,
> +	.rss = panfrost_gem_rss,
>  	.vm_ops = &drm_gem_shmem_vm_ops,
>  };
>  
> @@ -274,13 +286,23 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev,
>  {
>  	struct drm_gem_object *obj;
>  	struct panfrost_gem_object *bo;
> +	struct scatterlist *sgl;
> +	unsigned int count;
> +	size_t total = 0;
>  
>  	obj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt);
>  	if (IS_ERR(obj))
>  		return ERR_CAST(obj);
>  
> +	for_each_sgtable_dma_sg(sgt, sgl, count) {
> +		size_t len = sg_dma_len(sgl);
> +
> +		total += len;
> +	}

Why not simply have bo->rss_size = obj->size here? Not sure I see a
reason to not trust dma_buf?

> +
>  	bo = to_panfrost_bo(obj);
>  	bo->noexec = true;
> +	bo->rss_size = total;
>  
>  	return obj;
>  }
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
> index e06f7ceb8f73..e2a7c46403c7 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gem.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
> @@ -36,6 +36,11 @@ struct panfrost_gem_object {
>  	 */
>  	atomic_t gpu_usecount;
>  
> +	/*
> +	 * Object chunk size currently mapped onto physical memory
> +	 */
> +	size_t rss_size;
> +
>  	bool noexec		:1;
>  	bool is_heap		:1;
>  	bool is_purgable	:1;
> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> index c0123d09f699..e03a5a9da06f 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> @@ -285,17 +285,19 @@ static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
>  	pm_runtime_put_autosuspend(pfdev->dev);
>  }
>  
> -static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
> +static size_t mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
>  		      u64 iova, int prot, struct sg_table *sgt)
>  {
>  	unsigned int count;
>  	struct scatterlist *sgl;
>  	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
>  	u64 start_iova = iova;
> +	size_t total = 0;
>  
>  	for_each_sgtable_dma_sg(sgt, sgl, count) {
>  		unsigned long paddr = sg_dma_address(sgl);
>  		size_t len = sg_dma_len(sgl);
> +		total += len;
>  
>  		dev_dbg(pfdev->dev, "map: as=%d, iova=%llx, paddr=%lx, len=%zx", mmu->as, iova, paddr, len);
>  
> @@ -315,7 +317,7 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
>  
>  	panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova);
>  
> -	return 0;
> +	return total;
>  }
>  
>  int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
> @@ -326,6 +328,7 @@ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
>  	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
>  	struct sg_table *sgt;
>  	int prot = IOMMU_READ | IOMMU_WRITE;
> +	size_t mapped_size;
>  
>  	if (WARN_ON(mapping->active))
>  		return 0;
> @@ -337,9 +340,10 @@ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
>  	if (WARN_ON(IS_ERR(sgt)))
>  		return PTR_ERR(sgt);
>  
> -	mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT,
> +	mapped_size = mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT,
>  		   prot, sgt);
>  	mapping->active = true;
> +	bo->rss_size += mapped_size;

Actually, the GEM might be resident even before panfrost_mmu_map() is
called: as soon as drm_gem_shmem_get_pages[_locked]() is called, it's
resident (might get evicted after that point though). That means any
mmap coming from userspace will make the buffer resident too. I know
we're automatically mapping GEMs to the GPU VM in panfrost_gem_open(),
so it makes no difference, but I think I'd prefer if we keep ->rss_size
for heap BOs only (we probably want to rename it heap_rss_size) and
then have


	if (bo->is_heap)
		return bo->heap_rss_size;
	else if (bo->base.pages)
		return bo->base.base.size;
	else
		return 0;

in panfrost_gem_rss().

>  
>  	return 0;
>  }
> @@ -447,6 +451,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
>  	pgoff_t page_offset;
>  	struct sg_table *sgt;
>  	struct page **pages;
> +	size_t mapped_size;
>  
>  	bomapping = addr_to_mapping(pfdev, as, addr);
>  	if (!bomapping)
> @@ -518,10 +523,11 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
>  	if (ret)
>  		goto err_map;
>  
> -	mmu_map_sg(pfdev, bomapping->mmu, addr,
> -		   IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
> +	mapped_size = mmu_map_sg(pfdev, bomapping->mmu, addr,
> +				 IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
>  
>  	bomapping->active = true;
> +	bo->rss_size += mapped_size;
>  
>  	dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr);
>  


WARNING: multiple messages have this Message-ID (diff)
From: Boris Brezillon <boris.brezillon@collabora.com>
To: "Adrián Larumbe" <adrian.larumbe@collabora.com>
Cc: tzimmermann@suse.de, sean@poorly.run, quic_abhinavk@quicinc.com,
	mripard@kernel.org, steven.price@arm.com,
	freedreno@lists.freedesktop.org, healych@amazon.com,
	dri-devel@lists.freedesktop.org, linux-arm-msm@vger.kernel.org,
	dmitry.baryshkov@linaro.org, marijn.suijten@somainline.org,
	kernel@collabora.com, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 5/6] drm/panfrost: Implement generic DRM object RSS reporting function
Date: Wed, 30 Aug 2023 12:52:10 +0200	[thread overview]
Message-ID: <20230830125210.3d1172db@collabora.com> (raw)
In-Reply-To: <20230824013604.466224-6-adrian.larumbe@collabora.com>

On Thu, 24 Aug 2023 02:34:48 +0100
Adrián Larumbe <adrian.larumbe@collabora.com> wrote:

> BO's RSS is updated every time new pages are allocated and mapped for the
> object, either in its entirety at creation time for non-heap buffers, or
> else on demand for heap buffers at GPU page fault's IRQ handler.
> 
> Same calculations had to be done for imported PRIME objects, since backing
> storage for it might have already been allocated by the exporting driver.
> 
> Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> ---
>  drivers/gpu/drm/panfrost/panfrost_gem.c | 22 ++++++++++++++++++++++
>  drivers/gpu/drm/panfrost/panfrost_gem.h |  5 +++++
>  drivers/gpu/drm/panfrost/panfrost_mmu.c | 16 +++++++++++-----
>  3 files changed, 38 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
> index aea16b0e4dda..c6bd1f16a6d4 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gem.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
> @@ -206,6 +206,17 @@ static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj
>  
>  	return res;
>  }
> +
> +size_t panfrost_gem_rss(struct drm_gem_object *obj)
> +{
> +	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
> +
> +	if (!bo->base.pages)
> +		return 0;
> +
> +	return bo->rss_size;
> +}
> +
>  static const struct drm_gem_object_funcs panfrost_gem_funcs = {
>  	.free = panfrost_gem_free_object,
>  	.open = panfrost_gem_open,
> @@ -218,6 +229,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = {
>  	.vunmap = drm_gem_shmem_object_vunmap,
>  	.mmap = drm_gem_shmem_object_mmap,
>  	.status = panfrost_gem_status,
> +	.rss = panfrost_gem_rss,
>  	.vm_ops = &drm_gem_shmem_vm_ops,
>  };
>  
> @@ -274,13 +286,23 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev,
>  {
>  	struct drm_gem_object *obj;
>  	struct panfrost_gem_object *bo;
> +	struct scatterlist *sgl;
> +	unsigned int count;
> +	size_t total = 0;
>  
>  	obj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt);
>  	if (IS_ERR(obj))
>  		return ERR_CAST(obj);
>  
> +	for_each_sgtable_dma_sg(sgt, sgl, count) {
> +		size_t len = sg_dma_len(sgl);
> +
> +		total += len;
> +	}

Why not simply have bo->rss_size = obj->size here? Not sure I see a
reason to not trust dma_buf?

> +
>  	bo = to_panfrost_bo(obj);
>  	bo->noexec = true;
> +	bo->rss_size = total;
>  
>  	return obj;
>  }
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
> index e06f7ceb8f73..e2a7c46403c7 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gem.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
> @@ -36,6 +36,11 @@ struct panfrost_gem_object {
>  	 */
>  	atomic_t gpu_usecount;
>  
> +	/*
> +	 * Object chunk size currently mapped onto physical memory
> +	 */
> +	size_t rss_size;
> +
>  	bool noexec		:1;
>  	bool is_heap		:1;
>  	bool is_purgable	:1;
> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> index c0123d09f699..e03a5a9da06f 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> @@ -285,17 +285,19 @@ static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
>  	pm_runtime_put_autosuspend(pfdev->dev);
>  }
>  
> -static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
> +static size_t mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
>  		      u64 iova, int prot, struct sg_table *sgt)
>  {
>  	unsigned int count;
>  	struct scatterlist *sgl;
>  	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
>  	u64 start_iova = iova;
> +	size_t total = 0;
>  
>  	for_each_sgtable_dma_sg(sgt, sgl, count) {
>  		unsigned long paddr = sg_dma_address(sgl);
>  		size_t len = sg_dma_len(sgl);
> +		total += len;
>  
>  		dev_dbg(pfdev->dev, "map: as=%d, iova=%llx, paddr=%lx, len=%zx", mmu->as, iova, paddr, len);
>  
> @@ -315,7 +317,7 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
>  
>  	panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova);
>  
> -	return 0;
> +	return total;
>  }
>  
>  int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
> @@ -326,6 +328,7 @@ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
>  	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
>  	struct sg_table *sgt;
>  	int prot = IOMMU_READ | IOMMU_WRITE;
> +	size_t mapped_size;
>  
>  	if (WARN_ON(mapping->active))
>  		return 0;
> @@ -337,9 +340,10 @@ int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
>  	if (WARN_ON(IS_ERR(sgt)))
>  		return PTR_ERR(sgt);
>  
> -	mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT,
> +	mapped_size = mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT,
>  		   prot, sgt);
>  	mapping->active = true;
> +	bo->rss_size += mapped_size;

Actually, the GEM might be resident even before panfrost_mmu_map() is
called: as soon as drm_gem_shmem_get_pages[_locked]() is called, it's
resident (might get evicted after that point though). That means any
mmap coming from userspace will make the buffer resident too. I know
we're automatically mapping GEMs to the GPU VM in panfrost_gem_open(),
so it makes no difference, but I think I'd prefer if we keep ->rss_size
for heap BOs only (we probably want to rename it heap_rss_size) and
then have


	if (bo->is_heap)
		return bo->heap_rss_size;
	else if (bo->base.pages)
		return bo->base.base.size;
	else
		return 0;

in panfrost_gem_rss().

>  
>  	return 0;
>  }
> @@ -447,6 +451,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
>  	pgoff_t page_offset;
>  	struct sg_table *sgt;
>  	struct page **pages;
> +	size_t mapped_size;
>  
>  	bomapping = addr_to_mapping(pfdev, as, addr);
>  	if (!bomapping)
> @@ -518,10 +523,11 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
>  	if (ret)
>  		goto err_map;
>  
> -	mmu_map_sg(pfdev, bomapping->mmu, addr,
> -		   IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
> +	mapped_size = mmu_map_sg(pfdev, bomapping->mmu, addr,
> +				 IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
>  
>  	bomapping->active = true;
> +	bo->rss_size += mapped_size;
>  
>  	dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr);
>  


  parent reply	other threads:[~2023-08-30 18:32 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-24  1:34 [PATCH v2 0/6] Add fdinfo support to Panfrost Adrián Larumbe
2023-08-24  1:34 ` Adrián Larumbe
2023-08-24  1:34 ` [PATCH v2 1/6] drm/panfrost: Add cycle count GPU register definitions Adrián Larumbe
2023-08-24  1:34   ` Adrián Larumbe
2023-08-30 10:35   ` Boris Brezillon
2023-08-30 10:35     ` Boris Brezillon
2023-08-31 15:54   ` Steven Price
2023-08-31 15:54     ` Steven Price
2023-08-24  1:34 ` [PATCH v2 2/6] drm/panfrost: Add fdinfo support GPU load metrics Adrián Larumbe
2023-08-24  1:34   ` Adrián Larumbe
2023-08-24  4:12   ` kernel test robot
2023-08-24  4:12     ` kernel test robot
2023-08-30 10:17   ` Boris Brezillon
2023-08-30 10:17     ` Boris Brezillon
2023-08-31 23:23     ` Adrián Larumbe
2023-08-31 23:23       ` Adrián Larumbe
2023-08-31 15:54   ` Steven Price
2023-08-31 15:54     ` Steven Price
2023-08-31 21:34     ` Adrián Larumbe
2023-08-31 21:34       ` Adrián Larumbe
2023-09-04  8:22       ` Steven Price
2023-09-04  8:22         ` Steven Price
2023-09-02  3:20   ` kernel test robot
2023-09-02  3:20     ` kernel test robot
2023-08-24  1:34 ` [PATCH v2 3/6] drm/panfrost: Add fdinfo support for memory stats Adrián Larumbe
2023-08-24  1:34   ` Adrián Larumbe
2023-08-30 10:31   ` Boris Brezillon
2023-08-30 10:31     ` Boris Brezillon
2023-08-31 23:07     ` Adrián Larumbe
2023-08-31 23:07       ` Adrián Larumbe
2023-08-24  1:34 ` [PATCH v2 4/6] drm/drm_file: Add DRM obj's RSS reporting function for fdinfo Adrián Larumbe
2023-08-24  1:34   ` Adrián Larumbe
2023-08-30 10:34   ` Boris Brezillon
2023-08-30 10:34     ` Boris Brezillon
2023-08-24  1:34 ` [PATCH v2 5/6] drm/panfrost: Implement generic DRM object RSS reporting function Adrián Larumbe
2023-08-24  1:34   ` Adrián Larumbe
2023-08-24 11:13   ` kernel test robot
2023-08-24 11:13     ` kernel test robot
2023-08-30 10:52   ` Boris Brezillon [this message]
2023-08-30 10:52     ` Boris Brezillon
2023-09-01  0:03     ` Adrián Larumbe
2023-09-01  0:03       ` Adrián Larumbe
2023-09-01  6:44       ` Boris Brezillon
2023-09-01  6:44         ` Boris Brezillon
2023-08-24  1:34 ` [PATCH v2 6/6] drm/drm-file: Allow size unit selection in drm_show_memory_stats Adrián Larumbe
2023-08-24  1:34   ` Adrián Larumbe
2023-08-24  6:49   ` kernel test robot
2023-08-24  6:49     ` kernel test robot
2023-08-28 15:00   ` Rob Clark
2023-08-28 15:00     ` Rob Clark
2023-08-30 15:51     ` Adrián Larumbe
2023-08-30 15:51       ` Adrián Larumbe
2023-09-05 22:23       ` Rob Clark
2023-09-05 22:23         ` Rob Clark
2023-09-01 22:18   ` kernel test robot
2023-09-01 22:18     ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230830125210.3d1172db@collabora.com \
    --to=boris.brezillon@collabora.com \
    --cc=adrian.larumbe@collabora.com \
    --cc=airlied@gmail.com \
    --cc=daniel@ffwll.ch \
    --cc=dmitry.baryshkov@linaro.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=freedreno@lists.freedesktop.org \
    --cc=healych@amazon.com \
    --cc=kernel@collabora.com \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=marijn.suijten@somainline.org \
    --cc=mripard@kernel.org \
    --cc=quic_abhinavk@quicinc.com \
    --cc=robdclark@gmail.com \
    --cc=robh@kernel.org \
    --cc=sean@poorly.run \
    --cc=steven.price@arm.com \
    --cc=tzimmermann@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.