All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Timur Kristóf" <timur.kristof@gmail.com>
To: natalie.vock@gmx.de, honghuan@amd.com, Alexander.Deucher@amd.com,
	Felix.Kuehling@amd.com, Philip.Yang@amd.com,
	christian.koenig@amd.com
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 03/13] drm/amdgpu: add gfp_flags to amdgpu_sa_manager
Date: Fri, 12 Jun 2026 08:18:41 +0200	[thread overview]
Message-ID: <2787510.lGaqSPkdTl@timur-max> (raw)
In-Reply-To: <20260529114031.3714-4-christian.koenig@amd.com>

On 2026. május 29., péntek 13:24:05 közép-európai nyári idő Christian König 
wrote:
> Make sure that we use the emmergency reserves for unrecoverable page
> faults and GPU resets.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 23 ++++++++++++++++++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 18 ++++++++++++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sa.h |  3 ++-
>  3 files changed, 34 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 334f95f8f339..60e4c3985029
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -356,16 +356,33 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
>  		[AMDGPU_IB_POOL_IMMEDIATE] = SZ_128K,
>  		[AMDGPU_IB_POOL_DIRECT] = SZ_512K
>  	};
> +	const gfp_t gfp_flags[AMDGPU_IB_POOL_MAX] = {
> +		/*
> +		 * For normal page table updates and recoverable page 
faults,
> +		 * further restricted by the VM eviction lock to not 
wait for
> +		 * memory reclaim.
> +		 */
> +		[AMDGPU_IB_POOL_DELAYED] = GFP_KERNEL,
> +		/*
> +		 * For redirecting unrecoverable page faults to the 
dummy page
> +		 * or set the PRT bits. dma_fence submissions might 
depend on
> +		 * that so we need the emmergency resewrves.
> +		 */
> +		[AMDGPU_IB_POOL_IMMEDIATE] = GFP_ATOMIC,

I know that "retry faults" and "recoverable faults" are different, but both of 
these faults can be mitigated, so using the term "unrecoverable" here feels 
somewhat confusing to me.

Can you please clarify the comment above as well as the commit message that 
this is referring to retry faults?

> +		/*
> +		 * For IB tests during GPU resets. Only very small and 
temporary
> +		 * allocation to make allow dma_fences to signal.
> +		 */
> +		[AMDGPU_IB_POOL_DIRECT] = GFP_ATOMIC

Just a small grammatical nitpick but "make" is not necessary in this comment.

> +	};
>  	int r, i;
> 
>  	if (adev->ib_pool_ready)
>  		return 0;
> 
> -

Looks like this is just removing the spurious newline from the previous 
commit.

With that fixed and the comments + commit message clarified:
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>

>  	for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
>  		r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
> -					      sizes[i], 256,
> -					      
AMDGPU_GEM_DOMAIN_GTT);
> +					      sizes[i], 
gfp_flags[i]);
>  		if (r)
>  			goto error;
>  	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 39070b2a4c04..74124f80601e
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> @@ -46,11 +46,13 @@
> 
>  int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
>  			      struct amdgpu_sa_manager *sa_manager,
> -			      unsigned int size, u32 suballoc_align, 
u32 domain)
> +			      unsigned int size, gfp_t gfp_flags)
>  {
>  	int r;
> 
> -	r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE, 
domain,
> +	sa_manager->gfp_flags = gfp_flags;
> +	r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE,
> +				    AMDGPU_GEM_DOMAIN_GTT,
>  				    &sa_manager->bo, &sa_manager-
>gpu_addr,
>  				    &sa_manager->cpu_ptr);
>  	if (r) {
> @@ -59,7 +61,8 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
>  	}
> 
>  	memset(sa_manager->cpu_ptr, 0, size);
> -	drm_suballoc_manager_init(&sa_manager->base, size, 
suballoc_align);
> +	drm_suballoc_manager_init(&sa_manager->base, size, 256);
> +
>  	return r;
>  }
> 
> @@ -73,7 +76,8 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
> 
>  	drm_suballoc_manager_fini(&sa_manager->base);
> 
> -	amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr,
> &sa_manager->cpu_ptr); +	amdgpu_bo_free_kernel(&sa_manager->bo,
> &sa_manager->gpu_addr,
> +			      &sa_manager->cpu_ptr);
>  }
> 
>  int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
> @@ -81,7 +85,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
> unsigned int size)
>  {
>  	struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, 
size,
> -						   
GFP_KERNEL, false, 0);
> +						   
sa_manager->gfp_flags,
> +						   false, 
0);
> 
>  	if (IS_ERR(sa)) {
>  		*sa_bo = NULL;
> @@ -110,6 +115,7 @@ void amdgpu_sa_bo_dump_debug_info(struct
> amdgpu_sa_manager *sa_manager, {
>  	struct drm_printer p = drm_seq_file_printer(m);
> 
> -	drm_suballoc_dump_debug_info(&sa_manager->base, &p, sa_manager-
>gpu_addr);
> +	drm_suballoc_dump_debug_info(&sa_manager->base, &p,
> +				     sa_manager->gpu_addr);
>  }
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.h index 8c85c80fc119..1d1c89348709
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.h
> @@ -35,6 +35,7 @@ struct amdgpu_sa_manager {
>  	struct amdgpu_bo		*bo;
>  	uint64_t			gpu_addr;
>  	void				*cpu_ptr;
> +	gfp_t				gfp_flags;
>  };
> 
>  static inline struct amdgpu_sa_manager *
> @@ -57,7 +58,7 @@ static inline void *amdgpu_sa_bo_cpu_addr(struct
> drm_suballoc *sa_bo)
> 
>  int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
>  			      struct amdgpu_sa_manager *sa_manager,
> -			      unsigned size, u32 align, u32 domain);
> +			      unsigned size, gfp_t gfp_flags);
>  void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
>  			       struct amdgpu_sa_manager 
*sa_manager);
>  int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,





  reply	other threads:[~2026-06-12  6:18 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-29 11:24 Christian König
2026-05-29 11:24 ` [PATCH 01/13] drm/amdgpu: move suballoc defines into own header Christian König
2026-06-10  7:53   ` Christian König
2026-06-10 16:14   ` Kuehling, Felix
2026-06-12  6:12   ` Timur Kristóf
2026-05-29 11:24 ` [PATCH 02/13] drm/amdgpu: give different sizes for each SA pool type Christian König
2026-06-12  6:13   ` Timur Kristóf
2026-05-29 11:24 ` [PATCH 03/13] drm/amdgpu: add gfp_flags to amdgpu_sa_manager Christian König
2026-06-12  6:18   ` Timur Kristóf [this message]
2026-05-29 11:24 ` [PATCH 04/13] drm/amdgpu: move job parameter to the end in amdgpu_job_alloc() and *_with_ib() Christian König
2026-06-12  6:21   ` Timur Kristóf
2026-05-29 11:24 ` [PATCH 05/13] drm/amdgpu: use correct gfp_t for job allocation Christian König
2026-06-12  6:33   ` Timur Kristóf
2026-05-29 11:24 ` [PATCH 06/13] drm/amdgpu: add amdgpu_vm_update_leaves() Christian König
2026-06-12  6:54   ` Timur Kristóf
2026-05-29 11:24 ` [PATCH 07/13] drm/amdgpu: drop immediate updates from amdgpu_vm_update_range Christian König
2026-06-12  6:58   ` Timur Kristóf
2026-05-29 11:24 ` [PATCH 08/13] drm/amdgpu: split amdgpu_vm_update_range Christian König
2026-06-01 13:51   ` Pierre-Eric Pelloux-Prayer
2026-06-01 13:58     ` Christian König
2026-06-03 17:54   ` Kuehling, Felix
2026-06-05  9:21     ` Christian König
2026-06-05 19:21       ` Kuehling, Felix
2026-06-04 10:03   ` Huang, Honglei
2026-05-29 11:24 ` [PATCH 09/13] drm/amdgpu: start to move VM internals into amdgpu_vm_internal.h Christian König
2026-05-29 11:24 ` [PATCH 10/13] drm/amdgpu: remove unecessary parameters from trace_amdgpu_vm_update_ptes Christian König
2026-05-29 11:24 ` [PATCH 11/13] drm/amdgpu: nuke most amdgpu_vm_eviction_(try)lock uses Christian König
2026-06-03 18:00   ` Kuehling, Felix
2026-05-29 11:24 ` [PATCH 12/13] drm/amdgpu: rework eviction lock handling into critical section Christian König
2026-05-29 11:24 ` [PATCH 13/13] drm/amdgpu: fix the HMM range handling for KFD SVM Christian König
2026-06-03 19:23   ` Kuehling, Felix
2026-05-29 13:35 ` VM reworks Natalie Vock
2026-06-01  2:46 ` Huang, Honglei1

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2787510.lGaqSPkdTl@timur-max \
    --to=timur.kristof@gmail.com \
    --cc=Alexander.Deucher@amd.com \
    --cc=Felix.Kuehling@amd.com \
    --cc=Philip.Yang@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=honghuan@amd.com \
    --cc=natalie.vock@gmx.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.