Re: [PATCH v5 04/10] drm/amdgpu: remove AMDGPU_GTT_NUM_TRANSFER_WINDOWS

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Timur Kristóf" <timur.kristof@gmail.com>
To: "Alex Deucher" <alexander.deucher@amd.com>,
	"Christian König" <christian.koenig@amd.com>,
	"David Airlie" <airlied@gmail.com>,
	"Simona Vetter" <simona@ffwll.ch>,
	amd-gfx@lists.freedesktop.org
Cc: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>,
	amd-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
	linux-kernel@vger.kernel.org,
	Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Subject: Re: [PATCH v5 04/10] drm/amdgpu: remove AMDGPU_GTT_NUM_TRANSFER_WINDOWS
Date: Fri, 23 Jan 2026 12:16:15 +0100	[thread overview]
Message-ID: <3698448.dWV9SEqChM@timur-hyperion> (raw)
In-Reply-To: <20260122170218.3077-5-pierre-eric.pelloux-prayer@amd.com>

On Thursday, January 22, 2026 6:02:01 PM Central European Standard Time 
Pierre-Eric Pelloux-Prayer wrote:
> Instead use amdgpu_gtt_mgr_alloc_entries to avoid hardcoding
> the number of windows we need.
> 
> It also allows to simplify amdgpu_gtt_mgr_init because we don't
> need to reserve some pages anymore and this makes the
> amdgpu_vce_required_gart_pages function unneeded.

I suggest to split this patch into two if possible in order to make it cleaner 
and also allow for easier bisection in the future:
1. First patch to remove amdgpu_vce_required_gart_pages() and use
the amdgpu_gtt_mgr_alloc_entries() function instead.
2. Second patch to get rid of AMDGPU_GTT_NUM_TRANSFER_WINDOWS

> 
> ---
> v5: switch to amdgpu_gtt_mgr_alloc_entries
> ---
> 
> Signed-off-by: Pierre-Eric Pelloux-Prayer
> <pierre-eric.pelloux-prayer@amd.com> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c |  6 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     | 66 +++++++++++++--------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h     |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c     | 18 ------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h     |  2 +-
>  drivers/gpu/drm/amd/amdgpu/vce_v1_0.c       | 27 ++++++---
>  6 files changed, 63 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index
> dd9b845d5783..9b0bcf6aca44 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> @@ -324,17 +324,13 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev,
> uint64_t gtt_size) {
>  	struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
>  	struct ttm_resource_manager *man = &mgr->manager;
> -	uint64_t start, size;
> 
>  	man->use_tt = true;
>  	man->func = &amdgpu_gtt_mgr_func;
> 
>  	ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
> 
> -	start = AMDGPU_GTT_MAX_TRANSFER_SIZE * 
AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
> -	start += amdgpu_vce_required_gart_pages(adev);
> -	size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
> -	drm_mm_init(&mgr->mm, start, size);
> +	drm_mm_init(&mgr->mm, 0, adev->gmc.gart_size >> PAGE_SHIFT);
>  	spin_lock_init(&mgr->lock);
> 
>  	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr-
>manager);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8b38b5ed9a9c..d23d3046919b
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2012,37 +2012,47 @@ static void amdgpu_ttm_free_mmio_remap_bo(struct
> amdgpu_device *adev) adev->rmmio_remap.bo = NULL;
>  }
> 
> -static int amdgpu_ttm_buffer_entity_init(struct amdgpu_ttm_buffer_entity
> *entity, +static int amdgpu_ttm_buffer_entity_init(struct amdgpu_gtt_mgr
> *mgr, +					 struct 
amdgpu_ttm_buffer_entity *entity,
>  					 enum 
drm_sched_priority prio,
>  					 struct 
drm_gpu_scheduler **scheds,
>  					 int num_schedulers,
> -					 int 
starting_gart_window,
>  					 u32 num_gart_windows)
>  {
> -	int i, r;
> +	int i, r, num_pages;
> 
>  	r = drm_sched_entity_init(&entity->base, prio, scheds, 
num_schedulers,
> NULL); if (r)
>  		return r;
> 
> -
>  	mutex_init(&entity->lock);
> 
>  	if (ARRAY_SIZE(entity->gart_window_offs) < num_gart_windows)
> -		return starting_gart_window;
> +		return -EINVAL;
> +	if (num_gart_windows == 0)
> +		return 0;
> +
> +	num_pages = num_gart_windows * AMDGPU_GTT_MAX_TRANSFER_SIZE;
> +	r = amdgpu_gtt_mgr_alloc_entries(mgr, &entity->node, num_pages,
> +					 DRM_MM_INSERT_BEST);
> +	if (r) {
> +		drm_sched_entity_destroy(&entity->base);
> +		return r;
> +	}
> 
>  	for (i = 0; i < num_gart_windows; i++) {
>  		entity->gart_window_offs[i] =
> -			(u64)starting_gart_window * 
AMDGPU_GTT_MAX_TRANSFER_SIZE *
> -				AMDGPU_GPU_PAGE_SIZE;
> -		starting_gart_window++;
> +			(entity->node.start + (u64)i * 
AMDGPU_GTT_MAX_TRANSFER_SIZE) *
> +			AMDGPU_GPU_PAGE_SIZE;
>  	}
> 
> -	return starting_gart_window;
> +	return 0;
>  }
> 
> -static void amdgpu_ttm_buffer_entity_fini(struct amdgpu_ttm_buffer_entity
> *entity) +static void amdgpu_ttm_buffer_entity_fini(struct amdgpu_gtt_mgr
> *mgr, +					  struct 
amdgpu_ttm_buffer_entity *entity)
>  {
> +	amdgpu_gtt_mgr_free_entries(mgr, &entity->node);
>  	drm_sched_entity_destroy(&entity->base);
>  }
> 
> @@ -2343,36 +2353,42 @@ void amdgpu_ttm_set_buffer_funcs_status(struct
> amdgpu_device *adev, bool enable)
> 
>  		ring = adev->mman.buffer_funcs_ring;
>  		sched = &ring->sched;
> -		r = amdgpu_ttm_buffer_entity_init(&adev-
>mman.default_entity,
> -						  
DRM_SCHED_PRIORITY_KERNEL, &sched, 1,
> -						  0, 0);
> +		r = amdgpu_ttm_buffer_entity_init(&adev->mman.gtt_mgr,
> +						  &adev-
>mman.default_entity,
> +						  
DRM_SCHED_PRIORITY_KERNEL,
> +						  &sched, 1, 
0);
>  		if (r < 0) {
>  			dev_err(adev->dev,
>  				"Failed setting up TTM entity 
(%d)\n", r);
>  			return;
>  		}
> 
> -		r = amdgpu_ttm_buffer_entity_init(&adev-
>mman.clear_entity,
> -						  
DRM_SCHED_PRIORITY_NORMAL, &sched, 1,
> -						  r, 1);
> +		r = amdgpu_ttm_buffer_entity_init(&adev->mman.gtt_mgr,
> +						  &adev-
>mman.clear_entity,
> +						  
DRM_SCHED_PRIORITY_NORMAL,
> +						  &sched, 1, 
1);
>  		if (r < 0) {
>  			dev_err(adev->dev,
>  				"Failed setting up TTM BO clear 
entity (%d)\n", r);
>  			goto error_free_default_entity;
>  		}
> 
> -		r = amdgpu_ttm_buffer_entity_init(&adev-
>mman.move_entity,
> -						  
DRM_SCHED_PRIORITY_NORMAL, &sched, 1,
> -						  r, 2);
> +		r = amdgpu_ttm_buffer_entity_init(&adev->mman.gtt_mgr,
> +						  &adev-
>mman.move_entity,
> +						  
DRM_SCHED_PRIORITY_NORMAL,
> +						  &sched, 1, 
2);
>  		if (r < 0) {
>  			dev_err(adev->dev,
>  				"Failed setting up TTM BO move 
entity (%d)\n", r);
>  			goto error_free_clear_entity;
>  		}
>  	} else {
> -		amdgpu_ttm_buffer_entity_fini(&adev-
>mman.default_entity);
> -		amdgpu_ttm_buffer_entity_fini(&adev->mman.clear_entity);
> -		amdgpu_ttm_buffer_entity_fini(&adev->mman.move_entity);
> +		amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr,
> +					      &adev-
>mman.default_entity);
> +		amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr,
> +					      &adev-
>mman.clear_entity);
> +		amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr,
> +					      &adev-
>mman.move_entity);
>  		/* Drop all the old fences since re-creating the 
scheduler entities
>  		 * will allocate new contexts.
>  		 */
> @@ -2390,9 +2406,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct
> amdgpu_device *adev, bool enable) return;
> 
>  error_free_clear_entity:
> -	amdgpu_ttm_buffer_entity_fini(&adev->mman.clear_entity);
> +	amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr,
> +				      &adev->mman.clear_entity);
>  error_free_default_entity:
> -	amdgpu_ttm_buffer_entity_fini(&adev->mman.default_entity);
> +	amdgpu_ttm_buffer_entity_fini(&adev->mman.gtt_mgr,
> +				      &adev->mman.default_entity);
>  }
> 
>  static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 871388b86503..c8284cb2d22c
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -40,7 +40,6 @@
>  #define __AMDGPU_PL_NUM	(TTM_PL_PRIV + 6)
> 
>  #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512
> -#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	3
> 
>  extern const struct attribute_group amdgpu_vram_mgr_attr_group;
>  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
> @@ -56,6 +55,7 @@ struct amdgpu_gtt_mgr {
>  struct amdgpu_ttm_buffer_entity {
>  	struct drm_sched_entity base;
>  	struct mutex		lock;
> +	struct drm_mm_node	node;
>  	u64			gart_window_offs[2];
>  };
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index a7d8f1ce6ac2..eb4a15db2ef2
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -450,24 +450,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device
> *adev, struct drm_file *filp) }
>  }
> 
> -/**
> - * amdgpu_vce_required_gart_pages() - gets number of GART pages required by
> VCE - *
> - * @adev: amdgpu_device pointer
> - *
> - * Returns how many GART pages we need before GTT for the VCE IP block.
> - * For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details.
> - * For VCE2+, this is not needed so return zero.
> - */
> -u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev)
> -{
> -	/* VCE IP block not added yet, so can't use amdgpu_ip_version */
> -	if (adev->family == AMDGPU_FAMILY_SI)
> -		return 512;
> -
> -	return 0;
> -}
> -
>  /**
>   * amdgpu_vce_get_create_msg - generate a VCE create msg
>   *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 1c3464ce5037..a59d87e09004
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
> @@ -52,6 +52,7 @@ struct amdgpu_vce {
>  	uint32_t                srbm_soft_reset;
>  	unsigned		num_rings;
>  	uint32_t		keyselect;
> +	struct drm_mm_node	node;
>  };
> 
>  int amdgpu_vce_early_init(struct amdgpu_device *adev);
> @@ -61,7 +62,6 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev,
> struct amdgpu_ring *ring) int amdgpu_vce_suspend(struct amdgpu_device
> *adev);
>  int amdgpu_vce_resume(struct amdgpu_device *adev);
>  void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file
> *filp); -u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev);
>  int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job
> *job, struct amdgpu_ib *ib);
>  int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
> b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index 9ae424618556..bd47fda52e7e
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
> @@ -47,11 +47,6 @@
>  #define VCE_V1_0_DATA_SIZE	(7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
>  #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
> 
> -#define VCE_V1_0_GART_PAGE_START \
> -	(AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS)
> -#define VCE_V1_0_GART_ADDR_START \
> -	(VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE)
> -
>  static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
>  static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev);
> 
> @@ -541,6 +536,16 @@ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct
> amdgpu_device *adev) u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) /
> AMDGPU_GPU_PAGE_SIZE; u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
>  	u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | 
AMDGPU_PTE_VALID;
> +	u64 vce_gart_start;
> +	int r;
> +
> +	r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr,
> +					 &adev->vce.node, 
num_pages,
> +					 DRM_MM_INSERT_LOW);
> +	if (r)
> +		return r;
> +
> +	vce_gart_start = adev->vce.node.start * AMDGPU_GPU_PAGE_SIZE;

When the VCPU BO already has a 32-bit address, VCE does not need to use any 
GART space at all. I think you can just safely remove the "Check if the VCPU 
BO already has a 32-bit address". That is practically never going to be case 
anyway.

> 
>  	/*
>  	 * Check if the VCPU BO already has a 32-bit address.
> @@ -550,12 +555,12 @@ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct
> amdgpu_device *adev) return 0;
> 
>  	/* Check if we can map the VCPU BO in GART to a 32-bit address. */
> -	if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > 
max_vcpu_bo_addr)
> +	if (adev->gmc.gart_start + vce_gart_start > max_vcpu_bo_addr)
>  		return -EINVAL;
> 
> -	amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START,
> +	amdgpu_gart_map_vram_range(adev, pa, adev->vce.node.start,
>  				   num_pages, flags, adev-
>gart.ptr);
> -	adev->vce.gpu_addr = adev->gmc.gart_start + 
VCE_V1_0_GART_ADDR_START;
> +	adev->vce.gpu_addr = adev->gmc.gart_start + vce_gart_start;
>  	if (adev->vce.gpu_addr > max_vcpu_bo_addr)
>  		return -EINVAL;
> 
> @@ -610,7 +615,11 @@ static int vce_v1_0_sw_fini(struct amdgpu_ip_block
> *ip_block) if (r)
>  		return r;
> 
> -	return amdgpu_vce_sw_fini(adev);
> +	r = amdgpu_vce_sw_fini(adev);
> +
> +	amdgpu_gtt_mgr_free_entries(&adev->mman.gtt_mgr, &adev->vce.node);
> +
> +	return r;
>  }
> 
>  /**

next prev parent reply	other threads:[~2026-01-23 11:16 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-22 17:01 [PATCH v5 00/10] drm/amdgpu: preparation patchs for the use all SDMA instances series Pierre-Eric Pelloux-Prayer
2026-01-22 17:01 ` [PATCH v5 01/10] drm/amdgpu: remove gart_window_lock usage from gmc v12_1 Pierre-Eric Pelloux-Prayer
2026-01-22 19:52   ` Alex Deucher
2026-01-23 14:05   ` Christian König
2026-01-22 17:01 ` [PATCH v5 02/10] drm/amdgpu: statically assign gart windows to ttm entities Pierre-Eric Pelloux-Prayer
2026-01-22 17:02 ` [PATCH v5 03/10] drm/amdgpu: add amdgpu_ttm_buffer_entity_fini func Pierre-Eric Pelloux-Prayer
2026-01-23 14:32   ` Christian König
2026-01-22 17:02 ` [PATCH v5 04/10] drm/amdgpu: remove AMDGPU_GTT_NUM_TRANSFER_WINDOWS Pierre-Eric Pelloux-Prayer
2026-01-23 11:16   ` Timur Kristóf [this message]
2026-01-23 14:51   ` Christian König
2026-01-22 17:02 ` [PATCH v5 05/10] drm/amdgpu: add missing lock in amdgpu_benchmark_do_move Pierre-Eric Pelloux-Prayer
2026-01-22 17:02 ` [PATCH v5 06/10] drm/amdgpu: check entity lock is held in amdgpu_ttm_job_submit Pierre-Eric Pelloux-Prayer
2026-01-22 17:02 ` [PATCH v5 07/10] drm/amdgpu: double AMDGPU_GTT_MAX_TRANSFER_SIZE Pierre-Eric Pelloux-Prayer
2026-01-22 17:02 ` [PATCH v5 08/10] drm/amdgpu: use larger gart window when possible Pierre-Eric Pelloux-Prayer
2026-01-23 15:56   ` Christian König
2026-01-22 17:02 ` [PATCH v5 09/10] drm/amdgpu: introduce amdgpu_sdma_set_vm_pte_scheds Pierre-Eric Pelloux-Prayer
2026-01-22 17:02 ` [PATCH v5 10/10] drm/amdgpu: move sched status check inside amdgpu_ttm_set_buffer_funcs_status Pierre-Eric Pelloux-Prayer
2026-01-23 15:59   ` Christian König

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3698448.dWV9SEqChM@timur-hyperion \
    --to=timur.kristof@gmail.com \
    --cc=airlied@gmail.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pierre-eric.pelloux-prayer@amd.com \
    --cc=simona@ffwll.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.