Re: [PATCH] drm/xe: Implement clear VRAM on free

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Matthew Auld <matthew.auld@intel.com>
To: Matthew Brost <matthew.brost@intel.com>, intel-xe@lists.freedesktop.org
Cc: thomas.hellstrom@linux.intel.com
Subject: Re: [PATCH] drm/xe: Implement clear VRAM on free
Date: Fri, 20 Jun 2025 14:08:04 +0100	[thread overview]
Message-ID: <52e8452f-cad6-47c0-9a4c-0e7ff333641c@intel.com> (raw)
In-Reply-To: <20250611054235.3540936-1-matthew.brost@intel.com>

On 11/06/2025 06:42, Matthew Brost wrote:
> Clearing on free should hide latency of BO clears on new user BO
> allocations.
> 
> Implemented via calling xe_migrate_clear in release notify and updating
> iterator in xe_migrate_clear to skip cleared buddy blocks. Only user BOs
> cleared in release notify as kernel BOs could still be in use (e.g., PT
> BOs need to wait for dma-resv to be idle).
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/xe/xe_bo.c           | 47 ++++++++++++++++++++++++++++
>   drivers/gpu/drm/xe/xe_migrate.c      | 14 ++++++---
>   drivers/gpu/drm/xe/xe_migrate.h      |  1 +
>   drivers/gpu/drm/xe/xe_res_cursor.h   | 26 +++++++++++++++
>   drivers/gpu/drm/xe/xe_ttm_vram_mgr.c |  5 ++-
>   drivers/gpu/drm/xe/xe_ttm_vram_mgr.h |  6 ++++
>   6 files changed, 94 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 4e39188a021a..74470f4d418d 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -1434,6 +1434,51 @@ static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
>   	return locked;
>   }
>   
> +static void xe_ttm_bo_release_clear(struct ttm_buffer_object *ttm_bo)
> +{
> +	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
> +	struct dma_fence *fence;
> +	int err, idx;
> +
> +	xe_bo_assert_held(ttm_to_xe_bo(ttm_bo));
> +
> +	if (ttm_bo->type != ttm_bo_type_device)
> +		return;
> +
> +	if (xe_device_wedged(xe))
> +		return;
> +
> +	if (!ttm_bo->resource || !mem_type_is_vram(ttm_bo->resource->mem_type))
> +		return;
> +
> +	if (!drm_dev_enter(&xe->drm, &idx))
> +		return;
> +
> +	if (!xe_pm_runtime_get_if_active(xe))
> +		goto unbind;
> +
> +	err = dma_resv_reserve_fences(&ttm_bo->base._resv, 1);
> +	if (err)
> +		goto put_pm;
> +
> +	fence = xe_migrate_clear(mem_type_to_migrate(xe, ttm_bo->resource->mem_type),
> +				 ttm_to_xe_bo(ttm_bo), ttm_bo->resource,
> +				 XE_MIGRATE_CLEAR_FLAG_FULL |
> +				 XE_MIGRATE_CLEAR_NON_DIRTY);
> +	if (XE_WARN_ON(IS_ERR(fence)))
> +		goto put_pm;
> +
> +	xe_ttm_vram_mgr_resource_set_cleared(ttm_bo->resource);
> +	dma_resv_add_fence(&ttm_bo->base._resv, fence,
> +			   DMA_RESV_USAGE_KERNEL);
> +	dma_fence_put(fence);
> +
> +put_pm:
> +	xe_pm_runtime_put(xe);
> +unbind:
> +	drm_dev_exit(idx);
> +}
> +
>   static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
>   {
>   	struct dma_resv_iter cursor;
> @@ -1478,6 +1523,8 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
>   	}
>   	dma_fence_put(replacement);
>   
> +	xe_ttm_bo_release_clear(ttm_bo);
> +
>   	dma_resv_unlock(ttm_bo->base.resv);
>   }
>   
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index 8f8e9fdfb2a8..39d7200cb366 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -1063,7 +1063,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>   	struct xe_gt *gt = m->tile->primary_gt;
>   	struct xe_device *xe = gt_to_xe(gt);
>   	bool clear_only_system_ccs = false;
> -	struct dma_fence *fence = NULL;
> +	struct dma_fence *fence = dma_fence_get_stub();


>   	u64 size = bo->size;
>   	struct xe_res_cursor src_it;
>   	struct ttm_resource *src = dst;
> @@ -1075,10 +1075,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>   	if (!clear_bo_data && clear_ccs && !IS_DGFX(xe))
>   		clear_only_system_ccs = true;
>   
> -	if (!clear_vram)
> +	if (!clear_vram) {
>   		xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it);
> -	else
> +	} else {
>   		xe_res_first(src, 0, bo->size, &src_it);
> +		if (!(clear_flags & XE_MIGRATE_CLEAR_NON_DIRTY))
> +			size -= xe_res_next_dirty(&src_it);
> +	}
>   
>   	while (size) {
>   		u64 clear_L0_ofs;
> @@ -1125,6 +1128,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>   			emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs,
>   				 &src_it, clear_L0, dst);
>   
> +		if (clear_vram && !(clear_flags & XE_MIGRATE_CLEAR_NON_DIRTY))
> +			size -= xe_res_next_dirty(&src_it);
> +
>   		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>   		update_idx = bb->len;
>   
> @@ -1146,7 +1152,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>   		}
>   
>   		xe_sched_job_add_migrate_flush(job, flush_flags);
> -		if (!fence) {
> +		if (fence == dma_fence_get_stub()) {
>   			/*
>   			 * There can't be anything userspace related at this
>   			 * point, so we just need to respect any potential move
> diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
> index fb9839c1bae0..58a7b747ef11 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.h
> +++ b/drivers/gpu/drm/xe/xe_migrate.h
> @@ -118,6 +118,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
>   
>   #define XE_MIGRATE_CLEAR_FLAG_BO_DATA		BIT(0)
>   #define XE_MIGRATE_CLEAR_FLAG_CCS_DATA		BIT(1)
> +#define XE_MIGRATE_CLEAR_NON_DIRTY		BIT(2)
>   #define XE_MIGRATE_CLEAR_FLAG_FULL	(XE_MIGRATE_CLEAR_FLAG_BO_DATA | \
>   					XE_MIGRATE_CLEAR_FLAG_CCS_DATA)
>   struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
> diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
> index d1a403cfb628..630082e809ba 100644
> --- a/drivers/gpu/drm/xe/xe_res_cursor.h
> +++ b/drivers/gpu/drm/xe/xe_res_cursor.h
> @@ -315,6 +315,32 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
>   	}
>   }
>   
> +/**
> + * xe_res_next_dirty - advance the cursor to next dirty buddy block
> + *
> + * @cur: the cursor to advance
> + *
> + * Move the cursor until dirty buddy block is found.
> + *
> + * Return: Number of bytes cursor has been advanced
> + */
> +static inline u64 xe_res_next_dirty(struct xe_res_cursor *cur)
> +{
> +	struct drm_buddy_block *block = cur->node;
> +	u64 bytes = 0;
> +
> +	XE_WARN_ON(cur->mem_type != XE_PL_VRAM0 &&
> +		   cur->mem_type != XE_PL_VRAM1);
> +
> +	while (cur->remaining && drm_buddy_block_is_clear(block)) {
> +		bytes += cur->size;
> +		xe_res_next(cur, cur->size);
> +		block = cur->node;
> +	}
> +
> +	return bytes;
> +}
> +
>   /**
>    * xe_res_dma - return dma address of cursor at current position
>    *
> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> index 9e375a40aee9..120046941c1e 100644
> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> @@ -84,6 +84,9 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
>   	if (place->fpfn || lpfn != man->size >> PAGE_SHIFT)
>   		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>   
> +	if (tbo->type == ttm_bo_type_device)
> +		vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;

Would it make sense to also somehow check if the bo has tt pages at this 
stage, and skip asking for cleared memory? If we have tt pages, that 
would mean we are moving to vram, so we are about to copy over the 
memory anyway, so ideally leave any pre-cleared pages for another user?

> +
>   	if (WARN_ON(!vres->base.size)) {
>   		err = -EINVAL;
>   		goto error_fini;
> @@ -187,7 +190,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
>   	struct drm_buddy *mm = &mgr->mm;
>   
>   	mutex_lock(&mgr->lock);
> -	drm_buddy_free_list(mm, &vres->blocks, 0);
> +	drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>   	mgr->visible_avail += vres->used_visible_size;
>   	mutex_unlock(&mgr->lock);
>   
> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
> index cc76050e376d..dfc0e6890b3c 100644
> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
> @@ -36,6 +36,12 @@ to_xe_ttm_vram_mgr_resource(struct ttm_resource *res)
>   	return container_of(res, struct xe_ttm_vram_mgr_resource, base);
>   }
>   
> +static inline void
> +xe_ttm_vram_mgr_resource_set_cleared(struct ttm_resource *res)
> +{
> +	to_xe_ttm_vram_mgr_resource(res)->flags |= DRM_BUDDY_CLEARED;
> +}
> +
>   static inline struct xe_ttm_vram_mgr *
>   to_xe_ttm_vram_mgr(struct ttm_resource_manager *man)
>   {

next prev parent reply	other threads:[~2025-06-20 13:08 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-11  5:42 [PATCH] drm/xe: Implement clear VRAM on free Matthew Brost
2025-06-11  5:47 ` ✓ CI.Patch_applied: success for " Patchwork
2025-06-11  5:47 ` ✓ CI.checkpatch: " Patchwork
2025-06-11  5:48 ` ✓ CI.KUnit: " Patchwork
2025-06-11  5:59 ` ✓ CI.Build: " Patchwork
2025-06-11  6:02 ` ✓ CI.Hooks: " Patchwork
2025-06-11  6:03 ` ✓ CI.checksparse: " Patchwork
2025-06-11  6:51 ` ✓ Xe.CI.BAT: " Patchwork
2025-06-11  8:09 ` ✗ Xe.CI.Full: failure " Patchwork
2025-06-11 16:26 ` [PATCH] " Summers, Stuart
2025-06-11 16:46   ` Matthew Brost
2025-06-11 17:04     ` Summers, Stuart
2025-06-11 17:57       ` Matthew Brost
2025-06-11 18:05         ` Summers, Stuart
2025-06-11 18:20           ` Matthew Brost
2025-06-11 18:23             ` Summers, Stuart
2025-06-11 19:01               ` Matthew Brost
2025-06-11 19:03                 ` Matthew Brost
2025-06-11 19:12                   ` Summers, Stuart
2025-06-11 21:23                     ` Matthew Brost
2025-06-11 23:12                       ` Summers, Stuart
2025-06-12 12:53 ` Thomas Hellström
2025-06-12 17:11   ` Matthew Brost
2025-06-13  8:07     ` Thomas Hellström
2025-06-13 16:21       ` Matthew Brost
2025-06-13 20:02         ` Matthew Brost
2025-06-16  7:40           ` Thomas Hellström
2025-06-16  7:56             ` Matthew Brost
2025-06-16  8:53               ` Thomas Hellström
2025-06-16  9:16                 ` Matthew Brost
2025-06-16  9:28                   ` Thomas Hellström
2025-06-20 13:08 ` Matthew Auld [this message]
2025-06-23 11:28   ` Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52e8452f-cad6-47c0-9a4c-0e7ff333641c@intel.com \
    --to=matthew.auld@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox