Re: [PATCH v4 2/2] drm/xe/vf: Use drm mm instead of drm sa for CCS read/write

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Matthew Brost <matthew.brost@intel.com>
To: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: intel-xe@lists.freedesktop.org,
	"Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	"Maarten Lankhorst" <dev@lankhorst.se>,
	"Michal Wajdeczko" <michal.wajdeczko@intel.com>
Subject: Re: [PATCH v4 2/2] drm/xe/vf: Use drm mm instead of drm sa for CCS read/write
Date: Fri, 10 Apr 2026 18:45:15 -0700	[thread overview]
Message-ID: <admnqyMMtvifsPiK@gsse-cloud1.jf.intel.com> (raw)
In-Reply-To: <20260408110145.1639937-6-satyanarayana.k.v.p@intel.com>

On Wed, Apr 08, 2026 at 11:01:48AM +0000, Satyanarayana K V P wrote:
> The suballocator algorithm tracks a hole cursor at the last allocation
> and tries to allocate after it. This is optimized for fence-ordered
> progress, where older allocations are expected to become reusable first.
> 
> In fence-enabled mode, that ordering assumption holds. In fence-disabled
> mode, allocations may be freed in arbitrary order, so limiting allocation
> to the current hole window can miss valid free space and fail allocations
> despite sufficient total space.
> 
> Use DRM memory manager instead of sub-allocator to get rid of this issue
> as CCS read/write operations do not use fences.
> 
> Fixes: 864690cf4dd6 ("drm/xe/vf: Attach and detach CCS copy commands with BO")
> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
> Cc: Matthew Brost <matthew.brost@intel.com>

Reviewed-by: Matthew Brost <matthew.brost@intel.com>

> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Maarten Lankhorst <dev@lankhorst.se>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> 
> ---
> V3 -> V4:
> - Updated changes as per xe_mem_pool.
> - Updated Fixes: field (Thomas)
> 
> V2 -> V3:
> - Used xe_mem_pool_init() and xe_mem_pool_shadow_init() to allocate BB
> pools.
> 
> V1 -> V2:
> - Renamed xe_drm_mm to xe_mm_suballoc (Thomas)
> ---
>  drivers/gpu/drm/xe/xe_bo_types.h           |  3 +-
>  drivers/gpu/drm/xe/xe_migrate.c            | 56 ++++++++++++----------
>  drivers/gpu/drm/xe/xe_sriov_vf_ccs.c       | 54 +++++++++++----------
>  drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h |  5 +-
>  4 files changed, 63 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
> index ff8317bfc1ae..9d19940b8fc0 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -18,6 +18,7 @@
>  #include "xe_ggtt_types.h"
>  
>  struct xe_device;
> +struct xe_mem_pool_node;
>  struct xe_vm;
>  
>  #define XE_BO_MAX_PLACEMENTS	3
> @@ -88,7 +89,7 @@ struct xe_bo {
>  	bool ccs_cleared;
>  
>  	/** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */
> -	struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
> +	struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
>  
>  	/**
>  	 * @cpu_caching: CPU caching mode. Currently only used for userspace
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index fc918b4fba54..5fdc89ed5256 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -29,6 +29,7 @@
>  #include "xe_hw_engine.h"
>  #include "xe_lrc.h"
>  #include "xe_map.h"
> +#include "xe_mem_pool.h"
>  #include "xe_mocs.h"
>  #include "xe_printk.h"
>  #include "xe_pt.h"
> @@ -1166,11 +1167,12 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  	u32 batch_size, batch_size_allocated;
>  	struct xe_device *xe = gt_to_xe(gt);
>  	struct xe_res_cursor src_it, ccs_it;
> +	struct xe_mem_pool *bb_pool;
>  	struct xe_sriov_vf_ccs_ctx *ctx;
> -	struct xe_sa_manager *bb_pool;
>  	u64 size = xe_bo_size(src_bo);
> -	struct xe_bb *bb = NULL;
> +	struct xe_mem_pool_node *bb;
>  	u64 src_L0, src_L0_ofs;
> +	struct xe_bb xe_bb_tmp;
>  	u32 src_L0_pt;
>  	int err;
>  
> @@ -1208,18 +1210,18 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  		size -= src_L0;
>  	}
>  
> -	bb = xe_bb_alloc(gt);
> +	bb = xe_mem_pool_alloc_node();
>  	if (IS_ERR(bb))
>  		return PTR_ERR(bb);
>  
>  	bb_pool = ctx->mem.ccs_bb_pool;
> -	scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) {
> -		xe_sa_bo_swap_shadow(bb_pool);
> +	scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
> +		xe_mem_pool_swap_shadow_locked(bb_pool);
>  
> -		err = xe_bb_init(bb, bb_pool, batch_size);
> +		err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32));
>  		if (err) {
>  			xe_gt_err(gt, "BB allocation failed.\n");
> -			xe_bb_free(bb, NULL);
> +			kfree(bb);
>  			return err;
>  		}
>  
> @@ -1227,6 +1229,7 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  		size = xe_bo_size(src_bo);
>  		batch_size = 0;
>  
> +		xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 };
>  		/*
>  		 * Emit PTE and copy commands here.
>  		 * The CCS copy command can only support limited size. If the size to be
> @@ -1255,24 +1258,27 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  			xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
>  			batch_size += EMIT_COPY_CCS_DW;
>  
> -			emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src);
> +			emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src);
>  
> -			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
> +			emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src);
>  
> -			bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
> -			flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt,
> +			xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
> +							      flush_flags);
> +			flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt,
>  							  src_L0_ofs, dst_is_pltt,
>  							  src_L0, ccs_ofs, true);
> -			bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
> +			xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
> +							      flush_flags);
>  
>  			size -= src_L0;
>  		}
>  
> -		xe_assert(xe, (batch_size_allocated == bb->len));
> +		xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len));
> +		xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32));
>  		src_bo->bb_ccs[read_write] = bb;
>  
>  		xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
> -		xe_sa_bo_sync_shadow(bb->bo);
> +		xe_mem_pool_sync_shadow_locked(bb);
>  	}
>  
>  	return 0;
> @@ -1297,10 +1303,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
>  				  enum xe_sriov_vf_ccs_rw_ctxs read_write)
>  {
> -	struct xe_bb *bb = src_bo->bb_ccs[read_write];
> +	struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write];
>  	struct xe_device *xe = xe_bo_device(src_bo);
> +	struct xe_mem_pool *bb_pool;
>  	struct xe_sriov_vf_ccs_ctx *ctx;
> -	struct xe_sa_manager *bb_pool;
>  	u32 *cs;
>  
>  	xe_assert(xe, IS_SRIOV_VF(xe));
> @@ -1308,17 +1314,17 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
>  	ctx = &xe->sriov.vf.ccs.contexts[read_write];
>  	bb_pool = ctx->mem.ccs_bb_pool;
>  
> -	guard(mutex) (xe_sa_bo_swap_guard(bb_pool));
> -	xe_sa_bo_swap_shadow(bb_pool);
> -
> -	cs = xe_sa_bo_cpu_addr(bb->bo);
> -	memset(cs, MI_NOOP, bb->len * sizeof(u32));
> -	xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
> +	scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
> +		xe_mem_pool_swap_shadow_locked(bb_pool);
>  
> -	xe_sa_bo_sync_shadow(bb->bo);
> +		cs = xe_mem_pool_node_cpu_addr(bb);
> +		memset(cs, MI_NOOP, bb->sa_node.size);
> +		xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
>  
> -	xe_bb_free(bb, NULL);
> -	src_bo->bb_ccs[read_write] = NULL;
> +		xe_mem_pool_sync_shadow_locked(bb);
> +		xe_mem_pool_free_node(bb);
> +		src_bo->bb_ccs[read_write] = NULL;
> +	}
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> index db023fb66a27..09b99fb2608b 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> @@ -14,9 +14,9 @@
>  #include "xe_guc.h"
>  #include "xe_guc_submit.h"
>  #include "xe_lrc.h"
> +#include "xe_mem_pool.h"
>  #include "xe_migrate.h"
>  #include "xe_pm.h"
> -#include "xe_sa.h"
>  #include "xe_sriov_printk.h"
>  #include "xe_sriov_vf.h"
>  #include "xe_sriov_vf_ccs.h"
> @@ -141,43 +141,47 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe)
>  
>  static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
>  {
> +	struct xe_mem_pool *pool;
>  	struct xe_device *xe = tile_to_xe(tile);
> -	struct xe_sa_manager *sa_manager;
> +	u32 *pool_cpu_addr, *last_dw_addr;
>  	u64 bb_pool_size;
> -	int offset, err;
> +	int err;
>  
>  	bb_pool_size = get_ccs_bb_pool_size(xe);
>  	xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
>  		      ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
>  
> -	sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
> -					     XE_SA_BO_MANAGER_FLAG_SHADOW);
> -
> -	if (IS_ERR(sa_manager)) {
> -		xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
> -			     sa_manager);
> -		err = PTR_ERR(sa_manager);
> +	pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32),
> +				XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY);
> +	if (IS_ERR(pool)) {
> +		xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n",
> +			     pool);
> +		err = PTR_ERR(pool);
>  		return err;
>  	}
>  
> -	offset = 0;
> -	xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
> -		      bb_pool_size);
> -	xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
> -		      bb_pool_size);
> +	pool_cpu_addr = xe_mem_pool_cpu_addr(pool);
> +	memset(pool_cpu_addr, 0, bb_pool_size);
>  
> -	offset = bb_pool_size - sizeof(u32);
> -	xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
> -	xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
> +	last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1;
> +	*last_dw_addr = MI_BATCH_BUFFER_END;
>  
> -	ctx->mem.ccs_bb_pool = sa_manager;
> +	/**
> +	 * Sync the main copy and shadow copy so that the shadow copy is
> +	 * replica of main copy. We sync only BBs after init part. So, we
> +	 * need to make sure the main pool and shadow copy are in sync after
> +	 * this point. This is needed as GuC may read the BB commands from
> +	 * shadow copy.
> +	 */
> +	xe_mem_pool_sync(pool);
>  
> +	ctx->mem.ccs_bb_pool = pool;
>  	return 0;
>  }
>  
>  static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
>  {
> -	u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
> +	u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
>  	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
>  	u32 dw[10], i = 0;
>  
> @@ -388,7 +392,7 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>  #define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET	(2 * sizeof(u32))
>  void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
>  {
> -	u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
> +	u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
>  	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
>  	struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
>  
> @@ -412,8 +416,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
>  	struct xe_device *xe = xe_bo_device(bo);
>  	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
>  	struct xe_sriov_vf_ccs_ctx *ctx;
> +	struct xe_mem_pool_node *bb;
>  	struct xe_tile *tile;
> -	struct xe_bb *bb;
>  	int err = 0;
>  
>  	xe_assert(xe, IS_VF_CCS_READY(xe));
> @@ -445,7 +449,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
>  {
>  	struct xe_device *xe = xe_bo_device(bo);
>  	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
> -	struct xe_bb *bb;
> +	struct xe_mem_pool_node *bb;
>  
>  	xe_assert(xe, IS_VF_CCS_READY(xe));
>  
> @@ -471,8 +475,8 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
>   */
>  void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
>  {
> -	struct xe_sa_manager *bb_pool;
>  	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
> +	struct xe_mem_pool *bb_pool;
>  
>  	if (!IS_VF_CCS_READY(xe))
>  		return;
> @@ -485,7 +489,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
>  
>  		drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
>  		drm_printf(p, "-------------------------\n");
> -		drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
> +		xe_mem_pool_dump(bb_pool, p);
>  		drm_puts(p, "\n");
>  	}
>  }
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
> index 22c499943d2a..6fc8f97ef3f4 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
> @@ -17,9 +17,6 @@ enum xe_sriov_vf_ccs_rw_ctxs {
>  	XE_SRIOV_VF_CCS_CTX_COUNT
>  };
>  
> -struct xe_migrate;
> -struct xe_sa_manager;
> -
>  /**
>   * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data.
>   */
> @@ -33,7 +30,7 @@ struct xe_sriov_vf_ccs_ctx {
>  	/** @mem: memory data */
>  	struct {
>  		/** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */
> -		struct xe_sa_manager *ccs_bb_pool;
> +		struct xe_mem_pool *ccs_bb_pool;
>  	} mem;
>  };
>  
> -- 
> 2.43.0
>

next prev parent reply	other threads:[~2026-04-11  1:45 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-08 11:01 [PATCH v4 0/2] USE drm mm instead of drm SA for CCS read/write Satyanarayana K V P
2026-04-08 11:01 ` [PATCH v4 1/2] drm/xe: Add memory pool with shadow support Satyanarayana K V P
2026-04-11  1:43   ` Matthew Brost
2026-04-08 11:01 ` [PATCH v4 2/2] drm/xe/vf: Use drm mm instead of drm sa for CCS read/write Satyanarayana K V P
2026-04-11  1:45   ` Matthew Brost [this message]
2026-04-08 11:08 ` ✗ CI.checkpatch: warning for USE drm mm instead of drm SA for CCS read/write (rev4) Patchwork
2026-04-08 11:09 ` ✗ CI.KUnit: failure " Patchwork
2026-04-10  5:55 ` ✗ CI.checkpatch: warning for USE drm mm instead of drm SA for CCS read/write (rev5) Patchwork
2026-04-10  5:56 ` ✓ CI.KUnit: success " Patchwork
2026-04-10  6:32 ` ✓ Xe.CI.BAT: " Patchwork
2026-04-10 13:24 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-04-13  6:47   ` K V P, Satyanarayana

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=admnqyMMtvifsPiK@gsse-cloud1.jf.intel.com \
    --to=matthew.brost@intel.com \
    --cc=dev@lankhorst.se \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=michal.wajdeczko@intel.com \
    --cc=satyanarayana.k.v.p@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.