Re: [PATCH v4 2/2] drm/xe/vf: Use drm mm instead of drm sa for CCS read/write

public inbox for intel-xe@lists.freedesktop.org
 help / color / mirror / Atom feed

From: Matthew Brost <matthew.brost@intel.com>
To: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: intel-xe@lists.freedesktop.org,
	"Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	"Maarten Lankhorst" <dev@lankhorst.se>,
	"Michal Wajdeczko" <michal.wajdeczko@intel.com>
Subject: Re: [PATCH v4 2/2] drm/xe/vf: Use drm mm instead of drm sa for CCS read/write
Date: Fri, 10 Apr 2026 18:45:15 -0700	[thread overview]
Message-ID: <admnqyMMtvifsPiK@gsse-cloud1.jf.intel.com> (raw)
In-Reply-To: <20260408110145.1639937-6-satyanarayana.k.v.p@intel.com>

On Wed, Apr 08, 2026 at 11:01:48AM +0000, Satyanarayana K V P wrote:
> The suballocator algorithm tracks a hole cursor at the last allocation
> and tries to allocate after it. This is optimized for fence-ordered
> progress, where older allocations are expected to become reusable first.
> 
> In fence-enabled mode, that ordering assumption holds. In fence-disabled
> mode, allocations may be freed in arbitrary order, so limiting allocation
> to the current hole window can miss valid free space and fail allocations
> despite sufficient total space.
> 
> Use DRM memory manager instead of sub-allocator to get rid of this issue
> as CCS read/write operations do not use fences.
> 
> Fixes: 864690cf4dd6 ("drm/xe/vf: Attach and detach CCS copy commands with BO")
> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
> Cc: Matthew Brost <matthew.brost@intel.com>

Reviewed-by: Matthew Brost <matthew.brost@intel.com>

> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Maarten Lankhorst <dev@lankhorst.se>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> 
> ---
> V3 -> V4:
> - Updated changes as per xe_mem_pool.
> - Updated Fixes: field (Thomas)
> 
> V2 -> V3:
> - Used xe_mem_pool_init() and xe_mem_pool_shadow_init() to allocate BB
> pools.
> 
> V1 -> V2:
> - Renamed xe_drm_mm to xe_mm_suballoc (Thomas)
> ---
>  drivers/gpu/drm/xe/xe_bo_types.h           |  3 +-
>  drivers/gpu/drm/xe/xe_migrate.c            | 56 ++++++++++++----------
>  drivers/gpu/drm/xe/xe_sriov_vf_ccs.c       | 54 +++++++++++----------
>  drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h |  5 +-
>  4 files changed, 63 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
> index ff8317bfc1ae..9d19940b8fc0 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -18,6 +18,7 @@
>  #include "xe_ggtt_types.h"
>  
>  struct xe_device;
> +struct xe_mem_pool_node;
>  struct xe_vm;
>  
>  #define XE_BO_MAX_PLACEMENTS	3
> @@ -88,7 +89,7 @@ struct xe_bo {
>  	bool ccs_cleared;
>  
>  	/** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */
> -	struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
> +	struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
>  
>  	/**
>  	 * @cpu_caching: CPU caching mode. Currently only used for userspace
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index fc918b4fba54..5fdc89ed5256 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -29,6 +29,7 @@
>  #include "xe_hw_engine.h"
>  #include "xe_lrc.h"
>  #include "xe_map.h"
> +#include "xe_mem_pool.h"
>  #include "xe_mocs.h"
>  #include "xe_printk.h"
>  #include "xe_pt.h"
> @@ -1166,11 +1167,12 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  	u32 batch_size, batch_size_allocated;
>  	struct xe_device *xe = gt_to_xe(gt);
>  	struct xe_res_cursor src_it, ccs_it;
> +	struct xe_mem_pool *bb_pool;
>  	struct xe_sriov_vf_ccs_ctx *ctx;
> -	struct xe_sa_manager *bb_pool;
>  	u64 size = xe_bo_size(src_bo);
> -	struct xe_bb *bb = NULL;
> +	struct xe_mem_pool_node *bb;
>  	u64 src_L0, src_L0_ofs;
> +	struct xe_bb xe_bb_tmp;
>  	u32 src_L0_pt;
>  	int err;
>  
> @@ -1208,18 +1210,18 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  		size -= src_L0;
>  	}
>  
> -	bb = xe_bb_alloc(gt);
> +	bb = xe_mem_pool_alloc_node();
>  	if (IS_ERR(bb))
>  		return PTR_ERR(bb);
>  
>  	bb_pool = ctx->mem.ccs_bb_pool;
> -	scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) {
> -		xe_sa_bo_swap_shadow(bb_pool);
> +	scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
> +		xe_mem_pool_swap_shadow_locked(bb_pool);
>  
> -		err = xe_bb_init(bb, bb_pool, batch_size);
> +		err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32));
>  		if (err) {
>  			xe_gt_err(gt, "BB allocation failed.\n");
> -			xe_bb_free(bb, NULL);
> +			kfree(bb);
>  			return err;
>  		}
>  
> @@ -1227,6 +1229,7 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  		size = xe_bo_size(src_bo);
>  		batch_size = 0;
>  
> +		xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 };
>  		/*
>  		 * Emit PTE and copy commands here.
>  		 * The CCS copy command can only support limited size. If the size to be
> @@ -1255,24 +1258,27 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  			xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
>  			batch_size += EMIT_COPY_CCS_DW;
>  
> -			emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src);
> +			emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src);
>  
> -			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
> +			emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src);
>  
> -			bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
> -			flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt,
> +			xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
> +							      flush_flags);
> +			flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt,
>  							  src_L0_ofs, dst_is_pltt,
>  							  src_L0, ccs_ofs, true);
> -			bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
> +			xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
> +							      flush_flags);
>  
>  			size -= src_L0;
>  		}
>  
> -		xe_assert(xe, (batch_size_allocated == bb->len));
> +		xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len));
> +		xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32));
>  		src_bo->bb_ccs[read_write] = bb;
>  
>  		xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
> -		xe_sa_bo_sync_shadow(bb->bo);
> +		xe_mem_pool_sync_shadow_locked(bb);
>  	}
>  
>  	return 0;
> @@ -1297,10 +1303,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
>  				  enum xe_sriov_vf_ccs_rw_ctxs read_write)
>  {
> -	struct xe_bb *bb = src_bo->bb_ccs[read_write];
> +	struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write];
>  	struct xe_device *xe = xe_bo_device(src_bo);
> +	struct xe_mem_pool *bb_pool;
>  	struct xe_sriov_vf_ccs_ctx *ctx;
> -	struct xe_sa_manager *bb_pool;
>  	u32 *cs;
>  
>  	xe_assert(xe, IS_SRIOV_VF(xe));
> @@ -1308,17 +1314,17 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
>  	ctx = &xe->sriov.vf.ccs.contexts[read_write];
>  	bb_pool = ctx->mem.ccs_bb_pool;
>  
> -	guard(mutex) (xe_sa_bo_swap_guard(bb_pool));
> -	xe_sa_bo_swap_shadow(bb_pool);
> -
> -	cs = xe_sa_bo_cpu_addr(bb->bo);
> -	memset(cs, MI_NOOP, bb->len * sizeof(u32));
> -	xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
> +	scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
> +		xe_mem_pool_swap_shadow_locked(bb_pool);
>  
> -	xe_sa_bo_sync_shadow(bb->bo);
> +		cs = xe_mem_pool_node_cpu_addr(bb);
> +		memset(cs, MI_NOOP, bb->sa_node.size);
> +		xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
>  
> -	xe_bb_free(bb, NULL);
> -	src_bo->bb_ccs[read_write] = NULL;
> +		xe_mem_pool_sync_shadow_locked(bb);
> +		xe_mem_pool_free_node(bb);
> +		src_bo->bb_ccs[read_write] = NULL;
> +	}
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> index db023fb66a27..09b99fb2608b 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> @@ -14,9 +14,9 @@
>  #include "xe_guc.h"
>  #include "xe_guc_submit.h"
>  #include "xe_lrc.h"
> +#include "xe_mem_pool.h"
>  #include "xe_migrate.h"
>  #include "xe_pm.h"
> -#include "xe_sa.h"
>  #include "xe_sriov_printk.h"
>  #include "xe_sriov_vf.h"
>  #include "xe_sriov_vf_ccs.h"
> @@ -141,43 +141,47 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe)
>  
>  static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
>  {
> +	struct xe_mem_pool *pool;
>  	struct xe_device *xe = tile_to_xe(tile);
> -	struct xe_sa_manager *sa_manager;
> +	u32 *pool_cpu_addr, *last_dw_addr;
>  	u64 bb_pool_size;
> -	int offset, err;
> +	int err;
>  
>  	bb_pool_size = get_ccs_bb_pool_size(xe);
>  	xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
>  		      ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
>  
> -	sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
> -					     XE_SA_BO_MANAGER_FLAG_SHADOW);
> -
> -	if (IS_ERR(sa_manager)) {
> -		xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
> -			     sa_manager);
> -		err = PTR_ERR(sa_manager);
> +	pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32),
> +				XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY);
> +	if (IS_ERR(pool)) {
> +		xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n",
> +			     pool);
> +		err = PTR_ERR(pool);
>  		return err;
>  	}
>  
> -	offset = 0;
> -	xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
> -		      bb_pool_size);
> -	xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
> -		      bb_pool_size);
> +	pool_cpu_addr = xe_mem_pool_cpu_addr(pool);
> +	memset(pool_cpu_addr, 0, bb_pool_size);
>  
> -	offset = bb_pool_size - sizeof(u32);
> -	xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
> -	xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
> +	last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1;
> +	*last_dw_addr = MI_BATCH_BUFFER_END;
>  
> -	ctx->mem.ccs_bb_pool = sa_manager;
> +	/**
> +	 * Sync the main copy and shadow copy so that the shadow copy is
> +	 * replica of main copy. We sync only BBs after init part. So, we
> +	 * need to make sure the main pool and shadow copy are in sync after
> +	 * this point. This is needed as GuC may read the BB commands from
> +	 * shadow copy.
> +	 */
> +	xe_mem_pool_sync(pool);
>  
> +	ctx->mem.ccs_bb_pool = pool;
>  	return 0;
>  }
>  
>  static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
>  {
> -	u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
> +	u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
>  	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
>  	u32 dw[10], i = 0;
>  
> @@ -388,7 +392,7 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>  #define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET	(2 * sizeof(u32))
>  void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
>  {
> -	u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
> +	u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
>  	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
>  	struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
>  
> @@ -412,8 +416,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
>  	struct xe_device *xe = xe_bo_device(bo);
>  	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
>  	struct xe_sriov_vf_ccs_ctx *ctx;
> +	struct xe_mem_pool_node *bb;
>  	struct xe_tile *tile;
> -	struct xe_bb *bb;
>  	int err = 0;
>  
>  	xe_assert(xe, IS_VF_CCS_READY(xe));
> @@ -445,7 +449,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
>  {
>  	struct xe_device *xe = xe_bo_device(bo);
>  	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
> -	struct xe_bb *bb;
> +	struct xe_mem_pool_node *bb;
>  
>  	xe_assert(xe, IS_VF_CCS_READY(xe));
>  
> @@ -471,8 +475,8 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
>   */
>  void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
>  {
> -	struct xe_sa_manager *bb_pool;
>  	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
> +	struct xe_mem_pool *bb_pool;
>  
>  	if (!IS_VF_CCS_READY(xe))
>  		return;
> @@ -485,7 +489,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
>  
>  		drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
>  		drm_printf(p, "-------------------------\n");
> -		drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
> +		xe_mem_pool_dump(bb_pool, p);
>  		drm_puts(p, "\n");
>  	}
>  }
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
> index 22c499943d2a..6fc8f97ef3f4 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
> @@ -17,9 +17,6 @@ enum xe_sriov_vf_ccs_rw_ctxs {
>  	XE_SRIOV_VF_CCS_CTX_COUNT
>  };
>  
> -struct xe_migrate;
> -struct xe_sa_manager;
> -
>  /**
>   * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data.
>   */
> @@ -33,7 +30,7 @@ struct xe_sriov_vf_ccs_ctx {
>  	/** @mem: memory data */
>  	struct {
>  		/** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */
> -		struct xe_sa_manager *ccs_bb_pool;
> +		struct xe_mem_pool *ccs_bb_pool;
>  	} mem;
>  };
>  
> -- 
> 2.43.0
>

next prev parent reply	other threads:[~2026-04-11  1:45 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-08 11:01 [PATCH v4 0/2] USE drm mm instead of drm SA for CCS read/write Satyanarayana K V P
2026-04-08 11:01 ` [PATCH v4 1/2] drm/xe: Add memory pool with shadow support Satyanarayana K V P
2026-04-11  1:43   ` Matthew Brost
2026-04-08 11:01 ` [PATCH v4 2/2] drm/xe/vf: Use drm mm instead of drm sa for CCS read/write Satyanarayana K V P
2026-04-11  1:45   ` Matthew Brost [this message]
2026-04-08 11:08 ` ✗ CI.checkpatch: warning for USE drm mm instead of drm SA for CCS read/write (rev4) Patchwork
2026-04-08 11:09 ` ✗ CI.KUnit: failure " Patchwork
2026-04-10  5:55 ` ✗ CI.checkpatch: warning for USE drm mm instead of drm SA for CCS read/write (rev5) Patchwork
2026-04-10  5:56 ` ✓ CI.KUnit: success " Patchwork
2026-04-10  6:32 ` ✓ Xe.CI.BAT: " Patchwork
2026-04-10 13:24 ` ✗ Xe.CI.FULL: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=admnqyMMtvifsPiK@gsse-cloud1.jf.intel.com \
    --to=matthew.brost@intel.com \
    --cc=dev@lankhorst.se \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=michal.wajdeczko@intel.com \
    --cc=satyanarayana.k.v.p@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox